Skip to content
Snippets Groups Projects
Commit 6c10a062 authored by Anton Gusev's avatar Anton Gusev
Browse files

add logging.debug for BooksSpider

parent 6b39c55f
No related branches found
No related tags found
5 merge requests!5Develop,!4Develop,!3Develop,!2Develop,!1Develop
import scrapy
from scrapy import Spider
import re
from items.BookItem import BookItem
from database.connectors.BookConnector import BookConnector
import logging
class BooksSpider(BookConnector, scrapy.Spider):
class BooksSpider(Spider, BookConnector):
name = 'books'
start_urls = ['http://books.toscrape.com/']
page = 1
def parse(self, response):
self.logger.debug('Current page: {}'.format(self.page))
# follow links to book pages
for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate
yield response.follow(href, self.parse_book)
for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate
yield response.follow(href, self.parse_book, meta={'idx': idx})
# pagination
next_page = response.css('li.next a::attr(href)').get()
if next_page is not None:
self.page += 1
yield response.follow(next_page, callback=self.parse)
def parse_book(self, response):
self.logger.debug('Index book in page: {}'.format(response.meta.get('idx')))
book = BookItem()
table_data = response.css('table td::text').getall()
......@@ -50,4 +55,4 @@ class BooksSpider(BookConnector, scrapy.Spider):
if 'Four' in class_all:
return 4
if 'Five' in class_all:
return 5
return 5
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment