Skip to content
Snippets Groups Projects
Commit 6c10a062 authored by Anton Gusev's avatar Anton Gusev
Browse files

add logging.debug for BooksSpider

parent 6b39c55f
No related branches found
No related tags found
5 merge requests!5Develop,!4Develop,!3Develop,!2Develop,!1Develop
import scrapy from scrapy import Spider
import re import re
from items.BookItem import BookItem from items.BookItem import BookItem
from database.connectors.BookConnector import BookConnector from database.connectors.BookConnector import BookConnector
import logging
class BooksSpider(BookConnector, scrapy.Spider): class BooksSpider(Spider, BookConnector):
name = 'books' name = 'books'
start_urls = ['http://books.toscrape.com/'] start_urls = ['http://books.toscrape.com/']
page = 1
def parse(self, response): def parse(self, response):
self.logger.debug('Current page: {}'.format(self.page))
# follow links to book pages # follow links to book pages
for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate
yield response.follow(href, self.parse_book) yield response.follow(href, self.parse_book, meta={'idx': idx})
# pagination # pagination
next_page = response.css('li.next a::attr(href)').get() next_page = response.css('li.next a::attr(href)').get()
if next_page is not None: if next_page is not None:
self.page += 1
yield response.follow(next_page, callback=self.parse) yield response.follow(next_page, callback=self.parse)
def parse_book(self, response): def parse_book(self, response):
self.logger.debug('Index book in page: {}'.format(response.meta.get('idx')))
book = BookItem() book = BookItem()
table_data = response.css('table td::text').getall() table_data = response.css('table td::text').getall()
...@@ -50,4 +55,4 @@ class BooksSpider(BookConnector, scrapy.Spider): ...@@ -50,4 +55,4 @@ class BooksSpider(BookConnector, scrapy.Spider):
if 'Four' in class_all: if 'Four' in class_all:
return 4 return 4
if 'Five' in class_all: if 'Five' in class_all:
return 5 return 5
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment