Newer
Older
from database.connectors.BookConnector import BookConnector
self.logger.debug('Current page: {}'.format(self.page))
for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate
yield response.follow(href, self.parse_book, meta={'idx': idx})
# pagination
next_page = response.css('li.next a::attr(href)').get()
if next_page is not None:
yield response.follow(next_page, callback=self.parse)
def parse_book(self, response):
self.logger.debug('Index book in page: {}'.format(response.meta.get('idx')))
book = BookItem()
table_data = response.css('table td::text').getall()
book.update({
'title': response.css('div.product_main h1::text').get(),
'description': response.xpath("//*[@id='product_description']/following::p/text()").get(),
'image_urls': [response.urljoin(response.css('div.active img::attr(src)').get())],
'rating': self._get_rating(response.css('p.star-rating::attr(class)').get().split(' ')),
'upc': table_data[0],
'product_type': table_data[1],
'price_excl_tax': table_data[2][1:],
'price_incl_tax': table_data[3][1:],
'tax': table_data[4][1:],
'in_stock': int(re.search(r'\d+', table_data[5]).group()),
'count_reviews': int(table_data[6]),
'currency_type': table_data[2][0],
'category': response.css('ul.breadcrumb li:nth-child(3) a::text').get()
})
yield book
def _get_rating(self, class_all):
if 'One' in class_all:
return 1
if 'Two' in class_all:
return 2
if 'Three' in class_all:
return 3
if 'Four' in class_all:
return 4
if 'Five' in class_all: