From 061c126d8c056048b6633261ff3a2b80576fcadc Mon Sep 17 00:00:00 2001 From: Gusev Anton <gusev_aa@groupbwt.com> Date: Thu, 13 Jun 2019 18:41:32 +0300 Subject: [PATCH] add url field to Book model --- src/database/models/Book.py | 3 ++- ...e_books_table.py => 3217c19ef3a6_create_books_table.py} | 7 ++++--- src/items/BookItem.py | 2 ++ src/spiders/books_spider.py | 5 +++-- 4 files changed, 11 insertions(+), 6 deletions(-) rename src/database/versions/{6c88430b162c_create_books_table.py => 3217c19ef3a6_create_books_table.py} (95%) diff --git a/src/database/models/Book.py b/src/database/models/Book.py index ec6d5e4..f49555a 100644 --- a/src/database/models/Book.py +++ b/src/database/models/Book.py @@ -8,6 +8,7 @@ Base = declarative_base() class Book(Base, Model): __tablename__ = 'books' id = Column(Integer, primary_key=True) + url = Column(String(255)) title = Column(String(255), nullable=False, index=True) description = Column(Text) @@ -15,7 +16,7 @@ class Book(Base, Model): rating = Column(SmallInteger, index=True) upc = Column(String(32), unique=True) - product_type = Column(String(32), index=True) # -> Books + product_type = Column(String(32), index=True) # -> Books price_excl_tax = Column(Numeric(6, 2), index=True) price_incl_tax = Column(Numeric(6, 2), index=True) tax = Column(Numeric(6, 2), index=True) diff --git a/src/database/versions/6c88430b162c_create_books_table.py b/src/database/versions/3217c19ef3a6_create_books_table.py similarity index 95% rename from src/database/versions/6c88430b162c_create_books_table.py rename to src/database/versions/3217c19ef3a6_create_books_table.py index bcacf12..c819ceb 100644 --- a/src/database/versions/6c88430b162c_create_books_table.py +++ b/src/database/versions/3217c19ef3a6_create_books_table.py @@ -1,8 +1,8 @@ """create books table -Revision ID: 6c88430b162c +Revision ID: 3217c19ef3a6 Revises: -Create Date: 2019-06-13 11:50:35.583817 +Create Date: 2019-06-13 18:27:06.732796 """ from alembic import op @@ -10,7 +10,7 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -revision = '6c88430b162c' +revision = '3217c19ef3a6' down_revision = None branch_labels = None depends_on = None @@ -20,6 +20,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table('books', sa.Column('id', sa.Integer(), nullable=False), + sa.Column('url', sa.String(length=255), nullable=True), sa.Column('title', sa.String(length=255), nullable=False), sa.Column('description', sa.Text(), nullable=True), sa.Column('image_path', sa.String(length=255), nullable=True), diff --git a/src/items/BookItem.py b/src/items/BookItem.py index c8bacda..2387741 100644 --- a/src/items/BookItem.py +++ b/src/items/BookItem.py @@ -9,6 +9,8 @@ import scrapy class BookItem(scrapy.Item): + url = scrapy.Field() + title = scrapy.Field() description = scrapy.Field() image_urls = scrapy.Field() diff --git a/src/spiders/books_spider.py b/src/spiders/books_spider.py index 11dc3b5..e14ce74 100644 --- a/src/spiders/books_spider.py +++ b/src/spiders/books_spider.py @@ -13,7 +13,7 @@ class BooksSpider(Spider, BookConnector): self.logger.debug('Current page: {}'.format(self.page)) # follow links to book pages - for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate + for idx, href in enumerate(response.css('div.image_container a::attr(href)')): yield response.follow(href, self.parse_book, meta={'idx': idx}) # pagination @@ -41,7 +41,8 @@ class BooksSpider(Spider, BookConnector): 'in_stock': int(re.search(r'\d+', table_data[5]).group()), 'count_reviews': int(table_data[6]), 'currency_type': table_data[2][0], - 'category': response.css('ul.breadcrumb li:nth-child(3) a::text').get() + 'category': response.css('ul.breadcrumb li:nth-child(3) a::text').get(), + 'url': response.url, }) yield book -- GitLab