diff --git a/src/database/models/Book.py b/src/database/models/Book.py index ec6d5e48d47748b975933b8ae02eb838e3c04954..f49555ac4922260b906d065942523ac3c7dc953e 100644 --- a/src/database/models/Book.py +++ b/src/database/models/Book.py @@ -8,6 +8,7 @@ Base = declarative_base() class Book(Base, Model): __tablename__ = 'books' id = Column(Integer, primary_key=True) + url = Column(String(255)) title = Column(String(255), nullable=False, index=True) description = Column(Text) @@ -15,7 +16,7 @@ class Book(Base, Model): rating = Column(SmallInteger, index=True) upc = Column(String(32), unique=True) - product_type = Column(String(32), index=True) # -> Books + product_type = Column(String(32), index=True) # -> Books price_excl_tax = Column(Numeric(6, 2), index=True) price_incl_tax = Column(Numeric(6, 2), index=True) tax = Column(Numeric(6, 2), index=True) diff --git a/src/database/versions/6c88430b162c_create_books_table.py b/src/database/versions/3217c19ef3a6_create_books_table.py similarity index 95% rename from src/database/versions/6c88430b162c_create_books_table.py rename to src/database/versions/3217c19ef3a6_create_books_table.py index bcacf122929f401707323ab840f61da51465992e..c819ceb744c027c8b6b941a4c3b82c68f116b862 100644 --- a/src/database/versions/6c88430b162c_create_books_table.py +++ b/src/database/versions/3217c19ef3a6_create_books_table.py @@ -1,8 +1,8 @@ """create books table -Revision ID: 6c88430b162c +Revision ID: 3217c19ef3a6 Revises: -Create Date: 2019-06-13 11:50:35.583817 +Create Date: 2019-06-13 18:27:06.732796 """ from alembic import op @@ -10,7 +10,7 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -revision = '6c88430b162c' +revision = '3217c19ef3a6' down_revision = None branch_labels = None depends_on = None @@ -20,6 +20,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table('books', sa.Column('id', sa.Integer(), nullable=False), + sa.Column('url', sa.String(length=255), nullable=True), sa.Column('title', sa.String(length=255), nullable=False), sa.Column('description', sa.Text(), nullable=True), sa.Column('image_path', sa.String(length=255), nullable=True), diff --git a/src/items/BookItem.py b/src/items/BookItem.py index c8bacda94a8e8deb4fc75dfc99779bd7c797e24c..2387741f6dc7aef0ecbca30d28d318160ab2ecb3 100644 --- a/src/items/BookItem.py +++ b/src/items/BookItem.py @@ -9,6 +9,8 @@ import scrapy class BookItem(scrapy.Item): + url = scrapy.Field() + title = scrapy.Field() description = scrapy.Field() image_urls = scrapy.Field() diff --git a/src/spiders/books_spider.py b/src/spiders/books_spider.py index 11dc3b5ce1cf351dc4669566c8338fcf64f080c1..e14ce74d2cd4020291a64f31c70a444e0caf5547 100644 --- a/src/spiders/books_spider.py +++ b/src/spiders/books_spider.py @@ -13,7 +13,7 @@ class BooksSpider(Spider, BookConnector): self.logger.debug('Current page: {}'.format(self.page)) # follow links to book pages - for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate + for idx, href in enumerate(response.css('div.image_container a::attr(href)')): yield response.follow(href, self.parse_book, meta={'idx': idx}) # pagination @@ -41,7 +41,8 @@ class BooksSpider(Spider, BookConnector): 'in_stock': int(re.search(r'\d+', table_data[5]).group()), 'count_reviews': int(table_data[6]), 'currency_type': table_data[2][0], - 'category': response.css('ul.breadcrumb li:nth-child(3) a::text').get() + 'category': response.css('ul.breadcrumb li:nth-child(3) a::text').get(), + 'url': response.url, }) yield book