From a92af197308a7a4264218b6c823b09dcef81a22a Mon Sep 17 00:00:00 2001 From: Gusev Anton <gusev_aa@groupbwt.com> Date: Wed, 12 Jun 2019 19:13:11 +0300 Subject: [PATCH] change book model --- src/database/README | 1 + src/database/env.py | 83 +++++++++++++++++++ src/database/models/Book.py | 27 ++++++ src/database/models/Model.py | 7 ++ src/database/models/__init__.py | 0 src/database/script.py.mako | 25 ++++++ .../versions/f36bd8e8d8c1_add_books_table.py | 46 ++++++++++ src/items/BookItem.py | 9 +- src/spiders/books_spider.py | 23 ++--- 9 files changed, 205 insertions(+), 16 deletions(-) create mode 100644 src/database/README create mode 100644 src/database/env.py create mode 100644 src/database/models/Book.py create mode 100644 src/database/models/Model.py create mode 100644 src/database/models/__init__.py create mode 100644 src/database/script.py.mako create mode 100644 src/database/versions/f36bd8e8d8c1_add_books_table.py diff --git a/src/database/README b/src/database/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/src/database/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/src/database/env.py b/src/database/env.py new file mode 100644 index 0000000..62621fb --- /dev/null +++ b/src/database/env.py @@ -0,0 +1,83 @@ + +from logging.config import fileConfig + +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context +import os +import sys + + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +# target_metadata = None + +# added +sys.path.insert(0, '\\'.join(os.path.dirname(os.path.abspath(__file__)).split('\\')[:-1])) +from database.models.Book import Book +target_metadata = [Book.metadata] + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/src/database/models/Book.py b/src/database/models/Book.py new file mode 100644 index 0000000..8110e56 --- /dev/null +++ b/src/database/models/Book.py @@ -0,0 +1,27 @@ +from sqlalchemy import Column, Integer, String, Text, SmallInteger, Numeric +from sqlalchemy.ext.declarative import declarative_base +from .Model import Model + +Base = declarative_base() + + +class Book(Base, Model): + __tablename__ = 'books' + id = Column(Integer, primary_key=True) + + title = Column(String(255), unique=True, nullable=False, index=True) + description = Column(Text) + image_path = Column(String(255)) + rating = Column(SmallInteger) + + upc = Column(String(32)) + product_type = Column(String(32)) # -> Books + price_excl_tax = Column(Numeric(6, 2)) + price_incl_tax = Column(Numeric(6, 2)) + tax = Column(Numeric(6, 2)) + in_stock = Column(Integer) + count_reviews = Column(Integer) + + category = Column(String(32)) + + currency_type = Column(String(4)) diff --git a/src/database/models/Model.py b/src/database/models/Model.py new file mode 100644 index 0000000..5cdcba2 --- /dev/null +++ b/src/database/models/Model.py @@ -0,0 +1,7 @@ + + +class Model: + def update(self, dct): + for key, value in dct.items(): + if hasattr(self, key): + setattr(self, key, value) \ No newline at end of file diff --git a/src/database/models/__init__.py b/src/database/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/database/script.py.mako b/src/database/script.py.mako new file mode 100644 index 0000000..916ce7f --- /dev/null +++ b/src/database/script.py.mako @@ -0,0 +1,25 @@ +# -*- coding: UTF-8 -*- +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/src/database/versions/f36bd8e8d8c1_add_books_table.py b/src/database/versions/f36bd8e8d8c1_add_books_table.py new file mode 100644 index 0000000..42df59c --- /dev/null +++ b/src/database/versions/f36bd8e8d8c1_add_books_table.py @@ -0,0 +1,46 @@ +"""add books table + +Revision ID: f36bd8e8d8c1 +Revises: +Create Date: 2019-06-12 18:58:58.292063 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'f36bd8e8d8c1' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('books', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('title', sa.String(length=255), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('image_path', sa.String(length=255), nullable=True), + sa.Column('rating', sa.SmallInteger(), nullable=True), + sa.Column('upc', sa.String(length=32), nullable=True), + sa.Column('product_type', sa.String(length=32), nullable=True), + sa.Column('price_excl_tax', sa.Numeric(precision=6, scale=2), nullable=True), + sa.Column('price_incl_tax', sa.Numeric(precision=6, scale=2), nullable=True), + sa.Column('tax', sa.Numeric(precision=6, scale=2), nullable=True), + sa.Column('in_stock', sa.Integer(), nullable=True), + sa.Column('count_reviews', sa.Integer(), nullable=True), + sa.Column('category', sa.String(length=32), nullable=True), + sa.Column('currency_type', sa.String(length=4), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_books_title'), 'books', ['title'], unique=True) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_books_title'), table_name='books') + op.drop_table('books') + # ### end Alembic commands ### diff --git a/src/items/BookItem.py b/src/items/BookItem.py index 205efbc..2387741 100644 --- a/src/items/BookItem.py +++ b/src/items/BookItem.py @@ -13,7 +13,8 @@ class BookItem(scrapy.Item): title = scrapy.Field() description = scrapy.Field() - image = scrapy.Field() + image_urls = scrapy.Field() + images = scrapy.Field() rating = scrapy.Field() upc = scrapy.Field() @@ -22,4 +23,8 @@ class BookItem(scrapy.Item): price_incl_tax = scrapy.Field() tax = scrapy.Field() in_stock = scrapy.Field() - count_reviews = scrapy.Field() \ No newline at end of file + count_reviews = scrapy.Field() + + category = scrapy.Field() + + currency_type = scrapy.Field() diff --git a/src/spiders/books_spider.py b/src/spiders/books_spider.py index 6c10a13..d8c4c13 100644 --- a/src/spiders/books_spider.py +++ b/src/spiders/books_spider.py @@ -5,20 +5,14 @@ from database.connectors.BookConnector import BookConnector import logging -class BooksSpider(scrapy.Spider): class BooksSpider(BookConnector, scrapy.Spider): name = 'books' - start_urls = [ - 'http://books.toscrape.com/catalogue/page-1.html' - ] - ITEM_PIPELINES = { - 'tutorial.pipelines.PricePipeline': 1, - } + start_urls = ['http://books.toscrape.com/'] def parse(self, response): # follow links to book pages - for href in response.css('div.image_container a::attr(href)'): # TODO - yield response.follow(href, self.parse_book) + for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate + yield response.follow(href, self.parse_book) # pagination next_page = response.css('li.next a::attr(href)').get() @@ -30,18 +24,19 @@ class BooksSpider(BookConnector, scrapy.Spider): table_data = response.css('table td::text').getall() book.update({ - 'url': response.url, 'title': response.css('div.product_main h1::text').get(), 'description': response.xpath("//*[@id='product_description']/following::p/text()").get(), - 'image': response.urljoin(response.css('div.active img::attr(src)').get()), + 'image_urls': [response.urljoin(response.css('div.active img::attr(src)').get())], 'rating': self._get_rating(response.css('p.star-rating::attr(class)').get().split(' ')), 'upc': table_data[0], 'product_type': table_data[1], - 'price_excl_tax': table_data[2], - 'price_incl_tax': table_data[3], - 'tax': table_data[4], + 'price_excl_tax': table_data[2][1:], + 'price_incl_tax': table_data[3][1:], + 'tax': table_data[4][1:], 'in_stock': int(re.search(r'\d+', table_data[5]).group()), 'count_reviews': int(table_data[6]), + 'currency_type': table_data[2][0], + 'category': response.css('ul.breadcrumb li:nth-child(3) a::text').get() }) yield book -- GitLab