From a92af197308a7a4264218b6c823b09dcef81a22a Mon Sep 17 00:00:00 2001
From: Gusev Anton <gusev_aa@groupbwt.com>
Date: Wed, 12 Jun 2019 19:13:11 +0300
Subject: [PATCH] change book model

---
 src/database/README                           |  1 +
 src/database/env.py                           | 83 +++++++++++++++++++
 src/database/models/Book.py                   | 27 ++++++
 src/database/models/Model.py                  |  7 ++
 src/database/models/__init__.py               |  0
 src/database/script.py.mako                   | 25 ++++++
 .../versions/f36bd8e8d8c1_add_books_table.py  | 46 ++++++++++
 src/items/BookItem.py                         |  9 +-
 src/spiders/books_spider.py                   | 23 ++---
 9 files changed, 205 insertions(+), 16 deletions(-)
 create mode 100644 src/database/README
 create mode 100644 src/database/env.py
 create mode 100644 src/database/models/Book.py
 create mode 100644 src/database/models/Model.py
 create mode 100644 src/database/models/__init__.py
 create mode 100644 src/database/script.py.mako
 create mode 100644 src/database/versions/f36bd8e8d8c1_add_books_table.py

diff --git a/src/database/README b/src/database/README
new file mode 100644
index 0000000..98e4f9c
--- /dev/null
+++ b/src/database/README
@@ -0,0 +1 @@
+Generic single-database configuration.
\ No newline at end of file
diff --git a/src/database/env.py b/src/database/env.py
new file mode 100644
index 0000000..62621fb
--- /dev/null
+++ b/src/database/env.py
@@ -0,0 +1,83 @@
+
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+import os
+import sys
+
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+# target_metadata = None
+
+# added
+sys.path.insert(0, '\\'.join(os.path.dirname(os.path.abspath(__file__)).split('\\')[:-1]))
+from database.models.Book import Book
+target_metadata = [Book.metadata]
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline():
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url, target_metadata=target_metadata, literal_binds=True
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online():
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/src/database/models/Book.py b/src/database/models/Book.py
new file mode 100644
index 0000000..8110e56
--- /dev/null
+++ b/src/database/models/Book.py
@@ -0,0 +1,27 @@
+from sqlalchemy import Column, Integer, String, Text, SmallInteger, Numeric
+from sqlalchemy.ext.declarative import declarative_base
+from .Model import Model
+
+Base = declarative_base()
+
+
+class Book(Base, Model):
+    __tablename__ = 'books'
+    id = Column(Integer, primary_key=True)
+
+    title = Column(String(255), unique=True, nullable=False, index=True)
+    description = Column(Text)
+    image_path = Column(String(255))
+    rating = Column(SmallInteger)
+
+    upc = Column(String(32))
+    product_type = Column(String(32)) # -> Books
+    price_excl_tax = Column(Numeric(6, 2))
+    price_incl_tax = Column(Numeric(6, 2))
+    tax = Column(Numeric(6, 2))
+    in_stock = Column(Integer)
+    count_reviews = Column(Integer)
+
+    category = Column(String(32))
+
+    currency_type = Column(String(4))
diff --git a/src/database/models/Model.py b/src/database/models/Model.py
new file mode 100644
index 0000000..5cdcba2
--- /dev/null
+++ b/src/database/models/Model.py
@@ -0,0 +1,7 @@
+
+
+class Model:
+    def update(self, dct):
+        for key, value in dct.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
\ No newline at end of file
diff --git a/src/database/models/__init__.py b/src/database/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/database/script.py.mako b/src/database/script.py.mako
new file mode 100644
index 0000000..916ce7f
--- /dev/null
+++ b/src/database/script.py.mako
@@ -0,0 +1,25 @@
+# -*- coding: UTF-8 -*-
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade():
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade():
+    ${downgrades if downgrades else "pass"}
diff --git a/src/database/versions/f36bd8e8d8c1_add_books_table.py b/src/database/versions/f36bd8e8d8c1_add_books_table.py
new file mode 100644
index 0000000..42df59c
--- /dev/null
+++ b/src/database/versions/f36bd8e8d8c1_add_books_table.py
@@ -0,0 +1,46 @@
+"""add books table
+
+Revision ID: f36bd8e8d8c1
+Revises: 
+Create Date: 2019-06-12 18:58:58.292063
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'f36bd8e8d8c1'
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('books',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('title', sa.String(length=255), nullable=False),
+    sa.Column('description', sa.Text(), nullable=True),
+    sa.Column('image_path', sa.String(length=255), nullable=True),
+    sa.Column('rating', sa.SmallInteger(), nullable=True),
+    sa.Column('upc', sa.String(length=32), nullable=True),
+    sa.Column('product_type', sa.String(length=32), nullable=True),
+    sa.Column('price_excl_tax', sa.Numeric(precision=6, scale=2), nullable=True),
+    sa.Column('price_incl_tax', sa.Numeric(precision=6, scale=2), nullable=True),
+    sa.Column('tax', sa.Numeric(precision=6, scale=2), nullable=True),
+    sa.Column('in_stock', sa.Integer(), nullable=True),
+    sa.Column('count_reviews', sa.Integer(), nullable=True),
+    sa.Column('category', sa.String(length=32), nullable=True),
+    sa.Column('currency_type', sa.String(length=4), nullable=True),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(op.f('ix_books_title'), 'books', ['title'], unique=True)
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_books_title'), table_name='books')
+    op.drop_table('books')
+    # ### end Alembic commands ###
diff --git a/src/items/BookItem.py b/src/items/BookItem.py
index 205efbc..2387741 100644
--- a/src/items/BookItem.py
+++ b/src/items/BookItem.py
@@ -13,7 +13,8 @@ class BookItem(scrapy.Item):
 
     title = scrapy.Field()
     description = scrapy.Field()
-    image = scrapy.Field()
+    image_urls = scrapy.Field()
+    images = scrapy.Field()
     rating = scrapy.Field()
 
     upc = scrapy.Field()
@@ -22,4 +23,8 @@ class BookItem(scrapy.Item):
     price_incl_tax = scrapy.Field()
     tax = scrapy.Field()
     in_stock = scrapy.Field()
-    count_reviews = scrapy.Field()
\ No newline at end of file
+    count_reviews = scrapy.Field()
+
+    category = scrapy.Field()
+
+    currency_type = scrapy.Field()
diff --git a/src/spiders/books_spider.py b/src/spiders/books_spider.py
index 6c10a13..d8c4c13 100644
--- a/src/spiders/books_spider.py
+++ b/src/spiders/books_spider.py
@@ -5,20 +5,14 @@ from database.connectors.BookConnector import BookConnector
 import logging
 
 
-class BooksSpider(scrapy.Spider):
 class BooksSpider(BookConnector, scrapy.Spider):
     name = 'books'
-    start_urls = [
-        'http://books.toscrape.com/catalogue/page-1.html'
-    ]
-    ITEM_PIPELINES = {
-        'tutorial.pipelines.PricePipeline': 1,
-    }
+    start_urls = ['http://books.toscrape.com/']
 
     def parse(self, response):
         # follow links to book pages
-        for href in response.css('div.image_container a::attr(href)'):  # TODO
-            yield response.follow(href, self.parse_book)
+        for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate
+                yield response.follow(href, self.parse_book)
 
         # pagination
         next_page = response.css('li.next a::attr(href)').get()
@@ -30,18 +24,19 @@ class BooksSpider(BookConnector, scrapy.Spider):
 
         table_data = response.css('table td::text').getall()
         book.update({
-            'url': response.url,
             'title': response.css('div.product_main h1::text').get(),
             'description': response.xpath("//*[@id='product_description']/following::p/text()").get(),
-            'image': response.urljoin(response.css('div.active img::attr(src)').get()),
+            'image_urls': [response.urljoin(response.css('div.active img::attr(src)').get())],
             'rating': self._get_rating(response.css('p.star-rating::attr(class)').get().split(' ')),
             'upc': table_data[0],
             'product_type': table_data[1],
-            'price_excl_tax': table_data[2],
-            'price_incl_tax': table_data[3],
-            'tax': table_data[4],
+            'price_excl_tax': table_data[2][1:],
+            'price_incl_tax': table_data[3][1:],
+            'tax': table_data[4][1:],
             'in_stock': int(re.search(r'\d+', table_data[5]).group()),
             'count_reviews': int(table_data[6]),
+            'currency_type': table_data[2][0],
+            'category': response.css('ul.breadcrumb li:nth-child(3) a::text').get()
         })
         yield book
 
-- 
GitLab