Skip to content
Snippets Groups Projects
Commit a92af197 authored by Anton Gusev's avatar Anton Gusev
Browse files

change book model

parent c70deaf5
No related branches found
No related tags found
5 merge requests!5Develop,!4Develop,!3Develop,!2Develop,!1Develop
Generic single-database configuration.
\ No newline at end of file
from logging.config import fileConfig
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
import os
import sys
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
# target_metadata = None
# added
sys.path.insert(0, '\\'.join(os.path.dirname(os.path.abspath(__file__)).split('\\')[:-1]))
from database.models.Book import Book
target_metadata = [Book.metadata]
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url, target_metadata=target_metadata, literal_binds=True
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
from sqlalchemy import Column, Integer, String, Text, SmallInteger, Numeric
from sqlalchemy.ext.declarative import declarative_base
from .Model import Model
Base = declarative_base()
class Book(Base, Model):
__tablename__ = 'books'
id = Column(Integer, primary_key=True)
title = Column(String(255), unique=True, nullable=False, index=True)
description = Column(Text)
image_path = Column(String(255))
rating = Column(SmallInteger)
upc = Column(String(32))
product_type = Column(String(32)) # -> Books
price_excl_tax = Column(Numeric(6, 2))
price_incl_tax = Column(Numeric(6, 2))
tax = Column(Numeric(6, 2))
in_stock = Column(Integer)
count_reviews = Column(Integer)
category = Column(String(32))
currency_type = Column(String(4))
class Model:
def update(self, dct):
for key, value in dct.items():
if hasattr(self, key):
setattr(self, key, value)
\ No newline at end of file
# -*- coding: UTF-8 -*-
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade():
${upgrades if upgrades else "pass"}
def downgrade():
${downgrades if downgrades else "pass"}
"""add books table
Revision ID: f36bd8e8d8c1
Revises:
Create Date: 2019-06-12 18:58:58.292063
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'f36bd8e8d8c1'
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('books',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('title', sa.String(length=255), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('image_path', sa.String(length=255), nullable=True),
sa.Column('rating', sa.SmallInteger(), nullable=True),
sa.Column('upc', sa.String(length=32), nullable=True),
sa.Column('product_type', sa.String(length=32), nullable=True),
sa.Column('price_excl_tax', sa.Numeric(precision=6, scale=2), nullable=True),
sa.Column('price_incl_tax', sa.Numeric(precision=6, scale=2), nullable=True),
sa.Column('tax', sa.Numeric(precision=6, scale=2), nullable=True),
sa.Column('in_stock', sa.Integer(), nullable=True),
sa.Column('count_reviews', sa.Integer(), nullable=True),
sa.Column('category', sa.String(length=32), nullable=True),
sa.Column('currency_type', sa.String(length=4), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_books_title'), 'books', ['title'], unique=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_books_title'), table_name='books')
op.drop_table('books')
# ### end Alembic commands ###
......@@ -13,7 +13,8 @@ class BookItem(scrapy.Item):
title = scrapy.Field()
description = scrapy.Field()
image = scrapy.Field()
image_urls = scrapy.Field()
images = scrapy.Field()
rating = scrapy.Field()
upc = scrapy.Field()
......@@ -22,4 +23,8 @@ class BookItem(scrapy.Item):
price_incl_tax = scrapy.Field()
tax = scrapy.Field()
in_stock = scrapy.Field()
count_reviews = scrapy.Field()
\ No newline at end of file
count_reviews = scrapy.Field()
category = scrapy.Field()
currency_type = scrapy.Field()
......@@ -5,20 +5,14 @@ from database.connectors.BookConnector import BookConnector
import logging
class BooksSpider(scrapy.Spider):
class BooksSpider(BookConnector, scrapy.Spider):
name = 'books'
start_urls = [
'http://books.toscrape.com/catalogue/page-1.html'
]
ITEM_PIPELINES = {
'tutorial.pipelines.PricePipeline': 1,
}
start_urls = ['http://books.toscrape.com/']
def parse(self, response):
# follow links to book pages
for href in response.css('div.image_container a::attr(href)'): # TODO
yield response.follow(href, self.parse_book)
for idx, href in enumerate(response.css('div.image_container a::attr(href)')): # TODO delete enumerate
yield response.follow(href, self.parse_book)
# pagination
next_page = response.css('li.next a::attr(href)').get()
......@@ -30,18 +24,19 @@ class BooksSpider(BookConnector, scrapy.Spider):
table_data = response.css('table td::text').getall()
book.update({
'url': response.url,
'title': response.css('div.product_main h1::text').get(),
'description': response.xpath("//*[@id='product_description']/following::p/text()").get(),
'image': response.urljoin(response.css('div.active img::attr(src)').get()),
'image_urls': [response.urljoin(response.css('div.active img::attr(src)').get())],
'rating': self._get_rating(response.css('p.star-rating::attr(class)').get().split(' ')),
'upc': table_data[0],
'product_type': table_data[1],
'price_excl_tax': table_data[2],
'price_incl_tax': table_data[3],
'tax': table_data[4],
'price_excl_tax': table_data[2][1:],
'price_incl_tax': table_data[3][1:],
'tax': table_data[4][1:],
'in_stock': int(re.search(r'\d+', table_data[5]).group()),
'count_reviews': int(table_data[6]),
'currency_type': table_data[2][0],
'category': response.css('ul.breadcrumb li:nth-child(3) a::text').get()
})
yield book
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment