Commit 7d375471 authored by Kyryll Parolis's avatar Kyryll Parolis
Browse files

Finished extension.

parent fa0a3c49
import logging
from scrapy.crawler import CrawlerProcess
from scrapy.exceptions import CloseSpider
from scrapy.utils.project import get_project_settings
from twisted.internet import task
from twisted.internet import reactor
from extensions.extensions import SpiderOpenCloseLogging
logger = logging.getLogger(__name__)
process = CrawlerProcess(get_project_settings())
class Counter:
def __init__(self):
self.last_items = 0
self.times_called = 0
counter = Counter()
def count_items(func):
counter.last_items += 1
def wrapper():
func()
return wrapper
def check_scraped():
if counter.times_called > 0:
logger.info("Checking if any new items were scraped.")
counter.times_called += 1
if counter.last_items == 0:
logger.critical("No new items were scraped.")
raise CloseSpider("No new items were scraped, during the last 5 minutes.")
else:
counter.last_items = 0
logger.info("New items, were scraped. Next check in 5 minutes.")
else:
counter.times_called += 1
def cbLoopDone(result):
"""
Called when loop was stopped with success.
......@@ -22,7 +58,7 @@ def ebLoopFailed(failure):
reactor.stop()
loop = task.LoopingCall(SpiderOpenCloseLogging.check_scraped)
loop = task.LoopingCall(check_scraped)
loopDeferred = loop.start(300.0)
loopDeferred.addCallback(cbLoopDone)
......
......@@ -3,6 +3,8 @@ from scrapy import signals
import datetime
from scrapy.exceptions import NotConfigured, CloseSpider
from crawl import count_items
logger = logging.getLogger(__name__)
......@@ -11,8 +13,6 @@ class SpiderOpenCloseLogging:
def __init__(self, item_count):
self.item_count = item_count
self.items_scraped = 0
self.last_items = 0
self.time = datetime.datetime.now()
@classmethod
def from_crawler(cls, crawler):
......@@ -41,15 +41,6 @@ class SpiderOpenCloseLogging:
def spider_closed(self, spider):
logger.info("closed spider %s", spider.name)
@count_items
def item_scraped(self, item, spider):
self.items_scraped += 1
self.last_items += 1
def check_scraped(self):
logger.info("Checking if any new items were scraped.")
if self.last_items == 0:
logger.critical("No new items were scraped.")
raise CloseSpider("No new items were scraped, during the last 5 minutes.")
else:
logger.info("New items, were scraped. Next check in 5 minutes.")
self.last_items = 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment