Commit dd898dfa authored by Kyryll Parolis's avatar Kyryll Parolis
Browse files

Rewritten extension.

parent 7d375471
import logging
from scrapy.crawler import CrawlerProcess
from scrapy.exceptions import CloseSpider
from scrapy.utils.project import get_project_settings
from twisted.internet import task
from twisted.internet import reactor
logger = logging.getLogger(__name__)
process = CrawlerProcess(get_project_settings())
class Counter:
def __init__(self):
self.last_items = 0
self.times_called = 0
counter = Counter()
def count_items(func):
counter.last_items += 1
def wrapper():
func()
return wrapper
def check_scraped():
if counter.times_called > 0:
logger.info("Checking if any new items were scraped.")
counter.times_called += 1
if counter.last_items == 0:
logger.critical("No new items were scraped.")
raise CloseSpider("No new items were scraped, during the last 5 minutes.")
else:
counter.last_items = 0
logger.info("New items, were scraped. Next check in 5 minutes.")
else:
counter.times_called += 1
def cbLoopDone(result):
"""
Called when loop was stopped with success.
"""
print("Loop done.")
reactor.stop()
def ebLoopFailed(failure):
"""
Called when loop execution failed.
"""
print(failure.getBriefTraceback())
reactor.stop()
loop = task.LoopingCall(check_scraped)
loopDeferred = loop.start(300.0)
loopDeferred.addCallback(cbLoopDone)
loopDeferred.addErrback(ebLoopFailed)
process.crawl("sitemap")
process.start()
reactor.run()
import logging
from scrapy import signals
import datetime
from scrapy.exceptions import NotConfigured, CloseSpider
from crawl import count_items
from twisted.internet import task
logger = logging.getLogger(__name__)
......@@ -13,6 +13,8 @@ class SpiderOpenCloseLogging:
def __init__(self, item_count):
self.item_count = item_count
self.items_scraped = 0
self.last_items = 0
self.task = None
@classmethod
def from_crawler(cls, crawler):
......@@ -37,10 +39,23 @@ class SpiderOpenCloseLogging:
def spider_opened(self, spider):
logger.info("opened spider %s", spider.name)
self.task = task.LoopingCall(self.check_scraped, spider)
self.task.start(300.0)
def spider_closed(self, spider):
logger.info("closed spider %s", spider.name)
if self.task and self.task.running:
self.task.stop()
@count_items
def item_scraped(self, item, spider):
self.items_scraped += 1
self.last_items += 1
def check_scraped(self):
logger.info("Checking if any new items were scraped.")
if self.last_items == 0:
logger.critical("No new items were scraped.")
raise CloseSpider("No new items were scraped, during the last 5 minutes.")
else:
logger.info("New items, were scraped. Next check in 5 minutes.")
self.last_items = 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment