diff --git a/src/pipelines/ClosingSpiderPipeline.py b/src/pipelines/ClosingSpiderPipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..283c128f72b44868d2cd2e1f986a2206f507b104 --- /dev/null +++ b/src/pipelines/ClosingSpiderPipeline.py @@ -0,0 +1,26 @@ +from scrapy.mail import MailSender +from scrapy.conf import settings + + +class ClosingSpiderPipeline(object): + + def close_spider(self, spider): + self._send_email(spider) + self._save_statistics(spider) + + def _send_email(self, spider): + spider.logger.info('Sending email...') + + mailer = MailSender.from_settings(settings) + to = [settings.get('MAIL_FROM')] + subject = 'Parser name:{} finished'.format(spider.name) + body = subject + ':\n\n' + body += '\n'.join(['{}: {}'.format(k, v) for k, v in spider.crawler.stats.get_stats().items()]) + + mailer.send(to=to, subject=subject, body=body) + + def _save_statistics(self, spider): + stats = spider.crawler.stats.get_stats() + with open('../statistics.txt', 'w') as f: + for k, v in stats.items(): + f.writelines('{}: {}\n'.format(k, v)) diff --git a/src/settings.py b/src/settings.py index f199e00cce379e5ea5ae25113049c5269d2c3a0f..cd2081ac0cc070c3e545f916eae9f7e468c7778f 100644 --- a/src/settings.py +++ b/src/settings.py @@ -71,6 +71,7 @@ ROBOTSTXT_OBEY = True IMAGES_STORE = '../image' ITEM_PIPELINES = { 'pipelines.DatabasePipeline.DatabasePipeline': 101, + 'pipelines.ClosingSpiderPipeline.ClosingSpiderPipeline': 1000, 'scrapy.pipelines.images.ImagesPipeline': 1, } diff --git a/statistics.txt b/statistics.txt new file mode 100644 index 0000000000000000000000000000000000000000..47c79f9975fac5e62091143b5526a860a1b09d73 --- /dev/null +++ b/statistics.txt @@ -0,0 +1,18 @@ +log_count/INFO: 10 +start_time: 2019-06-13 14:08:50.725327 +scheduler/enqueued/memory: 1 +scheduler/enqueued: 1 +scheduler/dequeued/memory: 1 +scheduler/dequeued: 1 +downloader/request_count: 2 +downloader/request_method_count/GET: 2 +downloader/request_bytes: 444 +robotstxt/request_count: 1 +downloader/response_count: 2 +downloader/response_status_count/404: 1 +downloader/response_bytes: 6204 +log_count/DEBUG: 2 +response_received_count: 2 +robotstxt/response_count: 1 +robotstxt/response_status_count/404: 1 +downloader/response_status_count/200: 1