diff --git a/src/pipelines/ClosingSpiderPipeline.py b/src/pipelines/ClosingSpiderPipeline.py deleted file mode 100644 index 283c128f72b44868d2cd2e1f986a2206f507b104..0000000000000000000000000000000000000000 --- a/src/pipelines/ClosingSpiderPipeline.py +++ /dev/null @@ -1,26 +0,0 @@ -from scrapy.mail import MailSender -from scrapy.conf import settings - - -class ClosingSpiderPipeline(object): - - def close_spider(self, spider): - self._send_email(spider) - self._save_statistics(spider) - - def _send_email(self, spider): - spider.logger.info('Sending email...') - - mailer = MailSender.from_settings(settings) - to = [settings.get('MAIL_FROM')] - subject = 'Parser name:{} finished'.format(spider.name) - body = subject + ':\n\n' - body += '\n'.join(['{}: {}'.format(k, v) for k, v in spider.crawler.stats.get_stats().items()]) - - mailer.send(to=to, subject=subject, body=body) - - def _save_statistics(self, spider): - stats = spider.crawler.stats.get_stats() - with open('../statistics.txt', 'w') as f: - for k, v in stats.items(): - f.writelines('{}: {}\n'.format(k, v)) diff --git a/src/pipelines/StatsMailer.py b/src/pipelines/StatsMailer.py new file mode 100644 index 0000000000000000000000000000000000000000..1eec9e476d54a7176d816f5bc62c727b42e93cd1 --- /dev/null +++ b/src/pipelines/StatsMailer.py @@ -0,0 +1,14 @@ +from scrapy.mail import MailSender +from scrapy.conf import settings + + +class StatsMailer(object): + name = 'StatsMailer' + + def close_spider(self, spider): + mailer = MailSender.from_settings(settings) + to = [settings.get('MAIL_FROM')] + subject = 'Parser name:{} finished'.format(spider.name) + body = subject + ':\n\n' + body += '\n'.join(['{}: {}'.format(k, v) for k, v in spider.crawler.stats.get_stats().items()]) + return mailer.send(to=to, subject=subject, body=body) diff --git a/src/pipelines/StatsWriter.py b/src/pipelines/StatsWriter.py new file mode 100644 index 0000000000000000000000000000000000000000..da2042e549ff7ce174a39cb02794d172881496d5 --- /dev/null +++ b/src/pipelines/StatsWriter.py @@ -0,0 +1,10 @@ + + +class StatsWriter(object): + name = 'StatsWriter' + + def close_spider(self, spider): + stats = spider.crawler.stats.get_stats() + with open('../statistics.txt', 'w') as f: + for k, v in stats.items(): + f.writelines('{}: {}\n'.format(k, v)) diff --git a/src/settings.py b/src/settings.py index d96ddc1a3c84d848d9b4e7e98b60785c784f44a6..75f89fa9a0fd29524c47788c3a785435ff2e1f83 100644 --- a/src/settings.py +++ b/src/settings.py @@ -71,7 +71,9 @@ ROBOTSTXT_OBEY = True IMAGES_STORE = '../image' ITEM_PIPELINES = { 'pipelines.DatabasePipeline.DatabasePipeline': 101, - 'pipelines.ClosingSpiderPipeline.ClosingSpiderPipeline': 1000, + 'pipelines.DatabaseWriter.DatabaseWriter': 101, + 'pipelines.StatsMailer.StatsMailer': 999, + 'pipelines.StatsWriter.StatsWriter': 1000, 'scrapy.pipelines.images.ImagesPipeline': 1, }