From 6b39c55f7817750e635985d5eeca85377a778797 Mon Sep 17 00:00:00 2001
From: Gusev Anton <gusev_aa@groupbwt.com>
Date: Thu, 13 Jun 2019 17:18:17 +0300
Subject: [PATCH] add pipeline ClosingSpiderPipeline

---
 src/pipelines/ClosingSpiderPipeline.py | 26 ++++++++++++++++++++++++++
 src/settings.py                        |  1 +
 statistics.txt                         | 18 ++++++++++++++++++
 3 files changed, 45 insertions(+)
 create mode 100644 src/pipelines/ClosingSpiderPipeline.py
 create mode 100644 statistics.txt

diff --git a/src/pipelines/ClosingSpiderPipeline.py b/src/pipelines/ClosingSpiderPipeline.py
new file mode 100644
index 0000000..283c128
--- /dev/null
+++ b/src/pipelines/ClosingSpiderPipeline.py
@@ -0,0 +1,26 @@
+from scrapy.mail import MailSender
+from scrapy.conf import settings
+
+
+class ClosingSpiderPipeline(object):
+
+    def close_spider(self, spider):
+        self._send_email(spider)
+        self._save_statistics(spider)
+
+    def _send_email(self, spider):
+        spider.logger.info('Sending email...')
+
+        mailer = MailSender.from_settings(settings)
+        to = [settings.get('MAIL_FROM')]
+        subject = 'Parser name:{} finished'.format(spider.name)
+        body = subject + ':\n\n'
+        body += '\n'.join(['{}: {}'.format(k, v) for k, v in spider.crawler.stats.get_stats().items()])
+
+        mailer.send(to=to, subject=subject, body=body)
+
+    def _save_statistics(self, spider):
+        stats = spider.crawler.stats.get_stats()
+        with open('../statistics.txt', 'w') as f:
+            for k, v in stats.items():
+                f.writelines('{}: {}\n'.format(k, v))
diff --git a/src/settings.py b/src/settings.py
index f199e00..cd2081a 100644
--- a/src/settings.py
+++ b/src/settings.py
@@ -71,6 +71,7 @@ ROBOTSTXT_OBEY = True
 IMAGES_STORE = '../image'
 ITEM_PIPELINES = {
     'pipelines.DatabasePipeline.DatabasePipeline': 101,
+    'pipelines.ClosingSpiderPipeline.ClosingSpiderPipeline': 1000,
     'scrapy.pipelines.images.ImagesPipeline': 1,
 }
 
diff --git a/statistics.txt b/statistics.txt
new file mode 100644
index 0000000..47c79f9
--- /dev/null
+++ b/statistics.txt
@@ -0,0 +1,18 @@
+log_count/INFO: 10
+start_time: 2019-06-13 14:08:50.725327
+scheduler/enqueued/memory: 1
+scheduler/enqueued: 1
+scheduler/dequeued/memory: 1
+scheduler/dequeued: 1
+downloader/request_count: 2
+downloader/request_method_count/GET: 2
+downloader/request_bytes: 444
+robotstxt/request_count: 1
+downloader/response_count: 2
+downloader/response_status_count/404: 1
+downloader/response_bytes: 6204
+log_count/DEBUG: 2
+response_received_count: 2
+robotstxt/response_count: 1
+robotstxt/response_status_count/404: 1
+downloader/response_status_count/200: 1
-- 
GitLab