Skip to content
Snippets Groups Projects
Commit 6b39c55f authored by Anton Gusev's avatar Anton Gusev
Browse files

add pipeline ClosingSpiderPipeline

parent 26281d89
No related branches found
No related tags found
5 merge requests!5Develop,!4Develop,!3Develop,!2Develop,!1Develop
from scrapy.mail import MailSender
from scrapy.conf import settings
class ClosingSpiderPipeline(object):
def close_spider(self, spider):
self._send_email(spider)
self._save_statistics(spider)
def _send_email(self, spider):
spider.logger.info('Sending email...')
mailer = MailSender.from_settings(settings)
to = [settings.get('MAIL_FROM')]
subject = 'Parser name:{} finished'.format(spider.name)
body = subject + ':\n\n'
body += '\n'.join(['{}: {}'.format(k, v) for k, v in spider.crawler.stats.get_stats().items()])
mailer.send(to=to, subject=subject, body=body)
def _save_statistics(self, spider):
stats = spider.crawler.stats.get_stats()
with open('../statistics.txt', 'w') as f:
for k, v in stats.items():
f.writelines('{}: {}\n'.format(k, v))
......@@ -71,6 +71,7 @@ ROBOTSTXT_OBEY = True
IMAGES_STORE = '../image'
ITEM_PIPELINES = {
'pipelines.DatabasePipeline.DatabasePipeline': 101,
'pipelines.ClosingSpiderPipeline.ClosingSpiderPipeline': 1000,
'scrapy.pipelines.images.ImagesPipeline': 1,
}
......
log_count/INFO: 10
start_time: 2019-06-13 14:08:50.725327
scheduler/enqueued/memory: 1
scheduler/enqueued: 1
scheduler/dequeued/memory: 1
scheduler/dequeued: 1
downloader/request_count: 2
downloader/request_method_count/GET: 2
downloader/request_bytes: 444
robotstxt/request_count: 1
downloader/response_count: 2
downloader/response_status_count/404: 1
downloader/response_bytes: 6204
log_count/DEBUG: 2
response_received_count: 2
robotstxt/response_count: 1
robotstxt/response_status_count/404: 1
downloader/response_status_count/200: 1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment