Skip to content

Commit

Permalink
BITMAKER-723: add kafka-producer
Browse files Browse the repository at this point in the history
  • Loading branch information
renato262 committed May 27, 2021
1 parent 25bae36 commit 19272bc
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ dist
.eggs/
.tox
/.coverage
.idea/
venv/
19 changes: 19 additions & 0 deletions bm_scrapy/extensions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
import os
from json import dumps
from scrapy import signals
from scrapy.exporters import PythonItemExporter

from bm_scrapy.writer import pipe_writer
from kafka import KafkaProducer


def connect_kafka_producer():
_producer = None
bootstrap_server = [
'{}:{}'.format(os.getenv('KAFKA_ADVERTISED_HOST_NAME', '127.0.0.1'),
os.getenv('KAFKA_ADVERTISED_PORT', '9092'))
]
_producer = KafkaProducer(bootstrap_servers=bootstrap_server, api_version=(0, 10),
value_serializer=lambda x: dumps(x).encode('utf-8'))
return _producer


producer = connect_kafka_producer()


class ItemStorageExtension:
Expand All @@ -19,6 +36,8 @@ def from_crawler(cls, crawler):

def item_scraped(self, item):
item = self.exporter.export_item(item)
producer.send('spider-items', value=dict(item))
producer.flush()
self.writer.write_item(item)

def spider_closed(self, spider, reason):
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
packages=find_packages(),
install_requires=[
'Scrapy>=1.0',
'kafka-python'
],
entry_points={
'console_scripts': [
Expand Down

0 comments on commit 19272bc

Please sign in to comment.