From 35cf0d663fa9cefdaaf8a2c96e79aeadcb552ef6 Mon Sep 17 00:00:00 2001 From: Atalya Alon Date: Wed, 18 Dec 2024 16:00:16 +0200 Subject: [PATCH 01/18] remove fake GPS, add newsflash GPS --- anyway/request_params.py | 13 +++---------- tests/test_request_params.py | 3 +-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/anyway/request_params.py b/anyway/request_params.py index 2e35ca9f..e07dc52d 100644 --- a/anyway/request_params.py +++ b/anyway/request_params.py @@ -95,7 +95,7 @@ def get_request_params_from_request_values(vals: dict) -> Optional[RequestParams years_ago = vals.get("years_ago", BE_CONST.DEFAULT_NUMBER_OF_YEARS_AGO) lang = vals.get("lang", "he") location_text = location["text"] - gps = location["gps"] + gps = {"lat": news_flash_obj.lat, "lon": news_flash_obj.lon} location_info = location["data"] if location_info is None: @@ -220,9 +220,7 @@ def extract_road_segment_location(road_segment_id): data["road_segment_name"] = road_segment_name data["road_segment_id"] = int(road_segment_id) text = get_road_segment_location_text(road1, road_segment_name) - # fake gps - todo: fix - gps = {"lat": 32.825610, "lon": 35.165395} - return {"name": "location", "data": data, "gps": gps, "text": text} + return {"name": "location", "data": data, "text": text} # todo: fill both codes and names into location @@ -233,9 +231,7 @@ def extract_street_location(input_vals: dict): for k in ["yishuv_name", "yishuv_symbol", "street1", "street1_hebrew"]: data[k] = vals[k] text = get_street_location_text(vals["yishuv_name"], vals["street1_hebrew"]) - # fake gps - todo: fix - gps = {"lat": 32.825610, "lon": 35.165395} - return {"name": "location", "data": data, "gps": gps, "text": text} + return {"name": "location", "data": data, "text": text} def extract_street_location_suggestion_version(input_vals: dict): @@ -271,12 +267,9 @@ def extract_non_urban_intersection_location(input_vals: dict): data = {"resolution": BE_CONST.ResolutionCategories.SUBURBAN_JUNCTION} for k in ["non_urban_intersection", "non_urban_intersection_hebrew", "road1", "road2"]: data[k] = vals[k] - # fake gps - todo: fix - gps = {"lat": 32.825610, "lon": 35.165395} return { "name": "location", "data": data, - "gps": gps, "text": vals["non_urban_intersection_hebrew"], } diff --git a/tests/test_request_params.py b/tests/test_request_params.py index ec0b35a6..4f7ac957 100644 --- a/tests/test_request_params.py +++ b/tests/test_request_params.py @@ -30,7 +30,6 @@ class TestRequestParams(unittest.TestCase): 'resolution': BE_CONST.ResolutionCategories.SUBURBAN_JUNCTION, 'road1': 669, 'road2': 71}, - 'gps': {'lat': 32.82561, 'lon': 35.165395}, 'name': 'location', 'text': 'צומת השיטה'} nf = NewsFlash() @@ -46,7 +45,7 @@ class TestRequestParams(unittest.TestCase): lang='he', news_flash_description=nf.description, news_flash_title=nf.title, - gps={"lat": 32.825610, "lon": 35.165395} + gps={"lat": None, "lon": None} ) @patch("anyway.request_params.fill_missing_non_urban_intersection_values") From 3eb7c1c53b030642dca7e49db7e2d0fa905aa859 Mon Sep 17 00:00:00 2001 From: Atalya Alon Date: Fri, 20 Dec 2024 15:52:52 +0200 Subject: [PATCH 02/18] add is_included, use get instead of [] --- anyway/request_params.py | 2 +- anyway/widgets/road_segment_widgets/street_view_widget.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/anyway/request_params.py b/anyway/request_params.py index e07dc52d..4a6e394c 100644 --- a/anyway/request_params.py +++ b/anyway/request_params.py @@ -95,7 +95,7 @@ def get_request_params_from_request_values(vals: dict) -> Optional[RequestParams years_ago = vals.get("years_ago", BE_CONST.DEFAULT_NUMBER_OF_YEARS_AGO) lang = vals.get("lang", "he") location_text = location["text"] - gps = {"lat": news_flash_obj.lat, "lon": news_flash_obj.lon} + gps = location.get("gps") location_info = location["data"] if location_info is None: diff --git a/anyway/widgets/road_segment_widgets/street_view_widget.py b/anyway/widgets/road_segment_widgets/street_view_widget.py index 17da2739..72939e72 100644 --- a/anyway/widgets/road_segment_widgets/street_view_widget.py +++ b/anyway/widgets/road_segment_widgets/street_view_widget.py @@ -16,10 +16,13 @@ def __init__(self, request_params: RequestParams): def generate_items(self) -> None: self.items = { - "longitude": self.request_params.gps["lon"], - "latitude": self.request_params.gps["lat"], + "longitude": self.request_params.gps.get("lon"), + "latitude": self.request_params.gps.get("lat"), } + def is_included(self): + return self.request_params.gps is not None + @staticmethod def localize_items(request_params: RequestParams, items: Dict) -> Dict: items["data"]["text"] = {"title": _("Street view widget")} From 8f2bbb5380bf11047a96be5002a6c82600f135b9 Mon Sep 17 00:00:00 2001 From: Atalya Alon Date: Fri, 20 Dec 2024 16:06:47 +0200 Subject: [PATCH 03/18] fix is_included --- anyway/widgets/road_segment_widgets/street_view_widget.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anyway/widgets/road_segment_widgets/street_view_widget.py b/anyway/widgets/road_segment_widgets/street_view_widget.py index 72939e72..fc89e709 100644 --- a/anyway/widgets/road_segment_widgets/street_view_widget.py +++ b/anyway/widgets/road_segment_widgets/street_view_widget.py @@ -21,7 +21,7 @@ def generate_items(self) -> None: } def is_included(self): - return self.request_params.gps is not None + return self.request_params.gps @staticmethod def localize_items(request_params: RequestParams, items: Dict) -> Dict: From 402cb0f383b5637fdf20374e4b211555cc06d73c Mon Sep 17 00:00:00 2001 From: Atalya Alon Date: Fri, 20 Dec 2024 16:08:20 +0200 Subject: [PATCH 04/18] fix text --- tests/test_request_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_request_params.py b/tests/test_request_params.py index 4f7ac957..aa7f7a78 100644 --- a/tests/test_request_params.py +++ b/tests/test_request_params.py @@ -45,7 +45,7 @@ class TestRequestParams(unittest.TestCase): lang='he', news_flash_description=nf.description, news_flash_title=nf.title, - gps={"lat": None, "lon": None} + gps={} ) @patch("anyway.request_params.fill_missing_non_urban_intersection_values") From 9ee40d8adbf07daf4b27a2a9ac779c9b58266009 Mon Sep 17 00:00:00 2001 From: Atalya Alon Date: Sat, 28 Dec 2024 16:25:59 +0200 Subject: [PATCH 05/18] add update_result to street_view --- .../road_segment_widgets/street_view_widget.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/anyway/widgets/road_segment_widgets/street_view_widget.py b/anyway/widgets/road_segment_widgets/street_view_widget.py index 68d75a15..13c7571b 100644 --- a/anyway/widgets/road_segment_widgets/street_view_widget.py +++ b/anyway/widgets/road_segment_widgets/street_view_widget.py @@ -1,7 +1,8 @@ +import logging from anyway.request_params import RequestParams from anyway.widgets.widget import register from anyway.widgets.road_segment_widgets.road_segment_widget import RoadSegmentWidget -from typing import Dict +from typing import Dict, Optional from flask_babel import _ @@ -23,8 +24,18 @@ def generate_items(self) -> None: def is_included(self): return self.request_params.gps and self.request_params.gps.get("lon") and self.request_params.gps.get("lat") - @staticmethod def localize_items(request_params: RequestParams, items: Dict) -> Dict: items["data"]["text"] = {"title": _("Street view widget")} - return items \ No newline at end of file + return items + + @classmethod + def update_result(cls, request_params: RequestParams, cached_items: Dict) -> Optional[Dict]: + if cls.is_relevant(request_params): + w = cls(request_params) # pylint: disable=E1120 + try: + w.generate_items() + updated_widget_data = w.serialize() + return updated_widget_data if w.is_included() else None + except Exception as e: + logging.debug(f"Encountered error when generating items for {w.name} : {e}") \ No newline at end of file From 682f4b5e583e1bcac5d6758d7ffdc7bd69ebc988 Mon Sep 17 00:00:00 2001 From: Atalya Alon Date: Sat, 28 Dec 2024 16:26:56 +0200 Subject: [PATCH 06/18] update_output before filtering empty widgets --- anyway/infographics_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anyway/infographics_utils.py b/anyway/infographics_utils.py index 6eb57f53..10c2c04a 100755 --- a/anyway/infographics_utils.py +++ b/anyway/infographics_utils.py @@ -212,8 +212,8 @@ def get_infographics_data_for_location(request_params: RequestParams) -> Dict: elif WIDGETS not in output: logging.error(f"get_infographics_data: 'widgets' key missing from output:{output}") else: - non_empty = list(filter(lambda x: x[DATA][ITEMS], output[WIDGETS])) - output[WIDGETS] = update_cache_results(request_params, non_empty) + updated_output = update_cache_results(request_params, output[WIDGETS]) + output[WIDGETS] = list(filter(lambda x: x[DATA][ITEMS], updated_output)) return output From 5b0afb80507e3be6c722fa46d5431748f413f189 Mon Sep 17 00:00:00 2001 From: Atalya Alon Date: Sat, 28 Dec 2024 16:31:39 +0200 Subject: [PATCH 07/18] use generate_widget_data in update_result --- .../road_segment_widgets/street_view_widget.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/anyway/widgets/road_segment_widgets/street_view_widget.py b/anyway/widgets/road_segment_widgets/street_view_widget.py index 13c7571b..bb6ff27a 100644 --- a/anyway/widgets/road_segment_widgets/street_view_widget.py +++ b/anyway/widgets/road_segment_widgets/street_view_widget.py @@ -31,11 +31,7 @@ def localize_items(request_params: RequestParams, items: Dict) -> Dict: @classmethod def update_result(cls, request_params: RequestParams, cached_items: Dict) -> Optional[Dict]: - if cls.is_relevant(request_params): - w = cls(request_params) # pylint: disable=E1120 - try: - w.generate_items() - updated_widget_data = w.serialize() - return updated_widget_data if w.is_included() else None - except Exception as e: - logging.debug(f"Encountered error when generating items for {w.name} : {e}") \ No newline at end of file + try: + return cls.generate_widget_data(request_params) + except Exception as e: + logging.debug(f"Encountered error when generating items for widget class {cls} : {e}") From a0b67e9b23a6fc0a61ff0ad353aa2502081514b6 Mon Sep 17 00:00:00 2001 From: tkalir Date: Thu, 2 Jan 2025 18:23:12 +0200 Subject: [PATCH 08/18] added comment to publish_notification --- anyway/telegram_accident_notifications.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/anyway/telegram_accident_notifications.py b/anyway/telegram_accident_notifications.py index abecd469..b5040b53 100644 --- a/anyway/telegram_accident_notifications.py +++ b/anyway/telegram_accident_notifications.py @@ -65,6 +65,8 @@ def send_after_infographics_message(bot, message_id_in_group, newsflash_id, link return bot.send_message(linked_group, message, reply_to_message_id=message_id_in_group) +#this function sends the "root" message for the newsflash in telegram. +#the flow continues when the telegram server sends a request to our /api/telegram/webhook def publish_notification(newsflash_id, chat_id=TELEGRAM_CHANNEL_CHAT_ID): accident_text = create_accident_text(newsflash_id) bot = telebot.TeleBot(secrets.get("BOT_TOKEN")) From 7d5bff28b52cc4eafc53f144f150387495ecd7c6 Mon Sep 17 00:00:00 2001 From: tkalir Date: Thu, 2 Jan 2025 18:24:41 +0200 Subject: [PATCH 09/18] added delay in generate_infographics_in_selenium_container after scrolling to infographic, to prevent map infographics from being blurry --- anyway/infographic_image_generator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/anyway/infographic_image_generator.py b/anyway/infographic_image_generator.py index 31ce6ce0..36f4e72d 100644 --- a/anyway/infographic_image_generator.py +++ b/anyway/infographic_image_generator.py @@ -18,7 +18,7 @@ selenium_hub_url = f"https://{selenium_hub_url}/wd/hub" selenium_remote_results_url = f"https://{selenium_url}/tempdata" CHROME_PARTIALLY_DOWNLOADED_FILE_EXTENSION = "crdownload" - +SLEEP_DURATION_FOR_MAP_TO_HAVE_FOCUS = 5 def create_chrome_browser_session(newsflash_id): options = webdriver.ChromeOptions() @@ -94,7 +94,9 @@ def generate_infographics_in_selenium_container(browser, newsflash_id): logging.debug(f"found {buttons_found} buttons") if buttons_found > 0: for element in elements: - ActionChains(browser).move_to_element(element).click().perform() + ActionChains(browser).move_to_element(element).perform() + time.sleep(SLEEP_DURATION_FOR_MAP_TO_HAVE_FOCUS) #without sleep map infographic may be blurry + element.click() time.sleep(1) #prevents click arriving before the last finished is_download_done, generated_images_names = wait_for_folder_to_contain_all_files(newsflash_id, buttons_found, timeout=60) From c3b466df5b617db678111ea15301d0b11d6bf810 Mon Sep 17 00:00:00 2001 From: tkalir Date: Thu, 2 Jan 2025 23:30:55 +0200 Subject: [PATCH 10/18] extracted get_items_for_send from send_infographics_to_telegram --- anyway/telegram_accident_notifications.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/anyway/telegram_accident_notifications.py b/anyway/telegram_accident_notifications.py index b5040b53..bc285f1a 100644 --- a/anyway/telegram_accident_notifications.py +++ b/anyway/telegram_accident_notifications.py @@ -79,20 +79,28 @@ def publish_notification(newsflash_id, chat_id=TELEGRAM_CHANNEL_CHAT_ID): db.session.commit() +def get_items_for_send(newsflash_id): + items = [] + transcription_by_widget_name = fetch_transcription_by_widget_name(newsflash_id) + urls_by_infographic_name = create_public_urls_for_infographics_images(str(newsflash_id)) + for infographic_name, url in urls_by_infographic_name.items(): + text = transcription_by_widget_name[infographic_name] \ + if infographic_name in transcription_by_widget_name else None + items.append((url, text)) + return items + + def send_infographics_to_telegram(root_message_id, newsflash_id, channel_of_initial_message): #every message in the channel is automatically forwarded to the linked discussion group. #to create a comment on the channel message, we need to send a reply to the #forwareded message in the discussion group. bot = telebot.TeleBot(secrets.get("BOT_TOKEN")) - transcription_by_widget_name = fetch_transcription_by_widget_name(newsflash_id) - urls_by_infographic_name = create_public_urls_for_infographics_images(str(newsflash_id)) - linked_group = telegram_linked_group_by_channel[channel_of_initial_message] - for infographic_name, url in urls_by_infographic_name.items(): - text = transcription_by_widget_name[infographic_name] \ - if infographic_name in transcription_by_widget_name else None + items_for_send = get_items_for_send(newsflash_id) + for url, text in items_for_send: bot.send_photo(linked_group, url, reply_to_message_id=root_message_id, caption=text) + send_after_infographics_message(bot, root_message_id, newsflash_id, linked_group) logging.info("notification send done") From 214cf20b15e638196d5df1ad8af68726cb9bb3d7 Mon Sep 17 00:00:00 2001 From: tkalir Date: Sun, 5 Jan 2025 16:27:39 +0200 Subject: [PATCH 11/18] send newsflashes to telegeram in the order of appearance on website, added retries to widget fetching --- anyway/telegram_accident_notifications.py | 46 ++++++++++++++++++----- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/anyway/telegram_accident_notifications.py b/anyway/telegram_accident_notifications.py index bc285f1a..7376c731 100644 --- a/anyway/telegram_accident_notifications.py +++ b/anyway/telegram_accident_notifications.py @@ -50,11 +50,9 @@ def create_accident_text(newsflash_id): return f"{first_line}\n\n{newsflash['description']}" -def fetch_transcription_by_widget_name(newsflash_id): - widgets_url = f"{ANYWAY_BASE_API_URL}/infographics-data?lang=he&news_flash_id={newsflash_id}&years_ago=5" - widgets_json = requests.get(widgets_url).json() +def get_transcription_by_widget_name(widgets): transcription_by_widget_name = {widget["name"]: widget["data"]["text"]["transcription"] - for widget in widgets_json["widgets"] + for widget in widgets if "transcription" in widget["data"]["text"]} return transcription_by_widget_name @@ -79,14 +77,41 @@ def publish_notification(newsflash_id, chat_id=TELEGRAM_CHANNEL_CHAT_ID): db.session.commit() +def fetch_widgets_with_retries(newsflash_id, wait_times): + url = f"https://www.anyway.co.il/api/infographics-data?news_flash_id={newsflash_id}" + for attempt, wait_time in enumerate(wait_times): + try: + logging.debug(f"Attempt {attempt + 1}: Fetching data widgets for newsflash {newsflash_id}") + response = requests.get(url) + if response.ok: + response_json = response.json() + widgets = response_json.get("widgets", []) + if len(widgets) > 0: + return widgets + except requests.exceptions.RequestException as e: + logging.debug(e) + time.sleep(wait_time) + raise RuntimeError(f"Failed to fetch data from {url}") + + +def fetch_widgets(newsflash_id): + retry_timeouts = [10, 20, 30, 60] + widgets = fetch_widgets_with_retries(newsflash_id, retry_timeouts) + return [widget.get("name") for widget in widgets] + + def get_items_for_send(newsflash_id): items = [] - transcription_by_widget_name = fetch_transcription_by_widget_name(newsflash_id) + retry_timeouts = [10, 20, 30, 60] + widgets = fetch_widgets_with_retries(newsflash_id, retry_timeouts) + transcription_by_widget_name = get_transcription_by_widget_name(widgets) urls_by_infographic_name = create_public_urls_for_infographics_images(str(newsflash_id)) - for infographic_name, url in urls_by_infographic_name.items(): - text = transcription_by_widget_name[infographic_name] \ - if infographic_name in transcription_by_widget_name else None - items.append((url, text)) + for widget in widgets: + name = widget.get("name") + if name in urls_by_infographic_name: + url = urls_by_infographic_name.get(name) + text = transcription_by_widget_name.get(name) + items.append((url, text)) return items @@ -104,6 +129,7 @@ def send_infographics_to_telegram(root_message_id, newsflash_id, channel_of_init send_after_infographics_message(bot, root_message_id, newsflash_id, linked_group) logging.info("notification send done") + def extract_infographic_name_from_s3_object(s3_object_name): left = s3_object_name.rindex("/") right = s3_object_name.rindex(".") @@ -131,4 +157,4 @@ def trigger_generate_infographics_and_send_to_telegram(newsflash_id, pre_verific dag_conf = {"news_flash_id": newsflash_id} dag_conf["chat_id"] = TELEGRAM_CHANNEL_CHAT_ID if pre_verification_chat \ else TELEGRAM_POST_VERIFICATION_CHANNEL_CHAT_ID - trigger_airflow_dag("generate-and-send-infographics-images", dag_conf) + trigger_airflow_dag("generate-and-send-infographics-images", dag_conf) \ No newline at end of file From 26323b4427998f37caf10045653145b5c5e6f954 Mon Sep 17 00:00:00 2001 From: tkalir Date: Sat, 11 Jan 2025 17:20:18 +0200 Subject: [PATCH 12/18] add get_infographics_data_by_newsflash to infographics_utils.py --- anyway/infographic_image_generator.py | 75 ++++++++++++++++++++++ anyway/infographics_utils.py | 6 ++ docker-compose.yml | 90 +++++++++++++++++++++++++++ 3 files changed, 171 insertions(+) diff --git a/anyway/infographic_image_generator.py b/anyway/infographic_image_generator.py index 36f4e72d..a9829585 100644 --- a/anyway/infographic_image_generator.py +++ b/anyway/infographic_image_generator.py @@ -29,6 +29,7 @@ def create_chrome_browser_session(newsflash_id): "profile.content_settings.exceptions.automatic_downloads.*.setting": 1, } options.add_experimental_option("prefs", prefs) + options.add_argument("--incognito") browser = webdriver.Remote( command_executor=selenium_hub_url, @@ -146,3 +147,77 @@ def upload_to_s3(self, local_file_path, newsflash_id): local_filename = os.path.basename(local_file_path) s3_filename = f"{newsflash_id}/{local_filename}" self.s3_bucket.upload_file(local_file_path, s3_filename) + + +def stuff(): + options = webdriver.ChromeOptions() + driver = webdriver.Chrome(options=options) + driver.execute_cdp_cmd("Network.enable", {}) + print("miao") + # Create a listener to capture requests and responses + def capture_infographics_data(request_id, response_body): + try: + # Parse the JSON response + response_json = json.loads(response_body) + + # Check if the response contains "meta" + if "meta" in response_json: + meta_data = response_json["meta"] + print(f"Meta Section: {meta_data}") + + # Save to a JSON file + with open("meta_section.json", "w") as f: + json.dump(meta_data, f, indent=4) + print("Meta section saved to meta_section.json") + except Exception as e: + print(f"Error processing response: {e}") + + # Start listening for responses + saved_requests = {} + + def on_response_received(event): + request_id = event["requestId"] + if "response" in event and "/api/infographics-data" in event["response"]["url"]: + # Save the request ID to fetch its body later + saved_requests[request_id] = event["response"]["url"] + + def on_loading_finished(event): + request_id = event["requestId"] + if request_id in saved_requests: + # Get the response body + response_body = driver.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id}) + capture_infographics_data(request_id, response_body.get("body", "{}")) + + # Register CDP event listeners + driver.execute_script( + "const onNetworkEvent = arguments[0]; window.addEventListener('message', e => onNetworkEvent(e.data));", + on_response_received + ) + + # Navigate to the page + driver.get("https://media.anyway.co.il/newsflash/233429") + + # Wait for some time to ensure the API request is captured + driver.implicitly_wait(10) + + # Cleanup + driver.quit() + + +import selenium.webdriver.common.devtools.v111 as devtools +import trio + + +def execute_cdp(driver: webdriver.Remote, cmd): + async def execute_cdp_async(): + async with driver.bidi_connection() as session: + cdp_session = session.session + return await cdp_session.execute(cmd) + # It will have error if we use asyncio.run + # https://github.com/SeleniumHQ/selenium/issues/11244 + return trio.run(execute_cdp_async) + +driver = create_chrome_browser_session(233429) +# Use it this way: +execute_cdp(driver, devtools.network.enable()) +mhtml = execute_cdp(driver, devtools.page.capture_snapshot()) \ No newline at end of file diff --git a/anyway/infographics_utils.py b/anyway/infographics_utils.py index 6eb57f53..cf73a7eb 100755 --- a/anyway/infographics_utils.py +++ b/anyway/infographics_utils.py @@ -265,3 +265,9 @@ def get_infographics_mock_data(): mock_data[WIDGETS].append(widget) mock_data[WIDGETS] = sorted(mock_data[WIDGETS], key=lambda widget: widget[META]["rank"]) return mock_data + + +def get_infographics_data_by_newsflash(newsflash_id): + request_params = get_request_params_from_request_values({"news_flash_id": newsflash_id}) + return get_infographics_data_for_location(request_params) + diff --git a/docker-compose.yml b/docker-compose.yml index c7a29f24..ec9c6a1c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -99,8 +99,98 @@ services: - airflow-webserver - db + filebeat: + image: docker.elastic.co/beats/filebeat:7.17.3 + container_name: filebeat + command: [ "/bin/bash", "-c", "chmod +x /usr/share/filebeat/filebeat && filebeat -e" ] + user: root + volumes: + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./filebeat.yml:/usr/share/filebeat/filebeat.yml:ro + +# mongo: +# image: mongo:6 +# container_name: graylog-mongo +# networks: +# - graylog +# restart: always +# + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:8.16.1 + container_name: graylog-elasticsearch + environment: + - discovery.type=single-node + - xpack.security.enabled=false + ports: + - "9200:9200" + volumes: + - elasticsearch-data:/usr/share/elasticsearch/data + restart: always + + kibana: + image: docker.elastic.co/kibana/kibana:8.16.1 + container_name: kibana + environment: + - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 + ports: + - "5601:5601" + volumes: + - ./kibana.yml:/etc/kibana/kibana.yml + depends_on: + - elasticsearch +# graylog: +# image: graylog/graylog:5.1 +# container_name: graylog +# environment: +# - GRAYLOG_HTTP_EXTERNAL_URI=http://127.0.0.1:9000/ +# - GRAYLOG_ROOT_PASSWORD_SHA2=e3c652f0ba0b4801205814f8b6bc49672c4c74e25b497770bb89b22cdeb4e951 +# - GRAYLOG_MONGODB_URI=mongodb://mongo:27017/graylog +# - GRAYLOG_ELASTICSEARCH_HOSTS=http://elasticsearch:9200 +# - GRAYLOG_PASSWORD_SECRET=3e76c85763b24e4f9f4b2c3e9a0b5d19 +# entrypoint: /usr/bin/tini -- wait-for-it elasticsearch:9200 -- wait-for-it mongo:27017 -- /docker-entrypoint.sh +# networks: +# - graylog +# ports: +# - "9000:9000" # Graylog web interface +# - "12201:12201/udp" # GELF UDP input +# restart: always +# depends_on: +# - mongo +# - elasticsearch + +# loki: +# image: grafana/loki:latest +# container_name: loki +# ports: +# - "3100:3100" +# volumes: +# - ./loki-config.yml:/etc/loki/local-config.yaml +# - /var/lib/docker/containers:/var/lib/docker/containers:ro +# - /var/log:/var/log:ro +# command: -config.file=/etc/loki/local-config.yaml +# +# grafana: +# image: grafana/grafana:latest +# container_name: grafana +# ports: +# - "3002:3000" +# environment: +# - GF_SECURITY_ADMIN_USER=admin +# - GF_SECURITY_ADMIN_PASSWORD=admin +# volumes: +# - grafana-data:/var/lib/grafana +# depends_on: +# - loki + volumes: db_data: anyway_etl_data: airflow-db: airflow-home: + elasticsearch-data: + driver: local + +#networks: +# graylog: +# driver: bridge \ No newline at end of file From 3ee26f7c990aaf8033a29cd8a7fdf444c49e9399 Mon Sep 17 00:00:00 2001 From: tkalir Date: Sat, 11 Jan 2025 17:26:57 +0200 Subject: [PATCH 13/18] Revert "add get_infographics_data_by_newsflash to infographics_utils.py" This reverts commit 26323b4427998f37caf10045653145b5c5e6f954. --- anyway/infographic_image_generator.py | 75 ---------------------- anyway/infographics_utils.py | 6 -- docker-compose.yml | 90 --------------------------- 3 files changed, 171 deletions(-) diff --git a/anyway/infographic_image_generator.py b/anyway/infographic_image_generator.py index a9829585..36f4e72d 100644 --- a/anyway/infographic_image_generator.py +++ b/anyway/infographic_image_generator.py @@ -29,7 +29,6 @@ def create_chrome_browser_session(newsflash_id): "profile.content_settings.exceptions.automatic_downloads.*.setting": 1, } options.add_experimental_option("prefs", prefs) - options.add_argument("--incognito") browser = webdriver.Remote( command_executor=selenium_hub_url, @@ -147,77 +146,3 @@ def upload_to_s3(self, local_file_path, newsflash_id): local_filename = os.path.basename(local_file_path) s3_filename = f"{newsflash_id}/{local_filename}" self.s3_bucket.upload_file(local_file_path, s3_filename) - - -def stuff(): - options = webdriver.ChromeOptions() - driver = webdriver.Chrome(options=options) - driver.execute_cdp_cmd("Network.enable", {}) - print("miao") - # Create a listener to capture requests and responses - def capture_infographics_data(request_id, response_body): - try: - # Parse the JSON response - response_json = json.loads(response_body) - - # Check if the response contains "meta" - if "meta" in response_json: - meta_data = response_json["meta"] - print(f"Meta Section: {meta_data}") - - # Save to a JSON file - with open("meta_section.json", "w") as f: - json.dump(meta_data, f, indent=4) - print("Meta section saved to meta_section.json") - except Exception as e: - print(f"Error processing response: {e}") - - # Start listening for responses - saved_requests = {} - - def on_response_received(event): - request_id = event["requestId"] - if "response" in event and "/api/infographics-data" in event["response"]["url"]: - # Save the request ID to fetch its body later - saved_requests[request_id] = event["response"]["url"] - - def on_loading_finished(event): - request_id = event["requestId"] - if request_id in saved_requests: - # Get the response body - response_body = driver.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id}) - capture_infographics_data(request_id, response_body.get("body", "{}")) - - # Register CDP event listeners - driver.execute_script( - "const onNetworkEvent = arguments[0]; window.addEventListener('message', e => onNetworkEvent(e.data));", - on_response_received - ) - - # Navigate to the page - driver.get("https://media.anyway.co.il/newsflash/233429") - - # Wait for some time to ensure the API request is captured - driver.implicitly_wait(10) - - # Cleanup - driver.quit() - - -import selenium.webdriver.common.devtools.v111 as devtools -import trio - - -def execute_cdp(driver: webdriver.Remote, cmd): - async def execute_cdp_async(): - async with driver.bidi_connection() as session: - cdp_session = session.session - return await cdp_session.execute(cmd) - # It will have error if we use asyncio.run - # https://github.com/SeleniumHQ/selenium/issues/11244 - return trio.run(execute_cdp_async) - -driver = create_chrome_browser_session(233429) -# Use it this way: -execute_cdp(driver, devtools.network.enable()) -mhtml = execute_cdp(driver, devtools.page.capture_snapshot()) \ No newline at end of file diff --git a/anyway/infographics_utils.py b/anyway/infographics_utils.py index cf73a7eb..6eb57f53 100755 --- a/anyway/infographics_utils.py +++ b/anyway/infographics_utils.py @@ -265,9 +265,3 @@ def get_infographics_mock_data(): mock_data[WIDGETS].append(widget) mock_data[WIDGETS] = sorted(mock_data[WIDGETS], key=lambda widget: widget[META]["rank"]) return mock_data - - -def get_infographics_data_by_newsflash(newsflash_id): - request_params = get_request_params_from_request_values({"news_flash_id": newsflash_id}) - return get_infographics_data_for_location(request_params) - diff --git a/docker-compose.yml b/docker-compose.yml index ec9c6a1c..c7a29f24 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -99,98 +99,8 @@ services: - airflow-webserver - db - filebeat: - image: docker.elastic.co/beats/filebeat:7.17.3 - container_name: filebeat - command: [ "/bin/bash", "-c", "chmod +x /usr/share/filebeat/filebeat && filebeat -e" ] - user: root - volumes: - - /var/lib/docker/containers:/var/lib/docker/containers:ro - - /var/run/docker.sock:/var/run/docker.sock:ro - - ./filebeat.yml:/usr/share/filebeat/filebeat.yml:ro - -# mongo: -# image: mongo:6 -# container_name: graylog-mongo -# networks: -# - graylog -# restart: always -# - elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.16.1 - container_name: graylog-elasticsearch - environment: - - discovery.type=single-node - - xpack.security.enabled=false - ports: - - "9200:9200" - volumes: - - elasticsearch-data:/usr/share/elasticsearch/data - restart: always - - kibana: - image: docker.elastic.co/kibana/kibana:8.16.1 - container_name: kibana - environment: - - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 - ports: - - "5601:5601" - volumes: - - ./kibana.yml:/etc/kibana/kibana.yml - depends_on: - - elasticsearch -# graylog: -# image: graylog/graylog:5.1 -# container_name: graylog -# environment: -# - GRAYLOG_HTTP_EXTERNAL_URI=http://127.0.0.1:9000/ -# - GRAYLOG_ROOT_PASSWORD_SHA2=e3c652f0ba0b4801205814f8b6bc49672c4c74e25b497770bb89b22cdeb4e951 -# - GRAYLOG_MONGODB_URI=mongodb://mongo:27017/graylog -# - GRAYLOG_ELASTICSEARCH_HOSTS=http://elasticsearch:9200 -# - GRAYLOG_PASSWORD_SECRET=3e76c85763b24e4f9f4b2c3e9a0b5d19 -# entrypoint: /usr/bin/tini -- wait-for-it elasticsearch:9200 -- wait-for-it mongo:27017 -- /docker-entrypoint.sh -# networks: -# - graylog -# ports: -# - "9000:9000" # Graylog web interface -# - "12201:12201/udp" # GELF UDP input -# restart: always -# depends_on: -# - mongo -# - elasticsearch - -# loki: -# image: grafana/loki:latest -# container_name: loki -# ports: -# - "3100:3100" -# volumes: -# - ./loki-config.yml:/etc/loki/local-config.yaml -# - /var/lib/docker/containers:/var/lib/docker/containers:ro -# - /var/log:/var/log:ro -# command: -config.file=/etc/loki/local-config.yaml -# -# grafana: -# image: grafana/grafana:latest -# container_name: grafana -# ports: -# - "3002:3000" -# environment: -# - GF_SECURITY_ADMIN_USER=admin -# - GF_SECURITY_ADMIN_PASSWORD=admin -# volumes: -# - grafana-data:/var/lib/grafana -# depends_on: -# - loki - volumes: db_data: anyway_etl_data: airflow-db: airflow-home: - elasticsearch-data: - driver: local - -#networks: -# graylog: -# driver: bridge \ No newline at end of file From 243cd6d20bd58d4defc4413b0acad311ecbf1911 Mon Sep 17 00:00:00 2001 From: tkalir Date: Sat, 11 Jan 2025 17:33:42 +0200 Subject: [PATCH 14/18] add get_infographics_data_by_newsflash to infographics_utils.py --- anyway/infographic_image_generator.py | 1 + anyway/infographics_utils.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/anyway/infographic_image_generator.py b/anyway/infographic_image_generator.py index 36f4e72d..6878a871 100644 --- a/anyway/infographic_image_generator.py +++ b/anyway/infographic_image_generator.py @@ -29,6 +29,7 @@ def create_chrome_browser_session(newsflash_id): "profile.content_settings.exceptions.automatic_downloads.*.setting": 1, } options.add_experimental_option("prefs", prefs) + options.add_argument("--incognito") browser = webdriver.Remote( command_executor=selenium_hub_url, diff --git a/anyway/infographics_utils.py b/anyway/infographics_utils.py index 6eb57f53..b48514eb 100755 --- a/anyway/infographics_utils.py +++ b/anyway/infographics_utils.py @@ -265,3 +265,8 @@ def get_infographics_mock_data(): mock_data[WIDGETS].append(widget) mock_data[WIDGETS] = sorted(mock_data[WIDGETS], key=lambda widget: widget[META]["rank"]) return mock_data + + +def get_infographics_data_by_newsflash(newsflash_id): + request_params = get_request_params_from_request_values({"news_flash_id": newsflash_id}) + return get_infographics_data_for_location(request_params) \ No newline at end of file From f91d50d0db4ad5b4085907c649aef9239d40141b Mon Sep 17 00:00:00 2001 From: tkalir Date: Sat, 11 Jan 2025 18:00:53 +0200 Subject: [PATCH 15/18] get_items_for_send calls functions instead of calling Anyway's API --- anyway/telegram_accident_notifications.py | 29 +++-------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/anyway/telegram_accident_notifications.py b/anyway/telegram_accident_notifications.py index 7376c731..aac2d24a 100644 --- a/anyway/telegram_accident_notifications.py +++ b/anyway/telegram_accident_notifications.py @@ -4,6 +4,7 @@ from anyway.models import TelegramForwardedMessages from anyway.utilities import trigger_airflow_dag from anyway.app_and_db import db +from anyway.infographics_utils import get_infographics_data_by_newsflash import telebot import boto3 import time @@ -77,33 +78,9 @@ def publish_notification(newsflash_id, chat_id=TELEGRAM_CHANNEL_CHAT_ID): db.session.commit() -def fetch_widgets_with_retries(newsflash_id, wait_times): - url = f"https://www.anyway.co.il/api/infographics-data?news_flash_id={newsflash_id}" - for attempt, wait_time in enumerate(wait_times): - try: - logging.debug(f"Attempt {attempt + 1}: Fetching data widgets for newsflash {newsflash_id}") - response = requests.get(url) - if response.ok: - response_json = response.json() - widgets = response_json.get("widgets", []) - if len(widgets) > 0: - return widgets - except requests.exceptions.RequestException as e: - logging.debug(e) - time.sleep(wait_time) - raise RuntimeError(f"Failed to fetch data from {url}") - - -def fetch_widgets(newsflash_id): - retry_timeouts = [10, 20, 30, 60] - widgets = fetch_widgets_with_retries(newsflash_id, retry_timeouts) - return [widget.get("name") for widget in widgets] - - def get_items_for_send(newsflash_id): items = [] - retry_timeouts = [10, 20, 30, 60] - widgets = fetch_widgets_with_retries(newsflash_id, retry_timeouts) + widgets = get_infographics_data_by_newsflash(newsflash_id)["widgets"] transcription_by_widget_name = get_transcription_by_widget_name(widgets) urls_by_infographic_name = create_public_urls_for_infographics_images(str(newsflash_id)) for widget in widgets: @@ -157,4 +134,4 @@ def trigger_generate_infographics_and_send_to_telegram(newsflash_id, pre_verific dag_conf = {"news_flash_id": newsflash_id} dag_conf["chat_id"] = TELEGRAM_CHANNEL_CHAT_ID if pre_verification_chat \ else TELEGRAM_POST_VERIFICATION_CHANNEL_CHAT_ID - trigger_airflow_dag("generate-and-send-infographics-images", dag_conf) \ No newline at end of file + trigger_airflow_dag("generate-and-send-infographics-images", dag_conf) From 0061316f7f5ac208ee10527640792c6722652bc3 Mon Sep 17 00:00:00 2001 From: tkalir Date: Sat, 11 Jan 2025 19:27:21 +0200 Subject: [PATCH 16/18] added send to telegram in update_news_flash_qualifying when qualification is "verified" --- anyway/views/news_flash/api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/anyway/views/news_flash/api.py b/anyway/views/news_flash/api.py index 67bdb45b..a67781d3 100644 --- a/anyway/views/news_flash/api.py +++ b/anyway/views/news_flash/api.py @@ -375,8 +375,10 @@ def update_news_flash_qualifying(id): new_location=new_location, new_qualification=new_location_qualifiction, ) + VERIFIED_QUALIFICATIONS = [NewsflashLocationQualification.MANUAL.value, + NewsflashLocationQualification.VERIFIED.value] if os.environ.get("FLASK_ENV") == "production" and \ - new_location_qualifiction == NewsflashLocationQualification.MANUAL.value and \ + new_location_qualifiction in VERIFIED_QUALIFICATIONS and \ old_location_qualifiction != NewsflashLocationQualification.MANUAL.value: trigger_generate_infographics_and_send_to_telegram(id, False) return Response(status=HTTPStatus.OK) From 07a333b1f04564f921c71d687e93c18495a476c2 Mon Sep 17 00:00:00 2001 From: Tamar Kalir Date: Tue, 14 Jan 2025 18:46:41 +0200 Subject: [PATCH 17/18] prevent is_included from throwing KeyError exceptions in accident_count_by_severity_widget.py and injured_count_by_severity_widget.py --- .../all_locations_widgets/accident_count_by_severity_widget.py | 2 +- .../all_locations_widgets/injured_count_by_severity_widget.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/anyway/widgets/all_locations_widgets/accident_count_by_severity_widget.py b/anyway/widgets/all_locations_widgets/accident_count_by_severity_widget.py index 2a199e05..2ea17c10 100644 --- a/anyway/widgets/all_locations_widgets/accident_count_by_severity_widget.py +++ b/anyway/widgets/all_locations_widgets/accident_count_by_severity_widget.py @@ -27,7 +27,7 @@ def generate_items(self) -> None: ) def is_included(self) -> bool: - return self.items["total_accidents_count"] > 0 + return self.items.get("total_accidents_count", 0) > 0 @staticmethod def get_accident_count_by_severity(location_info, start_time, end_time, resolution): diff --git a/anyway/widgets/all_locations_widgets/injured_count_by_severity_widget.py b/anyway/widgets/all_locations_widgets/injured_count_by_severity_widget.py index 3b0fe487..4a6ab0a1 100644 --- a/anyway/widgets/all_locations_widgets/injured_count_by_severity_widget.py +++ b/anyway/widgets/all_locations_widgets/injured_count_by_severity_widget.py @@ -33,7 +33,7 @@ def generate_items(self) -> None: ) def is_included(self) -> bool: - return self.items["total_injured_count"] > 0 + return self.items.get("total_injured_count", 0) > 0 @staticmethod def get_injured_count_by_severity( From 20dcd5be8499034c6bf3de2faddc5f0e262ca5f3 Mon Sep 17 00:00:00 2001 From: Atalya Alon Date: Sun, 19 Jan 2025 16:24:26 +0200 Subject: [PATCH 18/18] ensure data consistency, commit only after delete and insert, rollback otherwise --- anyway/parsers/cbs/executor.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/anyway/parsers/cbs/executor.py b/anyway/parsers/cbs/executor.py index 2594846f..408e7969 100644 --- a/anyway/parsers/cbs/executor.py +++ b/anyway/parsers/cbs/executor.py @@ -900,7 +900,6 @@ def fill_dictionary_tables(cbs_dictionary, provider_code, year): + str(inner_k) ) db.session.execute(sql_delete) - db.session.commit() sql_insert = ( "INSERT INTO " + curr_table @@ -918,8 +917,12 @@ def fill_dictionary_tables(cbs_dictionary, provider_code, year): + " ON CONFLICT DO NOTHING" ) db.session.execute(sql_insert) - db.session.commit() - logging.debug("Inserted/Updated dictionary values into table " + curr_table) + try: + db.session.commit() + except Exception as e: + logging.error(f"Error updating Dictionary tables: {e}") + db.session.rollback() + logging.debug("Inserted/Updated dictionary values into table " + curr_table) create_provider_code_table() @@ -951,7 +954,11 @@ def create_provider_code_table(): "INSERT INTO " + provider_code_table + " VALUES (" + str(k) + "," + "'" + v + "'" + ")" ) db.session.execute(sql_insert) + try: db.session.commit() + except Exception as e: + logging.error(f"Error updating table {provider_code_table}: {e}") + db.session.rollback() def receive_rollback(conn, **kwargs):