diff --git a/keep/api/core/demo_mode_runner.py b/keep/api/core/demo_mode_runner.py index 374397159..3f4b4f74e 100644 --- a/keep/api/core/demo_mode_runner.py +++ b/keep/api/core/demo_mode_runner.py @@ -10,6 +10,8 @@ from keep.api.core.db import get_session_sync from keep.api.core.dependencies import SINGLE_TENANT_UUID +from keep.api.models.db.topology import TopologyServiceInDto +from keep.api.tasks.process_topology_task import process_topology from keep.api.utils.tenant_utils import get_or_create_api_key from keep.providers.providers_factory import ProvidersFactory @@ -60,6 +62,211 @@ }, ] +services_to_create = [ + TopologyServiceInDto( + source_provider_id="Prod-Datadog", + repository="keephq/keep", + tags=[], + service="api", + display_name="API Service", + environment="prod", + description="The main API service", + team="keep", + email="support@keephq.dev", + slack="https://slack.keephq.dev", + ip_address="10.0.0.1", + mac_address="", + category="Python", + manufacturer="", + dependencies={ + "db": "SQL", + "queue": "AMQP", + }, + application_ids=[], + updated_at="2024-11-18T09:23:46" + ), + TopologyServiceInDto( + source_provider_id="Prod-Datadog", + repository="keephq/keep", + tags=[], + service="ui", + display_name="Platform", + environment="prod", + description="The user interface (aka Platform)", + team="keep", + email="support@keephq.dev", + slack="https://slack.keephq.dev", + ip_address="10.0.0.2", + mac_address="", + category="nextjs", + manufacturer="", + dependencies={ + "api": "HTTP/S", + }, + application_ids=[], + updated_at="2024-11-18T09:29:25" + ), + TopologyServiceInDto( + source_provider_id="Prod-Datadog", + repository="keephq/keep", + tags=[], + service="db", + display_name="DB", + environment="prod", + description="Production Database", + team="keep", + email="support@keephq.dev", + slack="https://slack.keephq.dev", + ip_address="10.0.0.3", + mac_address="", + category="postgres", + manufacturer="", + dependencies={}, + application_ids=[], + updated_at="2024-11-18T09:30:44" + ), + TopologyServiceInDto( + source_provider_id="Prod-Datadog", + repository="keephq/keep", + tags=[], + service="queue", + display_name="Kafka", + environment="prod", + description="Production Queue", + team="keep", + email="support@keephq.dev", + slack="https://slack.keephq.dev", + ip_address="10.0.0.4", + mac_address="", + category="Kafka", + manufacturer="", + dependencies={ + "processor": "AMQP", + }, + application_ids=[], + updated_at="2024-11-18T09:31:31" + ), + TopologyServiceInDto( + source_provider_id="Prod-Datadog", + repository="keephq/keep", + tags=[], + service="processor", + display_name="Processor", + environment="prod", + description="Processing Service", + team="keep", + email="support@keephq.dev", + slack="https://slack.keephq.dev", + ip_address="10.0.0.5", + mac_address="", + category="go", + manufacturer="", + dependencies={ + "storage": "HTTP/S", + }, + application_ids=[], + updated_at="2024-11-18T10:02:20" + ), + TopologyServiceInDto( + source_provider_id="Prod-Datadog", + repository="keephq/keep", + tags=[], + service="backoffice", + display_name="Backoffice", + environment="prod", + description="Backoffice UI to control configuration", + team="keep", + email="support@keephq.dev", + slack="https://slack.keephq.dev", + ip_address="172.1.1.0", + mac_address="", + category="nextjs", + manufacturer="", + dependencies={ + "api": "HTTP/S", + }, + application_ids=[], + updated_at="2024-11-18T10:11:31" + ), + TopologyServiceInDto( + source_provider_id="Prod-Datadog", + repository="keephq/keep", + tags=[], + service="storage", + display_name="Storage", + environment="prod", + description="Storage Service", + team="keep", + email="support@keephq.dev", + slack="https://slack.keephq.dev", + ip_address="10.0.0.8", + mac_address="", + category="python", + manufacturer="", + dependencies={}, + application_ids=[], + updated_at="2024-11-18T10:13:56" + ) +] + +application_to_create = { + "name": "Main App", + "description": "It is the most critical business process ever imaginable.", + "services": [ + {"name": "API Service", "service": "api"}, + {"name": "DB", "service": "db"}, + {"name": "Kafka", "service": "queue"}, + {"name": "Processor", "service": "processor"}, + {"name": "Storage", "service": "storage"} + ] +} + +def get_or_create_topology(keep_api_key, keep_api_url): + services_existing = requests.get( + f"{keep_api_url}/topology", + headers={"x-api-key": keep_api_key}, + ) + services_existing.raise_for_status() + services_existing = services_existing.json() + + if len(services_existing) == 0 or True: + process_topology( + SINGLE_TENANT_UUID, + services_to_create, + "Prod-Datadog", + "datadog" + ) + + # Create application + applications_existing = requests.get( + f"{keep_api_url}/topology/applications", + headers={"x-api-key": keep_api_key}, + ) + applications_existing.raise_for_status() + applications_existing = applications_existing.json() + + if len(applications_existing) == 0: + # Pull services again to get their ids + services_existing = requests.get( + f"{keep_api_url}/topology", + headers={"x-api-key": keep_api_key}, + ) + services_existing.raise_for_status() + services_existing = services_existing.json() + + # Update application_to_create with existing services ids + for service in application_to_create["services"]: + for existing_service in services_existing: + if service["name"] == existing_service["display_name"]: + service["id"] = existing_service["id"] + + response = requests.post( + f"{keep_api_url}/topology/applications", + headers={"x-api-key": keep_api_key}, + json=application_to_create, + ) + response.raise_for_status() + def get_or_create_correlation_rules(keep_api_key, keep_api_url): correlation_rules_existing = requests.get( @@ -100,7 +307,13 @@ def remove_old_incidents(keep_api_key, keep_api_url): response.raise_for_status() -async def simulate_alerts(keep_api_url=None, keep_api_key=None, sleep_interval=5, demo_correlation_rules=False): +async def simulate_alerts( + keep_api_url=None, + keep_api_key=None, + sleep_interval=5, + demo_correlation_rules=False, + demo_topology=False + ): GENERATE_DEDUPLICATIONS = True providers = ["prometheus", "grafana"] @@ -113,7 +326,11 @@ async def simulate_alerts(keep_api_url=None, keep_api_key=None, sleep_interval=5 # Wait in the beginning because server may not be ready yet. await asyncio.sleep(sleep_interval * 2) - get_or_create_correlation_rules(keep_api_key, keep_api_url) + if demo_correlation_rules: + get_or_create_correlation_rules(keep_api_key, keep_api_url) + + if demo_topology: + get_or_create_topology(keep_api_key, keep_api_url) while True: await asyncio.sleep(sleep_interval) @@ -170,7 +387,8 @@ def launch_demo_mode(): keep_api_url, keep_api_key, sleep_interval=5, - demo_correlation_rules=True + demo_correlation_rules=True, + demo_topology=True ), )) thread.start() logger.info("Simulate Alert launched.") diff --git a/keep/providers/grafana_provider/alerts_mock.py b/keep/providers/grafana_provider/alerts_mock.py index bcb940077..4d9d0357a 100644 --- a/keep/providers/grafana_provider/alerts_mock.py +++ b/keep/providers/grafana_provider/alerts_mock.py @@ -1,6 +1,7 @@ ALERTS = { "database_connection_failure": { "severity": "critical", + "service": "api", "title": "Database Connection Failure", "alerts": [ { @@ -48,6 +49,7 @@ ], }, "high_memory_usage": { + "service": "api", "payload": { "condition": "B", "data": [ @@ -92,6 +94,7 @@ }, }, "network_latency_high": { + "service": "db", "payload": { "condition": "C", "data": [ diff --git a/keep/providers/prometheus_provider/alerts_mock.py b/keep/providers/prometheus_provider/alerts_mock.py index 1287f1a68..3c03bd675 100644 --- a/keep/providers/prometheus_provider/alerts_mock.py +++ b/keep/providers/prometheus_provider/alerts_mock.py @@ -11,7 +11,7 @@ }, "parameters": { "labels.host": ["host1", "host2", "host3"], - "labels.service": ["calendar-producer-java-otel-api-dd", "kafka"], + "labels.service": ["calendar-producer-java-otel-api-dd", "kafka", "api", "queue", "db"], "labels.instance": ["instance1", "instance2", "instance3"], }, }, @@ -20,11 +20,12 @@ "summary": "Message queue is over 33% capacity", "labels": { "severity": "warning", + "customer_id": "acme" }, }, "parameters": { "labels.queue": ["queue1", "queue2", "queue3"], - "labels.service": ["calendar-producer-java-otel-api-dd", "kafka"], + "labels.service": ["calendar-producer-java-otel-api-dd", "kafka", "queue"], "labels.mq_manager": ["mq_manager1", "mq_manager2", "mq_manager3"], }, }, @@ -37,7 +38,7 @@ }, "parameters": { "labels.host": ["host1", "host2", "host3"], - "labels.service": ["calendar-producer-java-otel-api-dd", "kafka"], + "labels.service": ["calendar-producer-java-otel-api-dd", "kafka", "api", "queue", "db"], "labels.instance": ["instance1", "instance2", "instance3"], }, }, @@ -50,7 +51,7 @@ }, "parameters": { "labels.host": ["host1", "host2", "host3"], - "labels.service": ["calendar-producer-java-otel-api-dd", "kafka"], + "labels.service": ["calendar-producer-java-otel-api-dd", "kafka", "api", "queue", "db"], "labels.instance": ["instance1", "instance2", "instance3"], }, },