Skip to content

Commit

Permalink
Merge branch 'master' into feature/prometheus_apis
Browse files Browse the repository at this point in the history
  • Loading branch information
ganeshrvel committed Nov 21, 2023
2 parents 88dc052 + eb2bddb commit 39f704f
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 15 deletions.
2 changes: 1 addition & 1 deletion docs/configuration/defining-playbooks/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ Learn how to define Robusta playbooks.
List of All Triggers and Actions
---------------------------------

Refer to :ref:`Playbook Reference<playbook-reference>`
Refer to :ref:`Triggers Reference` or :ref:`Actions Reference`
8 changes: 4 additions & 4 deletions docs/configuration/sinks/slack.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,10 @@ If you can't use the `official Slack app <https://slack.com/apps/A0214S5PHB4-rob
your own. This is not recommended for most companies due to the added complexity.

1. `Create a new Slack app. <https://api.slack.com/apps?new_app=1>`_
2. Enable Socket mode in your Slack App and copy the websocket token into the Robusta deployment yaml.
3. Under "OAuth and Permissions" add the following scopes: chat:write, files:write, incoming-webhook, and channels:history
4. Under "Event Subscriptions" add bot user events for message.channels and press "Save Changes"
5. Click "Install into Workspace"
2. Enable Socket mode in your Slack App.
3. Under "OAuth and Permissions" add the following scopes: chat:write, chat:write.public, files:write, incoming-webhook, and channels:history.
4. Under "Event Subscriptions" add bot user events for message.channels and press "Save Changes".
5. Click "Install into Workspace".
6. Copy the ``Bot User OAuth Token`` from "OAuth and Permissions".
7. Add the token to SinksConfig in your `generated_values.yaml` file.

Expand Down
9 changes: 8 additions & 1 deletion playbooks/robusta_playbooks/k8s_resource_enrichments.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class RelatedContainer(BaseModel):
status: Optional[str] = None
created: Optional[str] = None
ports: List[Any] = []
statusMessage: Optional[str] = None
statusReason: Optional[str] = None


class RelatedPod(BaseModel):
Expand All @@ -64,6 +66,7 @@ class RelatedPod(BaseModel):
addresses: str
containers: List[RelatedContainer]
status: Optional[str] = None
statusReason: Optional[str] = None


supported_resources = ["Deployment", "DaemonSet", "ReplicaSet", "Pod", "StatefulSet", "Job", "Node"]
Expand All @@ -87,6 +90,7 @@ def to_pod_row(pod: Pod, cluster_name: str) -> List:
addresses,
len(pod.spec.containers),
pod.status.phase,
pod.status.reason,
]


Expand All @@ -95,7 +99,6 @@ def get_related_pods(resource) -> list[Pod]:
if kind not in supported_resources:
raise ActionException(ErrorCodes.RESOURCE_NOT_SUPPORTED, f"Related pods is not supported for resource {kind}")

pods = []
if kind == "Job":
job_pods = get_job_all_pods(resource)
pods = job_pods if job_pods else []
Expand Down Expand Up @@ -128,6 +131,7 @@ def to_pod_obj(pod: Pod, cluster: str) -> RelatedPod:
addresses=addresses,
containers=get_pod_containers(pod),
status=pod.status.phase,
statusReason=pod.status.reason,
)


Expand Down Expand Up @@ -156,6 +160,8 @@ def get_pod_containers(pod: Pod) -> List[RelatedContainer]:
memoryRequest=requests.memory,
restarts=getattr(containerStatus, "restartCount", 0),
status=stateStr,
statusMessage=getattr(state, "messsage", None) if state else None,
statusReason=getattr(state, "reason", None) if state else None,
created=getattr(state, "startedAt", None),
ports=[port.to_dict() for port in container.ports] if container.ports else [],
)
Expand Down Expand Up @@ -197,6 +203,7 @@ def related_pods(event: KubernetesResourceEvent, params: RelatedPodParams):
"addresses",
"containers",
"status",
"status reason",
],
rows=rows,
)
Expand Down
8 changes: 4 additions & 4 deletions playbooks/robusta_playbooks/krr.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,15 +352,15 @@ def krr_scan(event: ExecutionBaseEvent, params: KRRParams):
except json.JSONDecodeError:
logging.error(f"*KRR scan job failed. Expecting json result.*\n\n Result:\n{logs}")
return
except ValidationError as e:
logging.error(f"*KRR scan job failed. Result format issue.*\n\n {e}")
except ValidationError:
logging.error("*KRR scan job failed. Result format issue.*\n\n", exc_info=True)
logging.error(f"\n {logs}")
return
except Exception as e:
if str(e) == "Failed to reach wait condition":
logging.error(f"*KRR scan job failed. The job wait condition timed out ({params.timeout}s)*")
logging.error(f"*KRR scan job failed. The job wait condition timed out ({params.timeout}s)*", exc_info=True)
else:
logging.error(f"*KRR scan job unexpected error.*\n {e}")
logging.error(f"*KRR scan job unexpected error.*\n {e}", exc_info=True)
return

scan_block = ScanReportBlock(
Expand Down
29 changes: 25 additions & 4 deletions playbooks/robusta_playbooks/pod_investigator_enricher.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import logging
from enum import Enum
from typing import List, Optional
from typing import List, Optional, Tuple

from hikaru.model.rel_1_26 import Pod, PodList
from robusta.api import (
Expand Down Expand Up @@ -59,8 +59,10 @@ def pod_issue_investigator(event: KubernetesResourceEvent):
logging.info(f"No pod issues discovered for {resource.kind} {resource.metadata.name}")
return
# Investigate first issue found
pod_issue = detect_pod_issue(pods_with_issues[0])
report_pod_issue(event, pods_with_issues, pod_issue)
first_pod = pods_with_issues[0]
pod_issue = detect_pod_issue(first_pod)
message, reason = get_pod_issue_message_and_reason(first_pod)
report_pod_issue(event, pods_with_issues, pod_issue, message, reason)


def detect_pod_issue(pod: Pod) -> PodIssue:
Expand All @@ -75,6 +77,18 @@ def detect_pod_issue(pod: Pod) -> PodIssue:
return PodIssue.NoneDetected


def get_pod_issue_message_and_reason(pod: Pod) -> Tuple[Optional[str], Optional[str]]:
# Works/should work only or KubeContainerWaiting and KubePodNotReady
# Note: in line with the old code in pod_issue_investigator, we only get the message for
# the first of possibly many misbehaving containers.
if pod.status.containerStatuses:
if pod.status.containerStatuses[0].state.waiting:
return (
pod.status.containerStatuses[0].state.waiting.message,
pod.status.containerStatuses[0].state.waiting.reason,
)


def is_pod_pending(pod: Pod) -> bool:
return pod.status.phase.lower() == "pending"

Expand Down Expand Up @@ -124,7 +138,9 @@ def has_image_pull_issue(pod: Pod) -> bool:
return len(image_pull_statuses) > 0


def report_pod_issue(event: KubernetesResourceEvent, pods: List[Pod], issue: PodIssue):
def report_pod_issue(
event: KubernetesResourceEvent, pods: List[Pod], issue: PodIssue, message: Optional[str], reason: Optional[str]
):
# find pods with issues
pods_with_issue = [pod for pod in pods if detect_pod_issue(pod) == issue]
pod_names = [pod.metadata.name for pod in pods_with_issue]
Expand All @@ -145,6 +161,11 @@ def report_pod_issue(event: KubernetesResourceEvent, pods: List[Pod], issue: Pod
blocks.extend(additional_blocks)
event.add_enrichment(blocks)

if reason:
if message is None:
message = "unknown"
event.add_enrichment([MarkdownBlock(f"\n\n{reason}: {message}")])


def get_expected_replicas(event: KubernetesResourceEvent) -> int:
resource = event.get_resource()
Expand Down
2 changes: 1 addition & 1 deletion src/robusta/integrations/prometheus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def get_prometheus_query(self) -> str:
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)

q_expr = query_params.get('g0.expr', [])
q_expr = query_params.get("g0.expr", [])
if len(q_expr) < 1 or not q_expr[0]:
return ""

Expand Down

0 comments on commit 39f704f

Please sign in to comment.