Skip to content

Commit

Permalink
Multi stack trace support (#471)
Browse files Browse the repository at this point in the history
* adding multi thread support to stack trace

* fixing stack trace output

* adding tabs

* adding stack trace errors to robusta log, and cleaner findings

* changing default stack trace duration

* Updating for new version

* new version

* potential exception if no trace

* refactoring json object

* Updating stack trace object
  • Loading branch information
Avi-Robusta authored Aug 14, 2022
1 parent 4f140eb commit ff0815b
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 11 deletions.
83 changes: 73 additions & 10 deletions playbooks/robusta_playbooks/pod_troubleshooting.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
# TODO: move the python playbooks into their own subpackage and put each playbook in its own file
import logging

import humanize
from robusta.api import *
from typing import List


class StackTraceObject(BaseModel):
"""
:var time: timestamp of trace.
:var status: success if succeeded in getting stack traces
:var error: the exception object from the debugger
:var trace: on success the stack traces of all threads, on error the stack trace of the exception
"""
time: float = None
status: str = None
error: str = None
trace: str = None


class StartProfilingParams(ActionParams):
"""
:var seconds: Profiling duration.
Expand Down Expand Up @@ -198,6 +213,15 @@ class DebuggerParams(ProcessParams):
port: int = 5678


class StackTraceParams(DebuggerParams):
"""
:var traces_amount: the amount of traces to do.
:var sleep_duration_s: the sleep time inbetween traces.
"""

traces_amount: int = 1
sleep_duration_s: int = 1

def get_example_launch_json(params: DebuggerParams):
return {
"version": "0.2.0",
Expand Down Expand Up @@ -251,7 +275,7 @@ def get_debugger_warnings(data):


@action
def debugger_stack_trace(event: PodEvent, params: DebuggerParams):
def debugger_stack_trace(event: PodEvent, params: StackTraceParams):
"""
Prints a stack track of a python process and child threads
Expand All @@ -266,7 +290,13 @@ def debugger_stack_trace(event: PodEvent, params: DebuggerParams):
pid = process_finder.get_lowest_relevant_pid()

if not pid:
logging.info(f"debugger_stack_trace - no relevant pids")
logging.error(f"debugger_stack_trace - no relevant pids")
return

if params.traces_amount < 1 or params.sleep_duration_s < 0:
logging.error(f"debugger_stack_trace - invalid params, "
f"traces_amount must be greater than 1 and sleep_duration_s must be greater than 0")
return

# if params pid is set, this will be returned, if not we return the parent process
finding = Finding(
Expand All @@ -277,20 +307,53 @@ def debugger_stack_trace(event: PodEvent, params: DebuggerParams):
finding_type=FindingType.REPORT,
failure=False,
)
cmd = f"debug-toolkit stack-trace {pid}"
cmd = f"debug-toolkit stack-trace {pid} --amount={params.traces_amount} --sleep-duration-s={params.sleep_duration_s}"
output = RobustaPod.exec_in_debugger_pod(
pod.metadata.name,
pod.spec.nodeName,
cmd,
)
blocks = []
for thread_output in output.split("\n\n"):
if thread_output.startswith("Current thread"):
# this is the thread we are getting the stack trace from, not relevant for debugging
continue
if thread_output:
blocks.append(MarkdownBlock(f"```\n{thread_output}\n```"))
finding.add_enrichment(blocks, annotations={SlackAnnotations.ATTACHMENT: True})
try:
output_json = json.loads(output)
SUCCESS_STATUS = "success"
first_stack_trace_obj = StackTraceObject(**output_json[0]) if len(output_json) >= 1 else None
if len(output_json) == 0 or (len(output_json) == 1 and
first_stack_trace_obj.status != SUCCESS_STATUS):
# no stack traces returned or only one with error
error_message = 'Failed to get python stack trace'
if len(output_json) == 1:
error_message += f', debugger error {first_stack_trace_obj.error} at ' \
f'{first_stack_trace_obj.trace}'
logging.error(error_message)
blocks.append(MarkdownBlock(f"Error while getting python stack trace."))
elif len(output_json) == 1 and first_stack_trace_obj.status == SUCCESS_STATUS:
# print single stack trace directly to finding
for thread_output in first_stack_trace_obj.trace.split("\n\n"):
if thread_output.startswith("Current thread"):
# this is the thread we are getting the stack trace from, not relevant for debugging
continue
# not printing extra blank lines
if thread_output:
blocks.append(MarkdownBlock(f"```\n{thread_output}\n```"))
else:
# print multiple stack traces to file
clean_output = []
for trace_object_json in output_json:
trace_object = StackTraceObject(**trace_object_json)
if trace_object.status != SUCCESS_STATUS:
# the full python stack trace of the error will appear here
logging.error(f'Failed to get stack trace, debugger error {trace_object.error} at {trace_object.trace}')
clean_output.append({"time": trace_object.time, "status": "Error: Failed to get stack trace."})
else:
clean_output.append(trace_object_json)
clean_file_output = json.dumps(clean_output, indent=4, sort_keys=True).replace('\\n', '\n')
blocks.append(FileBlock(f"debugger_stack_trace_{pid}.txt", clean_file_output.encode()))
except ValueError: # includes simplejson.decoder.JSONDecodeError
logging.error(f"failed to decode output")
blocks.append(MarkdownBlock(f"Failed to processess stack trace(s)"))

finding.add_enrichment(blocks)
event.add_finding(finding)


Expand Down
2 changes: 1 addition & 1 deletion src/robusta/integrations/kubernetes/custom_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
S = TypeVar("S")
T = TypeVar("T")
PYTHON_DEBUGGER_IMAGE = (
"us-central1-docker.pkg.dev/genuine-flight-317411/devel/debug-toolkit:v4.3"
"us-central1-docker.pkg.dev/genuine-flight-317411/devel/debug-toolkit:v4.4"
)
JAVA_DEBUGGER_IMAGE = (
"us-central1-docker.pkg.dev/genuine-flight-317411/devel/java-toolkit-11:v1"
Expand Down

0 comments on commit ff0815b

Please sign in to comment.