diff --git a/ddtrace/internal/datadog/profiling/ddup/__init__.py b/ddtrace/internal/datadog/profiling/ddup/__init__.py index 32bd273c5a4..1a1c9ebe7a4 100644 --- a/ddtrace/internal/datadog/profiling/ddup/__init__.py +++ b/ddtrace/internal/datadog/profiling/ddup/__init__.py @@ -1,4 +1,9 @@ -from .utils import sanitize_string # noqa: F401 +# This module supports an optional feature. It may not even load on all platforms or configurations. +# In ddtrace/settings/profiling.py, this module is imported and the is_available attribute is checked to determine +# whether the feature is available. If not, then the feature is disabled and all downstream consumption is +# suppressed. +is_available = False +failure_msg = "" try: @@ -7,89 +12,4 @@ is_available = True except Exception as e: - from typing import Dict # noqa:F401 - from typing import Optional # noqa:F401 - - from ddtrace.internal.logger import get_logger - - LOG = get_logger(__name__) - LOG.debug("Failed to import _ddup: %s", e) - - is_available = False - - # Decorator for not-implemented - def not_implemented(func): - def wrapper(*args, **kwargs): - raise NotImplementedError("{} is not implemented on this platform".format(func.__name__)) - - @not_implemented - def init( - env, # type: Optional[str] - service, # type: Optional[str] - version, # type: Optional[str] - tags, # type: Optional[Dict[str, str]] - max_nframes, # type: Optional[int] - url, # type: Optional[str] - ): - pass - - @not_implemented - def upload(): # type: () -> None - pass - - class SampleHandle: - @not_implemented - def push_cputime(self, value, count): # type: (int, int) -> None - pass - - @not_implemented - def push_walltime(self, value, count): # type: (int, int) -> None - pass - - @not_implemented - def push_acquire(self, value, count): # type: (int, int) -> None - pass - - @not_implemented - def push_release(self, value, count): # type: (int, int) -> None - pass - - @not_implemented - def push_alloc(self, value, count): # type: (int, int) -> None - pass - - @not_implemented - def push_heap(self, value): # type: (int) -> None - pass - - @not_implemented - def push_lock_name(self, lock_name): # type: (str) -> None - pass - - @not_implemented - def push_frame(self, name, filename, address, line): # type: (str, str, int, int) -> None - pass - - @not_implemented - def push_threadinfo(self, thread_id, thread_native_id, thread_name): # type: (int, int, Optional[str]) -> None - pass - - @not_implemented - def push_taskinfo(self, task_id, task_name): # type: (int, str) -> None - pass - - @not_implemented - def push_exceptioninfo(self, exc_type, count): # type: (type, int) -> None - pass - - @not_implemented - def push_class_name(self, class_name): # type: (str) -> None - pass - - @not_implemented - def push_span(self, span, endpoint_collection_enabled): # type: (Optional[Span], bool) -> None - pass - - @not_implemented - def flush_sample(self): # type: () -> None - pass + failure_msg = str(e) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/__init__.py b/ddtrace/internal/datadog/profiling/stack_v2/__init__.py index 8a8484e6950..399906e115d 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/__init__.py +++ b/ddtrace/internal/datadog/profiling/stack_v2/__init__.py @@ -1,34 +1,13 @@ +# See ../ddup/__init__.py for some discussion on the is_available attribute. +# This component is also loaded in ddtrace/settings/profiling.py is_available = False - - -# Decorator for not-implemented -def not_implemented(func): - def wrapper(*args, **kwargs): - raise NotImplementedError("{} is not implemented on this platform".format(func.__name__)) - - -@not_implemented -def start(*args, **kwargs): - pass - - -@not_implemented -def stop(*args, **kwargs): - pass - - -@not_implemented -def set_interval(*args, **kwargs): - pass +failure_msg = "" try: - from ._stack_v2 import * # noqa: F401, F403 + from ._stack_v2 import * # noqa: F403, F401 is_available = True -except Exception as e: - from ddtrace.internal.logger import get_logger - LOG = get_logger(__name__) - - LOG.debug("Failed to import _stack_v2: %s", e) +except Exception as e: + failure_msg = str(e) diff --git a/ddtrace/profiling/collector/stack.pyx b/ddtrace/profiling/collector/stack.pyx index 6164f477191..9a3f1f32838 100644 --- a/ddtrace/profiling/collector/stack.pyx +++ b/ddtrace/profiling/collector/stack.pyx @@ -478,7 +478,7 @@ class StackCollector(collector.PeriodicCollector): _thread_time = attr.ib(init=False, repr=False, eq=False) _last_wall_time = attr.ib(init=False, repr=False, eq=False, type=int) _thread_span_links = attr.ib(default=None, init=False, repr=False, eq=False) - _stack_collector_v2_enabled = attr.ib(type=bool, default=config.stack.v2.enabled) + _stack_collector_v2_enabled = attr.ib(type=bool, default=config.stack.v2_enabled) @max_time_usage_pct.validator def _check_max_time_usage(self, attribute, value): @@ -497,7 +497,7 @@ class StackCollector(collector.PeriodicCollector): if config.export.libdd_enabled: set_use_libdd(True) - # If at the end of things, stack v2 is still enabled, then start the native thread running the v2 sampler + # If stack v2 is enabled, then use the v2 sampler if self._stack_collector_v2_enabled: LOG.debug("Starting the stack v2 sampler") stack_v2.start() diff --git a/ddtrace/profiling/profiler.py b/ddtrace/profiling/profiler.py index acd16b68469..7341976e012 100644 --- a/ddtrace/profiling/profiler.py +++ b/ddtrace/profiling/profiler.py @@ -116,6 +116,7 @@ class _ProfilerInstance(service.Service): agentless = attr.ib(type=bool, default=config.agentless) _memory_collector_enabled = attr.ib(type=bool, default=config.memory.enabled) _stack_collector_enabled = attr.ib(type=bool, default=config.stack.enabled) + _stack_v2_enabled = attr.ib(type=bool, default=config.stack.v2_enabled) _lock_collector_enabled = attr.ib(type=bool, default=config.lock.enabled) enable_code_provenance = attr.ib(type=bool, default=config.code_provenance) endpoint_collection_enabled = attr.ib(type=bool, default=config.endpoint_collection) @@ -128,7 +129,6 @@ class _ProfilerInstance(service.Service): init=False, factory=lambda: os.environ.get("AWS_LAMBDA_FUNCTION_NAME"), type=Optional[str] ) _export_libdd_enabled = attr.ib(type=bool, default=config.export.libdd_enabled) - _export_libdd_required = attr.ib(type=bool, default=config.export.libdd_required) ENDPOINT_TEMPLATE = "https://intake.profile.{}" @@ -171,16 +171,10 @@ def _build_default_exporters(self): if self._lambda_function_name is not None: self.tags.update({"functionname": self._lambda_function_name}) - # Did the user request the libdd collector? Better log it. - if self._export_libdd_enabled: - LOG.debug("The libdd collector is enabled") - if self._export_libdd_required: - LOG.debug("The libdd collector is required") - # Build the list of enabled Profiling features and send along as a tag configured_features = [] if self._stack_collector_enabled: - if config.stack.v2.enabled: + if self._stack_v2_enabled: configured_features.append("stack_v2") else: configured_features.append("stack") @@ -195,8 +189,6 @@ def _build_default_exporters(self): configured_features.append("exp_dd") else: configured_features.append("exp_py") - if self._export_libdd_required: - configured_features.append("req_dd") configured_features.append("CAP" + str(config.capture_pct)) configured_features.append("MAXF" + str(config.max_frames)) self.tags.update({"profiler_config": "_".join(configured_features)}) @@ -207,7 +199,6 @@ def _build_default_exporters(self): # If libdd is enabled, then # * If initialization fails, disable the libdd collector and fall back to the legacy exporter - # * If initialization fails and libdd is required, disable everything and return (error) if self._export_libdd_enabled: try: ddup.init( @@ -225,16 +216,11 @@ def _build_default_exporters(self): self._export_libdd_enabled = False config.export.libdd_enabled = False - # If we're here and libdd was required, then there's nothing else to do. We don't have a - # collector. - if self._export_libdd_required: - LOG.error("libdd collector is required but could not be initialized. Disabling profiling.") - config.enabled = False - config.export.libdd_required = False - config.lock.enabled = False - config.memory.enabled = False - config.stack.enabled = False - return [] + # also disable other features that might be enabled + if self._stack_v2_enabled: + LOG.error("Disabling stack_v2 as libdd collector failed to initialize") + self._stack_v2_enabled = False + config.stack.v2_enabled = False # DEV: Import this only if needed to avoid importing protobuf # unnecessarily diff --git a/ddtrace/settings/profiling.py b/ddtrace/settings/profiling.py index fe8781fd3b1..c2a65b17931 100644 --- a/ddtrace/settings/profiling.py +++ b/ddtrace/settings/profiling.py @@ -10,8 +10,15 @@ logger = get_logger(__name__) +# Stash the reason why a transitive dependency failed to load; since we try to load things safely in order to guide +# configuration, these errors won't bubble up naturally. All of these components should use the same pattern +# in order to guarantee uniformity. +ddup_failure_msg = "" +stack_v2_failure_msg = "" + + def _derive_default_heap_sample_size(heap_config, default_heap_sample_size=1024 * 1024): - # type: (ProfilingConfig.Heap, int) -> int + # type: (ProfilingConfigHeap, int) -> int heap_sample_size = heap_config._sample_size if heap_sample_size is not None: return heap_sample_size @@ -38,18 +45,24 @@ def _derive_default_heap_sample_size(heap_config, default_heap_sample_size=1024 def _check_for_ddup_available(): + global ddup_failure_msg ddup_is_available = False try: from ddtrace.internal.datadog.profiling import ddup ddup_is_available = ddup.is_available + ddup_failure_msg = ddup.failure_msg except Exception: pass # nosec return ddup_is_available def _check_for_stack_v2_available(): + global stack_v2_failure_msg stack_v2_is_available = False + + # stack_v2 will use libdd; in order to prevent two separate collectors from running, it then needs to force + # libdd to be enabled as well; that means it depends on the libdd interface (ddup) if not _check_for_ddup_available(): return False @@ -57,15 +70,14 @@ def _check_for_stack_v2_available(): from ddtrace.internal.datadog.profiling import stack_v2 stack_v2_is_available = stack_v2.is_available + stack_v2_failure_msg = stack_v2.failure_msg except Exception: pass # nosec return stack_v2_is_available -# We don't check for the availability of the ddup module when determining whether libdd is _required_, -# since it's up to the application code to determine what happens in that failure case. def _is_libdd_required(config): - return config.stack.v2.enabled or config._libdd_required + return config.stack.v2_enabled or config.export._libdd_enabled class ProfilingConfig(En): @@ -220,98 +232,125 @@ class V2(En): enabled = En.d(bool, lambda c: _check_for_stack_v2_available() and c._enabled) - class Lock(En): - __item__ = __prefix__ = "lock" - enabled = En.v( - bool, - "enabled", - default=True, - help_type="Boolean", - help="Whether to enable the lock profiler", - ) +class ProfilingConfigStack(En): + __item__ = __prefix__ = "stack" - name_inspect_dir = En.v( - bool, - "name_inspect_dir", - default=True, - help_type="Boolean", - help="Whether to inspect the ``dir()`` of local and global variables to find the name of the lock. " - "With this enabled, the profiler finds the name of locks that are attributes of an object.", - ) + enabled = En.v( + bool, + "enabled", + default=True, + help_type="Boolean", + help="Whether to enable the stack profiler", + ) - class Memory(En): - __item__ = __prefix__ = "memory" + _v2_enabled = En.v( + bool, + "v2_enabled", + default=False, + help_type="Boolean", + help="Whether to enable the v2 stack profiler. Also enables the libdatadog collector.", + ) - enabled = En.v( - bool, - "enabled", - default=True, - help_type="Boolean", - help="Whether to enable the memory profiler", - ) + # V2 can't be enabled if stack collection is disabled or if pre-requisites are not met + v2_enabled = En.d(bool, lambda c: _check_for_stack_v2_available() and c._v2_enabled and c.enabled) - events_buffer = En.v( - int, - "events_buffer", - default=16, - help_type="Integer", - help="", - ) - class Heap(En): - __item__ = __prefix__ = "heap" +class ProfilingConfigLock(En): + __item__ = __prefix__ = "lock" - enabled = En.v( - bool, - "enabled", - default=True, - help_type="Boolean", - help="Whether to enable the heap memory profiler", - ) + enabled = En.v( + bool, + "enabled", + default=True, + help_type="Boolean", + help="Whether to enable the lock profiler", + ) - _sample_size = En.v( - t.Optional[int], - "sample_size", - default=None, - help_type="Integer", - help="", - ) - sample_size = En.d(int, _derive_default_heap_sample_size) + name_inspect_dir = En.v( + bool, + "name_inspect_dir", + default=True, + help_type="Boolean", + help="Whether to inspect the ``dir()`` of local and global variables to find the name of the lock. " + "With this enabled, the profiler finds the name of locks that are attributes of an object.", + ) - class Export(En): - __item__ = __prefix__ = "export" - _libdd_required = En.v( - bool, - "libdd_required", - default=False, - help_type="Boolean", - help="Requires the native exporter to be enabled", - ) +class ProfilingConfigMemory(En): + __item__ = __prefix__ = "memory" - libdd_required = En.d( - bool, - _is_libdd_required, - ) + enabled = En.v( + bool, + "enabled", + default=True, + help_type="Boolean", + help="Whether to enable the memory profiler", + ) - _libdd_enabled = En.v( - bool, - "libdd_enabled", - default=False, - help_type="Boolean", - help="Enables collection and export using a native exporter. Can fallback to the pure-Python exporter.", - ) + events_buffer = En.v( + int, + "events_buffer", + default=16, + help_type="Integer", + help="", + ) - libdd_enabled = En.d( - bool, lambda c: (_is_libdd_required(c) or c._libdd_enabled) and _check_for_ddup_available() - ) - Export.include(Stack, namespace="stack") +class ProfilingConfigHeap(En): + __item__ = __prefix__ = "heap" + + enabled = En.v( + bool, + "enabled", + default=True, + help_type="Boolean", + help="Whether to enable the heap memory profiler", + ) + + _sample_size = En.v( + t.Optional[int], + "sample_size", + default=None, + help_type="Integer", + help="", + ) + sample_size = En.d(int, _derive_default_heap_sample_size) + + +class ProfilingConfigExport(En): + __item__ = __prefix__ = "export" + + _libdd_enabled = En.v( + bool, + "libdd_enabled", + default=False, + help_type="Boolean", + help="Enables collection and export using a native exporter. Can fallback to the pure-Python exporter.", + ) + +# Include all the sub-configs +ProfilingConfig.include(ProfilingConfigStack, namespace="stack") +ProfilingConfig.include(ProfilingConfigLock, namespace="lock") +ProfilingConfig.include(ProfilingConfigMemory, namespace="memory") +ProfilingConfig.include(ProfilingConfigHeap, namespace="heap") +ProfilingConfig.include(ProfilingConfigExport, namespace="export") config = ProfilingConfig() -if config.export.libdd_required and not config.export.libdd_enabled: - logger.warning("The native exporter is required, but not enabled. Disabling profiling.") - config.enabled = False +# Force the enablement of libdd if the user requested a feature which requires it; otherwise the user has to manage +# configuration too intentionally and we'll need to change the API too much over time. +config.export.libdd_enabled = _is_libdd_required(config) + +# Certain features depend on libdd being available. If it isn't for some reason, those features cannot be enabled. +if config.stack.v2_enabled and not config.export.libdd_enabled: + msg = ddup_failure_msg or "libdd not available" + logger.warning("The v2 stack profiler cannot be used (%s)", msg) + config.stack.v2_enabled = False + +# Loading stack_v2 can fail for similar reasons +if config.stack.v2_enabled and not _check_for_stack_v2_available(): + msg = stack_v2_failure_msg or "stack_v2 not available" + logger.warning("The v2 stack profiler cannot be used (%s)", msg) + config.stack.v2_enabled = False