diff --git a/tensorflow/core/profiler/utils/derived_timeline.cc b/tensorflow/core/profiler/utils/derived_timeline.cc index a2ceb683f97bd7..6db5f1a51980e0 100644 --- a/tensorflow/core/profiler/utils/derived_timeline.cc +++ b/tensorflow/core/profiler/utils/derived_timeline.cc @@ -56,7 +56,9 @@ namespace tensorflow { namespace profiler { namespace { -using tsl::profiler::FindMutableTensorCorePlanes; +using ::tsl::profiler::DeviceType; +using ::tsl::profiler::FindMutableTensorCorePlanes; +using ::tsl::profiler::GetDeviceType; inline std::string HloModuleEventName(const GpuEventStats& stats) { return stats.program_id ? tsl::profiler::HloModuleNameWithProgramId( @@ -321,6 +323,7 @@ DerivedXLineBuilder::DerivedXLineBuilder( dependent_lines_(std::move(dependent_lines)) { line_.SetName(name); line_.SetTimestampNs(timestamp_ns); + is_gpu_plane_ = GetDeviceType(plane->Name()) == DeviceType::kGpu; } void DerivedXLineBuilder::ExpandOrAddEvent( @@ -351,8 +354,16 @@ void DerivedXLineBuilder::ExpandOrAddLevelEvent( std::optional group_id, std::optional scope_range_id, int level) { auto& last_event = last_event_by_level_[level]; - if (last_event && - last_event->ShouldExpand(event_metadata, group_id, scope_range_id)) { + // If group_id is not set and we still choose to expand, put an extra check: + // Expand only if the gap between the last event and the new event is less + // than 2 * duration of the last event. + // TODO: b/373944719 - add the extra node_id check for GPU profiles. + if (last_event.has_value() && + last_event->ShouldExpand(event_metadata, group_id, scope_range_id) && + (is_gpu_plane_ || group_id.has_value() || + (last_event->GetTimespan().end_ps() + + 2 * last_event->GetTimespan().duration_ps()) >= + event_span.begin_ps())) { // Expand the last event to cover the given event. last_event->Expand(event_span); } else { diff --git a/tensorflow/core/profiler/utils/derived_timeline.h b/tensorflow/core/profiler/utils/derived_timeline.h index edb0a071326bd8..6d2b5e5b243917 100644 --- a/tensorflow/core/profiler/utils/derived_timeline.h +++ b/tensorflow/core/profiler/utils/derived_timeline.h @@ -142,6 +142,7 @@ class DerivedXLineBuilder { absl::flat_hash_map> last_event_by_level_; std::vector dependent_lines_; + bool is_gpu_plane_ = false; }; struct Symbol { diff --git a/tensorflow/core/profiler/utils/derived_timeline_test.cc b/tensorflow/core/profiler/utils/derived_timeline_test.cc index c10b560ce176f1..162e58ed17199b 100644 --- a/tensorflow/core/profiler/utils/derived_timeline_test.cc +++ b/tensorflow/core/profiler/utils/derived_timeline_test.cc @@ -503,6 +503,40 @@ TEST(DerivedTimelineTest, DeriveLinesForXlaCpuOps) { }); } +TEST(DerivedTimelineTest, MergeAndNoMerge) { + constexpr absl::string_view kHloModuleName = "Framework Ops"; + static constexpr absl::string_view kTfOpName = "abc:model/layer/MatMul_1"; + XSpace space; + tsl::profiler::GroupMetadataMap group_metadata_map; + XPlane* plane = + GetOrCreateTpuXPlane(&space, /*device_ordinal=*/0, "DummyTPU", 1.0, 1.0); + XPlaneBuilder plane_builder(plane); + auto line_builder = plane_builder.GetOrCreateLine(0); + CreateXEvent( + &plane_builder, &line_builder, "op1", 0, 100, + {{StatType::kHloModule, kHloModuleName}, {StatType::kTfOp, kTfOpName}}); + CreateXEvent( + &plane_builder, &line_builder, "op2", 200, 300, + {{StatType::kHloModule, kHloModuleName}, {StatType::kTfOp, kTfOpName}}); + // The above two events are merged into one. This event will not be merged + // because the gap is > 2x(0..200+300) = 1000. + CreateXEvent( + &plane_builder, &line_builder, "op3", 1501, 300, + {{StatType::kHloModule, kHloModuleName}, {StatType::kTfOp, kTfOpName}}); + GenerateDerivedTimeLines(group_metadata_map, &space); + XPlaneVisitor plane_visitor = tsl::profiler::CreateTfXPlaneVisitor(plane); + // Only the hlo module line is added and other empty lines are removed at the + // end. + EXPECT_EQ(plane_visitor.NumLines(), 2); + plane_visitor.ForEachLine([](const XLineVisitor& line_visitor) { + if (line_visitor.Id() == 0) return; + EXPECT_EQ(line_visitor.NumEvents(), 2); + line_visitor.ForEachEvent([](const XEventVisitor& event_visitor) { + EXPECT_EQ(event_visitor.Name(), kTfOpName); + }); + }); +} + } // namespace } // namespace profiler } // namespace tensorflow diff --git a/third_party/xla/xla/tsl/profiler/utils/device_utils.cc b/third_party/xla/xla/tsl/profiler/utils/device_utils.cc index 945a157ec9c456..c9b15a51507f00 100644 --- a/third_party/xla/xla/tsl/profiler/utils/device_utils.cc +++ b/third_party/xla/xla/tsl/profiler/utils/device_utils.cc @@ -16,22 +16,26 @@ limitations under the License. #include "xla/tsl/profiler/utils/device_utils.h" #include "absl/strings/match.h" +#include "absl/strings/string_view.h" #include "xla/tsl/profiler/utils/xplane_schema.h" #include "tsl/profiler/protobuf/xplane.pb.h" namespace tsl { namespace profiler { -DeviceType GetDeviceType(const tensorflow::profiler::XPlane& plane) { - if (plane.name() == kHostThreadsPlaneName) { +DeviceType GetDeviceType(absl::string_view plane_name) { + if (plane_name == kHostThreadsPlaneName) { return DeviceType::kCpu; - } else if (absl::StartsWith(plane.name(), kTpuPlanePrefix)) { + } else if (absl::StartsWith(plane_name, kTpuPlanePrefix)) { return DeviceType::kTpu; - } else if (absl::StartsWith(plane.name(), kGpuPlanePrefix)) { + } else if (absl::StartsWith(plane_name, kGpuPlanePrefix)) { return DeviceType::kGpu; } else { return DeviceType::kUnknown; } } +DeviceType GetDeviceType(const tensorflow::profiler::XPlane& plane) { + return GetDeviceType(plane.name()); +} } // namespace profiler } // namespace tsl diff --git a/third_party/xla/xla/tsl/profiler/utils/device_utils.h b/third_party/xla/xla/tsl/profiler/utils/device_utils.h index 825a9fe975437d..e07f5679392f85 100644 --- a/third_party/xla/xla/tsl/profiler/utils/device_utils.h +++ b/third_party/xla/xla/tsl/profiler/utils/device_utils.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef XLA_TSL_PROFILER_UTILS_DEVICE_UTILS_H_ #define XLA_TSL_PROFILER_UTILS_DEVICE_UTILS_H_ +#include "absl/strings/string_view.h" #include "tsl/profiler/protobuf/xplane.pb.h" namespace tsl { @@ -28,8 +29,10 @@ enum class DeviceType { kGpu, }; -// Get DeviceType from XPlane. +// Gets DeviceType from XPlane. DeviceType GetDeviceType(const tensorflow::profiler::XPlane& plane); +// Gets DeviceType from XPlane name. +DeviceType GetDeviceType(absl::string_view plane_name); } // namespace profiler } // namespace tsl diff --git a/third_party/xla/xla/tsl/profiler/utils/device_utils_test.cc b/third_party/xla/xla/tsl/profiler/utils/device_utils_test.cc index 6f872dc5713bed..1698357e36b330 100644 --- a/third_party/xla/xla/tsl/profiler/utils/device_utils_test.cc +++ b/third_party/xla/xla/tsl/profiler/utils/device_utils_test.cc @@ -40,6 +40,13 @@ TEST(DeviceUtilsTest, GetDeviceType) { EXPECT_EQ(GetDeviceType(CreateXPlane("unknown")), DeviceType::kUnknown); } +TEST(DeviceUtilsTest, GetDeviceTypeFromName) { + EXPECT_EQ(GetDeviceType(kHostThreadsPlaneName), DeviceType::kCpu); + EXPECT_EQ(GetDeviceType(absl::StrCat(kTpuPlanePrefix, 0)), DeviceType::kTpu); + EXPECT_EQ(GetDeviceType(absl::StrCat(kGpuPlanePrefix, 0)), DeviceType::kGpu); + EXPECT_EQ(GetDeviceType("unknown"), DeviceType::kUnknown); +} + } // namespace } // namespace profiler } // namespace tsl