Skip to content

Commit

Permalink
Fix the heuristic for extending events in derived timeline to have a …
Browse files Browse the repository at this point in the history
…2x(previous event's duration) threshold for the gap for TPU sessions.

PiperOrigin-RevId: 703687641
  • Loading branch information
tensorflower-gardener committed Dec 7, 2024
1 parent 3ab9024 commit 4c65cd1
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 8 deletions.
17 changes: 14 additions & 3 deletions tensorflow/core/profiler/utils/derived_timeline.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ namespace tensorflow {
namespace profiler {
namespace {

using tsl::profiler::FindMutableTensorCorePlanes;
using ::tsl::profiler::DeviceType;
using ::tsl::profiler::FindMutableTensorCorePlanes;
using ::tsl::profiler::GetDeviceType;

inline std::string HloModuleEventName(const GpuEventStats& stats) {
return stats.program_id ? tsl::profiler::HloModuleNameWithProgramId(
Expand Down Expand Up @@ -321,6 +323,7 @@ DerivedXLineBuilder::DerivedXLineBuilder(
dependent_lines_(std::move(dependent_lines)) {
line_.SetName(name);
line_.SetTimestampNs(timestamp_ns);
is_gpu_plane_ = GetDeviceType(plane->Name()) == DeviceType::kGpu;
}

void DerivedXLineBuilder::ExpandOrAddEvent(
Expand Down Expand Up @@ -351,8 +354,16 @@ void DerivedXLineBuilder::ExpandOrAddLevelEvent(
std::optional<int64_t> group_id, std::optional<int64_t> scope_range_id,
int level) {
auto& last_event = last_event_by_level_[level];
if (last_event &&
last_event->ShouldExpand(event_metadata, group_id, scope_range_id)) {
// If group_id is not set and we still choose to expand, put an extra check:
// Expand only if the gap between the last event and the new event is less
// than 2 * duration of the last event.
// TODO: b/373944719 - add the extra node_id check for GPU profiles.
if (last_event.has_value() &&
last_event->ShouldExpand(event_metadata, group_id, scope_range_id) &&
(is_gpu_plane_ || group_id.has_value() ||
(last_event->GetTimespan().end_ps() +
2 * last_event->GetTimespan().duration_ps()) >=
event_span.begin_ps())) {
// Expand the last event to cover the given event.
last_event->Expand(event_span);
} else {
Expand Down
1 change: 1 addition & 0 deletions tensorflow/core/profiler/utils/derived_timeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ class DerivedXLineBuilder {
absl::flat_hash_map<int, std::optional<DerivedXEventBuilder>>
last_event_by_level_;
std::vector<DerivedXLineBuilder*> dependent_lines_;
bool is_gpu_plane_ = false;
};

struct Symbol {
Expand Down
34 changes: 34 additions & 0 deletions tensorflow/core/profiler/utils/derived_timeline_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,40 @@ TEST(DerivedTimelineTest, DeriveLinesForXlaCpuOps) {
});
}

TEST(DerivedTimelineTest, MergeAndNoMerge) {
constexpr absl::string_view kHloModuleName = "Framework Ops";
static constexpr absl::string_view kTfOpName = "abc:model/layer/MatMul_1";
XSpace space;
tsl::profiler::GroupMetadataMap group_metadata_map;
XPlane* plane =
GetOrCreateTpuXPlane(&space, /*device_ordinal=*/0, "DummyTPU", 1.0, 1.0);
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
CreateXEvent(
&plane_builder, &line_builder, "op1", 0, 100,
{{StatType::kHloModule, kHloModuleName}, {StatType::kTfOp, kTfOpName}});
CreateXEvent(
&plane_builder, &line_builder, "op2", 200, 300,
{{StatType::kHloModule, kHloModuleName}, {StatType::kTfOp, kTfOpName}});
// The above two events are merged into one. This event will not be merged
// because the gap is > 2x(0..200+300) = 1000.
CreateXEvent(
&plane_builder, &line_builder, "op3", 1501, 300,
{{StatType::kHloModule, kHloModuleName}, {StatType::kTfOp, kTfOpName}});
GenerateDerivedTimeLines(group_metadata_map, &space);
XPlaneVisitor plane_visitor = tsl::profiler::CreateTfXPlaneVisitor(plane);
// Only the hlo module line is added and other empty lines are removed at the
// end.
EXPECT_EQ(plane_visitor.NumLines(), 2);
plane_visitor.ForEachLine([](const XLineVisitor& line_visitor) {
if (line_visitor.Id() == 0) return;
EXPECT_EQ(line_visitor.NumEvents(), 2);
line_visitor.ForEachEvent([](const XEventVisitor& event_visitor) {
EXPECT_EQ(event_visitor.Name(), kTfOpName);
});
});
}

} // namespace
} // namespace profiler
} // namespace tensorflow
12 changes: 8 additions & 4 deletions third_party/xla/xla/tsl/profiler/utils/device_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,26 @@ limitations under the License.
#include "xla/tsl/profiler/utils/device_utils.h"

#include "absl/strings/match.h"
#include "absl/strings/string_view.h"
#include "xla/tsl/profiler/utils/xplane_schema.h"
#include "tsl/profiler/protobuf/xplane.pb.h"

namespace tsl {
namespace profiler {

DeviceType GetDeviceType(const tensorflow::profiler::XPlane& plane) {
if (plane.name() == kHostThreadsPlaneName) {
DeviceType GetDeviceType(absl::string_view plane_name) {
if (plane_name == kHostThreadsPlaneName) {
return DeviceType::kCpu;
} else if (absl::StartsWith(plane.name(), kTpuPlanePrefix)) {
} else if (absl::StartsWith(plane_name, kTpuPlanePrefix)) {
return DeviceType::kTpu;
} else if (absl::StartsWith(plane.name(), kGpuPlanePrefix)) {
} else if (absl::StartsWith(plane_name, kGpuPlanePrefix)) {
return DeviceType::kGpu;
} else {
return DeviceType::kUnknown;
}
}
DeviceType GetDeviceType(const tensorflow::profiler::XPlane& plane) {
return GetDeviceType(plane.name());
}
} // namespace profiler
} // namespace tsl
5 changes: 4 additions & 1 deletion third_party/xla/xla/tsl/profiler/utils/device_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.
#ifndef XLA_TSL_PROFILER_UTILS_DEVICE_UTILS_H_
#define XLA_TSL_PROFILER_UTILS_DEVICE_UTILS_H_

#include "absl/strings/string_view.h"
#include "tsl/profiler/protobuf/xplane.pb.h"

namespace tsl {
Expand All @@ -28,8 +29,10 @@ enum class DeviceType {
kGpu,
};

// Get DeviceType from XPlane.
// Gets DeviceType from XPlane.
DeviceType GetDeviceType(const tensorflow::profiler::XPlane& plane);
// Gets DeviceType from XPlane name.
DeviceType GetDeviceType(absl::string_view plane_name);

} // namespace profiler
} // namespace tsl
Expand Down
7 changes: 7 additions & 0 deletions third_party/xla/xla/tsl/profiler/utils/device_utils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ TEST(DeviceUtilsTest, GetDeviceType) {
EXPECT_EQ(GetDeviceType(CreateXPlane("unknown")), DeviceType::kUnknown);
}

TEST(DeviceUtilsTest, GetDeviceTypeFromName) {
EXPECT_EQ(GetDeviceType(kHostThreadsPlaneName), DeviceType::kCpu);
EXPECT_EQ(GetDeviceType(absl::StrCat(kTpuPlanePrefix, 0)), DeviceType::kTpu);
EXPECT_EQ(GetDeviceType(absl::StrCat(kGpuPlanePrefix, 0)), DeviceType::kGpu);
EXPECT_EQ(GetDeviceType("unknown"), DeviceType::kUnknown);
}

} // namespace
} // namespace profiler
} // namespace tsl

0 comments on commit 4c65cd1

Please sign in to comment.