Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TorchAO speedup metric vs eager #6178

Merged
merged 8 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ WITH benchmarks AS (
tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
tupleElement(o.runners [ 1 ], 'type')
) AS arch,
IF(
tupleElement(o.benchmark, 'extra_info') [ 'compile' ] = '',
'true', -- Default to true
tupleElement(o.benchmark, 'extra_info') [ 'compile' ]
) AS use_torch_compile,
DATE_TRUNC(
{granularity: String },
fromUnixTimestamp(o.timestamp)
Expand Down Expand Up @@ -71,6 +76,7 @@ SELECT
dtype,
device,
arch,
toBool(use_torch_compile) AS use_torch_compile,
granularity_bucket
FROM
benchmarks
Expand Down
21 changes: 14 additions & 7 deletions torchci/components/benchmark/llms/ModelGraphPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import {
import { TIME_FIELD_NAME } from "components/benchmark/common";
import {
DEFAULT_DEVICE_NAME,
DEFAULT_DTYPE_NAME,
DEFAULT_MODEL_NAME,
LLMsBenchmarkData,
METRIC_DISPLAY_HEADERS,
Expand All @@ -18,7 +17,10 @@ import {
TimeSeriesPanelWithData,
} from "components/metrics/panels/TimeSeriesPanel";
import dayjs from "dayjs";
import { computeSpeedup } from "lib/benchmark/aoUtils";
import {
computeSpeedup,
TORCHAO_SPEEDUP_METRIC_NAMES,
} from "lib/benchmark/aoUtils";
import { computeGeomean, useBenchmark } from "lib/benchmark/llmUtils";
import { BranchAndCommit } from "lib/types";

Expand Down Expand Up @@ -64,7 +66,12 @@ export function GraphPanel({
);
}

const dataWithSpeedup = computeSpeedup(repoName, data);
const dataWithSpeedup = computeSpeedup(
repoName,
computeSpeedup(repoName, data, false, true),
true,
false
);

// Clamp to the nearest granularity (e.g. nearest hour) so that the times will
// align with the data we get from the database
Expand All @@ -80,8 +87,10 @@ export function GraphPanel({
const chartData: { [k: string]: any } = {};
const graphSeries: { [k: string]: any } = {};
metricNames.forEach((metric: string) => {
// TODO (huydhn): Only display aggregated speedup metric for now
if (modelName === DEFAULT_MODEL_NAME && metric !== "speedup") {
if (
modelName === DEFAULT_MODEL_NAME &&
!TORCHAO_SPEEDUP_METRIC_NAMES.includes(metric)
) {
chartData[metric] = [];
return;
}
Expand Down Expand Up @@ -115,8 +124,6 @@ export function GraphPanel({
.filter((record: LLMsBenchmarkData) => {
return (
record.model === modelName &&
(record.dtype === dtypeName ||
dtypeName === DEFAULT_DTYPE_NAME) &&
(`${record.device} (${record.arch})` === deviceName ||
deviceName === DEFAULT_DEVICE_NAME) &&
record.metric === metric
Expand Down
5 changes: 4 additions & 1 deletion torchci/components/benchmark/llms/common.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
token_per_sec: "Token per second",
flops_utilization: "FLOPs utilization",
"compilation_time(s)": "Compilation Time (s)",
speedup: "Speedup",
compile_vs_eager_speedup: "Compile vs eager speedup",
autoquant_vs_compile_speedup: "Autoquant vs compile speedup",
eager_speedup: "Eager speedup",
};
// The variable name is a bit dumb, but it tells if a higher metric value
// is good or bad so that we can highlight it on the dashboard accordingly.
Expand Down Expand Up @@ -53,6 +55,7 @@ export interface LLMsBenchmarkData {
device: string;
arch: string;
display?: string;
use_torch_compile?: boolean;
}

export interface BranchAndCommitPerfData extends BranchAndCommit {
Expand Down
104 changes: 89 additions & 15 deletions torchci/lib/benchmark/aoUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,17 @@ export const TORCHAO_BASELINE = "noquant";
// here on the dashboard
const SPEEDUP_METRICS = ["tok/s", "time_ms(avg)", "time_s(avg)", "img_s(avg)"];

export const TORCHAO_SPEEDUP_METRIC_NAMES = [
"compile_vs_eager_speedup",
"autoquant_vs_compile_speedup",
"eager_speedup",
];
// Different speedup metrics, the key is quantization-torch.compile
export const TORCHAO_SPEEDUP_METRIC_NAMES_MAPPING: { [key: string]: string } = {
"noquant-false": "compile_vs_eager_speedup",
"-true": "autoquant_vs_compile_speedup",
};

// TODO (huydhn): Use this function to convert the generic benchmark data to the old
// CompilerPerformanceData format. This is needed until the TorchInductor dashboard
// is migrated to the new format
Expand Down Expand Up @@ -54,48 +65,111 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) {
return Object.values(convertData);
}

export function computeSpeedup(repoName: string, data: LLMsBenchmarkData[]) {
export function computeSpeedup(
repoName: string,
data: LLMsBenchmarkData[],
useTorchCompile: boolean,
usebaseCommitbaseline: boolean
) {
if (repoName !== TORCHAO_REPO) {
return data;
}

const baselineMetrics: { [key: string]: LLMsBenchmarkData } = {};
// https://github.com/pytorch/test-infra/pull/6178#issuecomment-2596338457, we want
// to show 3 different speedup in AO:
// - Current eager perf vs base commit eager
const baseCommitBaseline: { [key: string]: LLMsBenchmarkData } = {};
// - Current compile perf vs current eager
// - Current autoquant perf vs current compile
const currentCommitBaseline: { [key: string]: LLMsBenchmarkData } = {};

data.forEach((r: LLMsBenchmarkData) => {
if (r.dtype !== TORCHAO_BASELINE) {
if (
r.dtype !== TORCHAO_BASELINE ||
r.use_torch_compile !== useTorchCompile
) {
return;
}

const k = `${r.workflow_id} ${r.job_id} ${r.model} ${r.metric} ${r.device} ${r.arch}`;
baselineMetrics[k] = r;
const baseCommitKey = `${r.model} ${r.metric} ${r.device} ${r.arch}`;
const currentCommitKey = `${r.workflow_id} ${r.job_id} ${baseCommitKey}`;

// To compare against the current commit
currentCommitBaseline[currentCommitKey] = r;

// To compare against the oldest base commit
if (
!usebaseCommitbaseline ||
(baseCommitKey in baseCommitBaseline &&
baseCommitBaseline[baseCommitKey].workflow_id < r.workflow_id)
) {
return;
}
baseCommitBaseline[baseCommitKey] = r;
});

const withSpeedup: LLMsBenchmarkData[] = [];
data.forEach((r: LLMsBenchmarkData) => {
if (r.dtype === TORCHAO_BASELINE) {
return;
withSpeedup.push(r);

// Compute eager speedup vs the base commit baseline
if (r.dtype === TORCHAO_BASELINE && r.use_torch_compile === false) {
if (SPEEDUP_METRICS.includes(r.metric)) {
const k = `${r.model} ${r.metric} ${r.device} ${r.arch}`;
if (
k in baseCommitBaseline &&
baseCommitBaseline[k].actual !== 0 &&
r.actual !== 0 &&
baseCommitBaseline[k].workflow_id <= r.workflow_id
) {
const speedup = r.metric.includes("time")
? baseCommitBaseline[k].actual / r.actual
: r.actual / baseCommitBaseline[k].actual;

withSpeedup.push({
...r,
metric: "eager_speedup",
actual: Number(speedup.toFixed(2)),
target: 0,
});
}
}
}

if (SPEEDUP_METRICS.includes(r.metric)) {
const k = `${r.workflow_id} ${r.job_id} ${r.model} ${r.metric} ${r.device} ${r.arch}`;
if (
k in baselineMetrics &&
baselineMetrics[k].actual !== 0 &&
k in currentCommitBaseline &&
currentCommitBaseline[k].actual !== 0 &&
r.actual !== 0
) {
const speedup = r.metric.includes("time")
? baselineMetrics[k].actual / r.actual
: r.actual / baselineMetrics[k].actual;
? currentCommitBaseline[k].actual / r.actual
: r.actual / currentCommitBaseline[k].actual;

const speedupMetricName =
r.dtype === TORCHAO_BASELINE
? // Compile vs eager
r !== currentCommitBaseline[k]
? TORCHAO_SPEEDUP_METRIC_NAMES_MAPPING[
`${r.dtype}-${useTorchCompile}`
]
: ""
: // Autoquant vs compile or vs eager
TORCHAO_SPEEDUP_METRIC_NAMES_MAPPING[`-${useTorchCompile}`];

if (!speedupMetricName) {
return;
}

withSpeedup.push({
...r,
metric: "speedup",
actual: Number(speedup.toFixed(4)),
metric: speedupMetricName,
actual: Number(speedup.toFixed(2)),
target: 0,
});
}
}

withSpeedup.push(r);
});

return withSpeedup;
Expand Down
30 changes: 21 additions & 9 deletions torchci/pages/benchmark/llms.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ import CopyLink from "components/CopyLink";
import GranularityPicker from "components/GranularityPicker";
import { Granularity } from "components/metrics/panels/TimeSeriesPanel";
import dayjs from "dayjs";
import { computeSpeedup, TORCHAO_BASELINE } from "lib/benchmark/aoUtils";
import {
computeSpeedup,
TORCHAO_BASELINE,
TORCHAO_SPEEDUP_METRIC_NAMES,
} from "lib/benchmark/aoUtils";
import { useBenchmark } from "lib/benchmark/llmUtils";
import { fetcher } from "lib/GeneralUtils";
import { BranchAndCommit } from "lib/types";
Expand Down Expand Up @@ -82,11 +86,22 @@ function Report({
);
}

const lDataWithSpeedup = computeSpeedup(repoName, lData);
const rDataWithSpeedup = computeSpeedup(repoName, rData);
const lDataWithSpeedup = computeSpeedup(
repoName,
computeSpeedup(repoName, lData, false, true),
true,
false
);

const rDataWithSpeedup = computeSpeedup(
repoName,
computeSpeedup(repoName, rData, false, true),
true,
false
);

if (repoName === "pytorch/ao") {
metricNames = ["speedup", ...metricNames];
metricNames = [...TORCHAO_SPEEDUP_METRIC_NAMES, ...metricNames];
}

return (
Expand Down Expand Up @@ -288,10 +303,7 @@ export default function Page() {
];
const dtypeNames: string[] = _.compact([
DEFAULT_DTYPE_NAME,
..._.filter(
_.uniq(data.map((r: any) => r.dtype)) as string[],
(r: string) => r !== TORCHAO_BASELINE
),
...(_.uniq(data.map((r: any) => r.dtype)) as string[]),
]);
const metricNames: string[] = _.uniq(data.map((r: any) => r.metric));

Expand Down Expand Up @@ -367,7 +379,7 @@ export default function Page() {
commit={lCommit}
setCommit={setLCommit}
titlePrefix={"Base"}
fallbackIndex={1} // Default to previous commit
fallbackIndex={-1} // Default to oldest commit
timeRange={timeRange}
/>
<Divider orientation="vertical" flexItem>
Expand Down
Loading