Skip to content

Commit

Permalink
Merge branch 'main' into brosko/umd_single_driver
Browse files Browse the repository at this point in the history
  • Loading branch information
broskoTT authored Nov 2, 2024
2 parents d41c544 + 7321dd7 commit 25af3a8
Show file tree
Hide file tree
Showing 203 changed files with 5,439 additions and 899 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cpp-ttnn-project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ jobs:
-w ${{ github.workspace }}
run: |
set -eu # basic shell hygiene
./build_metal.sh --disable-unity-builds --build-type Release
./build_metal.sh --build-type Release
# TTNN project
- name: Checkout cpp-ttnn-project-template
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: tenstorrent/cpp-ttnn-project-template
path: project
Expand Down
13 changes: 11 additions & 2 deletions .github/workflows/ttnn-run-sweeps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ on:
options:
- ALL SWEEPS (Nightly)
- add
- tilize
- untilize
- ccl.line_all_gather
- ccl.all_gather_n300
- ccl.all_gather_n300_focused
Expand Down Expand Up @@ -255,10 +257,13 @@ on:
- eltwise.ternary.lerp.lerp
- eltwise.ternary.where.where
- eltwise.ternary.where.where_pytorch2
- eltwise.ternary_backward.addcmul_bw
- eltwise.ternary_backward.addcdiv_bw
- embedding.embedding
- reduction.topk.topk
- reduction.argmax.argmax
- embedding.embedding
- eltwise.ternary_backward.addcmul_bw
- reduction.prod
- reduction.sum
- matmul.full.matmul_default_block_sharded
- matmul.full.matmul_default_height_sharded
- matmul.full.matmul_default_interleaved
Expand All @@ -270,6 +275,8 @@ on:
- matmul.short.matmul_user_program_config_mcast_2d
- matmul.short.matmul_user_program_config
- matmul.short.matmul
- losses.l1_loss
- losses.mse_loss
- data_movement.concat.concat_interleaved_n_tensors
- data_movement.concat.concat_interleaved
- data_movement.concat.concat_sharded
Expand All @@ -288,6 +295,8 @@ on:
- data_movement.index_select.index_select_pytorch2
- data_movement.split.split_with_sizes_pytorch2
- data_movement.repeat.repeat
- data_movement.reshape.reshape
- data_movement.repeat_interleave.repeat_interleave
- data_movement.nonzero.nonzero
- conv2d.full.conv2d_misc
- conv2d.full.conv2d_sharding
Expand Down
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ if(ENABLE_LIBCXX)
# $<$<LINK_LANG_AND_ID:CXX,Clang>:-lc++>
# $<$<LINK_LANG_AND_ID:CXX,Clang>:-lc++abi>
#)
else()
# required when linking with libstdc++ with clang and gcc
add_compile_options(-fsized-deallocation)
endif()

# Using below until we can move to CMake >= 3.18 for LINK_LANG_AND_ID
Expand All @@ -63,8 +66,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND ENABLE_LIBCXX)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -lc++ -lc++abi")
endif()

add_compile_options($<$<COMPILE_LANG_AND_ID:CXX,GNU>:-fsized-deallocation>)

include(CTest)

get_property(isMultiConfig GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
Expand Down Expand Up @@ -263,7 +264,6 @@ target_compile_options(compiler_flags INTERFACE -DARCH_${ARCH_NAME_DEF})

add_library(metal_header_directories INTERFACE)
target_include_directories(metal_header_directories INTERFACE ${PROJECT_SOURCE_DIR}/tt_metal/hw/inc)
target_include_directories(metal_header_directories SYSTEM INTERFACE ${reflect_SOURCE_DIR})
foreach(lib ${BoostPackages})
target_include_directories(metal_header_directories INTERFACE ${Boost${lib}_SOURCE_DIR}/include)
endforeach()
Expand Down
1 change: 1 addition & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ tt_metal/hw/firmware/src/*erisc* @aliuTT @ubcheema
tt_metal/hw/inc/ethernet/ @aliuTT @ubcheema
tt_metal/hw/inc/wormhole/eth_l1_address_map.h @aliuTT @ubcheema
tt_metal/third_party/tt_llk_* @rtawfik01 @ttmtrajkovic @rdjogoTT
tt_metal/tt_stl/ @patrickroberts @yan-zaretskiy @eyonland @ayerofieiev-tt @dmakoviichuk-tt @sminakov-tt

sfpi/ @pgkeller

Expand Down
3 changes: 2 additions & 1 deletion INSTALLING.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ chmod u+x llvm.sh
sudo ./llvm.sh 17
sudo apt install libc++-17-dev libc++abi-17-dev
```

- Note: `CMake 3.16` is the targetted required version of `CMake` as it aligns with the default from `Ubuntu 20.04`. Some advanced build configurations like unity builds require `CMake 3.20`.
- To install `CMake 3.20` see: https://github.com/tenstorrent/tt-metal/blob/4d7730d3e2d22c51d62baa1bfed861b557d9a3c0/dockerfile/ubuntu-20.04-amd64.Dockerfile#L9-L14
---

### Step 3. Hugepages
Expand Down
5 changes: 5 additions & 0 deletions cmake/dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ endif()
############################################################################################################################

CPMAddPackage(NAME reflect GITHUB_REPOSITORY boost-ext/reflect GIT_TAG v1.1.1)
if(reflect_ADDED)
add_library(reflect INTERFACE)
add_library(Reflect::Reflect ALIAS reflect)
target_include_directories(reflect SYSTEM INTERFACE ${reflect_SOURCE_DIR})
endif()

############################################################################################################################
# magic_enum : https://github.com/Neargye/magic_enum
Expand Down
3 changes: 2 additions & 1 deletion tests/scripts/run_cpp_unit_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ if [[ ! -z "$TT_METAL_SLOW_DISPATCH_MODE" ]]; then
else
./build/test/tt_metal/unit_tests_fast_dispatch
TT_METAL_GTEST_NUM_HW_CQS=2 ./build/test/tt_metal/unit_tests_fast_dispatch_single_chip_multi_queue --gtest_filter=MultiCommandQueueSingleDeviceFixture.*
if [[ "$ARCH_NAME" == "wormhole_b0" || "$ARCH_NAME" == "blackhole" ]]; then
# Enable this on BH after #14613
if [[ "$ARCH_NAME" == "wormhole_b0" ]]; then
TT_METAL_GTEST_ETH_DISPATCH=1 ./build/test/tt_metal/unit_tests_fast_dispatch
fi
env python tests/scripts/run_tt_eager.py --dispatch-mode fast
Expand Down
2 changes: 1 addition & 1 deletion tests/scripts/tt_bisect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ while [[ "$found" = "false" ]]; do
build_code=0
echo "at commit `git rev-parse HEAD`"
echo "building Metal"
. build_metal.sh; build_code+=$?
./build_metal.sh --build-tests; build_code+=$?

if [[ $build_code -ne 0 ]]; then
echo "Build failed"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Tuple
from functools import partial
from itertools import combinations

import torch
import random
import ttnn
from functools import lru_cache
from tests.sweep_framework.sweep_utils.utils import gen_shapes
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt

from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
from models.utility_functions import torch_random

# Override the default timeout in seconds for hang detection.
TIMEOUT = 360
random.seed(0)


# Does not have memory_config parameter
parameters = {
"nightly": {
"input_shape": gen_shapes([1, 1, 1, 1], [6, 6, 256, 256], [1, 1, 1, 1], 8)
+ gen_shapes([1, 1, 1], [6, 256, 256], [1, 1, 1], 8)
+ gen_shapes([1, 1], [256, 256], [1, 1], 8),
"repeats": [1, 2, 4, 8],
"dim": [0, 1, 2, 3],
"input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
"input_a_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
"input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
"output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
},
}


def align_to_32(x):
if x % 32 == 0:
return x

return ((x // 32) + 1) * 32


def max_volume(rehape_shape):
vol = align_to_32(rehape_shape[-1]) * align_to_32(rehape_shape[-2])

if len(rehape_shape) >= 3:
vol *= rehape_shape[-3]

if len(rehape_shape) == 4:
vol *= rehape_shape[-4]

return vol


# Invalidate vector is called during the generation phase where each vector will be passed in.
# If invalidated, the vector will still be stored but will be skipped.
# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
input_shape = test_vector["input_shape"]

if test_vector["dim"] >= len(input_shape):
return True, "dim must be < len(input_shape)"

if (
test_vector["input_a_memory_config"] == ttnn.L1_MEMORY_CONFIG
or test_vector["output_memory_config"] == ttnn.L1_MEMORY_CONFIG
):
if max_volume(input_shape) * test_vector["repeats"] > 1024 * 1024:
return True, "Too large output tensor size for L1 memory config"

if test_vector["input_a_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
return True, "bfloat8_b/bfloat4_b requires TILE_LAYOUT!"

return False, None


def run(
input_shape,
repeats,
dim,
input_a_dtype,
input_a_layout,
input_a_memory_config,
output_memory_config,
*,
device,
) -> list:
data_seed = random.randint(0, 20000000)
torch.manual_seed(data_seed)

# Fix shape for row mayor
if input_a_layout == ttnn.ROW_MAJOR_LAYOUT and input_shape[-1] % 2 == 1:
input_shape[-1] += 1

torch_input_tensor_a = gen_func_with_cast_tt(
partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
)(input_shape)

# print(f"input_shape {input_shape} repeats {repeats} dim {dim} input_a_dtype {input_a_dtype} input_a_layout {input_a_layout}")

golden_function = ttnn.get_golden_function(ttnn.repeat_interleave)
torch_output_tensor = golden_function(torch_input_tensor_a, repeats=repeats, dim=dim)

input_tensor_a = ttnn.from_torch(
torch_input_tensor_a,
dtype=input_a_dtype,
layout=input_a_layout,
device=device,
memory_config=input_a_memory_config,
)

start_time = start_measuring_time()
result = ttnn.repeat_interleave(input_tensor_a, repeats=repeats, dim=dim, memory_config=output_memory_config)
output_tensor = ttnn.to_torch(result)
e2e_perf = stop_measuring_time(start_time)

pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
# print(pcc)
return [pcc, e2e_perf]


# # Run sweeps locally
# from tests.sweep_framework.framework.permutations import *

# start_time = start_measuring_time()
# for suite in parameters.keys():
# device_id = 0
# device = ttnn.open_device(device_id=device_id)
# suite_vectors = list(permutations(parameters[suite]))
# print(len(suite_vectors))
# for vector in suite_vectors:
# invalidate_res = invalidate_vector(vector)
# if invalidate_res[0]:
# print(f"Invalidated: {invalidate_res[1]}")
# continue
# try:
# passed, _ = run(**vector, device=device)
# if passed[0] != True:
# print(passed)
# except Exception as e:
# print(e)

# ttnn.close_device(device)

# e2e_perf = stop_measuring_time(start_time)
# print(f"time {e2e_perf / 1000000000}s")
Loading

0 comments on commit 25af3a8

Please sign in to comment.