Skip to content

Commit

Permalink
[fbgemm_gpu] Break down CMake module further, pt 2
Browse files Browse the repository at this point in the history
Break down `fbgemm_gpu_tbe_training_backward` module further, pt 2
  • Loading branch information
q10 committed Feb 12, 2025
1 parent d8e07ce commit 682966f
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/utils_cuda.bash
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ install_cuda () {
# in the future, we will be using conda-forge for installing all CUDA versions
# (except for versions 11.8 and below, which are only available through
# nvidia/label/cuda-*)
if [[ "$BUILD_CUDA_VERSION" =~ ^12.6.*$ ]]; then
if [[ "$cuda_version" =~ ^12.6.*$ ]]; then
# shellcheck disable=SC2086
(exec_with_retries 3 conda install --force-reinstall ${env_prefix} -c conda-forge --override-channels -y \
cuda=${cuda_version}) || return 1
Expand Down
16 changes: 16 additions & 0 deletions fbgemm_gpu/cmake/TbeTraining.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ get_tbe_sources_list(gen_cpu_files_training)
get_tbe_sources_list(gen_gpu_files_training)
get_tbe_sources_list(gen_gpu_files_training_pt2)
get_tbe_sources_list(gen_gpu_files_training_dense)
get_tbe_sources_list(gen_gpu_files_training_split_host)
handle_genfiles_rocm(gen_cpu_files_training)
handle_genfiles_rocm(gen_gpu_files_training)
handle_genfiles_rocm(gen_gpu_files_training_pt2)
handle_genfiles_rocm(gen_gpu_files_training_dense)
handle_genfiles_rocm(gen_gpu_files_training_split_host)

# Index Select
get_tbe_sources_list(static_cpu_files_index_select)
Expand Down Expand Up @@ -119,6 +121,7 @@ gpu_cpp_library(
${TORCH_CUDA_OPTIONS}
DEPS
fbgemm
fbgemm_gpu_config
DESTINATION
fbgemm_gpu)

Expand Down Expand Up @@ -215,6 +218,19 @@ gpu_cpp_library(
DESTINATION
fbgemm_gpu)

gpu_cpp_library(
PREFIX
fbgemm_gpu_tbe_training_backward_split_host
TYPE
SHARED
INCLUDE_DIRS
${fbgemm_sources_include_directories}
GPU_SRCS
${gen_gpu_files_training_split_host}
NVCC_FLAGS
${TORCH_CUDA_OPTIONS}
DESTINATION
fbgemm_gpu)

gpu_cpp_library(
PREFIX
Expand Down
78 changes: 50 additions & 28 deletions fbgemm_gpu/cmake/tbe_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,19 +364,41 @@
]
)

gen_gpu_files_training_dense = [
# Dense host and kernel, and forward-quantized host src files
fstring.format(wdesc)
for wdesc in WEIGHT_OPTIONS
for fstring in [
"gen_embedding_backward_dense_split_{}_cuda.cu",
"gen_embedding_backward_dense_split_{}_meta.cpp",
"gen_embedding_backward_dense_split_{}_kernel_cta.cu",
"gen_embedding_backward_dense_split_{}_kernel_warp.cu",
gen_gpu_files_training_dense = (
[
# Dense host and kernel, and forward-quantized host src files
fstring.format(wdesc)
for wdesc in WEIGHT_OPTIONS
for fstring in [
"gen_embedding_backward_dense_split_{}_cuda.cu",
"gen_embedding_backward_dense_split_{}_meta.cpp",
"gen_embedding_backward_dense_split_{}_kernel_cta.cu",
"gen_embedding_backward_dense_split_{}_kernel_warp.cu",
]
]

Check failure on line 378 in fbgemm_gpu/cmake/tbe_sources.py

View workflow job for this annotation

GitHub Actions / run-lint (3.13)

W291 trailing whitespace
+ [
"gen_embedding_backward_split_dense.cpp",
]
] + [
"gen_embedding_backward_split_dense.cpp",
]
)

gen_gpu_files_training_split_host = (
[
"gen_embedding_backward_split_{}.cpp".format(optimizer)
for optimizer in ALL_OPTIMIZERS
]
+ [
"gen_embedding_backward_ssd_{}.cpp".format(optimizer)
for optimizer in SSD_OPTIMIZERS
]
+ [
"gen_embedding_backward_{}_split_{}_meta.cpp".format(optimizer, wdesc)
for optimizer in GPU_OPTIMIZERS
for wdesc in [
"weighted",
"unweighted",
]
]

Check failure on line 400 in fbgemm_gpu/cmake/tbe_sources.py

View workflow job for this annotation

GitHub Actions / run-lint (3.13)

W291 trailing whitespace
)

gen_gpu_files_training = (
[
Expand Down Expand Up @@ -461,22 +483,22 @@
else []
)
]
+ [
"gen_embedding_backward_split_{}.cpp".format(optimizer)
for optimizer in ALL_OPTIMIZERS
]
+ [
"gen_embedding_backward_ssd_{}.cpp".format(optimizer)
for optimizer in SSD_OPTIMIZERS
]
+ [
"gen_embedding_backward_{}_split_{}_meta.cpp".format(optimizer, wdesc)
for optimizer in GPU_OPTIMIZERS
for wdesc in [
"weighted",
"unweighted",
]
]
# + [
# "gen_embedding_backward_split_{}.cpp".format(optimizer)
# for optimizer in ALL_OPTIMIZERS
# ]
# + [
# "gen_embedding_backward_ssd_{}.cpp".format(optimizer)
# for optimizer in SSD_OPTIMIZERS
# ]
# + [
# "gen_embedding_backward_{}_split_{}_meta.cpp".format(optimizer, wdesc)
# for optimizer in GPU_OPTIMIZERS
# for wdesc in [
# "weighted",
# "unweighted",
# ]
# ]
)

gen_hip_files_training = [
Expand Down
1 change: 1 addition & 0 deletions fbgemm_gpu/fbgemm_gpu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def _load_library(filename: str) -> None:
"fbgemm_gpu_tbe_training_backward",
"fbgemm_gpu_tbe_training_backward_pt2",
"fbgemm_gpu_tbe_training_backward_dense",
"fbgemm_gpu_tbe_training_backward_split_host",
"fbgemm_gpu_py",
]

Expand Down

0 comments on commit 682966f

Please sign in to comment.