Skip to content

Commit

Permalink
Break down CMake module further (#3673)
Browse files Browse the repository at this point in the history
Summary:
- Break down `fbgemm_gpu_tbe_training_backward` module further


Differential Revision: D69443056

Pulled By: q10
  • Loading branch information
q10 authored and facebook-github-bot committed Feb 11, 2025
1 parent 718ea47 commit 984dd76
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 30 deletions.
42 changes: 42 additions & 0 deletions fbgemm_gpu/cmake/TbeTraining.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,12 @@ handle_genfiles_rocm(gen_gpu_files_forward_split)
get_tbe_sources_list(static_cpu_files_training)
get_tbe_sources_list(gen_cpu_files_training)
get_tbe_sources_list(gen_gpu_files_training)
get_tbe_sources_list(gen_gpu_files_training_pt2)
get_tbe_sources_list(gen_gpu_files_training_dense)
handle_genfiles_rocm(gen_cpu_files_training)
handle_genfiles_rocm(gen_gpu_files_training)
handle_genfiles_rocm(gen_gpu_files_training_pt2)
handle_genfiles_rocm(gen_gpu_files_training_dense)

# Index Select
get_tbe_sources_list(static_cpu_files_index_select)
Expand Down Expand Up @@ -148,6 +152,27 @@ gpu_cpp_library(
DESTINATION
fbgemm_gpu)

gpu_cpp_library(
PREFIX
fbgemm_gpu_tbe_training_backward_pt2
TYPE
SHARED
INCLUDE_DIRS
${fbgemm_sources_include_directories}
GPU_SRCS
${gen_gpu_files_training_pt2}
NVCC_FLAGS
${TORCH_CUDA_OPTIONS}
DEPS
fbgemm
fbgemm_gpu_config
fbgemm_gpu_tbe_cache
fbgemm_gpu_tbe_common
fbgemm_gpu_tbe_utils
fbgemm_gpu_sparse_async_cumsum
DESTINATION
fbgemm_gpu)

gpu_cpp_library(
PREFIX
fbgemm_gpu_tbe_training_backward
Expand All @@ -174,6 +199,23 @@ gpu_cpp_library(
DESTINATION
fbgemm_gpu)

gpu_cpp_library(
PREFIX
fbgemm_gpu_tbe_training_backward_dense
TYPE
SHARED
INCLUDE_DIRS
${fbgemm_sources_include_directories}
GPU_SRCS
${gen_gpu_files_training_dense}
NVCC_FLAGS
${TORCH_CUDA_OPTIONS}
DEPS
fbgemm_gpu_tbe_training_backward
DESTINATION
fbgemm_gpu)


gpu_cpp_library(
PREFIX
fbgemm_gpu_tbe_index_select
Expand Down
63 changes: 33 additions & 30 deletions fbgemm_gpu/cmake/tbe_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,20 +345,42 @@
]
)

gen_gpu_files_training = (
gen_gpu_files_training_pt2 = (
[
"gen_embedding_backward_split_grad_embedding_ops.cu",
"gen_embedding_split_{}_pt2_autograd.cpp".format(optimizer)
for optimizer in ALL_OPTIMIZERS
]
+ [
# Dense host and kernel, and forward-quantized host src files
fstring.format(wdesc)
for wdesc in WEIGHT_OPTIONS
for fstring in [
"gen_embedding_backward_dense_split_{}_cuda.cu",
"gen_embedding_backward_dense_split_{}_meta.cpp",
"gen_embedding_backward_dense_split_{}_kernel_cta.cu",
"gen_embedding_backward_dense_split_{}_kernel_warp.cu",
]
"gen_embedding_ssd_{}_pt2_autograd.cpp".format(optimizer)
for optimizer in SSD_OPTIMIZERS
]
+ [
"gen_embedding_backward_split_{}_pt2_cuda_wrapper.cpp".format(optimizer)
for optimizer in ALL_OPTIMIZERS
]
+ [
"gen_embedding_backward_ssd_{}_pt2_cuda_wrapper.cpp".format(optimizer)
for optimizer in SSD_OPTIMIZERS
]
)

gen_gpu_files_training_dense = [
# Dense host and kernel, and forward-quantized host src files
fstring.format(wdesc)
for wdesc in WEIGHT_OPTIONS
for fstring in [
"gen_embedding_backward_dense_split_{}_cuda.cu",
"gen_embedding_backward_dense_split_{}_meta.cpp",
"gen_embedding_backward_dense_split_{}_kernel_cta.cu",
"gen_embedding_backward_dense_split_{}_kernel_warp.cu",
]
] + [
"gen_embedding_backward_split_dense.cpp",
]

gen_gpu_files_training = (
[
"gen_embedding_backward_split_grad_embedding_ops.cu",
]
+ [
# Backward-split positional weights and forward src files
Expand Down Expand Up @@ -447,14 +469,6 @@
"gen_embedding_backward_ssd_{}.cpp".format(optimizer)
for optimizer in SSD_OPTIMIZERS
]
+ [
"gen_embedding_split_{}_pt2_autograd.cpp".format(optimizer)
for optimizer in ALL_OPTIMIZERS
]
+ [
"gen_embedding_ssd_{}_pt2_autograd.cpp".format(optimizer)
for optimizer in SSD_OPTIMIZERS
]
+ [
"gen_embedding_backward_{}_split_{}_meta.cpp".format(optimizer, wdesc)
for optimizer in GPU_OPTIMIZERS
Expand All @@ -463,17 +477,6 @@
"unweighted",
]
]
+ [
"gen_embedding_backward_split_{}_pt2_cuda_wrapper.cpp".format(optimizer)
for optimizer in ALL_OPTIMIZERS
]
+ [
"gen_embedding_backward_ssd_{}_pt2_cuda_wrapper.cpp".format(optimizer)
for optimizer in SSD_OPTIMIZERS
]
+ [
"gen_embedding_backward_split_dense.cpp",
]
)

gen_hip_files_training = [
Expand Down
2 changes: 2 additions & 0 deletions fbgemm_gpu/fbgemm_gpu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def _load_library(filename: str) -> None:
"fbgemm_gpu_tbe_inference",
"fbgemm_gpu_tbe_training_forward",
"fbgemm_gpu_tbe_training_backward",
"fbgemm_gpu_tbe_training_backward_pt2",
"fbgemm_gpu_tbe_training_backward_dense",
"fbgemm_gpu_py",
]

Expand Down

0 comments on commit 984dd76

Please sign in to comment.