From 7eedb19f60eeb19e3c7d3d6038e8e691b8ba9b9a Mon Sep 17 00:00:00 2001 From: Chris Cai Date: Wed, 4 Dec 2024 00:52:14 -0800 Subject: [PATCH] Fix VBE+SSD path with StagedPipeline Summary: when SSD offloading is configured and VBD is used, as in Jupiter XL case: we will call into ``` if sparse_features.variable_stride_per_key() and len(embeddings) > 1: embeddings = self._merge_variable_batch_embeddings(embeddings, vbe_splits) ``` and throw the error: ``` File "/mnt/xarfuse/uid-179947/2baf53ce-seed-nspid4026531836_cgpid5102251-ns-4026531841/torchrec/distributed/embedding_lookup.py", line 530, in split_embs = [e.split(s) for e, s in zip(embeddings, splits)] File "/mnt/xarfuse/uid-179947/2baf53ce-seed-nspid4026531836_cgpid5102251-ns-4026531841/torch/_tensor.py", line 1028, in split return torch._VF.split_with_sizes(self, split_size, dim) RuntimeError: split_with_sizes expects split_sizes to sum exactly to 16 (input tensor's size at dimension 0), but got split_sizes=[4096] ``` need to make sure we pass batch_size_per_feature_per_rank for SSDTableBatchedEmbeddingBags case as well, otherwise the embedding output shape won't be in 1-D as expected Reviewed By: dstaay-fb, sryap Differential Revision: D66647146 --- torchrec/distributed/batched_embedding_kernel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchrec/distributed/batched_embedding_kernel.py b/torchrec/distributed/batched_embedding_kernel.py index e0b54aed5..c9b44581a 100644 --- a/torchrec/distributed/batched_embedding_kernel.py +++ b/torchrec/distributed/batched_embedding_kernel.py @@ -1166,6 +1166,7 @@ def forward(self, features: KeyedJaggedTensor) -> torch.Tensor: ( SplitTableBatchedEmbeddingBagsCodegen, DenseTableBatchedEmbeddingBagsCodegen, + SSDTableBatchedEmbeddingBags, ), ): return self.emb_module(