Re-organize SLL ops, pt 1 (#3642)

Summary: Pull Request resolved: #3642 X-link: facebookresearch/FBGEMM#718 - Re-organize SLL ops, pt 1 Differential Revision: D68915217
pytorch · Jan 31, 2025 · 40574c0 · 40574c0
1 parent 8bf568b
commit 40574c0
Show file tree

Hide file tree

Showing 6 changed files with 795 additions and 743 deletions.
diff --git a/fbgemm_gpu/fbgemm_gpu/sll/__init__.py b/fbgemm_gpu/fbgemm_gpu/sll/__init__.py
@@ -46,7 +46,6 @@
     jagged_jagged_bmm,
     jagged_jagged_bmm_jagged_out,
     jagged_softmax,
-    multi_head_jagged_flash_attention,
     triton_jagged_self_substraction_jagged_out,
 )
 
@@ -326,15 +325,16 @@
         "CUDA": jagged_dense_flash_attention,
         "AutogradCUDA": jagged_dense_flash_attention,
     },
-    "sll_multi_head_jagged_flash_attention": {
-        "CUDA": multi_head_jagged_flash_attention,
-        "AutogradCUDA": multi_head_jagged_flash_attention,
-    },
 }
 
 for op_name, dispatches in sll_cpu_registrations.items():
     lib.register(op_name, dispatches)
 
 if torch.cuda.is_available():
+    from fbgemm_gpu.sll.triton import op_registrations
+
+    for op_name, dispatches in op_registrations.items():
+        lib.register(op_name, dispatches)
+
     for op_name, dispatches in sll_gpu_registrations.items():
         lib.register(op_name, dispatches)
diff --git a/fbgemm_gpu/fbgemm_gpu/sll/triton/__init__.py b/fbgemm_gpu/fbgemm_gpu/sll/triton/__init__.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+
+from fbgemm_gpu.sll.triton.multi_head_jagged_flash_attention import (  # noqa F401
+    multi_head_jagged_flash_attention,
+    MultiHeadJaggedFlashAttention,
+)
+
+op_registrations = {
+    "sll_multi_head_jagged_flash_attention": {
+        "CUDA": multi_head_jagged_flash_attention,
+        "AutogradCUDA": multi_head_jagged_flash_attention,
+    },
+}
diff --git a/fbgemm_gpu/fbgemm_gpu/sll/triton/common.py b/fbgemm_gpu/fbgemm_gpu/sll/triton/common.py
@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+
+
+def expect_contiguous(x: torch.Tensor) -> torch.Tensor:
+    if not x.is_contiguous():
+        return x.contiguous()
+    else:
+        return x