forked from tensorflow/mlir
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This tool allows to execute MLIR IR snippets written in the GPU dialect on a CUDA capable GPU. For this to work, a working CUDA install is required and the build has to be configured with MLIR_CUDA_RUNNER_ENABLED set to 1. PiperOrigin-RevId: 256551415
- Loading branch information
Showing
15 changed files
with
428 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// RUN: mlir-cuda-runner %s --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext | FileCheck %s | ||
|
||
func @other_func(%arg0 : f32, %arg1 : memref<?xf32>) { | ||
%cst = constant 1 : index | ||
%cst2 = dim %arg1, 0 : memref<?xf32> | ||
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) | ||
threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) | ||
args(%kernel_arg0 = %arg0, %kernel_arg1 = %arg1) : f32, memref<?xf32> { | ||
store %kernel_arg0, %kernel_arg1[%tx] : memref<?xf32> | ||
gpu.return | ||
} | ||
return | ||
} | ||
|
||
// CHECK: [1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00] | ||
func @main() { | ||
%arg0 = alloc() : memref<5xf32> | ||
%20 = constant 0 : i32 | ||
%21 = constant 5 : i32 | ||
%22 = memref_cast %arg0 : memref<5xf32> to memref<?xf32> | ||
call @mcuMemHostRegister(%22, %20) : (memref<?xf32>, i32) -> () | ||
call @mcuPrintFloat(%22) : (memref<?xf32>) -> () | ||
%24 = constant 1.0 : f32 | ||
call @other_func(%24, %22) : (f32, memref<?xf32>) -> () | ||
call @mcuPrintFloat(%22) : (memref<?xf32>) -> () | ||
return | ||
} | ||
|
||
func @mcuMemHostRegister(%ptr : memref<?xf32>, %flags : i32) | ||
func @mcuPrintFloat(%ptr : memref<?xf32>) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
if not config.enable_cuda_runner: | ||
config.unsupported = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
set(LLVM_OPTIONAL_SOURCES | ||
cuda-runtime-wrappers.cpp | ||
mlir-cuda-runner.cpp | ||
) | ||
|
||
if(MLIR_CUDA_RUNNER_ENABLED) | ||
if (NOT ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)) | ||
message(SEND_ERROR | ||
"Building the mlir cuda runner requires the NVPTX backend") | ||
endif() | ||
|
||
# Configure CUDA runner support. Using check_language first allows us to give | ||
# a custom error message. | ||
include(CheckLanguage) | ||
check_language(CUDA) | ||
if (CMAKE_CUDA_COMPILER) | ||
enable_language(CUDA) | ||
else() | ||
message(SEND_ERROR | ||
"Building the mlir cuda runner requires a working CUDA install") | ||
endif() | ||
|
||
# We need the libcuda.so library. | ||
find_library(CUDA_RUNTIME_LIBRARY cuda) | ||
|
||
add_llvm_library(cuda-runtime-wrappers SHARED | ||
cuda-runtime-wrappers.cpp | ||
) | ||
target_include_directories(cuda-runtime-wrappers | ||
PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} | ||
LLVMSupport | ||
) | ||
target_link_libraries(cuda-runtime-wrappers | ||
LLVMSupport | ||
${CUDA_RUNTIME_LIBRARY} | ||
) | ||
|
||
set(FULL_LINK_LIBS | ||
MLIRAffineOps | ||
MLIRGPU | ||
MLIRGPUtoCUDATransforms | ||
MLIRGPUtoNVVMTransforms | ||
MLIRLLVMIR | ||
MLIRStandardOps | ||
MLIRStandardToLLVM | ||
MLIRTargetLLVMIR | ||
MLIRTransforms | ||
MLIRTranslation | ||
) | ||
set(LIBS | ||
MLIRIR | ||
MLIRParser | ||
MLIREDSC | ||
MLIRAnalysis | ||
MLIRCPURunnerLib | ||
MLIRExecutionEngine | ||
MLIRSupport | ||
LLVMCore | ||
LLVMSupport | ||
${CUDA_RUNTIME_LIBRARY} | ||
) | ||
add_llvm_executable(mlir-cuda-runner | ||
mlir-cuda-runner.cpp | ||
) | ||
add_dependencies(mlir-cuda-runner cuda-runtime-wrappers) | ||
target_include_directories(mlir-cuda-runner | ||
PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} | ||
) | ||
llvm_update_compile_flags(mlir-cuda-runner) | ||
whole_archive_link(mlir-cuda-runner ${FULL_LINK_LIBS}) | ||
target_link_libraries(mlir-cuda-runner PRIVATE ${FULL_LINK_LIBS} ${LIBS}) | ||
|
||
|
||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
//===- cuda-runtime-wrappers.cpp - MLIR CUDA runner wrapper library -------===// | ||
// | ||
// Copyright 2019 The MLIR Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
// ============================================================================= | ||
// | ||
// Implements C wrappers around the CUDA library for easy linking in ORC jit. | ||
// Also adds some debugging helpers that are helpful when writing MLIR code to | ||
// run on GPUs. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <assert.h> | ||
#include <memory.h> | ||
|
||
#include "llvm/Support/raw_ostream.h" | ||
|
||
#include "cuda.h" | ||
|
||
namespace { | ||
int32_t reportErrorIfAny(CUresult result, const char *where) { | ||
if (result != CUDA_SUCCESS) { | ||
llvm::errs() << "CUDA failed with " << result << " in " << where << "\n"; | ||
} | ||
return result; | ||
} | ||
} // anonymous namespace | ||
|
||
extern "C" int32_t mcuModuleLoad(void **module, void *data) { | ||
int32_t err = reportErrorIfAny( | ||
cuModuleLoadData(reinterpret_cast<CUmodule *>(module), data), | ||
"ModuleLoad"); | ||
return err; | ||
} | ||
|
||
extern "C" int32_t mcuModuleGetFunction(void **function, void *module, | ||
const char *name) { | ||
return reportErrorIfAny( | ||
cuModuleGetFunction(reinterpret_cast<CUfunction *>(function), | ||
reinterpret_cast<CUmodule>(module), name), | ||
"GetFunction"); | ||
} | ||
|
||
// The wrapper uses intptr_t instead of CUDA's unsigned int to match | ||
// the type of MLIR's index type. This avoids the need for casts in the | ||
// generated MLIR code. | ||
extern "C" int32_t mcuLaunchKernel(void *function, intptr_t gridX, | ||
intptr_t gridY, intptr_t gridZ, | ||
intptr_t blockX, intptr_t blockY, | ||
intptr_t blockZ, int32_t smem, void *stream, | ||
void **params, void **extra) { | ||
return reportErrorIfAny( | ||
cuLaunchKernel(reinterpret_cast<CUfunction>(function), gridX, gridY, | ||
gridZ, blockX, blockY, blockZ, smem, | ||
reinterpret_cast<CUstream>(stream), params, extra), | ||
"LaunchKernel"); | ||
} | ||
|
||
extern "C" void *mcuGetStreamHelper() { | ||
CUstream stream; | ||
reportErrorIfAny(cuStreamCreate(&stream, CU_STREAM_DEFAULT), "StreamCreate"); | ||
return stream; | ||
} | ||
|
||
extern "C" int32_t mcuStreamSynchronize(void *stream) { | ||
return reportErrorIfAny( | ||
cuStreamSynchronize(reinterpret_cast<CUstream>(stream)), "StreamSync"); | ||
} | ||
|
||
/// Helper functions for writing mlir example code | ||
|
||
// A struct that corresponds to how MLIR represents unknown-length 1d memrefs. | ||
struct memref_t { | ||
float *values; | ||
intptr_t length; | ||
}; | ||
|
||
// Allows to register a pointer with the CUDA runtime. Helpful until | ||
// we have transfer functions implemented. | ||
extern "C" void mcuMemHostRegister(const memref_t arg, int32_t flags) { | ||
reportErrorIfAny(cuMemHostRegister(arg.values, arg.length, flags), | ||
"MemHostRegister"); | ||
} | ||
|
||
/// Prints the given float array to stderr. | ||
extern "C" void mcuPrintFloat(const memref_t arg) { | ||
if (arg.length == 0) { | ||
llvm::outs() << "[]\n"; | ||
return; | ||
} | ||
llvm::outs() << "[" << arg.values[0]; | ||
for (int pos = 1; pos < arg.length; pos++) { | ||
llvm::outs() << ", " << arg.values[pos]; | ||
} | ||
llvm::outs() << "]\n"; | ||
} |
Oops, something went wrong.