tud-ccc · oowekyala · Dec 11, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/.github/workflows/build-ci.sh b/.github/workflows/build-ci.sh
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -3,6 +3,8 @@ run-name: 'Build and Test: ${{ github.event.head_commit.message }}'
 on: 
   workflow_dispatch:
   push:
+  pull_request:
+      types: [opened, reopened]
 jobs:
   main:
     name: Build and test
@@ -32,7 +34,7 @@ jobs:
           key: cinnamon-dependencies-${{ runner.os }}
 
       - name: Build 
-        run: .github/workflows/build-ci.sh
+        run: .github/workflows/build-ci.sh -reconfigure
 
       - name: Test
         working-directory: cinnamon/build

diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,6 @@
 .vscode
 .directory
 .venv
-llvm
-torch-mlir
-upmem
+/llvm
+/torch-mlir
+/upmem
diff --git a/build.sh b/build.sh
diff --git a/cinnamon/include/cinm-mlir/Dialect/Cinm/Interfaces/TilingInterface.h b/cinnamon/include/cinm-mlir/Dialect/Cinm/Interfaces/TilingInterface.h
@@ -84,7 +84,7 @@ Value createArithIntOrFloatOp(OpBuilder &builder, Location loc, Value a,
                               Value b) {
   assert(a.getType() == b.getType() && "Mismatched type");
   assert(a.getType().isIntOrIndexOrFloat() && "Expected scalar type");
-  if (a.getType().isa<IntegerType>()) {
+  if (isa<IntegerType>(a.getType())) {
     return builder.create<IntOp>(loc, a, b);
   } else {
     return builder.create<FloatOp>(loc, a, b);

diff --git a/cinnamon/include/cinm-mlir/Dialect/UPMEM/IR/UPMEMOps.td b/cinnamon/include/cinm-mlir/Dialect/UPMEM/IR/UPMEMOps.td
@@ -33,7 +33,7 @@ include "mlir/IR/RegionKindInterface.td"
 
 class UPMEM_IndexOp<string mnemonic, list<Trait> traits = []> :
     UPMEM_Op<mnemonic, !listconcat(traits, [
-        Pure, DeclareOpInterfaceMethods<InferIntRangeInterface>])>, 
+        Pure, DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>])>, 
         Results<(outs Index)> {
   let assemblyFormat = "attr-dict";
 }
@@ -128,7 +128,7 @@ def UPMEM_PrivateWRAMAllocOp : UPMEM_Op<"pwram_alloc", [
 def UPMEM_LaunchOp : UPMEM_Op<"launch", [
       AutomaticAllocationScope, AttrSizedOperandSegments, UPMEM_AsyncOpInterface,
       IsolatedFromAbove,
-      DeclareOpInterfaceMethods<InferIntRangeInterface>]> {
+      DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>]> {
   let summary = "UPMEM kernel launch operation";
 
   let arguments = (ins 

diff --git a/cinnamon/lib/Conversion/CimToMemristor/CimToMemristor.cpp b/cinnamon/lib/Conversion/CimToMemristor/CimToMemristor.cpp
@@ -40,7 +40,7 @@ struct ConvertCimOpToMemristor : OpConversionPattern<CimOp> {
                   ConversionPatternRewriter &rewriter) const override {
 
     auto tileId = op.getOperand(0);
-    auto resultShape = op.getResult().getType().template cast<ShapedType>();
+    auto resultShape = cast<ShapedType>(op.getResult().getType());
 
     auto resultAllocOp = rewriter.create<bufferization::AllocTensorOp>(
         op.getLoc(),
@@ -49,7 +49,7 @@ struct ConvertCimOpToMemristor : OpConversionPattern<CimOp> {
         ValueRange{});
 
     auto createBufferizeOp = [&](Value value) {
-      auto shapedType = value.getType().cast<ShapedType>();
+      auto shapedType = cast<ShapedType>(value.getType());
       return rewriter.create<bufferization::ToMemrefOp>(
           op.getLoc(),
           MemRefType::get(shapedType.getShape(), shapedType.getElementType()),

diff --git a/cinnamon/lib/Conversion/CinmToCim/CinmToCim.cpp b/cinnamon/lib/Conversion/CinmToCim/CinmToCim.cpp
@@ -32,7 +32,7 @@ namespace {
 // Creates the specified type for a value with correct shape and element type
 // Condition: The value must be shaped type
 template <typename T> static T getShapedType(Value value) {
-  auto shapedType = value.getType().cast<ShapedType>();
+  auto shapedType = cast<ShapedType>(value.getType());
   return T::get(shapedType.getShape(), shapedType.getElementType());
 }
 

diff --git a/cinnamon/lib/Conversion/CinmToCnm/CinmToCnm.cpp b/cinnamon/lib/Conversion/CinmToCnm/CinmToCnm.cpp
@@ -298,14 +298,14 @@ LogicalResult convertInputIntoAlloc(Location loc, Value &inputBuf,
   // For each input of the reduce, we need to
 
   // convert single element to tensor<1xelementTy>
-  if (!inputBuf.getType().dyn_cast<RankedTensorType>()) {
+  if (!isa<RankedTensorType>(inputBuf.getType())) {
     inputBuf = rewriter.create<tensor::FromElementsOp>(
         RankedTensorType::get(SmallVector<int64_t>(wgTy.getShape().size(), 1),
                               inputBuf.getType()),
         ValueRange{inputBuf});
   }
 
-  auto inputType = inputBuf.getType().cast<RankedTensorType>();
+  auto inputType = cast<RankedTensorType>(inputBuf.getType());
 
   llvm::SmallVector<int64_t, 1> shapeOfBuffer;
   std::optional<SmallVector<int64_t>> reshapeInto;
@@ -318,9 +318,9 @@ LogicalResult convertInputIntoAlloc(Location loc, Value &inputBuf,
     return failure();
 
   if (reshapeInto) {
-    inputBuf =
-        cinm::reshapeStatic(rewriter, rewriter.getLoc(), inputBuf,
-                            inputType.cast<RankedTensorType>(), *reshapeInto);
+    inputBuf = cinm::reshapeStatic(rewriter, rewriter.getLoc(), inputBuf,
+                                   cast<RankedTensorType>(inputType),
+                                   *reshapeInto);
   }
 
   // Allocate a cinm buffer
@@ -350,7 +350,7 @@ cnm::LaunchOp createLaunchOp(
     auto &launchBlock = launchOp.getBody().emplaceBlock();
     // arguments are memrefs with same shape as inputs
     for (auto input : launchOp.getParams()) {
-      if (auto inputTy = input.getType().dyn_cast<cnm::BufferType>()) {
+      if (auto inputTy = dyn_cast<cnm::BufferType>(input.getType())) {
         auto mappedTy =
             MemRefType::get(inputTy.getShape(), inputTy.getElementType());
         launchBlock.addArgument(mappedTy, input.getLoc());
@@ -428,8 +428,8 @@ LogicalResult convertCinmToCnm(
     auto res = builder.create<cnm::GatherOp>(alloc, workgroup, map, outBuf);
     auto shapedBack = cinm::reshapeStatic(
         builder, builder.getLoc(),
-        res.getOutput().cast<TypedValue<RankedTensorType>>(),
-        result.getType().cast<RankedTensorType>().getShape());
+        cast<TypedValue<RankedTensorType>>(res.getOutput()),
+        cast<RankedTensorType>(result.getType()).getShape());
 
     resultValues.push_back(shapedBack);
   }
@@ -514,7 +514,7 @@ struct ConvertElementWiseToCnm : public OpConversionPattern<CinmOp> {
                 ValueRange outputs) {
               SmallVector<AffineMap> affineMaps;
               for (const auto &i : inputs) {
-                MemRefType t = i.getType().cast<MemRefType>();
+                MemRefType t = cast<MemRefType>(i.getType());
                 affineMaps.push_back(AffineMap::getMultiDimIdentityMap(
                     t.getRank(), op.getContext()));
 
@@ -541,7 +541,7 @@ struct ConvertElementWiseToCnm : public OpConversionPattern<CinmOp> {
                     Value rhs = IsScalarOp ? inputs[1u] : args[1u];
                     if constexpr (IsScalarOp) {
                       if (const auto memrefType =
-                              rhs.getType().dyn_cast<MemRefType>()) {
+                              dyn_cast<MemRefType>(rhs.getType())) {
                         const Value zero =
                             builder.create<arith::ConstantIndexOp>(loc, 0);
                         rhs = builder.create<memref::LoadOp>(
@@ -622,7 +622,7 @@ struct ConvertCinmGemmToCnm : public OpConversionPattern<cinm::GemmOp> {
   using OpConversionPattern<cinm::GemmOp>::OpConversionPattern;
 
   static Value transpose(ImplicitLocOpBuilder &builder, Value tensor) {
-    auto inTy = tensor.getType().cast<RankedTensorType>();
+    auto inTy = cast<RankedTensorType>(tensor.getType());
     auto shape = inTy.getShape();
     SmallVector<int64_t, 2> newShape{shape[1], shape[0]};
     SmallVector<int64_t, 2> perms{1, 0};
@@ -785,10 +785,8 @@ struct ConvertCinmReduceToCnm : public OpConversionPattern<cinm::ReduceOp> {
         op.getResult().getType(),
         builder.getZeroAttr(op.getResult().getType()));
 
-    const bool isFloatOp = op.getType()
-                               .cast<ShapedType>()
-                               .getElementType()
-                               .dyn_cast<FloatType>() != nullptr;
+    const bool isFloatOp = isa<FloatType>(
+        cast<ShapedType>(op.getType()).getElementType());
 
     llvm::SmallVector<Value, 1> newResults;
     if (convertCinmToCnm(

diff --git a/cinnamon/lib/Conversion/CnmToGPU/CnmToGPU.cpp b/cinnamon/lib/Conversion/CnmToGPU/CnmToGPU.cpp
@@ -51,11 +51,11 @@ MemRefType convertCnmBufferToMemRefType(cnm::BufferType bufferType) {
 void convertLaunchParameter(ConversionPatternRewriter &rewriter, Location loc,
                             Value buffer, ValueRange threadIds,
                             BlockArgument arg) {
-  if (!buffer.getType().dyn_cast<cnm::BufferType>()) {
+  const auto bufferType = dyn_cast<cnm::BufferType>(buffer.getType());
+
+  if (!bufferType)
     return;
-  }
 
-  const BufferType bufferType = buffer.getType().dyn_cast<cnm::BufferType>();
   const MemRefType memrefType = convertCnmBufferToMemRefType(bufferType);
 
   const Value source = createOrFoldUnrealizedConversionCast(
@@ -122,8 +122,8 @@ struct ConvertCnmScatterToGPU : public OpConversionPattern<cnm::ScatterOp> {
                   ConversionPatternRewriter &rewriter) const override {
     const WorkgroupType workgroupType = op.getWg().getType();
     const ArrayRef<int64_t> workgroupShape = workgroupType.getShape();
-    const cnm::BufferType bufferType =
-        op.getOperandTypes()[1].dyn_cast<cnm::BufferType>();
+    const auto bufferType =
+        dyn_cast<cnm::BufferType>(op.getOperand(1).getType());
 
     Value src = rewriter.getRemappedValue(op.getOperand(0));
     Value dst = rewriter.getRemappedValue(op.getOperand(1));
@@ -155,8 +155,8 @@ struct ConvertCnmGatherToGPU : public OpConversionPattern<cnm::GatherOp> {
                   ConversionPatternRewriter &rewriter) const override {
     const WorkgroupType workgroupType = op.getWg().getType();
     const ArrayRef<int64_t> workgroupShape = workgroupType.getShape();
-    const cnm::BufferType bufferType =
-        op.getOperandTypes()[0].dyn_cast<cnm::BufferType>();
+    const auto bufferType =
+        dyn_cast<cnm::BufferType>(op.getOperand(0).getType());
 
     Value src = rewriter.getRemappedValue(op.getOperand(0));
     src = createOrFoldUnrealizedConversionCast(
@@ -282,7 +282,6 @@ struct ConvertCnmToGPUPass
 
     RewritePatternSet patterns(&getContext());
     populateCnmToGPUConversionPatterns(patterns, &getContext());
-    populateReconcileUnrealizedCastsPatterns(patterns);
 
     ConversionTarget target(getContext());
     target.addIllegalDialect<cnm::CnmDialect>();

diff --git a/cinnamon/lib/Conversion/CnmToUPMEM/CnmToUPMEM.cpp b/cinnamon/lib/Conversion/CnmToUPMEM/CnmToUPMEM.cpp
@@ -24,6 +24,7 @@
 #include <mlir/IR/ValueRange.h>
 #include <mlir/Support/LLVM.h>
 #include <mlir/Transforms/DialectConversion.h>
+#include <mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h>
 
 #define GEN_PASS_DEF_CONVERTCNMTOUPMEMPASS
 #include "cinm-mlir/Conversion/CnmPasses.h.inc"
@@ -40,8 +41,8 @@ template <typename T> T reduceMul(ArrayRef<T> arr) {
 }
 
 MemRefType convertTensorToMemref(ShapedType ty) {
-  if (ty.isa<MemRefType>())
-    return ty.cast<MemRefType>();
+  if (isa<MemRefType>(ty))
+    return cast<MemRefType>(ty);
 
   return MemRefType::get(ty.getShape(), ty.getElementType());
 }
@@ -126,7 +127,7 @@ struct ConvertCnmGatherToUPMEM : public OpConversionPattern<cnm::GatherOp> {
                   ConversionPatternRewriter &rewriter) const override {
 
     Value outputBuf = adaptor.getOutputBuf();
-    bool isBufferized = op.getOutputBuf().getType().isa<BaseMemRefType>();
+    bool isBufferized = isa<BaseMemRefType>(op.getOutputBuf().getType());
     if (!isBufferized) {
       outputBuf = rewriter.create<memref::AllocOp>(
           op->getLoc(), convertTensorToMemref(op.getOutputBuf().getType()));
@@ -165,7 +166,7 @@ struct ConvertCnmLaunchToUPMEM : public OpConversionPattern<cnm::LaunchOp> {
     const size_t availableWRAM = 32 * 1024;
     size_t requiredWRAM = 0;
     for (Value buffer : op.getParams()) {
-      const BufferType bufferType = buffer.getType().cast<BufferType>();
+      const BufferType bufferType = cast<BufferType>(buffer.getType());
       const size_t elementSize =
           bufferType.getElementType().getIntOrFloatBitWidth() / 8;
       requiredWRAM += reduceMul(bufferType.getShape()) * elementSize;
@@ -207,7 +208,7 @@ struct ConvertCnmLaunchToUPMEM : public OpConversionPattern<cnm::LaunchOp> {
         continue;
       }
 
-      const BufferType bufferType = buffer.getType().cast<BufferType>();
+      const BufferType bufferType = cast<BufferType>(buffer.getType());
       const size_t chunkSize = reduceMul(bufferType.getShape());
       const size_t memoryPerTasklet = chunksPerTasklet * chunkSize;
       const size_t memoryPerDPU = wgShape[2] * memoryPerTasklet;
@@ -355,7 +356,6 @@ struct ConvertCnmToUPMEMPass
 
     RewritePatternSet patterns(&getContext());
     populateCnmToUPMEMConversionPatterns(converter, patterns);
-    populateReconcileUnrealizedCastsPatterns(patterns);
     populateFinalBufferizationPatterns(patterns);
 
     ConversionTarget target(getContext());

diff --git a/cinnamon/lib/Conversion/CommonPatterns.cpp b/cinnamon/lib/Conversion/CommonPatterns.cpp
@@ -94,7 +94,7 @@ LogicalResult ConvertCnmSetZeroToAffine::matchAndRewrite(
     cnm::SetZeroOp op, OpAdaptor, ConversionPatternRewriter &rewriter) const {
   const Value dst = rewriter.getRemappedValue(op.getOperand());
 
-  const MemRefType type = dst.getType().cast<MemRefType>();
+  const MemRefType type = cast<MemRefType>(dst.getType());
   const SmallVector<int64_t> loopSizes{type.getShape()};
   const SmallVector<int64_t> loopSteps(loopSizes.size(), 1);
 
@@ -125,8 +125,8 @@ SmallVector<Value> createAffineApply(OpBuilder &builder, Location loc,
 void createMemrefSubviewCopy(OpBuilder &builder, Location loc, Value src,
                              Value dst, ArrayRef<int64_t> sliceShape,
                              ValueRange srcOffsets, ValueRange dstOffsets) {
-  MemRefType srcType = src.getType().cast<MemRefType>();
-  MemRefType dstType = dst.getType().cast<MemRefType>();
+  MemRefType srcType = cast<MemRefType>(src.getType());
+  MemRefType dstType = cast<MemRefType>(dst.getType());
 
   SmallVector<int64_t> srcStaticOffsets(srcType.getRank(), 0);
   SmallVector<int64_t> srcStaticSizes{srcType.getShape()};

diff --git a/cinnamon/lib/Conversion/TorchToCinm/TorchToCinm.cpp b/cinnamon/lib/Conversion/TorchToCinm/TorchToCinm.cpp
@@ -49,20 +49,20 @@ struct ConvertTorchTensorOpToCinm : OpConversionPattern<SourceOp> {
                   ConversionPatternRewriter &rewriter) const override {
 
     auto lhs = op.getOperand(0);
-    auto lhsType = lhs.getType().template cast<torch::Torch::ValueTensorType>();
+    auto lhsType = cast<torch::Torch::ValueTensorType>(lhs.getType());
     auto lhsConversionOp =
         rewriter.create<torch::TorchConversion::ToBuiltinTensorOp>(
             op.getLoc(), lhsType.toBuiltinTensor(), lhs);
 
     auto rhs = op.getOperand(1);
-    auto rhsType = rhs.getType().template cast<torch::Torch::ValueTensorType>();
+    auto rhsType = cast<torch::Torch::ValueTensorType>(rhs.getType());
     auto rhsConversionOp =
         rewriter.create<torch::TorchConversion::ToBuiltinTensorOp>(
             op.getLoc(), rhsType.toBuiltinTensor(), rhs);
 
     auto result = op.getResult();
     auto resultType =
-        result.getType().template cast<torch::Torch::ValueTensorType>();
+        cast<torch::Torch::ValueTensorType>(result.getType());
 
     auto cinmComputeOp = rewriter.create<cinm::ComputeOp>(
         op.getLoc(), resultType.toBuiltinTensor());