diff --git a/enzyme/Enzyme/FunctionUtils.cpp b/enzyme/Enzyme/FunctionUtils.cpp index 0e14c20d9ef2..4c3370cf4cf9 100644 --- a/enzyme/Enzyme/FunctionUtils.cpp +++ b/enzyme/Enzyme/FunctionUtils.cpp @@ -7063,6 +7063,7 @@ Constraints::allSolutions(SCEVExpander &Exp, llvm::Type *T, Instruction *IP, return {}; } +constexpr bool SparseDebug = false; std::shared_ptr getSparseConditions(bool &legal, Value *val, std::shared_ptr defaultFloat, @@ -7077,11 +7078,13 @@ getSparseConditions(bool &legal, Value *val, auto res = lhs->andB(rhs, ctx); assert(res); assert(ctx.seen.size() == 0); - llvm::errs() << " getSparse(and, " << *I << "), lhs(" << *I->getOperand(0) - << ") = " << *lhs << "\n"; - llvm::errs() << " getSparse(and, " << *I << "), rhs(" << *I->getOperand(1) - << ") = " << *rhs << "\n"; - llvm::errs() << " getSparse(and, " << *I << ") = " << *res << "\n"; + if (SparseDebug) { + llvm::errs() << " getSparse(and, " << *I << "), lhs(" + << *I->getOperand(0) << ") = " << *lhs << "\n"; + llvm::errs() << " getSparse(and, " << *I << "), rhs(" + << *I->getOperand(1) << ") = " << *rhs << "\n"; + llvm::errs() << " getSparse(and, " << *I << ") = " << *res << "\n"; + } return res; } @@ -7092,11 +7095,13 @@ getSparseConditions(bool &legal, Value *val, auto rhs = getSparseConditions(legal, I->getOperand(1), Constraints::none(), I, ctx); auto res = lhs->orB(rhs, ctx); - llvm::errs() << " getSparse(or, " << *I << "), lhs(" << *I->getOperand(0) - << ") = " << *lhs << "\n"; - llvm::errs() << " getSparse(or, " << *I << "), rhs(" << *I->getOperand(1) - << ") = " << *rhs << "\n"; - llvm::errs() << " getSparse(or, " << *I << ") = " << *res << "\n"; + if (SparseDebug) { + llvm::errs() << " getSparse(or, " << *I << "), lhs(" + << *I->getOperand(0) << ") = " << *lhs << "\n"; + llvm::errs() << " getSparse(or, " << *I << "), rhs(" + << *I->getOperand(1) << ") = " << *rhs << "\n"; + llvm::errs() << " getSparse(or, " << *I << ") = " << *res << "\n"; + } return res; } @@ -7108,9 +7113,12 @@ getSparseConditions(bool &legal, Value *val, getSparseConditions(legal, I->getOperand(1 - i), defaultFloat->notB(ctx), scope, ctx); auto res = pres->notB(ctx); - llvm::errs() << " getSparse(not, " << *I << "), prev (" - << *I->getOperand(0) << ") = " << *pres << "\n"; - llvm::errs() << " getSparse(not, " << *I << ") = " << *res << "\n"; + if (SparseDebug) { + llvm::errs() << " getSparse(not, " << *I << "), prev (" + << *I->getOperand(0) << ") = " << *pres << "\n"; + llvm::errs() << " getSparse(not, " << *I << ") = " << *res + << "\n"; + } return res; } } @@ -7120,8 +7128,10 @@ getSparseConditions(bool &legal, Value *val, auto L = ctx.loopToSolve; auto lhs = ctx.SE.getSCEVAtScope(icmp->getOperand(0), L); auto rhs = ctx.SE.getSCEVAtScope(icmp->getOperand(1), L); - llvm::errs() << " lhs: " << *lhs << "\n"; - llvm::errs() << " rhs: " << *rhs << "\n"; + if (SparseDebug) { + llvm::errs() << " lhs: " << *lhs << "\n"; + llvm::errs() << " rhs: " << *rhs << "\n"; + } auto sub1 = ctx.SE.getMinusSCEV(lhs, rhs); @@ -7145,8 +7155,10 @@ getSparseConditions(bool &legal, Value *val, auto res = Constraints::make_compare( div, icmp->getPredicate() == ICmpInst::ICMP_EQ, add->getLoop(), ctx); - llvm::errs() - << " getSparse(icmp, " << *I << ") = " << *res << "\n"; + if (SparseDebug) { + llvm::errs() + << " getSparse(icmp, " << *I << ") = " << *res << "\n"; + } return res; } } @@ -7172,7 +7184,9 @@ getSparseConditions(bool &legal, Value *val, // cmp x, 1.0 -> false/true if (auto fcmp = dyn_cast(I)) { auto res = defaultFloat; - llvm::errs() << " getSparse(fcmp, " << *I << ") = " << *res << "\n"; + if (SparseDebug) { + llvm::errs() << " getSparse(fcmp, " << *I << ") = " << *res << "\n"; + } return res; if (fcmp->getPredicate() == CmpInst::FCMP_OEQ || @@ -7263,13 +7277,16 @@ void fixSparseIndices(llvm::Function &F, llvm::FunctionAnalysisManager &FAM, // Full simplification while (!Q.empty()) { auto cur = Q.pop_back_val(); + /* std::set prev; for (auto v : Q) prev.insert(v); // llvm::errs() << "\n\n\n\n" << F << "\n"; llvm::errs() << "cur: " << *cur << "\n"; + */ auto changed = fixSparse_inner(cur, F, Q, DT, SE, LI, DL); (void)changed; + /* if (changed) { llvm::errs() << "changed: " << *changed << "\n"; @@ -7278,6 +7295,7 @@ void fixSparseIndices(llvm::Function &F, llvm::FunctionAnalysisManager &FAM, llvm::errs() << " + " << *I << "\n"; // llvm::errs() << F << "\n\n"; } + */ } // llvm::errs() << " post fix inner " << F << "\n"; @@ -7872,6 +7890,7 @@ void replaceToDense(llvm::CallBase *CI, bool replaceAll, llvm::Function *F, args.push_back(diff); for (size_t i = argstart; i < num_args; i++) args.push_back(CI->getArgOperand(i)); + if (load_fn->getFunctionType()->getNumParams() != args.size()) { auto fnName = load_fn->getName(); auto found_numargs = load_fn->getFunctionType()->getNumParams(); @@ -7893,7 +7912,7 @@ void replaceToDense(llvm::CallBase *CI, bool replaceAll, llvm::Function *F, *args[i]->getType(), " found ", load_fn->getFunctionType()->params()[i]); tocontinue = true; - break; + args[i] = UndefValue::get(args[i]->getType()); } } if (tocontinue) @@ -7902,8 +7921,18 @@ void replaceToDense(llvm::CallBase *CI, bool replaceAll, llvm::Function *F, CallInst *call = B.CreateCall(load_fn, args); call->setDebugLoc(LI->getDebugLoc()); Value *tmp = call; - if (tmp->getType() != LI->getType()) - tmp = B.CreateBitCast(tmp, LI->getType()); + if (tmp->getType() != LI->getType()) { + if (CastInst::castIsValid(Instruction::BitCast, tmp, LI->getType())) + tmp = B.CreateBitCast(tmp, LI->getType()); + else { + auto fnName = load_fn->getName(); + EmitFailure("IllegalSparse", CI->getDebugLoc(), CI, + " incorrect return type of loader function ", fnName, + " expected ", *LI->getType(), " found ", + *call->getType()); + tmp = UndefValue::get(LI->getType()); + } + } LI->replaceAllUsesWith(tmp); if (load_fn->hasFnAttribute(Attribute::AlwaysInline)) { @@ -7927,15 +7956,44 @@ void replaceToDense(llvm::CallBase *CI, bool replaceAll, llvm::Function *F, EmitFailure("IllegalSparse", CI->getDebugLoc(), CI, " first argument of store function must be the type of " "the store found fn arg type ", - sty, " expected ", args0ty); + *sty, " expected ", *args0ty); + args[0] = UndefValue::get(sty); } } args.push_back(diff); for (size_t i = argstart; i < num_args; i++) args.push_back(CI->getArgOperand(i)); + + if (store_fn->getFunctionType()->getNumParams() != args.size()) { + auto fnName = store_fn->getName(); + auto found_numargs = store_fn->getFunctionType()->getNumParams(); + auto expected_numargs = args.size(); + EmitFailure("IllegalSparse", CI->getDebugLoc(), CI, + " incorrect number of arguments to store function ", fnName, + " expected ", expected_numargs, " found ", found_numargs, + " - ", *store_fn->getFunctionType()); + continue; + } else { + bool tocontinue = false; + for (size_t i = 0; i < args.size(); i++) { + if (store_fn->getFunctionType()->getParamType(i) != + args[i]->getType()) { + auto fnName = store_fn->getName(); + EmitFailure("IllegalSparse", CI->getDebugLoc(), CI, + " incorrect type of argument ", i, + " to storeer function ", fnName, " expected ", + *args[i]->getType(), " found ", + store_fn->getFunctionType()->params()[i]); + tocontinue = true; + args[i] = UndefValue::get(args[i]->getType()); + } + } + if (tocontinue) + continue; + } auto call = B.CreateCall(store_fn, args); call->setDebugLoc(SI->getDebugLoc()); - if (load_fn->hasFnAttribute(Attribute::AlwaysInline)) { + if (store_fn->hasFnAttribute(Attribute::AlwaysInline)) { InlineFunctionInfo IFI; InlineFunction(*call, IFI); } diff --git a/enzyme/test/Integration/Sparse/eigen_analysis.cpp b/enzyme/test/Integration/Sparse/eigen_analysis.cpp index d8dc311f957a..434ebcf48ddd 100644 --- a/enzyme/test/Integration/Sparse/eigen_analysis.cpp +++ b/enzyme/test/Integration/Sparse/eigen_analysis.cpp @@ -150,41 +150,6 @@ static void gradient_ip(const T *__restrict__ pos0, const size_t num_faces, cons enzyme_dup, x, out); } - -template -__attribute__((always_inline)) -static T ident_load(unsigned long long offset, size_t i) { - return (offset / sizeof(T) == i) ? T(1) : T(0); -} - - -template -__attribute__((always_inline)) -static void err_store(T val, unsigned long long offset, size_t i) { - assert(0 && "store is not legal"); -} - - -template -__attribute__((always_inline)) -static T zero_load(unsigned long long offset, size_t i, std::vector> &hess) { - return T(0); -} - - -__attribute__((enzyme_sparse_accumulate)) -void inner_store(size_t offset, size_t i, float val, std::vector> &hess) { - hess.push_back(Triple(offset, i, val)); -} - -template -__attribute__((always_inline)) -static void csr_store(T val, unsigned long long offset, size_t i, std::vector> &hess) { - if (val == 0.0) return; - offset /= sizeof(T); - inner_store(offset, i, val, hess); -} - template __attribute__((noinline)) std::vector> hessian(const T*__restrict__ pos0, size_t num_faces, const int* faces, const T*__restrict__ x, size_t x_pts) @@ -217,13 +182,20 @@ std::vector> hessian(const T*__restrict__ pos0, size_t num_faces, cons enzyme_const, pos02, enzyme_const, num_faces, enzyme_const, faces, - enzyme_dup, x2, __enzyme_todense(ident_load, err_store, i), - enzyme_dupnoneed, nullptr, __enzyme_todense(zero_load, csr_store, i, &hess)); + enzyme_dup, x2, __enzyme_todense(ident_load, ident_store, i), + enzyme_dupnoneed, nullptr, __enzyme_todense(sparse_load, sparse_store, i, &hess)); return hess; } -int main() { - const size_t x_pts = 1; +int main(int argc, char** argv) { + size_t x_pts = 8; + + if (argc >= 2) { + x_pts = atoi(argv[1]); + } + + // TODO generate data for more inputs + assert(x_pts == 8); const float x[] = {0.0, 1.0, 0.0}; @@ -233,25 +205,37 @@ int main() { const float pos0[] = {1.0, 2.0, 3.0, 4.0, 3.0, 2.0, 3.0, 1.0, 3.0}; // Call eigenstuffM_simple + struct timeval start, end; + gettimeofday(&start, NULL); const float resultM = eigenstuffM(pos0, num_faces, faces, x); - printf("Result for eigenstuffM_simple: %f\n", resultM); + gettimeofday(&end, NULL); + printf("Result for eigenstuffM_simple: %f, runtime:%f\n", resultM, tdiff(&start, &end)); // Call eigenstuffL_simple + gettimeofday(&start, NULL); const float resultL = eigenstuffL(pos0, num_faces, faces, x); - printf("Result for eigenstuffL_simple: %f\n", resultL); + gettimeofday(&end, NULL); + printf("Result for eigenstuffL_simple: %f, runtime:%f\n", resultL, tdiff(&start, &end)); float dx[sizeof(x)/sizeof(x[0])]; for (size_t i=0; i #include +#include +float tdiff(struct timeval *start, struct timeval *end) { + return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec); +} + template struct Triple { size_t row; @@ -10,6 +15,56 @@ struct Triple { Triple(size_t row, size_t col, T val) : row(row), col(col), val(val) {} }; +__attribute__((enzyme_sparse_accumulate)) +static void inner_storeflt(int64_t row, int64_t col, float val, std::vector> &triplets) { +#ifdef BENCHMARK + if (val == 0.0) return; +#else +#warning "Compiling for debug/verfication, performance may be slowed" +#endif + triplets.emplace_back(row, col, val); +} + +__attribute__((enzyme_sparse_accumulate)) +static void inner_storedbl(int64_t row, int64_t col, double val, std::vector> &triplets) { +#ifdef BENCHMARK + if (val == 0.0) return; +#else +#warning "Compiling for debug/verfication, performance may be slowed" +#endif + triplets.emplace_back(row, col, val); +} + +template +__attribute__((always_inline)) +static void sparse_store(T val, int64_t idx, size_t i, std::vector> &triplets) { + if (val == 0.0) return; + idx /= sizeof(T); + if constexpr (sizeof(T) == 4) + inner_storeflt(i, idx, val, triplets); + else + inner_storedbl(i, idx, val, triplets); +} + +template +__attribute__((always_inline)) +static T sparse_load(int64_t idx, size_t i, std::vector> &triplets) { + return 0.0; +} + +template +__attribute__((always_inline)) +static void ident_store(T, int64_t idx, size_t i) { + assert(0 && "should never load"); +} + +template +__attribute__((always_inline)) +static T ident_load(int64_t idx, size_t i) { + idx /= sizeof(T); + return (T)(idx == i);// ? 1.0 : 0.0; +} + extern int enzyme_width; extern int enzyme_dup; extern int enzyme_dupv; @@ -17,16 +72,16 @@ extern int enzyme_const; extern int enzyme_dupnoneed; template -extern T __enzyme_autodiff(void*, Tys...); +extern T __enzyme_autodiff(void*, Tys...) noexcept; template -extern T __enzyme_fwddiff(void *, Tys...); +extern T __enzyme_fwddiff(void *, Tys...) noexcept; template -extern T __enzyme_todense(Tys...); +extern T __enzyme_todense(Tys...) noexcept; template -extern T __enzyme_post_sparse_todense(Tys...); +extern T __enzyme_post_sparse_todense(Tys...) noexcept; template __attribute__((always_inline)) @@ -200,4 +255,4 @@ static T area(const T *__restrict__ u, const T *__restrict__ v, const T *__restr T cross_product[3]; cross(cross_product, diff1, diff2); return 0.5 * norm(cross_product); -} \ No newline at end of file +} diff --git a/enzyme/test/Integration/Sparse/ringspring.cpp b/enzyme/test/Integration/Sparse/ringspring.cpp index 0ecae72bef5e..dd4242a1bcc5 100644 --- a/enzyme/test/Integration/Sparse/ringspring.cpp +++ b/enzyme/test/Integration/Sparse/ringspring.cpp @@ -17,123 +17,97 @@ #include -struct triple { - size_t row; - size_t col; - double val; - triple(triple&&) = default; - triple(size_t row, size_t col, double val) : row(row), col(col), val(val) {} -}; - - -size_t N = 8; - -extern int enzyme_dup; -extern int enzyme_dupnoneed; -extern int enzyme_out; -extern int enzyme_const; - -extern void __enzyme_autodiff(void *, ...); - -extern void __enzyme_fwddiff(void *, ...); - -extern double* __enzyme_todense(void *, ...) noexcept; - +#include "matrix.h" +template __attribute__((always_inline)) -static double f(size_t N, double* input) { +static T f(size_t N, T* input) { double out = 0; // __builtin_assume(!((N-1) == 0)); for (size_t i=0; i __attribute__((always_inline)) -static void grad_f(size_t N, double* input, double* dinput) { - __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); -} - -__attribute__((always_inline)) -static void ident_store(double , int64_t idx, size_t i) { - assert(0 && "should never load"); +static void grad_f(size_t N, T* input, T* dinput) { + __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); } +template __attribute__((always_inline)) -double ident_load(int64_t idx, size_t i, size_t N) { +double ringident_load(int64_t idx, size_t i, size_t N) { idx /= sizeof(double); // return (double)( ( (idx == N) ? 0 : idx) == i); return (double)((idx != N && idx == i) || (idx == N && 0 == i)); // return (double)( idx % N == i); } - -__attribute__((enzyme_sparse_accumulate)) -void inner_store(int64_t row, int64_t col, double val, std::vector &triplets) { - printf("row=%d col=%d val=%f\n", row, col % N, val); - // assert(abs(val) > 0.00001); - triplets.emplace_back(row % N, col % N, val); -} - -__attribute__((always_inline)) -void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector &triplets) { - if (val == 0.0) return; - idx /= sizeof(double); - inner_store(i, idx, val, triplets); -} - +template __attribute__((always_inline)) -double sparse_load(int64_t idx, size_t i, size_t N, std::vector &triplets) { - return 0.0; -} - -__attribute__((always_inline)) -void never_store(double val, int64_t idx, double* input, size_t N) { +void never_store(T val, int64_t idx, T* input, size_t N) { assert(0 && "this is a read only input, why are you storing here..."); } +template __attribute__((always_inline)) double mod_load(int64_t idx, double* input, size_t N) { idx /= sizeof(double); return input[idx % N]; } +template __attribute__((noinline)) -std::vector hess_f(size_t N, double* input) { - std::vector triplets; - input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); +std::vector> hess_f(size_t N, T* input) { + std::vector> triplets; + input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); __builtin_assume(N > 0); __builtin_assume(N != 1); for (size_t i=0; i((void*)ringident_load, (void*)never_store, i, N); + T* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, &triplets); - __enzyme_fwddiff((void*)grad_f, + __enzyme_fwddiff((void*)grad_f, enzyme_const, N, enzyme_dup, input, d_input, - enzyme_dupnoneed, (double*)0x1, d_dinput); + enzyme_dupnoneed, (T*)0x1, d_dinput); } return triplets; } -int main() { - // size_t N = 8; - double x[N]; - for (int i=0; i= 2) { + N = atoi(argv[1]); + } + + double *x = (double*)malloc(sizeof(double) * N); + for (int i=0; i -struct triple { - size_t row; - size_t col; - double val; - triple(triple&&) = default; - triple(size_t row, size_t col, double val) : row(row), col(col), val(val) {} -}; - - -extern int enzyme_dup; -extern int enzyme_dupnoneed; -extern int enzyme_out; -extern int enzyme_const; - -extern void __enzyme_autodiff(void *, ...); - -extern void __enzyme_fwddiff(void *, ...); - -extern double* __enzyme_todense(void *, ...) noexcept; - +#include "matrix.h" +template __attribute__((always_inline)) static double f(size_t N, double* pos) { double e = 0.; @@ -52,94 +34,52 @@ static double f(size_t N, double* pos) { return e; } - +template __attribute__((always_inline)) -static void grad_f(size_t N, double* input, double* dinput) { - __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); +static void grad_f(size_t N, T* input, T* dinput) { + __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); } +template __attribute__((always_inline)) -void ident_store(double , int64_t idx, size_t i) { - assert(0 && "should never load"); -} - -__attribute__((always_inline)) -double ident_load(size_t idx, size_t i, size_t N) { - idx /= sizeof(double); - return (double)(idx == i);// ? 1.0 : 0.0; -} - -__attribute__((enzyme_sparse_accumulate)) -void inner_store(int64_t row, int64_t col, size_t N, double val, std::vector &triplets) { - printf("row=%d col=%d val=%f\n", row, col % N, val); - // assert(abs(val) > 0.00001); - triplets.emplace_back(row % N, col % N, val); -} - -__attribute__((always_inline)) -void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector &triplets) { - if (val == 0.0) return; - idx /= sizeof(double); - inner_store(i, idx, N, val, triplets); -} - -__attribute__((always_inline)) -double sparse_load(int64_t idx, size_t i, size_t N, std::vector &triplets) { - return 0.0; -} - -__attribute__((always_inline)) -void never_store(double val, int64_t idx, double* input, size_t N) { +static void never_store(T val, int64_t idx, T* input, size_t N) { assert(0 && "this is a read only input, why are you storing here..."); } __attribute__((always_inline)) -double mod_load(int64_t idx, double* input, size_t N) { +static double mod_load(int64_t idx, double* input, size_t N) { idx /= sizeof(double); return input[idx % N]; } +template __attribute__((noinline)) -std::vector hess_f(size_t N, double* input) { - std::vector triplets; +std::vector> hess_f(size_t N, T* input) { + std::vector> triplets; // input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); __builtin_assume(N > 0); for (size_t i=0; i((void*)ident_load, (void*)ident_store, i); + T* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, &triplets); - __enzyme_fwddiff((void*)grad_f, + __enzyme_fwddiff((void*)grad_f, enzyme_const, N, enzyme_dup, input, d_input, - enzyme_dupnoneed, (double*)0x1, d_dinput); + enzyme_dupnoneed, (T*)0x1, d_dinput); } return triplets; } -/* -__attribute__((noinline)) -std::vector hess_f2(size_t N, double* input) { - std::vector triplets; - input = - ((void*)mod_load, (void*)never_store, input, N); - hess_f(N, input); -} -*/ -// int argc, char** argv -int __attribute__((always_inline)) main() { - - - // if (argc != 2) { - // printf("Usage: %s \n", argv[0]); - // return 1; - // } +int main(int argc, char** argv) { + size_t N = 30; - // size_t N = atoi(argv[1]); - size_t N = 16; + if (argc >= 2) { + N = atoi(argv[1]); + } - double x[2 * N + 2]; + double *x = (double*)malloc(sizeof(double) * (2 * N + 2)); for (int i = 0; i < N; ++i) { double angle = 2 * M_PI * i / N; x[2 * i] = cos(angle) ;//+ normal(generator); @@ -147,13 +87,23 @@ int __attribute__((always_inline)) main() { } x[2 * N] = x[0]; x[2 * N + 1] = x[1]; - auto res = hess_f(N, &x[0]); - printf("%ld\n", res.size()); + + struct timeval start, end; + gettimeofday(&start, NULL); - for (auto & tup : res) - printf("%ld, %ld = %f\n", tup.row, tup.col, tup.val); + auto res = hess_f(N, x); + + gettimeofday(&end, NULL); + + printf("Number of elements %ld\n", res.size()); - return 0; -} + printf("Runtime %0.6f\n", tdiff(&start, &end)); + if (N <= 30) { + for (auto & tup : res) + printf("%ld, %ld = %f\n", tup.row, tup.col, tup.val); + } + + return 0; +} diff --git a/enzyme/test/Integration/Sparse/ringspring3Dextenddata.cpp b/enzyme/test/Integration/Sparse/ringspring3Dextenddata.cpp index 72408a73df27..6b59d27bca64 100644 --- a/enzyme/test/Integration/Sparse/ringspring3Dextenddata.cpp +++ b/enzyme/test/Integration/Sparse/ringspring3Dextenddata.cpp @@ -38,132 +38,114 @@ extern void __enzyme_fwddiff(void *, ...); extern double* __enzyme_todense(void *, ...) noexcept; +// This should work on LLVM 7, 8, 9, however in CI the version of clang installed on Ubuntu 18.04 cannot load +// a clang plugin properly without segfaulting on exit. This is fine on Ubuntu 20.04 or later LLVM versions... +// RUN: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O1 %s -S -emit-llvm -o - %loadClangEnzyme -mllvm -enzyme-auto-sparsity=1 | %lli - ; fi +// RUN: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O2 %s -S -emit-llvm -o - %loadClangEnzyme -mllvm -enzyme-auto-sparsity=1 | %lli - ; fi +// RUN: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O3 %s -S -emit-llvm -o - %loadClangEnzyme -mllvm -enzyme-auto-sparsity=1 | %lli - ; fi +// TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O1 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi +// TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O2 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi +// TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O3 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi + +#include +#include +#include +#include + +#include + +#include "matrix.h" + +template __attribute__((always_inline)) -static double f(size_t N, double* pos) { - double e = 0.; +static T f(size_t N, T* pos) { + T e = 0.; __builtin_assume(N != 0); for (size_t i = 0; i < N; i+=3) { - __builtin_assume(i < 1000000000); - double vx = pos[i]; - double vy = pos[i + 1]; - double vz = pos[i + 2]; - - double wx = pos[i + 3]; - double wy = pos[i + 4]; - double wz = pos[i + 5]; + T vx = pos[i]; + T vy = pos[i + 1]; + T vz = pos[i + 2]; + + T wx = pos[i + 3]; + T wy = pos[i + 4]; + T wz = pos[i + 5]; e += (wx - vx) * (wx - vx) + (wy - vy) * (wy - vy) + (wz - vz) * (wz - vz); } return e; } - -__attribute__((always_inline)) -static void grad_f(size_t N, double* input, double* dinput) { - __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); -} - -__attribute__((always_inline)) -static void ident_store(double , int64_t idx, size_t i) { - assert(0 && "should never load"); -} - -__attribute__((always_inline)) -static double ident_load(int64_t idx, size_t i, size_t N) { - idx /= sizeof(double); - return (double)(idx == i);// ? 1.0 : 0.0; -} - -__attribute__((enzyme_sparse_accumulate)) -static void inner_store(int64_t row, int64_t col, size_t N, double val, std::vector &triplets) { - printf("row=%d col=%d val=%f\n", row, col % N, val); - // assert(abs(val) > 0.00001); - triplets.emplace_back(row % N, col % N, val); -} - +template __attribute__((always_inline)) -static void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector &triplets) { - if (val == 0.0) return; - idx /= sizeof(double); - inner_store(i, idx, N, val, triplets); +static void grad_f(size_t N, T* input, T* dinput) { + __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); } +template __attribute__((always_inline)) -double sparse_load(int64_t idx, size_t i, size_t N, std::vector &triplets) { - return 0.0; -} - -__attribute__((always_inline)) -void never_store(double val, int64_t idx, double* input, size_t N) { +static void never_store(T val, int64_t idx, T* input, size_t N) { assert(0 && "this is a read only input, why are you storing here..."); } __attribute__((always_inline)) -double mod_load(int64_t idx, double* input, size_t N) { +static double mod_load(int64_t idx, double* input, size_t N) { idx /= sizeof(double); return input[idx % N]; } +template __attribute__((noinline)) -std::vector hess_f(size_t N, double* input) { - std::vector triplets; +std::vector> hess_f(size_t N, T* input) { + std::vector> triplets; // input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); __builtin_assume(N > 0); - __builtin_assume(N < 10000000000); for (size_t i=0; i((void*)ident_load, (void*)ident_store, i); + T* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, &triplets); - __enzyme_fwddiff((void*)grad_f, + __enzyme_fwddiff((void*)grad_f, enzyme_const, N, enzyme_dup, input, d_input, - enzyme_dupnoneed, (double*)0x1, d_dinput); + enzyme_dupnoneed, (T*)0x1, d_dinput); } return triplets; } -/* -__attribute__((noinline)) -std::vector hess_f2(size_t N, double* input) { - std::vector triplets; - input = - ((void*)mod_load, (void*)never_store, input, N); - hess_f(N, input); -} -*/ -// int argc, char** argv -int __attribute__((always_inline)) main() { - std::mt19937 generator(0); // Seed the random number generator - std::uniform_real_distribution normal(0, 0.05); - - - // if (argc != 2) { - // printf("Usage: %s \n", argv[0]); - // return 1; - // } - - // size_t N = atoi(argv[1]); +int main(int argc, char** argv) { size_t N = 30; - double x[3 * N + 3]; + if (argc >= 2) { + N = atoi(argv[1]); + } + + double *x = (double*)malloc(sizeof(double) * (3 * N + 3)); for (int i = 0; i < N; ++i) { double angle = 2 * M_PI * i / N; - x[3 * i] = cos(angle) + normal(generator); - x[3 * i + 1] = sin(angle) + normal(generator); - x[3 * i + 2] = normal(generator); + x[3 * i] = cos(angle) ;//+ normal(generator); + x[3 * i + 1] = sin(angle) ;//+ normal(generator); + x[3 * i + 2] = 0;//normal(generator); } x[3 * N] = x[0]; x[3 * N + 1] = x[1]; x[3 * N + 2] = x[2]; - auto res = hess_f(N, &x[0]); - - printf("%ld\n", res.size()); + struct timeval start, end; + gettimeofday(&start, NULL); + + auto res = hess_f(N, x); + + gettimeofday(&end, NULL); + + printf("Number of elements %ld\n", res.size()); + + printf("Runtime %0.6f\n", tdiff(&start, &end)); + if (N <= 30) { for (auto & tup : res) printf("%ld, %ld = %f\n", tup.row, tup.col, tup.val); + } return 0; } diff --git a/enzyme/test/Integration/Sparse/ringspring3Dextenddatarestlengthone.cpp b/enzyme/test/Integration/Sparse/ringspring3Dextenddatarestlengthone.cpp index b5bb2f259135..54f9b0fbd8c1 100644 --- a/enzyme/test/Integration/Sparse/ringspring3Dextenddatarestlengthone.cpp +++ b/enzyme/test/Integration/Sparse/ringspring3Dextenddatarestlengthone.cpp @@ -1,7 +1,5 @@ // This should work on LLVM 7, 8, 9, however in CI the version of clang installed on Ubuntu 18.04 cannot load // a clang plugin properly without segfaulting on exit. This is fine on Ubuntu 20.04 or later LLVM versions... -// This should work on LLVM 7, 8, 9, however in CI the version of clang installed on Ubuntu 18.04 cannot load -// a clang plugin properly without segfaulting on exit. This is fine on Ubuntu 20.04 or later LLVM versions... // RUN: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O1 %s -S -emit-llvm -o - %loadClangEnzyme -mllvm -enzyme-auto-sparsity=1 | %lli - ; fi // RUN: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O2 %s -S -emit-llvm -o - %loadClangEnzyme -mllvm -enzyme-auto-sparsity=1 | %lli - ; fi // RUN: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O3 %s -S -emit-llvm -o - %loadClangEnzyme -mllvm -enzyme-auto-sparsity=1 | %lli - ; fi @@ -9,94 +7,45 @@ // TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O2 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi // TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O3 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi -// everything should be always inline - #include #include #include #include - #include -struct triple { - size_t row; - size_t col; - double val; - triple(triple&&) = default; - triple(size_t row, size_t col, double val) : row(row), col(col), val(val) {} -}; - - -extern int enzyme_dup; -extern int enzyme_dupnoneed; -extern int enzyme_out; -extern int enzyme_const; - -extern void __enzyme_autodiff(void *, ...); - -extern void __enzyme_fwddiff(void *, ...); - -extern double* __enzyme_todense(void *, ...) noexcept; - +#include "matrix.h" +template __attribute__((always_inline)) -static double f(size_t N, double* pos) { +static double f(size_t N, T* __restrict__ pos) { double e = 0.; - for (size_t i = 0; i < N; i += 3) { - double vx = pos[i]; - double vy = pos[i + 1]; - double vz = pos[i + 2]; - - double wx = pos[i + 3]; - double wy = pos[i + 4]; - double wz = pos[i + 5]; - double distance = (wx - vx) * (wx - vx) + (wy - vy) * (wy - vy) + (wz - vz) * (wz - vz); - double rest_len_one_dist = (sqrt(distance) - 1) * (sqrt(distance) - 1); + __builtin_assume(N != 0); + for (size_t j = 0; j < N/3; j ++) { + size_t i = 3 * j; + T vx = pos[i]; + T vy = pos[i + 1]; + T vz = pos[i + 2]; + + T wx = pos[i + 3]; + T wy = pos[i + 4]; + T wz = pos[i + 5]; + T distance = (wx - vx) * (wx - vx) + (wy - vy) * (wy - vy) + (wz - vz) * (wz - vz); + T rest_len_one_dist = (sqrt(distance) - 1) * (sqrt(distance) - 1); e += rest_len_one_dist; } return e; } - -__attribute__((always_inline)) -static void grad_f(size_t N, double* input, double* dinput) { - __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); -} - - +template __attribute__((always_inline)) -static void ident_store(double , int64_t idx, size_t i) { - assert(0 && "should never load"); +static void grad_f(size_t N, T* input, T* dinput) { + __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); } +template __attribute__((always_inline)) -static double ident_load(int64_t idx, size_t i, size_t N) { - idx /= sizeof(double); - return (double)(idx == i);// ? 1.0 : 0.0; -} - -__attribute__((enzyme_sparse_accumulate)) -static void inner_store(int64_t row, int64_t col, size_t N, double val, std::vector &triplets) { - printf("row=%d col=%d val=%f\n", row, col % N, val); - // assert(abs(val) > 0.00001); - triplets.emplace_back(row % N, col % N, val); -} - -__attribute__((always_inline)) -static void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector &triplets) { - if (val == 0.0) return; - idx /= sizeof(double); - inner_store(i, idx, N, val, triplets); -} - -__attribute__((always_inline)) -static double sparse_load(int64_t idx, size_t i, size_t N, std::vector &triplets) { - return 0.0; -} - -__attribute__((always_inline)) -static void never_store(double val, int64_t idx, double* input, size_t N) { +static void never_store(T val, int64_t idx, T* input, size_t N) { assert(0 && "this is a read only input, why are you storing here..."); } @@ -106,50 +55,34 @@ static double mod_load(int64_t idx, double* input, size_t N) { return input[idx % N]; } +template __attribute__((noinline)) -std::vector hess_f(size_t N, double* input) { - std::vector triplets; +std::vector> hess_f(size_t N, T* input) { + std::vector> triplets; // input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); __builtin_assume(N > 0); for (size_t i=0; i((void*)ident_load, (void*)ident_store, i); + T* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, &triplets); - __enzyme_fwddiff((void*)grad_f, + __enzyme_fwddiff((void*)grad_f, enzyme_const, N, enzyme_dup, input, d_input, - enzyme_dupnoneed, (double*)0x1, d_dinput); + enzyme_dupnoneed, (T*)0x1, d_dinput); } return triplets; } -/* -__attribute__((noinline)) -std::vector hess_f2(size_t N, double* input) { - std::vector triplets; - input = - ((void*)mod_load, (void*)never_store, input, N); - hess_f(N, input); -} -*/ - -// int argc, char** argv -int __attribute__((always_inline)) main() { - //std::mt19937 generator(0); // Seed the random number generator - //std::uniform_real_distribution normal(0, 0.05); - - - // if (argc != 2) { - // printf("Usage: %s \n", argv[0]); - // return 1; - // } - - // size_t N = atoi(argv[1]); +int main(int argc, char** argv) { size_t N = 30; - double x[3 * N + 3]; + if (argc >= 2) { + N = atoi(argv[1]); + } + + double *x = (double*)malloc(sizeof(double) * (3 * N + 3)); for (int i = 0; i < N; ++i) { double angle = 2 * M_PI * i / N; x[3 * i] = cos(angle) ;//+ normal(generator); @@ -159,14 +92,23 @@ int __attribute__((always_inline)) main() { x[3 * N] = x[0]; x[3 * N + 1] = x[1]; x[3 * N + 2] = x[2]; - auto res = hess_f(N, &x[0]); - - printf("%ld\n", res.size()); + struct timeval start, end; + gettimeofday(&start, NULL); + + auto res = hess_f(N, x); + + gettimeofday(&end, NULL); + + printf("Number of elements %ld\n", res.size()); + + printf("Runtime %0.6f\n", tdiff(&start, &end)); + if (N <= 30) { for (auto & tup : res) printf("%ld, %ld = %f\n", tup.row, tup.col, tup.val); + } return 0; } diff --git a/enzyme/test/Integration/Sparse/ringspring3Drestlengthone.cpp b/enzyme/test/Integration/Sparse/ringspring3Drestlengthone.cpp index 49896b2cbc62..cae8bdad5708 100644 --- a/enzyme/test/Integration/Sparse/ringspring3Drestlengthone.cpp +++ b/enzyme/test/Integration/Sparse/ringspring3Drestlengthone.cpp @@ -7,94 +7,45 @@ // TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O2 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi // TODO: if [ %llvmver -ge 12 ]; then %clang++ -fno-exceptions -ffast-math -mllvm -enable-load-pre=0 -std=c++11 -O3 %s -S -emit-llvm -o - %newLoadClangEnzyme -mllvm -enzyme-auto-sparsity=1 -S | %lli - ; fi -// everything should be always inline - #include #include #include #include - #include -struct triple { - size_t row; - size_t col; - double val; - triple(triple&&) = default; - triple(size_t row, size_t col, double val) : row(row), col(col), val(val) {} -}; - - -extern int enzyme_dup; -extern int enzyme_dupnoneed; -extern int enzyme_out; -extern int enzyme_const; - -extern void __enzyme_autodiff(void *, ...); - -extern void __enzyme_fwddiff(void *, ...); - -extern double* __enzyme_todense(void *, ...) noexcept; - +#include "matrix.h" +template __attribute__((always_inline)) -static double f(size_t N, double* pos) { +static double f(size_t N, T* __restrict__ pos) { double e = 0.; - for (size_t i = 0; i < N; i += 3) { - double vx = pos[i]; - double vy = pos[i + 1]; - double vz = pos[i + 2]; + __builtin_assume(N != 0); + for (size_t j = 0; j < N/3; j ++) { + size_t i = 3 * j; + T vx = pos[i]; + T vy = pos[i + 1]; + T vz = pos[i + 2]; - double wx = pos[i + 3]; - double wy = pos[i + 4]; - double wz = pos[i + 5]; - double distance = (wx - vx) * (wx - vx) + (wy - vy) * (wy - vy) + (wz - vz) * (wz - vz); - double rest_len_one_dist = (sqrt(distance) - 1) * (sqrt(distance) - 1); + T wx = pos[i + 3]; + T wy = pos[i + 4]; + T wz = pos[i + 5]; + T distance = (wx - vx) * (wx - vx) + (wy - vy) * (wy - vy) + (wz - vz) * (wz - vz); + T rest_len_one_dist = (sqrt(distance) - 1) * (sqrt(distance) - 1); e += rest_len_one_dist; } return e; } - -__attribute__((always_inline)) -static void grad_f(size_t N, double* input, double* dinput) { - __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); -} - - +template __attribute__((always_inline)) -static void ident_store(double , int64_t idx, size_t i) { - assert(0 && "should never load"); +static void grad_f(size_t N, T* input, T* dinput) { + __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); } +template __attribute__((always_inline)) -static double ident_load(int64_t idx, size_t i, size_t N) { - idx /= sizeof(double); - return (double)(idx == i);// ? 1.0 : 0.0; -} - -__attribute__((enzyme_sparse_accumulate)) -static void inner_store(int64_t row, int64_t col, size_t N, double val, std::vector &triplets) { - printf("row=%d col=%d val=%f\n", row, col % N, val); - // assert(abs(val) > 0.00001); - triplets.emplace_back(row % N, col % N, val); -} - -__attribute__((always_inline)) -static void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector &triplets) { - if (val == 0.0) return; - idx /= sizeof(double); - inner_store(i, idx, N, val, triplets); -} - -__attribute__((always_inline)) -static double sparse_load(int64_t idx, size_t i, size_t N, std::vector &triplets) { - return 0.0; -} - -__attribute__((always_inline)) -static void never_store(double val, int64_t idx, double* input, size_t N) { +static void never_store(T val, int64_t idx, T* input, size_t N) { assert(0 && "this is a read only input, why are you storing here..."); } @@ -104,20 +55,21 @@ static double mod_load(int64_t idx, double* input, size_t N) { return input[idx % N]; } +template __attribute__((noinline)) -std::vector hess_f(size_t N, double* input) { - std::vector triplets; +std::vector> hess_f(size_t N, T* input) { + std::vector> triplets; // input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); __builtin_assume(N > 0); for (size_t i=0; i((void*)ident_load, (void*)ident_store, i); + T* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, &triplets); - __enzyme_fwddiff((void*)grad_f, + __enzyme_fwddiff((void*)grad_f, enzyme_const, N, enzyme_dup, input, d_input, - enzyme_dupnoneed, (double*)0x1, d_dinput); + enzyme_dupnoneed, (T*)0x1, d_dinput); } return triplets; @@ -134,34 +86,37 @@ std::vector hess_f2(size_t N, double* input) { */ // int argc, char** argv -int __attribute__((always_inline)) main() { - std::mt19937 generator(0); // Seed the random number generator - std::uniform_real_distribution normal(0, 0.05); - - - // if (argc != 2) { - // printf("Usage: %s \n", argv[0]); - // return 1; - // } - - // size_t N = atoi(argv[1]); +int main(int argc, char** argv) { size_t N = 30; - double x[3 * N]; + if (argc >= 2) { + N = atoi(argv[1]); + } + + double *x = (double*)malloc(sizeof(double) * 3 * N); for (int i = 0; i < N; ++i) { double angle = 2 * M_PI * i / N; x[3 * i] = cos(angle) ;//+ normal(generator); x[3 * i + 1] = sin(angle) ;//+ normal(generator); x[3 * i + 2] = 0;//normal(generator); } - auto res = hess_f(N, &x[0]); - - printf("%ld\n", res.size()); + struct timeval start, end; + gettimeofday(&start, NULL); + + auto res = hess_f(N, x); + + gettimeofday(&end, NULL); + + printf("Number of elements %ld\n", res.size()); + + printf("Runtime %0.6f\n", tdiff(&start, &end)); + if (N <= 30) { for (auto & tup : res) printf("%ld, %ld = %f\n", tup.row, tup.col, tup.val); + } return 0; } diff --git a/enzyme/test/Integration/Sparse/sqrtspring.cpp b/enzyme/test/Integration/Sparse/sqrtspring.cpp index a9750409b37d..9645593e8fe9 100644 --- a/enzyme/test/Integration/Sparse/sqrtspring.cpp +++ b/enzyme/test/Integration/Sparse/sqrtspring.cpp @@ -15,105 +15,73 @@ #include -struct triple { - size_t row; - size_t col; - double val; - triple(triple&&) = default; - triple(size_t row, size_t col, double val) : row(row), col(col), val(val) {} -}; - -extern int enzyme_dup; -extern int enzyme_dupnoneed; -extern int enzyme_out; -extern int enzyme_const; - -extern void __enzyme_autodiff(void *, ...); - -extern void __enzyme_fwddiff(void *, ...); - -extern double* __enzyme_todense(void *, ...) noexcept; - +#include "matrix.h" +template __attribute__((always_inline)) -static double f(size_t N, double* input) { - double out = 0; +static T f(size_t N, T* input) { + T out = 0; __builtin_assume(!((N-1) == 0)); for (size_t i=0; i __attribute__((always_inline)) -static void grad_f(size_t N, double* input, double* dinput) { - __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); -} - - -__attribute__((always_inline)) -static void ident_store(double , int64_t idx, size_t i) { - assert(0 && "should never load"); -} - -__attribute__((always_inline)) -static double ident_load(int64_t idx, size_t i, size_t N) { - idx /= sizeof(double); - return (double)(idx == i);// ? 1.0 : 0.0; -} - -__attribute__((enzyme_sparse_accumulate)) -static void inner_store(int64_t row, int64_t col, double val, std::vector &triplets) { - printf("row=%d col=%d val=%f\n", row, col, val); - assert(abs(val) > 0.00001); - triplets.emplace_back(row, col, val); -} - -__attribute__((always_inline)) -static void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector &triplets) { - if (val == 0.0) return; - idx /= sizeof(double); - inner_store(i, idx, val, triplets); -} - -__attribute__((always_inline)) -static double sparse_load(int64_t idx, size_t i, size_t N, std::vector &triplets) { - return 0.0; +static void grad_f(size_t N, T* input, T* dinput) { + __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); } +template __attribute__((noinline)) -std::vector hess_f(size_t N, double* input) { - std::vector triplets; +std::vector> hess_f(size_t N, T* input) { + std::vector> triplets; __builtin_assume(N > 0); for (size_t i=0; i((void*)ident_load, (void*)ident_store, i); + T* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, &triplets); - __enzyme_fwddiff((void*)grad_f, + __enzyme_fwddiff((void*)grad_f, enzyme_const, N, enzyme_dup, input, d_input, - enzyme_dupnoneed, (double*)0x1, d_dinput); - + enzyme_dupnoneed, (T*)0x1, d_dinput); } return triplets; } -int main() { - size_t N = 8; - double x[N]; - for (int i=0; i= 2) { + N = atoi(argv[1]); + } + + double *x = (double*)malloc(sizeof(double) * N); + for (int i=0; i