diff --git a/Src/ILGPU.Algorithms.Tests/XMathTests.Sqrt.tt b/Src/ILGPU.Algorithms.Tests/XMathTests.Sqrt.tt index 4b0eb3a903..9d2ed98a54 100644 --- a/Src/ILGPU.Algorithms.Tests/XMathTests.Sqrt.tt +++ b/Src/ILGPU.Algorithms.Tests/XMathTests.Sqrt.tt @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU Algorithms -// Copyright (c) 2020-2023 ILGPU Project +// Copyright (c) 2020-2024 ILGPU Project // www.ilgpu.net // // File: XMathTests.Sqrt.tt/XMathTests.Sqrt.cs @@ -32,7 +32,7 @@ using Xunit; var rsqrtFunctions = new [] { - new XMathFunction("Rsqrt" , "float" , new Precision(15, 15, 7)), + new XMathFunction("Rsqrt" , "float" , new Precision(15, 6, 7)), new XMathFunction("Rsqrt" , "double", new Precision(15, 15, 15)), }; #> diff --git a/Src/ILGPU.Algorithms/PTX/PTXContext.Generated.tt b/Src/ILGPU.Algorithms/PTX/PTXContext.Generated.tt index 1763f736fd..8b6984fc2f 100644 --- a/Src/ILGPU.Algorithms/PTX/PTXContext.Generated.tt +++ b/Src/ILGPU.Algorithms/PTX/PTXContext.Generated.tt @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU Algorithms -// Copyright (c) 2019-2021 ILGPU Project +// Copyright (c) 2019-2024 ILGPU Project // www.ilgpu.net // // File: PTXContext.Generated.tt/PTXContext.Generated.cs @@ -18,34 +18,34 @@ <#@ output extension=".cs" #> <# var hardwareMathFunctions = - new ValueTuple, string>[] + new ValueTuple[] { - ( UnaryMathFunctions[10], null ), // IsInfinity - ( UnaryMathFunctions[11], null ), // IsInfinity - ( UnaryMathFunctions[12], null ), // IsNaN - ( UnaryMathFunctions[13], null ), // IsNaN - - ( UnaryMathFunctions[18], null ), // Rcp - ( UnaryMathFunctions[19], null ), // Rcp - - ( UnaryMathFunctions[20], null ), // Sqrt - ( UnaryMathFunctions[21], null ), // Sqrt - - ( UnaryMathFunctions[24], null ), // Sin - ( UnaryMathFunctions[30], null ), // Cos - - ( UnaryMathFunctions[16], null ), // Exp2 - - ( UnaryMathFunctions[8], null ), // Log2 - - ( UnaryMathFunctions[40], "SM_75" ), // TanH + UnaryMathFunctions[10], // IsInfinity + UnaryMathFunctions[11], // IsInfinity + UnaryMathFunctions[12], // IsNaN + UnaryMathFunctions[13], // IsNaN + + UnaryMathFunctions[18], // Rcp + UnaryMathFunctions[19], // Rcp + + UnaryMathFunctions[20], // Sqrt + UnaryMathFunctions[21], // Sqrt + + UnaryMathFunctions[24], // Sin + UnaryMathFunctions[30], // Cos + + UnaryMathFunctions[16], // Exp2 + + UnaryMathFunctions[8], // Log2 + + UnaryMathFunctions[40], // TanH }; var unaryMathFunctions = UnaryMathFunctions.Where(t => !hardwareMathFunctions.Any(t2 => { var functionName = t.Item1; var dataType = t.Item2; - var hardwareFunctionName = t2.Item1.Item1; - var hardwareDataType = t2.Item1.Item2; + var hardwareFunctionName = t2.Item1; + var hardwareDataType = t2.Item2; return functionName == hardwareFunctionName && dataType == hardwareDataType; })); var binaryMathFunctions = BinaryMathFunctions; @@ -87,29 +87,11 @@ namespace ILGPU.Algorithms.PTX typeof(<#= type #>))); <# } #> -<# foreach (var ((name, type, kind, basicType), sm) in hardwareMathFunctions) { #> -<# - if (string.IsNullOrWhiteSpace(sm)) { - // Register hardware intrinsic -#> +<# foreach (var (name, type, kind, basicType) in hardwareMathFunctions) { #> manager.RegisterUnaryArithmetic( UnaryArithmeticKind.<#= kind #>, BasicValueType.<#= basicType #>, MathCodeGeneratorIntrinsic); -<# - } else { - // Register software fallback first, so that it gets replaced - // by the specialized hardware intrinsic. -#> - manager.RegisterUnaryArithmetic( - UnaryArithmeticKind.<#= kind #>, - BasicValueType.<#= basicType #>, - GetMathIntrinsic("<#= name #>", typeof(<#= type #>))); - manager.RegisterUnaryArithmetic( - UnaryArithmeticKind.<#= kind #>, - BasicValueType.<#= basicType #>, - GetMathCodeGeneratorIntrinsic(CudaArchitecture.<#= sm #>)); -<# } #> <# } #> <# foreach (var functionName in xmathUnaryRedirects) { #> diff --git a/Src/ILGPU.Algorithms/PTX/PTXContext.cs b/Src/ILGPU.Algorithms/PTX/PTXContext.cs index 61a2685080..b4bc9fea39 100644 --- a/Src/ILGPU.Algorithms/PTX/PTXContext.cs +++ b/Src/ILGPU.Algorithms/PTX/PTXContext.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU Algorithms -// Copyright (c) 2019-2023 ILGPU Project +// Copyright (c) 2019-2024 ILGPU Project // www.ilgpu.net // // File: PTXContext.cs @@ -46,7 +46,9 @@ static partial class PTXContext private static readonly PTXIntrinsic MathCodeGeneratorIntrinsic = new PTXIntrinsic( MathCodeGenerator, - IntrinsicImplementationMode.GenerateCode) + IntrinsicImplementationMode.GenerateCode, + null, + maxArchitecture: PTXLibDevicePtx.MinArchtecture) .ThrowIfNull(); /// @@ -59,19 +61,6 @@ static partial class PTXContext /// internal static readonly Type PTXWarpExtensionsType = typeof(PTXWarpExtensions); - /// - /// Resolves a PTX code generator for the given math-function configuration. - /// - /// The target/minimum architecture. - /// The resolved intrinsic representation. - private static PTXIntrinsic GetMathCodeGeneratorIntrinsic( - CudaArchitecture minArchitecture) => - new PTXIntrinsic( - PTXMathType, - nameof(PTXMath.GenerateMathIntrinsic), - IntrinsicImplementationMode.GenerateCode, - minArchitecture); - /// /// Resolves a PTX intrinsic for the given math-function configuration. /// @@ -87,7 +76,11 @@ private static PTXIntrinsic GetMathIntrinsic(string name, params Type[] types) types, null) .ThrowIfNull(); - return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect); + return new PTXIntrinsic( + targetMethod, + IntrinsicImplementationMode.Redirect, + null, + maxArchitecture: PTXLibDevicePtx.MinArchtecture); } /// diff --git a/Src/ILGPU/Backends/PTX/PTXIntrinsic.cs b/Src/ILGPU/Backends/PTX/PTXIntrinsic.cs index 6c6f322415..e814412d59 100644 --- a/Src/ILGPU/Backends/PTX/PTXIntrinsic.cs +++ b/Src/ILGPU/Backends/PTX/PTXIntrinsic.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2019-2021 ILGPU Project +// Copyright (c) 2019-2024 ILGPU Project // www.ilgpu.net // // File: PTXIntrinsic.cs @@ -52,6 +52,46 @@ public PTXIntrinsic(MethodInfo targetMethod, IntrinsicImplementationMode mode) mode) { } + /// + /// Constructs a new PTX intrinsic that can handle all architectures + /// newer or equal to . + /// + /// The associated target method. + /// The code-generation mode. + /// The target/minimum architecture. + public PTXIntrinsic( + MethodInfo targetMethod, + IntrinsicImplementationMode mode, + CudaArchitecture minArchitecture) + : base( + BackendType.PTX, + targetMethod, + mode) + { + MinArchitecture = minArchitecture; + } + + /// + /// Constructs a new PTX intrinsic. + /// + /// The associated target method. + /// The code-generation mode. + /// The target/minimum architecture. + /// The max architecture (exclusive). + public PTXIntrinsic( + MethodInfo targetMethod, + IntrinsicImplementationMode mode, + CudaArchitecture? minArchitecture, + CudaArchitecture? maxArchitecture) + : base( + BackendType.PTX, + targetMethod, + mode) + { + MinArchitecture = minArchitecture; + MaxArchitecture = maxArchitecture; + } + /// /// Constructs a new PTX intrinsic that can handle all architectures. /// diff --git a/Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt b/Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt index 800dacce24..019cd4f186 100644 --- a/Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt +++ b/Src/ILGPU/Backends/PTX/PTXIntrinsics.Generated.tt @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2016-2021 ILGPU Project +// Copyright (c) 2016-2024 ILGPU Project // www.ilgpu.net // // File: PTXIntrinsics.Generated.tt/PTXIntrinsics.Generated.cs @@ -35,6 +35,61 @@ var fp16Ops = new (string, string, string, string)[] ("Ternary", "MultiplyAdd", "FmaFP32", "SM_53"), }; + +var unaryMathFunctions = new (string, string, TypeInformation)[] +{ + ("AcosF", "Acos", FloatTypes[2]), + ("AsinF", "Asin", FloatTypes[2]), + ("AtanF", "Atan", FloatTypes[2]), + ("CeilingF", "Ceil", FloatTypes[2]), + ("CosF", "Cos", FloatTypes[2]), + ("CoshF", "Cosh", FloatTypes[2]), + ("ExpF", "Exp", FloatTypes[2]), + ("Exp2F", "Exp2", FloatTypes[2]), + ("FloorF", "Floor", FloatTypes[2]), + ("LogF", "Log", FloatTypes[2]), + ("Log2F", "Log2", FloatTypes[2]), + ("Log10F", "Log10", FloatTypes[2]), + ("RsqrtF", "Rsqrt", FloatTypes[2]), + ("SinF", "Sin", FloatTypes[2]), + ("SinhF", "Sinh", FloatTypes[2]), + ("SqrtF", "Sqrt", FloatTypes[2]), + ("TanF", "Tan", FloatTypes[2]), + ("TanhF", "Tanh", FloatTypes[2]), + + ("AcosF", "Acos", FloatTypes[1]), + ("AsinF", "Asin", FloatTypes[1]), + ("AtanF", "Atan", FloatTypes[1]), + ("CeilingF", "Ceil", FloatTypes[1]), + ("CosF", "Cos", FloatTypes[1]), + ("CoshF", "Cosh", FloatTypes[1]), + ("ExpF", "Exp", FloatTypes[1]), + ("Exp2F", "Exp2", FloatTypes[1]), + ("FloorF", "Floor", FloatTypes[1]), + ("LogF", "Log", FloatTypes[1]), + ("Log2F", "Log2", FloatTypes[1]), + ("Log10F", "Log10", FloatTypes[1]), + ("RsqrtF", "Rsqrt", FloatTypes[1]), + ("SinF", "Sin", FloatTypes[1]), + ("SinhF", "Sinh", FloatTypes[1]), + ("SqrtF", "Sqrt", FloatTypes[1]), + ("TanF", "Tan", FloatTypes[1]), + ("TanhF", "Tanh", FloatTypes[1]), +}; + +var binaryMathFunctions = new (string, string, string, TypeInformation)[] +{ + ("Atan2F", "Atan", null, FloatTypes[2]), + ("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[2]), + ("PowF", "Pow", null, FloatTypes[2]), + ("Rem", "Fmod", null, FloatTypes[2]), + + ("Atan2F", "Atan", null, FloatTypes[1]), + ("BinaryLogF", "Log", "IntrinsicMath.BinaryLog", FloatTypes[1]), + ("PowF", "Pow", null, FloatTypes[1]), + ("Rem", "Fmod", null, FloatTypes[1]), +}; + #> using ILGPU.IR.Intrinsics; using ILGPU.IR.Values; @@ -157,5 +212,43 @@ namespace ILGPU.Backends.PTX } #endregion + + #region Math + + /// + /// Registers all Math intrinsics with the given manager. + /// + /// The target implementation manager. + private static void RegisterMathFunctions(IntrinsicImplementationManager manager) + { +<# foreach (var (kind, methodName, type) in unaryMathFunctions) { #> + manager.RegisterUnaryArithmetic( + UnaryArithmeticKind.<#= kind #>, + BasicValueType.<#= type.GetBasicValueType() #>, + CreateMathIntrinsic( + nameof(LibDevice.<#= methodName #>), + typeof(<#= type.Type #>))); +<# } #> + +<# foreach (var (kind, methodName, baseClass, type) in binaryMathFunctions) { #> + manager.RegisterBinaryArithmetic( + BinaryArithmeticKind.<#= kind #>, + BasicValueType.<#= type.GetBasicValueType() #>, + <# if (baseClass == null) { #> + CreateMathIntrinsic( + nameof(LibDevice.<#= methodName #>), + typeof(<#= type.Type #>), + typeof(<#= type.Type #>))); + <# } else { #> + CreateMathIntrinsic( + typeof(<#= baseClass #>), + nameof(<#= baseClass #>.<#= methodName #>), + typeof(<#= type.Type #>), + typeof(<#= type.Type #>))); + <# } #> +<# } #> + } + + #endregion } } \ No newline at end of file diff --git a/Src/ILGPU/Backends/PTX/PTXIntrinsics.cs b/Src/ILGPU/Backends/PTX/PTXIntrinsics.cs index 564d6e16a2..49bf8654a2 100644 --- a/Src/ILGPU/Backends/PTX/PTXIntrinsics.cs +++ b/Src/ILGPU/Backends/PTX/PTXIntrinsics.cs @@ -1,6 +1,6 @@ // --------------------------------------------------------------------------------------- // ILGPU -// Copyright (c) 2019-2023 ILGPU Project +// Copyright (c) 2019-2024 ILGPU Project // www.ilgpu.net // // File: PTXIntrinsics.cs @@ -13,7 +13,9 @@ using ILGPU.IR.Intrinsics; using ILGPU.IR.Values; using ILGPU.Runtime.Cuda; +using ILGPU.Util; using System; +using System.Reflection; using System.Runtime.CompilerServices; namespace ILGPU.Backends.PTX @@ -84,6 +86,49 @@ private static PTXIntrinsic CreateFP16Intrinsic( maxArchitecture.Value) : new PTXIntrinsic(HalfType, name, IntrinsicImplementationMode.Redirect); + /// + /// Creates a PTX intrinsic for the given math function. + /// + /// The intrinsic name. + /// The parameter types. + /// The resolved intrinsic representation. + private static PTXIntrinsic CreateMathIntrinsic(string name, params Type[] types) + { + var targetMethod = typeof(LibDevice).GetMethod( + name, + BindingFlags.Public | BindingFlags.Static, + null, + types, + null) + .ThrowIfNull(); + return new PTXIntrinsic( + targetMethod, + IntrinsicImplementationMode.Redirect, + PTXLibDevicePtx.MinArchtecture); + } + + /// + /// Creates a PTX intrinsic for the given math function. + /// + /// The source type containing the intrinsic. + /// The intrinsic name. + /// The parameter types. + /// The resolved intrinsic representation. + private static PTXIntrinsic CreateMathIntrinsic( + Type baseType, + string name, + params Type[] types) + { + var targetMethod = baseType.GetMethod( + name, + BindingFlags.Public | BindingFlags.Static, + null, + types, + null) + .ThrowIfNull(); + return new PTXIntrinsic(targetMethod, IntrinsicImplementationMode.Redirect); + } + /// /// Registers all PTX intrinsics with the given manager. /// @@ -95,6 +140,7 @@ public static void Register(IntrinsicImplementationManager manager) RegisterWarpShuffles(manager); RegisterFP16(manager); RegisterBitFunctions(manager); + RegisterMathFunctions(manager); } #endregion diff --git a/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt b/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt index 67d5b20379..a1ae887077 100644 --- a/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt +++ b/Src/ILGPU/Backends/PTX/PTXLibDevicePtx.tt @@ -35,7 +35,7 @@ namespace ILGPU.Backends.PTX /// /// Contains the pre-generated PTX for Cuda LibDevice functions. /// - internal static class PTXLibDevicePtx + public static class PTXLibDevicePtx { /// /// Minimum architecture required to use the pre-generated PTX. @@ -97,7 +97,7 @@ namespace ILGPU.Backends.PTX /// Filled in with the PTX modules. /// Filled in with the PTX declarations. /// The PTX modules. - public static void GetPtx( + internal static void GetPtx( IEnumerable methods, ref InlineList ptxModules, out string ptxDeclarations)