From 2fee5ee08b35347f16b97fbf809c223cd64ab8fb Mon Sep 17 00:00:00 2001 From: hooke007 Date: Sun, 19 Jan 2025 12:07:42 +0800 Subject: [PATCH] =?UTF-8?q?=E7=9D=80=E8=89=B2=E5=99=A8=EF=BC=9A=E5=B0=86?= =?UTF-8?q?=E5=89=A9=E4=BD=99AMD=E6=97=8F=E8=BD=AC=E6=8D=A2=E4=B8=BART?= =?UTF-8?q?=E5=8F=98=E4=BD=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 移除 AMD_CAS_RT 的预缩放限制; `lite` 变体没有明显优势,已移除; `RCAS` 变体由FSR简单裁切得来; `EASU_chroma` 的原始变体由 deus0ww 裁切。 --- .../shaders/{AMD_CAS.glsl => AMD_CAS_RT.glsl} | 63 +++- .../shaders/AMD_CAS_lite2_rgb.glsl | 154 -------- .../shaders/AMD_CAS_lite_luma.glsl | 85 ----- portable_config/shaders/AMD_CAS_lite_rgb.glsl | 85 ----- .../{AMD_CAS_rgb.glsl => AMD_CAS_rgb_RT.glsl} | 71 +++- ...CAS_scaled.glsl => AMD_CAS_scaled_RT.glsl} | 61 ++- ...ed_rgb.glsl => AMD_CAS_scaled_rgb_RT.glsl} | 68 +++- .../shaders/AMD_FSR_EASU_chroma_RT.glsl | 347 ++++++++++++++++++ .../shaders/AMD_FSR_RCAS_luma_RT.glsl | 141 +++++++ .../shaders/AMD_FSR_RCAS_rgb_RT.glsl | 152 ++++++++ 10 files changed, 860 insertions(+), 367 deletions(-) rename portable_config/shaders/{AMD_CAS.glsl => AMD_CAS_RT.glsl} (83%) delete mode 100644 portable_config/shaders/AMD_CAS_lite2_rgb.glsl delete mode 100644 portable_config/shaders/AMD_CAS_lite_luma.glsl delete mode 100644 portable_config/shaders/AMD_CAS_lite_rgb.glsl rename portable_config/shaders/{AMD_CAS_rgb.glsl => AMD_CAS_rgb_RT.glsl} (80%) rename portable_config/shaders/{AMD_CAS_scaled.glsl => AMD_CAS_scaled_RT.glsl} (87%) rename portable_config/shaders/{AMD_CAS_scaled_rgb.glsl => AMD_CAS_scaled_rgb_RT.glsl} (88%) create mode 100644 portable_config/shaders/AMD_FSR_EASU_chroma_RT.glsl create mode 100644 portable_config/shaders/AMD_FSR_RCAS_luma_RT.glsl create mode 100644 portable_config/shaders/AMD_FSR_RCAS_rgb_RT.glsl diff --git a/portable_config/shaders/AMD_CAS.glsl b/portable_config/shaders/AMD_CAS_RT.glsl similarity index 83% rename from portable_config/shaders/AMD_CAS.glsl rename to portable_config/shaders/AMD_CAS_RT.glsl index 05ed697e..cd61e0c2 100644 --- a/portable_config/shaders/AMD_CAS.glsl +++ b/portable_config/shaders/AMD_CAS_RT.glsl @@ -38,15 +38,54 @@ // scaled you should probably use CAS-scaled.glsl instead. However this behavior can be overriden by changing the WHEN // directives with "OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 < !" which allows it to be used as a pre-upscale sharpener. +//!PARAM TRC +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 6 +4 + +//!PARAM GAMMA +//!TYPE float +//!MINIMUM 0.0 +2.2 + +//!PARAM SHARP +//!TYPE float +//!MINIMUM 0.0 +//!MAXIMUM 1.0 +0.0 + +//!PARAM SLOW +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +1 + +//!PARAM SLOW2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM TRC2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 6 +4 + +//!PARAM GAMMA2 +//!TYPE float +//!MINIMUM 0.0 +2.2 + //!HOOK LUMA //!BIND HOOKED -//!DESC [AMD_CAS] FidelityFX Sharpening (Relinearization) -//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > ! OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 < ! * +//!DESC [AMD_CAS_RT] FidelityFX Sharpening (Relinearization) // User variables - Relinearization // Compatibility -#define SOURCE_TRC 4 // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom -#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when SOURCE_TRC is 6. +#define SOURCE_TRC TRC // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom +#define CUSTOM_GAMMA GAMMA // Custom power gamma curve to use if and when SOURCE_TRC is 6. // Shader code @@ -99,20 +138,19 @@ vec4 hook() { //!HOOK LUMA //!BIND HOOKED -//!DESC [AMD_CAS] FidelityFX Sharpening -//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > ! OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 < ! * +//!DESC [AMD_CAS_RT] FidelityFX Sharpening // User variables // Intensity -#define SHARPENING 0.0 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. +#define SHARPENING SHARP // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. // Performance -#define CAS_BETTER_DIAGONALS 1 // If set to 0, drops certain math and texture lookup operations for better performance. 0 or 1. -#define CAS_GO_SLOWER 0 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. +#define CAS_BETTER_DIAGONALS SLOW // If set to 0, drops certain math and texture lookup operations for better performance. 0 or 1. +#define CAS_GO_SLOWER SLOW2 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. // Compatibility -#define TARGET_TRC 4 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom -#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when TARGET_TRC is 6. +#define TARGET_TRC TRC2 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom +#define CUSTOM_GAMMA GAMMA2 // Custom power gamma curve to use if and when TARGET_TRC is 6. // Shader code @@ -267,4 +305,5 @@ vec4 hook() #endif return pix; -} \ No newline at end of file +} + diff --git a/portable_config/shaders/AMD_CAS_lite2_rgb.glsl b/portable_config/shaders/AMD_CAS_lite2_rgb.glsl deleted file mode 100644 index 9dab720a..00000000 --- a/portable_config/shaders/AMD_CAS_lite2_rgb.glsl +++ /dev/null @@ -1,154 +0,0 @@ -// LICENSE -// ======= -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. -// ------- -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// ------- -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the -// Software. -// ------- -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE - -//Initial port to ReShade: SLSNe https://gist.github.com/SLSNe/bbaf2d77db0b2a2a0755df581b3cf00c - -//Optimizations by Marty McFly: -// vectorized math, even with scalar gcn hardware this should work -// out the same, order of operations has not changed -// For some reason, it went from 64 to 48 instructions, a lot of MOV gone -// Also modified the way the final window is calculated -// -// reordered min() and max() operations, from 11 down to 9 registers -// -// restructured final weighting, 49 -> 48 instructions -// -// delayed RCP to replace SQRT with RSQRT -// -// removed the saturate() from the control var as it is clamped -// by UI manager already, 48 -> 47 instructions -// -// replaced tex2D with tex2Doffset intrinsic (address offset by immediate integer) -// 47 -> 43 instructions -// 9 -> 8 registers - -//Further modified by OopyDoopy and Lord of Lunacy: -// Changed wording in the UI for the existing variable and added a new variable and relevant code to adjust sharpening strength. - -//Fix by Lord of Lunacy: -// Made the shader use a linear colorspace rather than sRGB, as recommended by the original AMD documentation from FidelityFX. - -//Modified by CeeJay.dk: -// Included a label and tooltip description. I followed AMDs official naming guidelines for FidelityFX. -// -// Used gather trick to reduce the number of texture operations by one (9 -> 8). It's now 42 -> 51 instructions but still faster -// because of the texture operation that was optimized away. - -//Fix by CeeJay.dk -// Fixed precision issues with the gather at super high resolutions -// Also tried to refactor the samples so more work can be done while they are being sampled, but it's not so easy and the gains -// I'm seeing are so small they might be statistical noise. So it MIGHT be faster - no promises. - -//Modified by agyild (JPulowski) for mpv port -// Source version: https://github.com/CeeJayDK/SweetFX/blob/4f1692abdc49fbd582b6ac88dff1833beae2eb38/Shaders/CAS.fx -// Added back clamp mechanism to Saturation since it is no longer clamped by ReShade UI -// Changed lerp to mix -// Changed rsqrt to inversesqrt -// Changed rcp(x) to "1.0 / x" -// Changed saturate(x) to clamp(x, 0.0, 1.0) -// Reimplemented linear colorspace processing as an in-shader operation -// Removed gather performance trick because unlike in ReShade, mpv has transparent content with alpha channels, and the gather trick cannot be adapted to capture the alpha channel wihout making an additional texture lookup which makes the whole trick pointless -// Fragments marked as transparent by their alpha channel is no longer processed as a potential performance gain -// Reverted y-coordinate multiplication since mpv uses DX-like coordinate system -// Changed hooked texture to OUTPUT from SCALED, since apparently the gamma curve is applied at this stage - -// Shader code -// Relinearization pass - -//!HOOK OUTPUT -//!BIND HOOKED -//!DESC [AMD_CAS_lite2_rgb] Relinearization Pass - -vec3 srgb_to_linear(vec3 col) { - return mix(col * 1.0 / 12.92, pow((col + 0.055) / 1.055, vec3(2.4)), ivec3(lessThan(vec3(0.04045), col))); -} - -vec4 hook() -{ - vec4 col = HOOKED_tex(HOOKED_pos); - return vec4(srgb_to_linear(col.rgb), col.a); -} - -// CAS - -//!HOOK OUTPUT -//!BIND HOOKED -//!DESC [AMD_CAS_lite2_rgb] Sharpening + Delinearization Pass - -// User variables -#define SHARPENING 1.0 // Sharpening intensity: Adjusts sharpening intensity by averaging the original pixels to the sharpened result. 1.0 is the unmodified default. 0.0 to 1.0. -#define CONTRAST 0.0 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. - -vec3 linear_to_srgb(vec3 col) { - return mix(col * 12.92, 1.055 * pow(col, vec3(1.0 / 2.4)) - 0.055, ivec3(lessThanEqual(vec3(0.0031308), col))); -} - -vec4 hook() -{ - // fetch a 3x3 neighborhood around the pixel 'e', - // a b c - // d(e)f - // g h i - vec4 e = HOOKED_tex(HOOKED_pos); - - // If the current fragment is transparent, skip further processing - if (e.a == 0.0) - return vec4(linear_to_srgb(e.rgb), e.a); - - vec3 a = HOOKED_texOff(vec2(-1.0, -1.0)).rgb; - vec3 b = HOOKED_texOff(vec2( 0.0, -1.0)).rgb; - vec3 c = HOOKED_texOff(vec2( 1.0, -1.0)).rgb; - vec3 f = HOOKED_texOff(vec2( 1.0, 0.0)).rgb; - vec3 g = HOOKED_texOff(vec2(-1.0, 1.0)).rgb; - vec3 h = HOOKED_texOff(vec2( 0.0, 1.0)).rgb; - vec3 d = HOOKED_texOff(vec2(-1.0, 0.0)).rgb; - vec3 i = HOOKED_texOff(vec2( 1.0, 1.0)).rgb; - - // Soft min and max. - // a b c b - // d e f * 0.5 + d e f * 0.5 - // g h i h - // These are 2.0x bigger (factored out the extra multiply). - vec3 mnRGB = min(min(min(d, e.rgb), min(f, b)), h); - vec3 mnRGB2 = min(mnRGB, min(min(a, c), min(g, i))); - mnRGB += mnRGB2; - - vec3 mxRGB = max(max(max(d, e.rgb), max(f, b)), h); - vec3 mxRGB2 = max(mxRGB, max(max(a, c), max(g, i))); - mxRGB += mxRGB2; - - // Smooth minimum distance to signal limit divided by smooth max. - vec3 rcpMRGB = 1.0 / mxRGB; - vec3 ampRGB = clamp(min(mnRGB, 2.0 - mxRGB) * rcpMRGB, 0.0, 1.0); - - // Shaping amount of sharpening. - ampRGB = inversesqrt(ampRGB); - - float peak = -3.0 * clamp(CONTRAST, 0.0, 1.0) + 8.0; - vec3 wRGB = -(1.0 / (ampRGB * peak)); - - vec3 rcpWeightRGB = 1.0 / (4.0 * wRGB + 1.0); - - // 0 w 0 - // Filter shape: w 1 w - // 0 w 0 - vec3 window = (b + d) + (f + h); - vec3 outColor = clamp((window * wRGB + e.rgb) * rcpWeightRGB, 0.0, 1.0); - - // Delinearize - return vec4(linear_to_srgb(mix(e.rgb, outColor, SHARPENING)), e.a); -} \ No newline at end of file diff --git a/portable_config/shaders/AMD_CAS_lite_luma.glsl b/portable_config/shaders/AMD_CAS_lite_luma.glsl deleted file mode 100644 index cb6a3415..00000000 --- a/portable_config/shaders/AMD_CAS_lite_luma.glsl +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -// ============================================================================ -// Contrast Adaptive Sharpening - deus0ww - 2022-01-28 -// -// Orginal: https://github.com/GPUOpen-Effects/FidelityFX-CAS -// Reshade: https://gist.github.com/SLSNe/bbaf2d77db0b2a2a0755df581b3cf00c -// Reshade: https://gist.github.com/martymcmodding/30304c4bffa6e2bd2eb59ff8bb09d135 -// ============================================================================ - - -//!HOOK LUMA -//!BIND HOOKED -//!DESC [AMD_CAS_lite_luma] - -#define CONTRAST 0.4 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. -#define SHARPENING 1.0 // Adjusts sharpening intensity by averaging the original pixels to the sharpened result. - -const float peak = -1.0 / (-3.0 * clamp(CONTRAST, 0.0, 1.0) + 8.0); -const float sharpening = clamp(SHARPENING, 0.0, 1.0); - -vec4 hook() { - // fetch a 3x3 neighborhood around the pixel 'e', - // a b c - // d(e)f - // g h i - vec3 a = HOOKED_texOff(ivec2(-1, -1)).rgb; - vec3 b = HOOKED_texOff(ivec2( 0, -1)).rgb; - vec3 c = HOOKED_texOff(ivec2( 1, -1)).rgb; - vec3 d = HOOKED_texOff(ivec2(-1, 0)).rgb; - vec4 e = HOOKED_tex(HOOKED_pos); - vec3 f = HOOKED_texOff(ivec2( 1, 0)).rgb; - vec3 g = HOOKED_texOff(ivec2(-1, 1)).rgb; - vec3 h = HOOKED_texOff(ivec2( 0, 1)).rgb; - vec3 i = HOOKED_texOff(ivec2( 1, 1)).rgb; - - // Soft min and max. - // a b c b - // d e f * 0.5 + d e f * 0.5 - // g h i h - // These are 2.0x bigger (factored out the extra multiply). - vec3 mnRGB = min(min(min(d, e.rgb), min(f, b)), h); - vec3 mnRGB2 = min(mnRGB, min(min(a, c), min(g, i))); - mnRGB += mnRGB2; - - vec3 mxRGB = max(max(max(d, e.rgb), max(f, b)), h); - vec3 mxRGB2 = max(mxRGB, max(max(a, c), max(g, i))); - mxRGB += mxRGB2; - - // Smooth minimum distance to signal limit divided by smooth max. - vec3 ampRGB = clamp(min(mnRGB, 2.0 - mxRGB) / mxRGB, 0.0, 1.0); - - // Shaping amount of sharpening. - vec3 wRGB = sqrt(ampRGB) * peak; - vec3 weightRGB = 4.0 * wRGB + 1.0; - - // Filter shape. - // 0 w 0 - // w 1 w - // 0 w 0 - vec3 window = (b + d) + (f + h); - vec3 outColor = clamp((window * wRGB + e.rgb) / weightRGB, 0.0, 1.0); - - return vec4(mix(e.rgb, outColor, sharpening), e.a); -} - diff --git a/portable_config/shaders/AMD_CAS_lite_rgb.glsl b/portable_config/shaders/AMD_CAS_lite_rgb.glsl deleted file mode 100644 index 294d0745..00000000 --- a/portable_config/shaders/AMD_CAS_lite_rgb.glsl +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -// ============================================================================ -// Contrast Adaptive Sharpening - deus0ww - 2022-01-28 -// -// Orginal: https://github.com/GPUOpen-Effects/FidelityFX-CAS -// Reshade: https://gist.github.com/SLSNe/bbaf2d77db0b2a2a0755df581b3cf00c -// Reshade: https://gist.github.com/martymcmodding/30304c4bffa6e2bd2eb59ff8bb09d135 -// ============================================================================ - - -//!HOOK SCALED -//!BIND HOOKED -//!DESC [AMD_CAS_lite_rgb] - -#define CONTRAST 0.4 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. -#define SHARPENING 1.0 // Adjusts sharpening intensity by averaging the original pixels to the sharpened result. - -const float peak = -1.0 / (-3.0 * clamp(CONTRAST, 0.0, 1.0) + 8.0); -const float sharpening = clamp(SHARPENING, 0.0, 1.0); - -vec4 hook() { - // fetch a 3x3 neighborhood around the pixel 'e', - // a b c - // d(e)f - // g h i - vec3 a = HOOKED_texOff(ivec2(-1, -1)).rgb; - vec3 b = HOOKED_texOff(ivec2( 0, -1)).rgb; - vec3 c = HOOKED_texOff(ivec2( 1, -1)).rgb; - vec3 d = HOOKED_texOff(ivec2(-1, 0)).rgb; - vec4 e = HOOKED_tex(HOOKED_pos); - vec3 f = HOOKED_texOff(ivec2( 1, 0)).rgb; - vec3 g = HOOKED_texOff(ivec2(-1, 1)).rgb; - vec3 h = HOOKED_texOff(ivec2( 0, 1)).rgb; - vec3 i = HOOKED_texOff(ivec2( 1, 1)).rgb; - - // Soft min and max. - // a b c b - // d e f * 0.5 + d e f * 0.5 - // g h i h - // These are 2.0x bigger (factored out the extra multiply). - vec3 mnRGB = min(min(min(d, e.rgb), min(f, b)), h); - vec3 mnRGB2 = min(mnRGB, min(min(a, c), min(g, i))); - mnRGB += mnRGB2; - - vec3 mxRGB = max(max(max(d, e.rgb), max(f, b)), h); - vec3 mxRGB2 = max(mxRGB, max(max(a, c), max(g, i))); - mxRGB += mxRGB2; - - // Smooth minimum distance to signal limit divided by smooth max. - vec3 ampRGB = clamp(min(mnRGB, 2.0 - mxRGB) / mxRGB, 0.0, 1.0); - - // Shaping amount of sharpening. - vec3 wRGB = sqrt(ampRGB) * peak; - vec3 weightRGB = 4.0 * wRGB + 1.0; - - // Filter shape. - // 0 w 0 - // w 1 w - // 0 w 0 - vec3 window = (b + d) + (f + h); - vec3 outColor = clamp((window * wRGB + e.rgb) / weightRGB, 0.0, 1.0); - - return vec4(mix(e.rgb, outColor, sharpening), e.a); -} - diff --git a/portable_config/shaders/AMD_CAS_rgb.glsl b/portable_config/shaders/AMD_CAS_rgb_RT.glsl similarity index 80% rename from portable_config/shaders/AMD_CAS_rgb.glsl rename to portable_config/shaders/AMD_CAS_rgb_RT.glsl index aa144bc0..ad85198c 100644 --- a/portable_config/shaders/AMD_CAS_rgb.glsl +++ b/portable_config/shaders/AMD_CAS_rgb_RT.glsl @@ -17,13 +17,61 @@ // Mod of AMD_CAS.glsl +//!PARAM TRC +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 5 +4 + +//!PARAM SHARP +//!TYPE float +//!MINIMUM 0.0 +//!MAXIMUM 1.0 +1.0 + +//!PARAM CTRS +//!TYPE float +//!MINIMUM 0.0 +//!MAXIMUM 1.0 +0.0 + +//!PARAM SLOW +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +1 + +//!PARAM SLOW2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM SLOW3 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM ALPHA +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM TRC2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 5 +4 + //!HOOK OUTPUT //!BIND HOOKED -//!DESC [AMD_CAS_rgb] (Relinearization) +//!DESC [AMD_CAS_rgb_RT] (Relinearization) // User variables - Relinearization // Compatibility -#define SOURCE_TRC 4 // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG +#define SOURCE_TRC TRC // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG // Shader code @@ -85,21 +133,21 @@ vec4 hook() { //!HOOK OUTPUT //!BIND HOOKED -//!DESC [AMD_CAS_rgb] +//!DESC [AMD_CAS_rgb_RT] // User variables // Intensity -#define SHARPENING 1.0 // Sharpening intensity: Adjusts sharpening intensity by averaging the original pixels to the sharpened result. 1.0 is the unmodified default. 0.0 to 1.0. -#define CONTRAST 0.0 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. +#define SHARPENING SHARP // Sharpening intensity: Adjusts sharpening intensity by averaging the original pixels to the sharpened result. 1.0 is the unmodified default. 0.0 to 1.0. +#define CONTRAST CTRS // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. // Performance -#define CAS_BETTER_DIAGONALS 1 // If set to 0, drops certain math and texture lookup operations for better performance. 0 or 1. -#define CAS_SLOW 0 // If set to 1, uses all the three RGB coefficients for calculating weights which might slightly increase quality in exchange of performance, otherwise only uses the green coefficient by default. 0 or 1. -#define CAS_GO_SLOWER 0 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. -#define SKIP_ALPHA 0 // If set to 1, skips transparency preservation for better performance on OpenGL 4.0+ renderers. 0 or 1. +#define CAS_BETTER_DIAGONALS SLOW // If set to 0, drops certain math and texture lookup operations for better performance. 0 or 1. +#define CAS_SLOW SLOW2 // If set to 1, uses all the three RGB coefficients for calculating weights which might slightly increase quality in exchange of performance, otherwise only uses the green coefficient by default. 0 or 1. +#define CAS_GO_SLOWER SLOW3 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. +#define SKIP_ALPHA ALPHA // If set to 1, skips transparency preservation for better performance on OpenGL 4.0+ renderers. 0 or 1. // Compatibility -#define TARGET_TRC 4 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG +#define TARGET_TRC TRC2 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG // Shader code @@ -314,4 +362,5 @@ vec4 hook() #else return vec4(pix, e.a); #endif -} \ No newline at end of file +} + diff --git a/portable_config/shaders/AMD_CAS_scaled.glsl b/portable_config/shaders/AMD_CAS_scaled_RT.glsl similarity index 87% rename from portable_config/shaders/AMD_CAS_scaled.glsl rename to portable_config/shaders/AMD_CAS_scaled_RT.glsl index 40aa11ab..1b4c05af 100644 --- a/portable_config/shaders/AMD_CAS_scaled.glsl +++ b/portable_config/shaders/AMD_CAS_scaled_RT.glsl @@ -37,15 +37,55 @@ // on what kind of content the filter is running on. You might want to create seperate versions of the file with different // colorspace values, and apply them via autoprofiles. Note that running in non-linear light will result in oversharpening. +//!PARAM TRC +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 6 +4 + +//!PARAM GAMMA +//!TYPE float +//!MINIMUM 0.0 +2.2 + +//!PARAM SHARP +//!TYPE float +//!MINIMUM 0.0 +//!MAXIMUM 1.0 +0.0 + +//!PARAM SLOW +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +1 + +//!PARAM SLOW2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM TRC2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 6 +4 + +//!PARAM GAMMA2 +//!TYPE float +//!MINIMUM 0.0 +2.2 + //!HOOK LUMA //!BIND HOOKED -//!DESC [AMD_CAS_scaled] FidelityFX Upsampling and Sharpening v1.0.2 (Relinearization) +//!DESC [AMD_CAS_scaled_RT] FidelityFX Upsampling and Sharpening v1.0.2 (Relinearization) //!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > // User variables - Relinearization // Compatibility -#define SOURCE_TRC 4 // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom -#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when SOURCE_TRC is 6. +#define SOURCE_TRC TRC // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom +#define CUSTOM_GAMMA GAMMA // Custom power gamma curve to use if and when SOURCE_TRC is 6. // Shader code @@ -98,22 +138,22 @@ vec4 hook() { //!HOOK LUMA //!BIND HOOKED -//!DESC [AMD_CAS_scaled] FidelityFX Upsampling and Sharpening v1.0.2 +//!DESC [AMD_CAS_scaled_RT] FidelityFX Upsampling and Sharpening v1.0.2 //!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > //!WIDTH OUTPUT.w OUTPUT.w LUMA.w 2 * < * LUMA.w 2 * OUTPUT.w LUMA.w 2 * > * + OUTPUT.w OUTPUT.w LUMA.w 2 * = * + //!HEIGHT OUTPUT.h OUTPUT.h LUMA.h 2 * < * LUMA.h 2 * OUTPUT.h LUMA.h 2 * > * + OUTPUT.h OUTPUT.h LUMA.h 2 * = * + // User variables - Upsampling and Sharpening // Intensity -#define SHARPENING 0.0 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. +#define SHARPENING SHARP // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. // Performance -#define CAS_BETTER_DIAGONALS 1 // If set to 0, drops certain math and texture lookup operations for better performance. This is only useful on pre-OpenGL 4.0 renderers and there is no need to disable it otherwise. 0 or 1. -#define CAS_GO_SLOWER 0 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. +#define CAS_BETTER_DIAGONALS SLOW // If set to 0, drops certain math and texture lookup operations for better performance. This is only useful on pre-OpenGL 4.0 renderers and there is no need to disable it otherwise. 0 or 1. +#define CAS_GO_SLOWER SLOW2 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. // Compatibility -#define TARGET_TRC 4 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom -#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when TARGET_TRC is 6. +#define TARGET_TRC TRC2 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom +#define CUSTOM_GAMMA GAMMA2 // Custom power gamma curve to use if and when TARGET_TRC is 6. // Shader code @@ -412,4 +452,5 @@ vec4 hook() #endif return pix; -} \ No newline at end of file +} + diff --git a/portable_config/shaders/AMD_CAS_scaled_rgb.glsl b/portable_config/shaders/AMD_CAS_scaled_rgb_RT.glsl similarity index 88% rename from portable_config/shaders/AMD_CAS_scaled_rgb.glsl rename to portable_config/shaders/AMD_CAS_scaled_rgb_RT.glsl index e9ff1984..6f32f8cc 100644 --- a/portable_config/shaders/AMD_CAS_scaled_rgb.glsl +++ b/portable_config/shaders/AMD_CAS_scaled_rgb_RT.glsl @@ -17,14 +17,62 @@ // Mod of AMD_CAS_scaled.glsl +//!PARAM TRC +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 5 +4 + +//!PARAM SHARP +//!TYPE float +//!MINIMUM 0.0 +//!MAXIMUM 1.0 +1.0 + +//!PARAM CTRS +//!TYPE float +//!MINIMUM 0.0 +//!MAXIMUM 1.0 +0.0 + +//!PARAM SLOW +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +1 + +//!PARAM SLOW2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM SLOW3 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM ALPHA +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM TRC2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 5 +4 + //!HOOK MAIN //!BIND HOOKED -//!DESC [AMD_CAS_scaled_rgb] (Relinearization) +//!DESC [AMD_CAS_scaled_rgb_RT] (Relinearization) //!WHEN OUTPUT.w OUTPUT.h * MAIN.w MAIN.h * / 1.0 > // User variables - Relinearization // Compatibility -#define SOURCE_TRC 4 // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG +#define SOURCE_TRC TRC // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG // Shader code @@ -86,24 +134,24 @@ vec4 hook() { //!HOOK MAIN //!BIND HOOKED -//!DESC [AMD_CAS_scaled_rgb] +//!DESC [AMD_CAS_scaled_rgb_RT] //!WHEN OUTPUT.w OUTPUT.h * MAIN.w MAIN.h * / 1.0 > //!WIDTH OUTPUT.w //!HEIGHT OUTPUT.h // User variables - Upsampling and Sharpening // Intensity -#define SHARPENING 1.0 // Sharpening intensity: Adjusts sharpening intensity by averaging the original pixels to the sharpened result. 1.0 is the unmodified default. Will be ignored if SKIP_ORI_LOOKUP is 1. 0.0 to 1.0. -#define CONTRAST 0.0 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. +#define SHARPENING SHARP // Sharpening intensity: Adjusts sharpening intensity by averaging the original pixels to the sharpened result. 1.0 is the unmodified default. Will be ignored if SKIP_ORI_LOOKUP is 1. 0.0 to 1.0. +#define CONTRAST CTRS // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. // Performance -#define CAS_BETTER_DIAGONALS 1 // If set to 0, drops certain math and texture lookup operations for better performance. 0 or 1. -#define CAS_SLOW 0 // If set to 1, uses all the three RGB coefficients for calculating weights which might slightly increase quality in exchange of performance, otherwise only uses the green coefficient by default. 0 or 1. -#define CAS_GO_SLOWER 0 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. -#define SKIP_ORI_LOOKUP 0 // If set to 1, skips transparency preservation and the optional blending step for better performance. 0 or 1. +#define CAS_BETTER_DIAGONALS SLOW // If set to 0, drops certain math and texture lookup operations for better performance. 0 or 1. +#define CAS_SLOW SLOW2 // If set to 1, uses all the three RGB coefficients for calculating weights which might slightly increase quality in exchange of performance, otherwise only uses the green coefficient by default. 0 or 1. +#define CAS_GO_SLOWER SLOW3 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. +#define SKIP_ORI_LOOKUP ALPHA // If set to 1, skips transparency preservation and the optional blending step for better performance. 0 or 1. // Compatibility -#define TARGET_TRC 4 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG +#define TARGET_TRC TRC2 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG // Shader code diff --git a/portable_config/shaders/AMD_FSR_EASU_chroma_RT.glsl b/portable_config/shaders/AMD_FSR_EASU_chroma_RT.glsl new file mode 100644 index 00000000..e15b28f6 --- /dev/null +++ b/portable_config/shaders/AMD_FSR_EASU_chroma_RT.glsl @@ -0,0 +1,347 @@ +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FidelityFX FSR v1.0.2 by AMD +// ported to mpv by agyild + +// Changelog +// Made it compatible with pre-OpenGL 4.0 renderers +// Made it directly operate on LUMA plane, since the original shader was operating on LUMA by deriving it from RGB. This should cause a major increase in performance, especially on OpenGL 4.0+ renderers (4+2 texture lookups vs. 12+5) +// Removed transparency preservation mechanism since the alpha channel is a separate source plane than LUMA +// Added optional performance-saving lossy optimizations to EASU (Credit: atyuwen, https://atyuwen.github.io/posts/optimizing-fsr/) +// +// Notes +// Per AMD's guidelines only upscales content up to 4x (e.g., 1080p -> 2160p, 720p -> 1440p etc.) and everything else in between, +// that means FSR will scale up to 4x at maximum, and any further scaling will be processed by mpv's scalers + +//!PARAM AR +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +1 + +//!PARAM FAST +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!PARAM FAST2 +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!DESC [AMD_FSR_EASU_chroma_RT] +//!HOOK CHROMA +//!BIND HOOKED +//!WIDTH LUMA.w +//!HEIGHT LUMA.h +//!OFFSET ALIGN +//!WHEN HOOKED.w LUMA.w < HOOKED.h LUMA.h < * +//!COMPONENTS 1 + +// User variables - EASU +#define FSR_EASU_DERING AR // If set to 0, disables deringing for a small increase in performance. 0 or 1. +#define FSR_EASU_SIMPLE_ANALYSIS FAST // If set to 1, uses a simpler single-pass direction and length analysis for an increase in performance. 0 or 1. +#define FSR_EASU_QUIT_EARLY FAST2 // If set to 1, uses bilinear filtering for non-edge pixels and skips EASU on those regions for an increase in performance. 0 or 1. + +// Shader code + +#ifndef FSR_EASU_DIR_THRESHOLD + #if (FSR_EASU_QUIT_EARLY == 1) + #define FSR_EASU_DIR_THRESHOLD 64.0 + #elif (FSR_EASU_QUIT_EARLY == 0) + #define FSR_EASU_DIR_THRESHOLD 32768.0 + #endif +#endif + +float APrxLoRcpF1(float a) { + return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a)); +} + +float APrxLoRsqF1(float a) { + return uintBitsToFloat(uint(0x5f347d74) - (floatBitsToUint(a) >> uint(1))); +} + +float AMin3F1(float x, float y, float z) { + return min(x, min(y, z)); +} + +float AMax3F1(float x, float y, float z) { + return max(x, max(y, z)); +} + +#if (FSR_PQ == 1) + +float ToGamma2(float a) { + return pow(a, 4.0); +} + +#endif + + // Filtering for a given tap for the scalar. + void FsrEasuTap( + inout float aC, // Accumulated color, with negative lobe. + inout float aW, // Accumulated weight. + vec2 off, // Pixel offset from resolve position to tap. + vec2 dir, // Gradient direction. + vec2 len, // Length. + float lob, // Negative lobe strength. + float clp, // Clipping point. + float c){ // Tap color. + // Rotate offset by direction. + vec2 v; + v.x = (off.x * ( dir.x)) + (off.y * dir.y); + v.y = (off.x * (-dir.y)) + (off.y * dir.x); + // Anisotropy. + v *= len; + // Compute distance^2. + float d2 = v.x * v.x + v.y * v.y; + // Limit to the window as at corner, 2 taps can easily be outside. + d2 = min(d2, clp); + // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. + // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 + // |_______________________________________| |_______________| + // base window + // The general form of the 'base' is, + // (a*(b*x^2-1)^2-(a-1)) + // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. + float wB = float(2.0 / 5.0) * d2 + -1.0; + float wA = lob * d2 + -1.0; + wB *= wB; + wA *= wA; + wB = float(25.0 / 16.0) * wB + float(-(25.0 / 16.0 - 1.0)); + float w = wB * wA; + // Do weighted average. + aC += c * w; + aW += w; +} + +// Accumulate direction and length. +void FsrEasuSet( + inout vec2 dir, + inout float len, + vec2 pp, +#if (FSR_EASU_SIMPLE_ANALYSIS == 1) + float b, float c, + float i, float j, float f, float e, + float k, float l, float h, float g, + float o, float n +#elif (FSR_EASU_SIMPLE_ANALYSIS == 0) + bool biS, bool biT, bool biU, bool biV, + float lA, float lB, float lC, float lD, float lE +#endif + ){ + // Compute bilinear weight, branches factor out as predicates are compiler time immediates. + // s t + // u v +#if (FSR_EASU_SIMPLE_ANALYSIS == 1) + vec4 w = vec4(0.0); + w.x = (1.0 - pp.x) * (1.0 - pp.y); + w.y = pp.x * (1.0 - pp.y); + w.z = (1.0 - pp.x) * pp.y; + w.w = pp.x * pp.y; + + float lA = dot(w, vec4(b, c, f, g)); + float lB = dot(w, vec4(e, f, i, j)); + float lC = dot(w, vec4(f, g, j, k)); + float lD = dot(w, vec4(g, h, k, l)); + float lE = dot(w, vec4(j, k, n, o)); +#elif (FSR_EASU_SIMPLE_ANALYSIS == 0) + float w = 0.0; + if (biS) + w = (1.0 - pp.x) * (1.0 - pp.y); + if (biT) + w = pp.x * (1.0 - pp.y); + if (biU) + w = (1.0 - pp.x) * pp.y; + if (biV) + w = pp.x * pp.y; +#endif + // Direction is the '+' diff. + // a + // b c d + // e + // Then takes magnitude from abs average of both sides of 'c'. + // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. + float dc = lD - lC; + float cb = lC - lB; + float lenX = max(abs(dc), abs(cb)); + lenX = APrxLoRcpF1(lenX); + float dirX = lD - lB; + lenX = clamp(abs(dirX) * lenX, 0.0, 1.0); + lenX *= lenX; + // Repeat for the y axis. + float ec = lE - lC; + float ca = lC - lA; + float lenY = max(abs(ec), abs(ca)); + lenY = APrxLoRcpF1(lenY); + float dirY = lE - lA; + lenY = clamp(abs(dirY) * lenY, 0.0, 1.0); + lenY *= lenY; +#if (FSR_EASU_SIMPLE_ANALYSIS == 1) + len = lenX + lenY; + dir = vec2(dirX, dirY); +#elif (FSR_EASU_SIMPLE_ANALYSIS == 0) + dir += vec2(dirX, dirY) * w; + len += dot(vec2(w), vec2(lenX, lenY)); +#endif +} + +vec4 hook() { + // Result + vec4 pix = vec4(0.0, 0.0, 0.0, 1.0); + + //------------------------------------------------------------------------------------------------------------------------------ + // +---+---+ + // | | | + // +--(0)--+ + // | b | c | + // +---F---+---+---+ + // | e | f | g | h | + // +--(1)--+--(2)--+ + // | i | j | k | l | + // +---+---+---+---+ + // | n | o | + // +--(3)--+ + // | | | + // +---+---+ + // Get position of 'F'. + vec2 pp = HOOKED_pos * HOOKED_size - vec2(0.5); + vec2 fp = floor(pp); + pp -= fp; + //------------------------------------------------------------------------------------------------------------------------------ + // 12-tap kernel. + // b c + // e f g h + // i j k l + // n o + // Gather 4 ordering. + // a b + // r g + // Allowing dead-code removal to remove the 'z's. + const ivec2 quad_idx[4] = {{ 1,-1}, { 0, 1}, { 2, 1}, { 1, 3}}; + mat2x4 bczz = mat2x4(HOOKED_gather(vec2((fp + quad_idx[0]) * HOOKED_pt), 0), + HOOKED_gather(vec2((fp + quad_idx[0]) * HOOKED_pt), 1)); + mat2x4 ijfe = mat2x4(HOOKED_gather(vec2((fp + quad_idx[1]) * HOOKED_pt), 0), + HOOKED_gather(vec2((fp + quad_idx[1]) * HOOKED_pt), 1)); + mat2x4 klhg = mat2x4(HOOKED_gather(vec2((fp + quad_idx[2]) * HOOKED_pt), 0), + HOOKED_gather(vec2((fp + quad_idx[2]) * HOOKED_pt), 1)); + mat2x4 zzon = mat2x4(HOOKED_gather(vec2((fp + quad_idx[3]) * HOOKED_pt), 0), + HOOKED_gather(vec2((fp + quad_idx[3]) * HOOKED_pt), 1)); + +for(int i = 0; i < 2; i++) +{ + //------------------------------------------------------------------------------------------------------------------------------ + // Rename. + float bL = bczz[i].x; + float cL = bczz[i].y; + float iL = ijfe[i].x; + float jL = ijfe[i].y; + float fL = ijfe[i].z; + float eL = ijfe[i].w; + float kL = klhg[i].x; + float lL = klhg[i].y; + float hL = klhg[i].z; + float gL = klhg[i].w; + float oL = zzon[i].z; + float nL = zzon[i].w; + + // Accumulate for bilinear interpolation. + vec2 dir = vec2(0.0); + float len = 0.0; +#if (FSR_EASU_SIMPLE_ANALYSIS == 1) + FsrEasuSet(dir, len, pp, bL, cL, iL, jL, fL, eL, kL, lL, hL, gL, oL, nL); +#elif (FSR_EASU_SIMPLE_ANALYSIS == 0) + FsrEasuSet(dir, len, pp, true, false, false, false, bL, eL, fL, gL, jL); + FsrEasuSet(dir, len, pp, false, true, false, false, cL, fL, gL, hL, kL); + FsrEasuSet(dir, len, pp, false, false, true, false, fL, iL, jL, kL, nL); + FsrEasuSet(dir, len, pp, false, false, false, true, gL, jL, kL, lL, oL); +#endif + //------------------------------------------------------------------------------------------------------------------------------ + // Normalize with approximation, and cleanup close to zero. + vec2 dir2 = dir * dir; + float dirR = dir2.x + dir2.y; + bool zro = dirR < float(1.0 / FSR_EASU_DIR_THRESHOLD); + dirR = APrxLoRsqF1(dirR); +#if (FSR_EASU_QUIT_EARLY == 1) + if (zro) { + vec4 w = vec4(0.0); + w.x = (1.0 - pp.x) * (1.0 - pp.y); + w.y = pp.x * (1.0 - pp.y); + w.z = (1.0 - pp.x) * pp.y; + w.w = pp.x * pp.y; + + pix.r = clamp(dot(w, vec4(fL, gL, jL, kL)), 0.0, 1.0); + return pix; + } +#elif (FSR_EASU_QUIT_EARLY == 0) + dirR = zro ? 1.0 : dirR; + dir.x = zro ? 1.0 : dir.x; +#endif + dir *= vec2(dirR); + // Transform from {0 to 2} to {0 to 1} range, and shape with square. + len = len * 0.5; + len *= len; + // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. + float stretch = (dir.x * dir.x + dir.y * dir.y) * APrxLoRcpF1(max(abs(dir.x), abs(dir.y))); + // Anisotropic length after rotation, + // x := 1.0 lerp to 'stretch' on edges + // y := 1.0 lerp to 2x on edges + vec2 len2 = vec2(1.0 + (stretch - 1.0) * len, 1.0 + -0.5 * len); + // Based on the amount of 'edge', + // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. + float lob = 0.5 + float((1.0 / 4.0 - 0.04) - 0.5) * len; + // Set distance^2 clipping point to the end of the adjustable window. + float clp = APrxLoRcpF1(lob); + //------------------------------------------------------------------------------------------------------------------------------ + // Accumulation + // b c + // e f g h + // i j k l + // n o + float aC = 0.0; + float aW = 0.0; + FsrEasuTap(aC, aW, vec2( 0.0,-1.0) - pp, dir, len2, lob, clp, bL); // b + FsrEasuTap(aC, aW, vec2( 1.0,-1.0) - pp, dir, len2, lob, clp, cL); // c + FsrEasuTap(aC, aW, vec2(-1.0, 1.0) - pp, dir, len2, lob, clp, iL); // i + FsrEasuTap(aC, aW, vec2( 0.0, 1.0) - pp, dir, len2, lob, clp, jL); // j + FsrEasuTap(aC, aW, vec2( 0.0, 0.0) - pp, dir, len2, lob, clp, fL); // f + FsrEasuTap(aC, aW, vec2(-1.0, 0.0) - pp, dir, len2, lob, clp, eL); // e + FsrEasuTap(aC, aW, vec2( 1.0, 1.0) - pp, dir, len2, lob, clp, kL); // k + FsrEasuTap(aC, aW, vec2( 2.0, 1.0) - pp, dir, len2, lob, clp, lL); // l + FsrEasuTap(aC, aW, vec2( 2.0, 0.0) - pp, dir, len2, lob, clp, hL); // h + FsrEasuTap(aC, aW, vec2( 1.0, 0.0) - pp, dir, len2, lob, clp, gL); // g + FsrEasuTap(aC, aW, vec2( 1.0, 2.0) - pp, dir, len2, lob, clp, oL); // o + FsrEasuTap(aC, aW, vec2( 0.0, 2.0) - pp, dir, len2, lob, clp, nL); // n + //------------------------------------------------------------------------------------------------------------------------------ + // Normalize and dering. + pix[i] = aC / aW; +#if (FSR_EASU_DERING == 1) + float min1 = min(AMin3F1(fL, gL, jL), kL); + float max1 = max(AMax3F1(fL, gL, jL), kL); + pix[i] = clamp(pix[i], min1, max1); +#endif + pix[i] = clamp(pix[i], 0.0, 1.0); +} + return pix; +} + diff --git a/portable_config/shaders/AMD_FSR_RCAS_luma_RT.glsl b/portable_config/shaders/AMD_FSR_RCAS_luma_RT.glsl new file mode 100644 index 00000000..ce42104c --- /dev/null +++ b/portable_config/shaders/AMD_FSR_RCAS_luma_RT.glsl @@ -0,0 +1,141 @@ +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FidelityFX FSR v1.0.2 by AMD +// ported to mpv by agyild + +// Changelog +// Made it compatible with pre-OpenGL 4.0 renderers +// Made it directly operate on LUMA plane, since the original shader was operating on LUMA by deriving it from RGB. This should cause a major increase in performance, especially on OpenGL 4.0+ renderers (4+2 texture lookups vs. 12+5) +// Removed transparency preservation mechanism since the alpha channel is a separate source plane than LUMA +// Added optional performance-saving lossy optimizations to EASU (Credit: atyuwen, https://atyuwen.github.io/posts/optimizing-fsr/) +// +// Notes +// Per AMD's guidelines only upscales content up to 4x (e.g., 1080p -> 2160p, 720p -> 1440p etc.) and everything else in between, +// that means FSR will scale up to 4x at maximum, and any further scaling will be processed by mpv's scalers + +//!PARAM SHARP +//!TYPE float +//!MINIMUM 0.0 +//!MAXIMUM 2.0 +0.2 + +//!PARAM NR +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +1 + +//!PARAM PQ +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +0 + +//!HOOK LUMA +//!BIND HOOKED +//!DESC [AMD_FSR_RCAS_luma_RT] +//!COMPONENTS 1 + +// User variables - RCAS +#define SHARPNESS SHARP // Controls the amount of sharpening. The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. 0.0 to 2.0. +#define FSR_RCAS_DENOISE NR // If set to 1, lessens the sharpening on noisy areas. Can be disabled for better performance. 0 or 1. +#define FSR_PQ PQ // Whether the source content has PQ gamma or not. + +// Shader code + +#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0)) // This is set at the limit of providing unnatural results for sharpening. + +float APrxMedRcpF1(float a) { + float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a)); + return b * (-b * a + 2.0); +} + +float AMax3F1(float x, float y, float z) { + return max(x, max(y, z)); +} + +float AMin3F1(float x, float y, float z) { + return min(x, min(y, z)); +} + +#if (FSR_PQ == 1) + +float FromGamma2(float a) { + return sqrt(sqrt(a)); +} + +#endif + +vec4 hook() { + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h +#if (defined(HOOKED_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310))) + vec3 bde = HOOKED_gather(HOOKED_pos + HOOKED_pt * vec2(-0.5), 0).xyz; + float b = bde.z; + float d = bde.x; + float e = bde.y; + + vec2 fh = HOOKED_gather(HOOKED_pos + HOOKED_pt * vec2(0.5), 0).zx; + float f = fh.x; + float h = fh.y; +#else + float b = HOOKED_texOff(vec2( 0.0, -1.0)).r; + float d = HOOKED_texOff(vec2(-1.0, 0.0)).r; + float e = HOOKED_tex(HOOKED_pos).r; + float f = HOOKED_texOff(vec2(1.0, 0.0)).r; + float h = HOOKED_texOff(vec2(0.0, 1.0)).r; +#endif + + // Min and max of ring. + float mn1L = min(AMin3F1(b, d, f), h); + float mx1L = max(AMax3F1(b, d, f), h); + + // Immediate constants for peak range. + vec2 peakC = vec2(1.0, -1.0 * 4.0); + + // Limiters, these need to be high precision RCPs. + float hitMinL = min(mn1L, e) / (4.0 * mx1L); + float hitMaxL = (peakC.x - max(mx1L, e)) / (4.0 * mn1L + peakC.y); + float lobeL = max(-hitMinL, hitMaxL); + float lobe = max(float(-FSR_RCAS_LIMIT), min(lobeL, 0.0)) * exp2(-clamp(float(SHARPNESS), 0.0, 2.0)); + + // Apply noise removal. +#if (FSR_RCAS_DENOISE == 1) + // Noise detection. + float nz = 0.25 * b + 0.25 * d + 0.25 * f + 0.25 * h - e; + nz = clamp(abs(nz) * APrxMedRcpF1(AMax3F1(AMax3F1(b, d, e), f, h) - AMin3F1(AMin3F1(b, d, e), f, h)), 0.0, 1.0); + nz = -0.5 * nz + 1.0; + lobe *= nz; +#endif + + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + float rcpL = APrxMedRcpF1(4.0 * lobe + 1.0); + vec4 pix = vec4(0.0, 0.0, 0.0, 1.0); + pix.r = float((lobe * b + lobe * d + lobe * h + lobe * f + e) * rcpL); +#if (FSR_PQ == 1) + pix.r = FromGamma2(pix.r); +#endif + + return pix; +} + diff --git a/portable_config/shaders/AMD_FSR_RCAS_rgb_RT.glsl b/portable_config/shaders/AMD_FSR_RCAS_rgb_RT.glsl new file mode 100644 index 00000000..e9e0410a --- /dev/null +++ b/portable_config/shaders/AMD_FSR_RCAS_rgb_RT.glsl @@ -0,0 +1,152 @@ +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// Mod of AMD_FSR.glsl + +//!PARAM SHARP +//!TYPE float +//!MINIMUM 0.0 +//!MAXIMUM 2.0 +0.25 + +//!PARAM NR +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +1 + +//!PARAM ALPHA +//!TYPE int +//!MINIMUM 0 +//!MAXIMUM 1 +1 + +//!HOOK MAIN +//!BIND HOOKED +//!DESC [AMD_FSR_RCAS_rgb_RT] + +// User variables - RCAS +#define SHARPNESS SHARP // Controls the amount of sharpening. The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. 0.0 to N>0. +#define FSR_RCAS_DENOISE NR // If set to 1, applies denoising in addition to sharpening. Can be disabled for better performance. 0 or 1. +#define FSR_RCAS_PASSTHROUGH_ALPHA ALPHA // If set to 1, preserves transparency in the image. 0 or 1. + +// Shader code + +#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0)) // This is set at the limit of providing unnatural results for sharpening. + +float APrxMedRcpF1(float a) { + float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a)); + return b * (-b * a + float(2.0)); +} + +float AMax3F1(float x, float y, float z) { + return max(x, max(y, z)); +} + +float AMin3F1(float x, float y, float z) { + return min(x, min(y, z)); +} + +vec4 hook() { + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + + vec4 pix; + + vec3 b = HOOKED_texOff(vec2( 0.0, -1.0)).rgb; + vec3 d = HOOKED_texOff(vec2(-1.0, 0.0)).rgb; +#if (FSR_RCAS_PASSTHROUGH_ALPHA == 1) + vec4 ee = HOOKED_tex(HOOKED_pos); + vec3 e = ee.rgb; + pix.a = ee.a; +#else + vec3 e = HOOKED_tex(HOOKED_pos).rgb; + pix.a = float(1.0); +#endif + vec3 f = HOOKED_texOff(vec2(1.0, 0.0)).rgb; + vec3 h = HOOKED_texOff(vec2(0.0, 1.0)).rgb; + + // Rename (32-bit) or regroup (16-bit). + float bR = b.r; + float bG = b.g; + float bB = b.b; + float dR = d.r; + float dG = d.g; + float dB = d.b; + float eR = e.r; + float eG = e.g; + float eB = e.b; + float fR = f.r; + float fG = f.g; + float fB = f.b; + float hR = h.r; + float hG = h.g; + float hB = h.b; + + // Min and max of ring. + float mn4R = min(AMin3F1(bR, dR, fR), hR); + float mn4G = min(AMin3F1(bG, dG, fG), hG); + float mn4B = min(AMin3F1(bB, dB, fB), hB); + float mx4R = max(AMax3F1(bR, dR, fR), hR); + float mx4G = max(AMax3F1(bG, dG, fG), hG); + float mx4B = max(AMax3F1(bB, dB, fB), hB); + + // Immediate constants for peak range. + vec2 peakC = vec2(1.0, -1.0 * 4.0); + + // Limiters, these need to be high precision RCPs. + float hitMinR = min(mn4R, eR) * (float(1.0) / (float(4.0) * mx4R)); + float hitMinG = min(mn4G, eG) * (float(1.0) / (float(4.0) * mx4G)); + float hitMinB = min(mn4B, eB) * (float(1.0) / (float(4.0) * mx4B)); + float hitMaxR = (peakC.x - max(mx4R, eR)) * (float(1.0) / (float(4.0) * mn4R + peakC.y)); + float hitMaxG = (peakC.x - max(mx4G, eG)) * (float(1.0) / (float(4.0) * mn4G + peakC.y)); + float hitMaxB = (peakC.x - max(mx4B, eB)) * (float(1.0) / (float(4.0) * mn4B + peakC.y)); + float lobeR = max(-hitMinR, hitMaxR); + float lobeG = max(-hitMinG, hitMaxG); + float lobeB = max(-hitMinB, hitMaxB); + float lobe = max(float(-FSR_RCAS_LIMIT), min(AMax3F1(lobeR, lobeG, lobeB), float(0.0))) * exp2(-max(float(SHARPNESS), float(0.0))); + + // Apply noise removal. +#if (FSR_RCAS_DENOISE == 1) + // Luma times 2. + float bL = bB * float(0.5) + (bR* float(0.5) + bG); + float dL = dB * float(0.5) + (dR* float(0.5) + dG); + float eL = eB * float(0.5) + (eR* float(0.5) + eG); + float fL = fB * float(0.5) + (fR* float(0.5) + fG); + float hL = hB * float(0.5) + (hR* float(0.5) + hG); + + // Noise detection. + float nz = float(0.25) * bL + float(0.25) * dL + float(0.25) * fL + float(0.25) * hL-eL; + nz = clamp(abs(nz) * APrxMedRcpF1(AMax3F1(AMax3F1(bL, dL, eL), fL, hL) - AMin3F1(AMin3F1(bL, dL, eL), fL, hL)), 0.0, 1.0); + nz = float(-0.5) * nz + float(1.0); + lobe *= nz; +#endif + + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + float rcpL = APrxMedRcpF1(float(4.0) * lobe + float(1.0)); + pix.rgb = vec3((lobe * bR+ lobe * dR + lobe * hR + lobe * fR + eR) * rcpL, + (lobe * bG+ lobe * dG + lobe * hG + lobe * fG + eG) * rcpL, + (lobe * bB+ lobe * dB + lobe * hB + lobe * fB + eB) * rcpL); + + return pix; +} +