Initial project commit

2026-01-08 16:50:20 +00:00
commit f0c5a8b267
29596 changed files with 4861782 additions and 0 deletions
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/ISTPEnabledRenderPipeline.cs
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/ISTPEnabledRenderPipeline.cs
@@ -0,0 +1,13 @@
+namespace UnityEngine.Rendering
+{
+    /// <summary>
+    /// By implementing this interface, a render pipeline can indicate its usage of the STP upscaler
+    /// </summary>
+    public interface ISTPEnabledRenderPipeline
+    {
+        /// <summary>
+        /// Indicates if this render pipeline instance uses STP.
+        /// </summary>
+        bool isStpUsed { get; }
+    }
+}
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/ISTPEnabledRenderPipeline.cs.meta
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/ISTPEnabledRenderPipeline.cs.meta
@@ -0,0 +1,2 @@
+fileFormatVersion: 2
+guid: f3117c72f2a5c6d47a581d5ccc60ca55
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/STP.cs
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/STP.cs
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/STP.cs.hlsl
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/STP.cs.hlsl
@@ -0,0 +1,31 @@
+//
+// This file was automatically generated. Please don't edit by hand. Execute Editor command [ Edit > Rendering > Generate Shader Includes ] instead
+//
+
+#ifndef STP_CS_HLSL
+#define STP_CS_HLSL
+//
+// UnityEngine.Rendering.STP+StpSetupPerViewConstants:  static fields
+//
+#define STPSETUPPERVIEWCONSTANTS_COUNT (8)
+
+// Generated from UnityEngine.Rendering.STP+StpConstantBufferData
+// PackingRules = Exact
+CBUFFER_START(StpConstantBufferData)
+    float4 _StpCommonConstant;
+    float4 _StpSetupConstants0;
+    float4 _StpSetupConstants1;
+    float4 _StpSetupConstants2;
+    float4 _StpSetupConstants3;
+    float4 _StpSetupConstants4;
+    float4 _StpSetupConstants5;
+    float4 _StpSetupPerViewConstants[16];
+    float4 _StpDilConstants0;
+    float4 _StpTaaConstants0;
+    float4 _StpTaaConstants1;
+    float4 _StpTaaConstants2;
+    float4 _StpTaaConstants3;
+CBUFFER_END
+
+
+#endif
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/STP.cs.hlsl.meta
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/STP.cs.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 62ccbd1f548f598418c8662d9f7da3ee
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/STP.cs.meta
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/STP.cs.meta
@@ -0,0 +1,2 @@
+fileFormatVersion: 2
+guid: 689bcf9068b54f1458c35e139c08a260
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/Stp.hlsl
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/Stp.hlsl
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/Stp.hlsl.meta
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/Stp.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: edf9bcadab79ea842850b55b8d3a60d4
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpCommon.hlsl
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpCommon.hlsl
@@ -0,0 +1,216 @@
+// This is necessary to prevent Unity from deciding that our default config logic is actually an include guard declaration
+#ifndef STP_COMMON_UNITY_INCLUDE_GUARD
+#define STP_COMMON_UNITY_INCLUDE_GUARD
+
+///
+/// Spatial-Temporal Post-Processing (STP) Common Shader Code
+///
+/// This file provides configuration defines and other common utilities associated with STP
+/// See STP.cs for more details on how this shader code interacts with C#
+///
+/// Usage:
+/// - Add control defines
+/// - Include this file in a shader pass associated with STP
+/// - Call relevant STP function
+///
+/// By default, no shader functions are available until they are specifically requested via define.
+///
+/// The following defines can be used to enable shader functionality:
+/// - STP_PAT
+///     - Enables the "Pattern" pass
+/// - STP_DIL
+///     - Enables the "Dilation" pass
+/// - STP_SAA
+///     - Enables the "Spatial Anti-Aliasing" pass
+/// - STP_TAA
+///     - Enables the "TAA" pass
+
+// Indicate that we'll be using the HLSL implementation of STP
+#define STP_HLSL 1
+#define STP_GPU 1
+
+// Disable grain since we don't currently have a way to integrate this with the rest of Unity's post processing
+#define STP_GRAIN 0
+
+// Enable the minimum precision path when supported by the shader environment.
+#if REAL_IS_HALF || defined(UNITY_DEVICE_SUPPORTS_NATIVE_16BIT)
+    #define STP_MEDIUM 1
+#endif
+
+// Mobile platforms use a simplified version of STP to reduce runtime overhead.
+#if defined(SHADER_API_MOBILE) || defined(SHADER_API_SWITCH)
+    #define STP_TAA_Q 0
+#endif
+
+#if defined(SHADER_API_SWITCH)
+    #define STP_BUG_SAT_INF 1
+#endif
+
+// Enable workarounds that help us avoid issues on Metal
+#if defined(SHADER_API_METAL)
+    // Relying on infinity behavior causes issues in the on-screen inline pass calculations
+    // We expect this option to be required on Metal because the shading language spec states that the fast-math
+    // option is on by default which disables support for proper INF handling.
+    #define STP_BUG_SAT_INF 1
+#endif
+
+#if defined(SHADER_API_PSSL)
+    #define STP_BUG_SAT_INF 1
+#endif
+
+#if defined(UNITY_DEVICE_SUPPORTS_NATIVE_16BIT)
+    #define STP_16BIT 1
+
+    #if defined(SHADER_API_PSSL)
+        #define STP_BUG_PRX 1
+    #endif
+#else
+    #define STP_32BIT 1
+#endif
+
+#if defined(ENABLE_DEBUG_MODE)
+    #define STP_BUG 1
+#endif
+
+// Include the STP HLSL files
+#include "Packages/com.unity.render-pipelines.core/Runtime/STP/Stp.hlsl"
+#include "Packages/com.unity.render-pipelines.core/Runtime/STP/STP.cs.hlsl"
+
+// Include TextureXR.hlsl for XR macros
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/TextureXR.hlsl"
+
+//
+// Common
+//
+
+#if defined(ENABLE_LARGE_KERNEL)
+#define STP_GROUP_SIZE 128
+#define STP_GROUP_SIZE_SHIFT_X 3
+#define STP_GROUP_SIZE_SHIFT_Y 4
+#else
+#define STP_GROUP_SIZE 64
+#define STP_GROUP_SIZE_SHIFT_X 3
+#define STP_GROUP_SIZE_SHIFT_Y 3
+#endif
+
+#if defined(STP_16BIT)
+StpW2 ComputeGroupPos(StpW2 groupId)
+{
+    return StpW2(
+        groupId.x << StpW1_(STP_GROUP_SIZE_SHIFT_X),
+        groupId.y << StpW1_(STP_GROUP_SIZE_SHIFT_Y)
+    );
+}
+#else
+StpMU2 ComputeGroupPos(StpMU2 groupId)
+{
+    return StpMU2(
+        groupId.x << StpMU1_(STP_GROUP_SIZE_SHIFT_X),
+        groupId.y << StpMU1_(STP_GROUP_SIZE_SHIFT_Y)
+    );
+}
+#endif
+
+#define STP_COMMON_CONSTANT asuint(_StpCommonConstant.x)
+#define STP_ZBUFFER_PARAMS_Z _StpCommonConstant.y
+#define STP_ZBUFFER_PARAMS_W _StpCommonConstant.z
+
+TEXTURE2D(_StpBlueNoiseIn);
+
+RW_TEXTURE2D_X(float4, _StpDebugOut);
+
+SAMPLER(s_point_clamp_sampler);
+SAMPLER(s_linear_clamp_sampler);
+SAMPLER(s_linear_repeat_sampler);
+
+#if defined(STP_32BIT)
+StpMU1 StpBfeF(StpMU1 data, StpMU1 offset, StpMU1 numBits)
+{
+    StpMU1 mask = (StpMU1(1) << numBits) - StpMU1(1);
+    return (data >> offset) & mask;
+}
+
+StpMU1 StpBfiF(StpMU1 mask, StpMU1 src, StpMU1 dst)
+{
+    return (src & mask) | (dst & ~mask);
+}
+
+StpMU2 StpRemapLaneTo8x16F(StpMU1 i)
+{
+    return StpMU2(StpBfiF(StpMU1(1), i, StpBfeF(i, StpMU1(2), StpMU1(3))),
+        StpBfiF(StpMU1(3), StpBfeF(i, StpMU1(1), StpMU1(2)), StpBfeF(i, StpMU1(3), StpMU1(4))));
+}
+
+StpMU1 DecodeNoiseWidthMinusOneF(StpMU1 param)
+{
+    return param & StpMU1(0xFF);
+}
+#endif
+
+#if defined(STP_16BIT)
+StpW1 StpBfeH(StpW1 data, StpW1 offset, StpW1 numBits)
+{
+    StpW1 mask = (StpW1(1) << numBits) - StpW1(1);
+    return (data >> offset) & mask;
+}
+
+StpW1 StpBfiH(StpW1 mask, StpW1 src, StpW1 dst)
+{
+    return (src & mask) | (dst & ~mask);
+}
+
+StpW2 StpRemapLaneTo8x16H(StpW1 i)
+{
+    return StpW2(StpBfiH(StpW1(1), i, StpBfeH(i, StpW1(2), StpW1(3))),
+        StpBfiH(StpW1(3), StpBfeH(i, StpW1(1), StpW1(2)), StpBfeH(i, StpW1(3), StpW1(4))));
+}
+
+StpW1 DecodeNoiseWidthMinusOneH(StpW1 param)
+{
+    return param & StpW1(0xFF);
+}
+#endif
+
+bool DecodeHasValidHistory(uint param)
+{
+    return (param >> 8) & 1;
+}
+
+uint DecodeStencilMask(uint param)
+{
+    return (param >> 16) & 0xFF;
+}
+
+uint DecodeDebugViewIndex(uint param)
+{
+    return (param >> 24) & 0xFF;
+}
+
+#if defined(STP_32BIT)
+StpMF1 StpDitF1(StpMU2 o)
+{
+    StpMU1 noiseWidthMinusOne = DecodeNoiseWidthMinusOneF(StpMU1(STP_COMMON_CONSTANT));
+    return (StpMF1)LOAD_TEXTURE2D_LOD(_StpBlueNoiseIn, o & noiseWidthMinusOne, 0).a;
+}
+// TODO: Broadcast one value as all three outputs a bug that will effect 'STP_GRAIN=3' output.
+StpMF3 StpDitF3(StpMU2 o) { return (StpMF3)StpDitF1(o); }
+#endif
+
+// NOTE: This function is used by both the 32-bit path, and the 16-bit path (when various workarounds are active)
+void StpBugF(StpU3 p, StpF4 c)
+{
+    if (p.z == DecodeDebugViewIndex(STP_COMMON_CONSTANT))
+        _StpDebugOut[COORD_TEXTURE2D_X(p.xy)] = c;
+}
+
+#if defined(STP_16BIT)
+StpH1 StpDitH1(StpW2 o)
+{
+    StpW1 noiseWidthMinusOne = DecodeNoiseWidthMinusOneH(StpW1(STP_COMMON_CONSTANT));
+    return (StpH1)LOAD_TEXTURE2D_LOD(_StpBlueNoiseIn, o & noiseWidthMinusOne, 0).a;
+}
+// TODO: Broadcast one value as all three outputs a bug that will effect 'STP_GRAIN=3' output.
+StpH3 StpDitH3(StpW2 o) { return (StpH3)StpDitH1(o); }
+#endif
+
+#endif // STP_COMMON_UNITY_INCLUDE_GUARD
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpCommon.hlsl.meta
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpCommon.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 11eb6b38eb2cd494cb1ccc8742453265
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpPreTaa.compute
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpPreTaa.compute
@@ -0,0 +1,117 @@
+#pragma kernel StpPreTaa
+
+#pragma multi_compile _ ENABLE_DEBUG_MODE
+#pragma multi_compile _ ENABLE_LARGE_KERNEL
+
+#pragma multi_compile _ UNITY_DEVICE_SUPPORTS_NATIVE_16BIT
+
+#pragma multi_compile _ DISABLE_TEXTURE2D_X_ARRAY
+
+#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch glcore
+
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl"
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/UnityInstancing.hlsl"
+
+#define STP_DIL 1
+#define STP_SAA 1
+
+#include "Packages/com.unity.render-pipelines.core/Runtime/STP/StpCommon.hlsl"
+
+//
+// Input
+//
+
+TEXTURE2D_X(_StpIntermediateConvergence);
+
+//
+// Intermediate Output
+//
+
+RW_TEXTURE2D_X(float, _StpIntermediateWeights);
+
+//
+// History Input/Output
+//
+
+TEXTURE2D_X(_StpLuma);
+RW_TEXTURE2D_X(float, _StpConvergence);
+
+// DIL
+#if defined(STP_16BIT)
+StpH1 StpDilDitH(StpW2 o) { return StpDitH1(o); }
+StpH1 StpDilConH(StpF2 p) { return (StpH1)SAMPLE_TEXTURE2D_X_LOD(_StpIntermediateConvergence, s_linear_clamp_sampler, p, 0).r; }
+StpH4 StpDilCon4H(StpF2 p) { return (StpH4)GATHER_RED_TEXTURE2D_X(_StpIntermediateConvergence, s_point_clamp_sampler, p); }
+#endif
+#if defined(STP_32BIT)
+StpMF1 StpDilDitF(StpMU2 o) { return StpDitF1(o); }
+StpMF1 StpDilConF(StpF2 p) { return (StpMF1)SAMPLE_TEXTURE2D_X_LOD(_StpIntermediateConvergence, s_linear_clamp_sampler, p, 0).r; }
+StpMF4 StpDilCon4F(StpF2 p) { return (StpMF4)GATHER_RED_TEXTURE2D_X(_StpIntermediateConvergence, s_point_clamp_sampler, p); }
+#endif
+
+// SAA
+#if defined(STP_16BIT)
+StpH4 StpSaaLum4H(StpF2 p) { return (StpH4)GATHER_RED_TEXTURE2D_X(_StpLuma, s_point_clamp_sampler, p); }
+#endif
+#if defined(STP_32BIT)
+StpMF4 StpSaaLum4F(StpF2 p) { return (StpMF4)GATHER_RED_TEXTURE2D_X(_StpLuma, s_point_clamp_sampler, p); }
+#endif
+
+#define THREADING_BLOCK_SIZE STP_GROUP_SIZE
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Threading.hlsl"
+
+[numthreads(STP_GROUP_SIZE, 1, 1)]
+void StpPreTaa(Threading::Group group)
+{
+    UNITY_XR_ASSIGN_VIEW_INDEX(group.groupID.z);
+
+#if defined(STP_16BIT)
+    StpW1 lane = StpW1_(group.groupIndex);
+    StpW2 groupPos = ComputeGroupPos(StpW2(group.groupID.xy));
+    StpW2 pos = groupPos + StpRemapLaneTo8x16H(lane);
+    StpW2 dilationSize = StpW2(asuint(_StpDilConstants0.zw)); // TODO: 16-bit packed constant?
+#else
+    StpMU1 lane = StpMU1_(group.groupIndex);
+    StpMU2 groupPos = ComputeGroupPos(StpMU2(group.groupID.xy));
+    StpMU2 pos = groupPos + StpRemapLaneTo8x16F(lane);
+    StpMU2 dilationSize = StpMU2(asuint(_StpDilConstants0).zw);
+#endif
+
+    // The dilation logic only runs for a subset of the input image size
+    if (all(groupPos < dilationSize))
+    {
+        half convergence;
+
+#if defined(STP_16BIT)
+        StpDilH(
+#else
+        StpDilF(
+#endif
+            convergence,
+
+            pos,
+
+            asuint(_StpDilConstants0)
+        );
+
+        _StpConvergence[COORD_TEXTURE2D_X(pos)] = convergence;
+    }
+
+    half weights;
+
+#if defined(STP_16BIT)
+    StpSaaH(
+#else
+    StpSaaF(
+#endif
+        weights,
+
+        pos,
+
+        // SAA uses the same constants as the pattern matcher
+        asuint(_StpSetupConstants0)
+    );
+
+    _StpIntermediateWeights[COORD_TEXTURE2D_X(pos)] = weights;
+}
+
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpPreTaa.compute.meta
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpPreTaa.compute.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: a679dba8ec4d9ce45884a270b0e22dda
+ComputeShaderImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpSetup.compute
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpSetup.compute
@@ -0,0 +1,523 @@
+#pragma kernel StpSetup
+
+#pragma multi_compile _ ENABLE_DEBUG_MODE
+#pragma multi_compile _ ENABLE_STENCIL_RESPONSIVE
+#pragma multi_compile _ ENABLE_LARGE_KERNEL
+
+#pragma multi_compile _ UNITY_DEVICE_SUPPORTS_NATIVE_16BIT
+
+// TODO: Re-enable support for wave reductions (usage of UNITY_DEVICE_SUPPORTS_WAVE_ANY keyword)
+//
+// We've run into many platform specific problems when trying to use wave operations for STP's reductions so they're being
+// disabled for now. Enabling support for wave operations also causes us to use DXC on the 32-bit path on some Qualcomm Android
+// devices and this triggers visual artifacts that we have no other way to work around at the moment.
+
+#pragma multi_compile _ DISABLE_TEXTURE2D_X_ARRAY
+
+#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch glcore
+
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl"
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/UnityInstancing.hlsl"
+
+#define STP_PAT 1
+
+#include "Packages/com.unity.render-pipelines.core/Runtime/STP/StpCommon.hlsl"
+
+//
+// Input
+//
+
+TEXTURE2D_X(_StpInputColor);
+TEXTURE2D_X(_StpInputDepth);
+TEXTURE2D_X(_StpInputMotion);
+
+#if defined(ENABLE_STENCIL_RESPONSIVE)
+    TYPED_TEXTURE2D_X(uint2, _StpInputStencil);
+#endif
+
+//
+// Intermediate Output
+//
+
+RW_TEXTURE2D_X(float4, _StpIntermediateColor);
+RW_TEXTURE2D_X(float, _StpIntermediateConvergence);
+
+//
+// History Input/Output
+//
+
+TYPED_TEXTURE2D_X(uint, _StpPriorDepthMotion);
+RW_TEXTURE2D_X(uint, _StpDepthMotion);
+
+TEXTURE2D_X(_StpPriorLuma);
+RW_TEXTURE2D_X(float2, _StpLuma);
+
+TEXTURE2D_X(_StpPriorConvergence);
+
+TEXTURE2D_X(_StpPriorFeedback);
+
+#define STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET (SLICE_ARRAY_INDEX * STPSETUPPERVIEWCONSTANTS_COUNT)
+
+#if defined(SHADER_API_PSSL) || defined(SHADER_API_SWITCH) || (defined(SHADER_API_METAL) && !defined(SHADER_API_MOBILE))
+    // Force usage of the 32-bit reduction path even in 16-bit environments
+    #define STP_FORCE_32BIT_REDUCTION
+#endif
+
+#if defined(SHADER_API_PSSL) || defined(SHADER_API_GAMECORE) || defined(SHADER_API_METAL) || (defined(SHADER_API_VULKAN) && defined(SHADER_API_MOBILE))
+    // Force usage of group shared memory instead using wave operations
+    #define STP_FORCE_GROUPSHARED
+#endif
+
+// Enable the use of wave operations when they're supported by the current hardware and usage of groupshared hasn't been forced.
+#if defined(UNITY_HW_SUPPORTS_WAVE) && !defined(STP_FORCE_GROUPSHARED)
+    #define STP_ENABLE_WAVEOPS
+#endif
+
+// STP requires a 4x4 reduction which must be implemented by either wave operations, or group shared memory.
+#if !defined(STP_ENABLE_WAVEOPS)
+#if defined(STP_16BIT) && !defined(STP_FORCE_32BIT_REDUCTION)
+groupshared uint4 gs_StpScratch[STP_GROUP_SIZE];
+#else
+groupshared float4 gs_StpScratch[STP_GROUP_SIZE * 2];
+#endif
+#endif
+
+// In some cases, we have to expose the 32-bit reduction code in the 16-bit path
+#if defined(STP_32BIT) || defined(STP_FORCE_32BIT_REDUCTION)
+void StpPat4x4MaxF8(StpMU1 i, inout StpF4 a, inout StpF4 b)
+{
+#if defined(STP_ENABLE_WAVEOPS)
+    a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 1)));
+    a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 1)));
+    a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 1)));
+    a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 1)));
+    b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 1)));
+    b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 1)));
+    b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 1)));
+    b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 1)));
+
+    a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 2)));
+    a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 2)));
+    a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 2)));
+    a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 2)));
+    b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 2)));
+    b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 2)));
+    b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 2)));
+    b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 2)));
+
+    a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 4)));
+    a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 4)));
+    a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 4)));
+    a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 4)));
+    b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 4)));
+    b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 4)));
+    b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 4)));
+    b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 4)));
+
+    a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 8)));
+    a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 8)));
+    a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 8)));
+    a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 8)));
+    b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 8)));
+    b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 8)));
+    b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 8)));
+    b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 8)));
+#else
+    gs_StpScratch[i] = a;
+    gs_StpScratch[i + STP_GROUP_SIZE] = b;
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // 2x2 Reduction
+    {
+        StpMU1 offset = (i & ~StpMU1(3));
+
+        StpMU1 a0 = offset + ((i + StpMU1(1)) & StpMU1(3));
+        StpMU1 a1 = offset + ((i + StpMU1(2)) & StpMU1(3));
+        StpMU1 a2 = offset + ((i + StpMU1(3)) & StpMU1(3));
+
+        float4 x0 = gs_StpScratch[a0];
+        float4 x1 = gs_StpScratch[a1];
+        float4 x2 = gs_StpScratch[a2];
+
+        float4 y0 = gs_StpScratch[a0 + STP_GROUP_SIZE];
+        float4 y1 = gs_StpScratch[a1 + STP_GROUP_SIZE];
+        float4 y2 = gs_StpScratch[a2 + STP_GROUP_SIZE];
+
+        GroupMemoryBarrierWithGroupSync();
+
+        a = max(max(max(a, x0), x1), x2);
+        b = max(max(max(b, y0), y1), y2);
+    }
+
+    gs_StpScratch[i] = a;
+    gs_StpScratch[i + STP_GROUP_SIZE] = b;
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // 4x4 Reduction
+    {
+        StpMU1 offset = (i & ~StpMU1(15));
+
+        StpMU1 a0 = offset + ((i + StpMU1(4)) & StpMU1(15));
+        StpMU1 a1 = offset + ((i + StpMU1(8)) & StpMU1(15));
+        StpMU1 a2 = offset + ((i + StpMU1(12)) & StpMU1(15));
+
+        float4 x0 = gs_StpScratch[a0];
+        float4 x1 = gs_StpScratch[a1];
+        float4 x2 = gs_StpScratch[a2];
+
+        float4 y0 = gs_StpScratch[a0 + STP_GROUP_SIZE];
+        float4 y1 = gs_StpScratch[a1 + STP_GROUP_SIZE];
+        float4 y2 = gs_StpScratch[a2 + STP_GROUP_SIZE];
+
+        GroupMemoryBarrierWithGroupSync();
+
+        a = max(max(max(a, x0), x1), x2);
+        b = max(max(max(b, y0), y1), y2);
+    }
+#endif
+}
+void StpPat4x4SumF4(StpMU1 i, inout StpF4 a)
+{
+#if defined(STP_ENABLE_WAVEOPS)
+    a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 1));
+    a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 1));
+    a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 1));
+    a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 1));
+
+    a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 2));
+    a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 2));
+    a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 2));
+    a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 2));
+
+    a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 4));
+    a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 4));
+    a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 4));
+    a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 4));
+
+    a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 8));
+    a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 8));
+    a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 8));
+    a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 8));
+#else
+    gs_StpScratch[i] = a;
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // 2x2 Reduction
+    {
+        StpMU1 offset = (i & ~StpMU1(3));
+
+        StpMU1 a0 = offset + ((i + StpMU1(1)) & StpMU1(3));
+        StpMU1 a1 = offset + ((i + StpMU1(2)) & StpMU1(3));
+        StpMU1 a2 = offset + ((i + StpMU1(3)) & StpMU1(3));
+
+        float4 x0 = gs_StpScratch[a0];
+        float4 x1 = gs_StpScratch[a1];
+        float4 x2 = gs_StpScratch[a2];
+
+        GroupMemoryBarrierWithGroupSync();
+
+        a = a + x0 + x1 + x2;
+    }
+
+    gs_StpScratch[i] = a;
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // 4x4 Reduction
+    {
+        StpMU1 offset = (i & ~StpMU1(15));
+
+        StpMU1 a0 = offset + ((i + StpMU1(4)) & StpMU1(15));
+        StpMU1 a1 = offset + ((i + StpMU1(8)) & StpMU1(15));
+        StpMU1 a2 = offset + ((i + StpMU1(12)) & StpMU1(15));
+
+        float4 x0 = gs_StpScratch[a0];
+        float4 x1 = gs_StpScratch[a1];
+        float4 x2 = gs_StpScratch[a2];
+
+        GroupMemoryBarrierWithGroupSync();
+
+        a = a + x0 + x1 + x2;
+    }
+#endif
+}
+#endif
+
+#if defined(STP_16BIT)
+void StpPat4x4MaxH8(StpW1 i, inout StpH4 a, inout StpH4 b)
+{
+#if defined(STP_FORCE_32BIT_REDUCTION)
+    StpPat4x4MaxF8(i, a, b);
+#else
+#if defined(STP_ENABLE_WAVEOPS)
+    a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 1)));
+    a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 1)));
+    b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 1)));
+    b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 1)));
+
+    a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 2)));
+    a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 2)));
+    b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 2)));
+    b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 2)));
+
+    a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 4)));
+    a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 4)));
+    b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 4)));
+    b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 4)));
+
+    a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 8)));
+    a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 8)));
+    b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 8)));
+    b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 8)));
+#else
+    gs_StpScratch[i] = StpU4(StpU1_H2(a.xy), StpU1_H2(a.zw), StpU1_H2(b.xy), StpU1_H2(b.zw));
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // 2x2 Reduction
+    {
+        StpW1 offset = (i & ~StpW1(3));
+
+        StpW1 a0 = offset + ((i + StpW1(1)) & StpW1(3));
+        StpW1 a1 = offset + ((i + StpW1(2)) & StpW1(3));
+        StpW1 a2 = offset + ((i + StpW1(3)) & StpW1(3));
+
+        uint4 x0 = gs_StpScratch[a0];
+        uint4 x1 = gs_StpScratch[a1];
+        uint4 x2 = gs_StpScratch[a2];
+
+        GroupMemoryBarrierWithGroupSync();
+
+        a.xy = max(max(max(a.xy, StpH2_U1(x0.x)), StpH2_U1(x1.x)), StpH2_U1(x2.x));
+        a.zw = max(max(max(a.zw, StpH2_U1(x0.y)), StpH2_U1(x1.y)), StpH2_U1(x2.y));
+        b.xy = max(max(max(b.xy, StpH2_U1(x0.z)), StpH2_U1(x1.z)), StpH2_U1(x2.z));
+        b.zw = max(max(max(b.zw, StpH2_U1(x0.w)), StpH2_U1(x1.w)), StpH2_U1(x2.w));
+    }
+
+    gs_StpScratch[i] = StpU4(StpU1_H2(a.xy), StpU1_H2(a.zw), StpU1_H2(b.xy), StpU1_H2(b.zw));
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // 4x4 Reduction
+    {
+        StpW1 offset = (i & ~StpW1(15));
+
+        StpW1 a0 = offset + ((i + StpW1(4)) & StpW1(15));
+        StpW1 a1 = offset + ((i + StpW1(8)) & StpW1(15));
+        StpW1 a2 = offset + ((i + StpW1(12)) & StpW1(15));
+
+        uint4 x0 = gs_StpScratch[a0];
+        uint4 x1 = gs_StpScratch[a1];
+        uint4 x2 = gs_StpScratch[a2];
+
+        GroupMemoryBarrierWithGroupSync();
+
+        a.xy = max(max(max(a.xy, StpH2_U1(x0.x)), StpH2_U1(x1.x)), StpH2_U1(x2.x));
+        a.zw = max(max(max(a.zw, StpH2_U1(x0.y)), StpH2_U1(x1.y)), StpH2_U1(x2.y));
+        b.xy = max(max(max(b.xy, StpH2_U1(x0.z)), StpH2_U1(x1.z)), StpH2_U1(x2.z));
+        b.zw = max(max(max(b.zw, StpH2_U1(x0.w)), StpH2_U1(x1.w)), StpH2_U1(x2.w));
+    }
+#endif
+#endif
+}
+void StpPat4x4SumH4(StpW1 i, inout StpH4 a)
+{
+#if defined(STP_FORCE_32BIT_REDUCTION)
+    StpPat4x4SumF4(i, a);
+#else
+#if defined(STP_ENABLE_WAVEOPS)
+    a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 1));
+    a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 1));
+
+    a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 2));
+    a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 2));
+
+    a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 4));
+    a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 4));
+
+    a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 8));
+    a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 8));
+#else
+    gs_StpScratch[i].xy = StpU2(StpU1_H2(a.xy), StpU1_H2(a.zw));
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // 2x2 Reduction
+    {
+        StpW1 offset = (i & ~StpW1(3));
+
+        StpW1 a0 = offset + ((i + StpW1(1)) & StpW1(3));
+        StpW1 a1 = offset + ((i + StpW1(2)) & StpW1(3));
+        StpW1 a2 = offset + ((i + StpW1(3)) & StpW1(3));
+
+        uint2 x0 = gs_StpScratch[a0].xy;
+        uint2 x1 = gs_StpScratch[a1].xy;
+        uint2 x2 = gs_StpScratch[a2].xy;
+
+        GroupMemoryBarrierWithGroupSync();
+
+        a.xy = a.xy + StpH2_U1(x0.x) + StpH2_U1(x1.x) + StpH2_U1(x2.x);
+        a.zw = a.zw + StpH2_U1(x0.y) + StpH2_U1(x1.y) + StpH2_U1(x2.y);
+    }
+
+    gs_StpScratch[i].xy = StpU2(StpU1_H2(a.xy), StpU1_H2(a.zw));
+
+    GroupMemoryBarrierWithGroupSync();
+
+    // 4x4 Reduction
+    {
+        StpW1 offset = (i & ~StpW1(15));
+
+        StpW1 a0 = offset + ((i + StpW1(4)) & StpW1(15));
+        StpW1 a1 = offset + ((i + StpW1(8)) & StpW1(15));
+        StpW1 a2 = offset + ((i + StpW1(12)) & StpW1(15));
+
+        uint2 x0 = gs_StpScratch[a0].xy;
+        uint2 x1 = gs_StpScratch[a1].xy;
+        uint2 x2 = gs_StpScratch[a2].xy;
+
+        GroupMemoryBarrierWithGroupSync();
+
+        a.xy = a.xy + StpH2_U1(x0.x) + StpH2_U1(x1.x) + StpH2_U1(x2.x);
+        a.zw = a.zw + StpH2_U1(x0.y) + StpH2_U1(x1.y) + StpH2_U1(x2.y);
+    }
+#endif
+#endif
+}
+StpH1 StpPatPriConH(StpF2 p) { return (StpH1)SAMPLE_TEXTURE2D_X_LOD(_StpPriorConvergence, s_linear_clamp_sampler, p, 0); }
+
+// These are separate to support inline operation (pass merged instead of loads).
+StpF2 StpPatDatMotH(StpW2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputMotion, o, 0).xy; }
+StpH3 StpPatDatColH(StpW2 o) { return (StpH3)LOAD_TEXTURE2D_X_LOD(_StpInputColor, o, 0).rgb; }
+StpF1 StpPatDatZH(StpW2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputDepth, o, 0).x; }
+// This provides a place to convert Z from depth to linear if not inlined and actually loaded.
+StpF1 StpPatFixZH(StpF1 z)
+{
+#if !UNITY_REVERSED_Z
+    // Reverse Z if necessary as STP expects reversed Z input
+    z = 1.0 - z;
+#endif
+    return 1.0 / (STP_ZBUFFER_PARAMS_Z * z + STP_ZBUFFER_PARAMS_W);
+}
+StpU1 StpPatDatRH(StpW2 o) {
+    #if defined(ENABLE_STENCIL_RESPONSIVE)
+        return GetStencilValue(LOAD_TEXTURE2D_X_LOD(_StpInputStencil, o, 0).xy);
+    #endif // defined(ENABLE_STENCIL_RESPONSIVE)
+    return StpU1_(0); }
+StpH1 StpPatFixRH(StpU1 v) {
+    // Activate the "responsive" feature when we don't have valid history textures.
+    bool hasValidHistory = DecodeHasValidHistory(STP_COMMON_CONSTANT);
+    bool excludeTaa = false;
+    #if defined(ENABLE_STENCIL_RESPONSIVE)
+        excludeTaa = (v & DecodeStencilMask(STP_COMMON_CONSTANT)) != 0;
+    #endif // defined(ENABLE_STENCIL_RESPONSIVE)
+    return (hasValidHistory && !excludeTaa) ? StpH1_(1.0) : StpH1_(0.0); }
+
+StpH1 StpPatDitH(StpW2 o) { return StpDitH1(o); }
+StpH4 StpPatPriFedH(StpF2 p) { return (StpH4)SAMPLE_TEXTURE2D_X_LOD(_StpPriorFeedback, s_linear_clamp_sampler, p, 0); }
+StpH4 StpPatPriFedR4H(StpF2 p) { return (StpH4)GATHER_RED_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpH4 StpPatPriFedG4H(StpF2 p) { return (StpH4)GATHER_GREEN_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpH4 StpPatPriFedB4H(StpF2 p) { return (StpH4)GATHER_BLUE_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpH2 StpPatPriLumH(StpF2 p) { return (StpH2)SAMPLE_TEXTURE2D_X_LOD(_StpPriorLuma, s_linear_clamp_sampler, p, 0); }
+StpU4 StpPatPriMot4H(StpF2 p) { return GATHER_RED_TEXTURE2D_X(_StpPriorDepthMotion, s_point_clamp_sampler, p); }
+
+void StpPatStMotH(StpW2 p, StpU1 v) { _StpDepthMotion[COORD_TEXTURE2D_X(p)] = v; }
+void StpPatStColH(StpW2 p, StpH4 v) { _StpIntermediateColor[COORD_TEXTURE2D_X(p)] = v; }
+void StpPatStLumH(StpW2 p, StpH2 v) { _StpLuma[COORD_TEXTURE2D_X(p)] = v; }
+void StpPatStCnvH(StpW2 p, StpH1 v) { _StpIntermediateConvergence[COORD_TEXTURE2D_X(p >> StpW1(2))] = v; }
+#endif
+
+#if defined(STP_32BIT)
+StpMF1 StpPatPriConF(StpF2 p) { return (StpMF1)SAMPLE_TEXTURE2D_X_LOD(_StpPriorConvergence, s_linear_clamp_sampler, p, 0); }
+
+// These are separate to support inline operation (pass merged instead of loads).
+StpF2 StpPatDatMotF(StpMU2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputMotion, o, 0).xy; }
+StpMF3 StpPatDatColF(StpMU2 o) { return (StpMF3)LOAD_TEXTURE2D_X_LOD(_StpInputColor, o, 0).rgb; }
+StpF1 StpPatDatZF(StpMU2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputDepth, o, 0).x; }
+// This provides a place to convert Z from depth to linear if not inlined and actually loaded.
+StpF1 StpPatFixZF(StpF1 z)
+{
+#if !UNITY_REVERSED_Z
+    // Reverse Z if necessary as STP expects reversed Z input
+    z = 1.0 - z;
+#endif
+    return 1.0 / (STP_ZBUFFER_PARAMS_Z * z + STP_ZBUFFER_PARAMS_W);
+}
+StpU1 StpPatDatRF(StpMU2 o) {
+    #if defined(ENABLE_STENCIL_RESPONSIVE)
+        return GetStencilValue(LOAD_TEXTURE2D_X_LOD(_StpInputStencil, o, 0).xy);
+    #endif // defined(ENABLE_STENCIL_RESPONSIVE)
+    return StpU1_(0); }
+StpMF1 StpPatFixRF(StpU1 v) {
+    // Activate the "responsive" feature when we don't have valid history textures.
+    bool hasValidHistory = DecodeHasValidHistory(STP_COMMON_CONSTANT);
+    bool excludeTaa = false;
+    #if defined(ENABLE_STENCIL_RESPONSIVE)
+        excludeTaa = (v & DecodeStencilMask(STP_COMMON_CONSTANT)) != 0;
+    #endif // defined(ENABLE_STENCIL_RESPONSIVE)
+    return (hasValidHistory && !excludeTaa) ? StpMF1_(1.0) : StpMF1_(0.0); }
+
+StpMF1 StpPatDitF(StpMU2 o) { return (StpMF1)StpDitF1(o); }
+StpMF4 StpPatPriFedF(StpF2 p) { return (StpMF4)SAMPLE_TEXTURE2D_X_LOD(_StpPriorFeedback, s_linear_clamp_sampler, p, 0); }
+StpMF4 StpPatPriFedR4F(StpF2 p) { return (StpMF4)GATHER_RED_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpMF4 StpPatPriFedG4F(StpF2 p) { return (StpMF4)GATHER_GREEN_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpMF4 StpPatPriFedB4F(StpF2 p) { return (StpMF4)GATHER_BLUE_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpMF2 StpPatPriLumF(StpF2 p) { return (StpMF2)SAMPLE_TEXTURE2D_X_LOD(_StpPriorLuma, s_linear_clamp_sampler, p, 0); }
+StpU4 StpPatPriMot4F(StpF2 p) { return GATHER_RED_TEXTURE2D_X(_StpPriorDepthMotion, s_point_clamp_sampler, p); }
+
+void StpPatStMotF(StpMU2 p, StpU1 v) { _StpDepthMotion[COORD_TEXTURE2D_X(p)] = v; }
+void StpPatStColF(StpMU2 p, StpMF4 v) { _StpIntermediateColor[COORD_TEXTURE2D_X(p)] = v; }
+void StpPatStLumF(StpMU2 p, StpMF2 v) { _StpLuma[COORD_TEXTURE2D_X(p)] = v; }
+void StpPatStCnvF(StpMU2 p, StpMF1 v) { _StpIntermediateConvergence[COORD_TEXTURE2D_X(p >> StpMU1(2))] = v; }
+#endif
+
+#define THREADING_BLOCK_SIZE STP_GROUP_SIZE
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Threading.hlsl"
+
+[numthreads(STP_GROUP_SIZE, 1, 1)]
+void StpSetup(Threading::Group group)
+{
+    UNITY_XR_ASSIGN_VIEW_INDEX(group.groupID.z);
+
+#if defined(STP_16BIT)
+    StpW1 lane = StpW1_(group.groupIndex);
+    StpW2 groupPos = ComputeGroupPos(StpW2(group.groupID.xy));
+    StpW2 pos = groupPos + StpRemapLaneTo8x16H(lane);
+#else
+    StpMU1 lane = StpMU1_(group.groupIndex);
+    StpMU2 groupPos = ComputeGroupPos(StpMU2(group.groupID.xy));
+    StpMU2 pos = groupPos + StpRemapLaneTo8x16F(lane);
+#endif
+
+#if defined(STP_16BIT)
+    StpPatH(
+        lane,
+        pos,
+#else
+    StpPatF(
+        lane,
+        pos,
+#endif
+
+        asuint(_StpSetupConstants0),
+        asuint(_StpSetupConstants1),
+        asuint(_StpSetupConstants2),
+        asuint(_StpSetupConstants3),
+        asuint(_StpSetupConstants4),
+        asuint(_StpSetupConstants5),
+
+        asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 0]),
+        asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 1]),
+        asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 2]),
+        asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 3]),
+        asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 4]),
+        asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 5]),
+        asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 6]),
+        asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 7])
+    );
+}
+
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpSetup.compute.meta
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpSetup.compute.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 33be2e9a5506b2843bdb2bdff9cad5e1
+ComputeShaderImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpTaa.compute
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpTaa.compute
@@ -0,0 +1,110 @@
+#pragma kernel StpTaa
+
+#pragma multi_compile _ ENABLE_DEBUG_MODE
+#pragma multi_compile _ ENABLE_LARGE_KERNEL
+
+#pragma multi_compile _ UNITY_DEVICE_SUPPORTS_NATIVE_16BIT
+
+#pragma multi_compile _ DISABLE_TEXTURE2D_X_ARRAY
+
+#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch glcore
+
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl"
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/UnityInstancing.hlsl"
+
+#define STP_TAA 1
+
+#include "Packages/com.unity.render-pipelines.core/Runtime/STP/StpCommon.hlsl"
+
+//
+// Input
+//
+
+TEXTURE2D_X(_StpIntermediateColor);
+TEXTURE2D_X(_StpIntermediateWeights);
+
+//
+// History Input/Output
+//
+
+TEXTURE2D_X(_StpPriorFeedback);
+TYPED_TEXTURE2D_X(uint, _StpDepthMotion);
+TEXTURE2D_X(_StpConvergence);
+
+RW_TEXTURE2D_X(float4, _StpFeedback);
+RW_TEXTURE2D_X(float4, _StpOutput);
+
+#if defined(STP_16BIT)
+StpH4 StpTaaCtl4H(StpF2 p) { return (StpH4)GATHER_RED_TEXTURE2D_X(_StpIntermediateWeights, s_point_clamp_sampler, p); }
+StpH4 StpTaaCol4RH(StpF2 p) { return (StpH4)GATHER_RED_TEXTURE2D_X(_StpIntermediateColor, s_point_clamp_sampler, p); }
+StpH4 StpTaaCol4GH(StpF2 p) { return (StpH4)GATHER_GREEN_TEXTURE2D_X(_StpIntermediateColor, s_point_clamp_sampler, p); }
+StpH4 StpTaaCol4BH(StpF2 p) { return (StpH4)GATHER_BLUE_TEXTURE2D_X(_StpIntermediateColor, s_point_clamp_sampler, p); }
+StpH4 StpTaaCol4AH(StpF2 p) { return (StpH4)GATHER_ALPHA_TEXTURE2D_X(_StpIntermediateColor, s_point_clamp_sampler, p); }
+StpH1 StpTaaConH(StpF2 p) { return (StpH1)SAMPLE_TEXTURE2D_X_LOD(_StpConvergence, s_linear_clamp_sampler, p, 0); }
+StpH1 StpTaaDitH(StpW2 o) { return StpDitH1(o); }
+StpU4 StpTaaMot4H(StpF2 p) { return GATHER_RED_TEXTURE2D_X(_StpDepthMotion, s_point_clamp_sampler, p); }
+StpH4 StpTaaPriFedH(StpF2 p) { return (StpH4)SAMPLE_TEXTURE2D_X_LOD(_StpPriorFeedback, s_linear_clamp_sampler, p, 0); }
+StpH4 StpTaaPriFed4RH(StpF2 p) { return (StpH4)GATHER_RED_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpH4 StpTaaPriFed4GH(StpF2 p) { return (StpH4)GATHER_GREEN_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpH4 StpTaaPriFed4BH(StpF2 p) { return (StpH4)GATHER_BLUE_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+#endif
+
+#if defined(STP_32BIT)
+StpMF4 StpTaaCtl4F(StpF2 p) { return (StpMF4)GATHER_RED_TEXTURE2D_X(_StpIntermediateWeights, s_point_clamp_sampler, p); }
+StpMF4 StpTaaCol4RF(StpF2 p) { return (StpMF4)GATHER_RED_TEXTURE2D_X(_StpIntermediateColor, s_point_clamp_sampler, p); }
+StpMF4 StpTaaCol4GF(StpF2 p) { return (StpMF4)GATHER_GREEN_TEXTURE2D_X(_StpIntermediateColor, s_point_clamp_sampler, p); }
+StpMF4 StpTaaCol4BF(StpF2 p) { return (StpMF4)GATHER_BLUE_TEXTURE2D_X(_StpIntermediateColor, s_point_clamp_sampler, p); }
+StpMF4 StpTaaCol4AF(StpF2 p) { return (StpMF4)GATHER_ALPHA_TEXTURE2D_X(_StpIntermediateColor, s_point_clamp_sampler, p); }
+StpMF1 StpTaaConF(StpF2 p) { return (StpMF1)SAMPLE_TEXTURE2D_X_LOD(_StpConvergence, s_linear_clamp_sampler, p, 0); }
+StpMF1 StpTaaDitF(StpMU2 o) { return (StpMF1)StpDitF1(o); }
+StpU4 StpTaaMot4F(StpF2 p) { return GATHER_RED_TEXTURE2D_X(_StpDepthMotion, s_point_clamp_sampler, p); }
+StpMF4 StpTaaPriFedF(StpF2 p) { return (StpMF4)SAMPLE_TEXTURE2D_X_LOD(_StpPriorFeedback, s_linear_clamp_sampler, p, 0); }
+StpMF4 StpTaaPriFed4RF(StpF2 p) { return (StpMF4)GATHER_RED_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpMF4 StpTaaPriFed4GF(StpF2 p) { return (StpMF4)GATHER_GREEN_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+StpMF4 StpTaaPriFed4BF(StpF2 p) { return (StpMF4)GATHER_BLUE_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); }
+#endif
+
+#define THREADING_BLOCK_SIZE STP_GROUP_SIZE
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Threading.hlsl"
+
+[numthreads(STP_GROUP_SIZE, 1, 1)]
+void StpTaa(Threading::Group group)
+{
+    UNITY_XR_ASSIGN_VIEW_INDEX(group.groupID.z);
+
+#if defined(STP_16BIT)
+    StpW1 lane = StpW1_(group.groupIndex);
+    StpW2 groupPos = ComputeGroupPos(StpW2(group.groupID.xy));
+    StpW2 pos = groupPos + StpRemapLaneTo8x16H(lane);
+#else
+    StpMU1 lane = StpMU1_(group.groupIndex);
+    StpMU2 groupPos = ComputeGroupPos(StpMU2(group.groupID.xy));
+    StpMU2 pos = groupPos + StpRemapLaneTo8x16F(lane);
+#endif
+
+    half4 feedback;
+    half4 output;
+
+#if defined(STP_16BIT)
+    StpTaaH(
+        lane,
+        pos,
+#else
+    StpTaaF(
+        lane,
+        pos,
+#endif
+        feedback,
+        output,
+
+        asuint(_StpTaaConstants0),
+        asuint(_StpTaaConstants1),
+        asuint(_StpTaaConstants2),
+        asuint(_StpTaaConstants3)
+    );
+
+    _StpFeedback[COORD_TEXTURE2D_X(pos)] = feedback;
+    _StpOutput[COORD_TEXTURE2D_X(pos)] = output;
+}
+
--- a/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpTaa.compute.meta
+++ b/Library/PackageCache/com.unity.render-pipelines.core@67f868dbad82/Runtime/STP/StpTaa.compute.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 3923900e2b41b5e47bc25bfdcbcdc9e6
+ComputeShaderImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: