From b41e57c4be60c3f2e6276f38e54baddb0dcd6994 Mon Sep 17 00:00:00 2001 From: Daniele Pieroni Date: Mon, 24 Oct 2022 22:30:23 -0400 Subject: [PATCH] Add a memory barrier when wave lanes size < 32 On Intel GPUs wave lanes size can be less than 32 and therefore the mask created with WaveActiveBitOr in the FFX_DNSR_Shadows_CopyResult function can be incomplete --- ffx-shadows-dnsr/ffx_denoiser_shadows_prepare.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ffx-shadows-dnsr/ffx_denoiser_shadows_prepare.h b/ffx-shadows-dnsr/ffx_denoiser_shadows_prepare.h index ea853fa..79eb744 100644 --- a/ffx-shadows-dnsr/ffx_denoiser_shadows_prepare.h +++ b/ffx-shadows-dnsr/ffx_denoiser_shadows_prepare.h @@ -31,6 +31,11 @@ void FFX_DNSR_Shadows_CopyResult(uint2 gtid, uint2 gid) const uint linear_tile_index = FFX_DNSR_Shadows_LinearTileIndex(gid, FFX_DNSR_Shadows_GetBufferDimensions().x); const bool hit_light = FFX_DNSR_Shadows_HitsLight(did, gtid, gid); const uint lane_mask = hit_light ? FFX_DNSR_Shadows_GetBitMaskFromPixelPosition(did) : 0; + + //adding a memory barrier before WaveActiveBitOr for GPUs that can have wave lanes size < 32 + if (WaveGetLaneCount() < 32) + GroupMemoryBarrierWithGroupSync(); + FFX_DNSR_Shadows_WriteMask(linear_tile_index, WaveActiveBitOr(lane_mask)); }