From 60fe2f6eccc263e89dc78f5488148c65b2824623 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 2 Nov 2018 18:50:48 +0000 Subject: [PATCH 001/220] egl: add EGL_EXT_device_base entrypoints eglQueryDevicesEXT (unlike the other three functions) does not depend on the display. It is implemented in GLVND, which calls into each driver collecting the list of devices and presenting it to the user. For the other entrypoints, GLVND acts as pass through stub calling into the vendor library. The vendor implementation calls back into GLVND to get the vendor dispatch. Then the driver proceeds to call itself via the said dispatch. This design makes is possible to keep using "old" GLVND with newer vendor drivers. Since effectively all the extension code is within the latter itself. Without said entrypoints, any user will outright crash - as reported in the bug report. Note: there's a follow-up fix needed to our GLVND code, to make piglit happy. v2: add some beefy documentation in the commit message. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108635 Fixes: 7552fcb7b9b ("egl: add base EGL_EXT_device_base implementation") Reported-by: kyle.devir@mykolab.com Cc: kyle.devir@mykolab.com Acked-by: Eric Engestrom Signed-off-by: Emil Velikov Tested-by: Emil Velikov (cherry picked from commit 2a8fefdeb0f4e259cc01e32dae40bc2f3063f5e0) --- src/egl/generate/eglFunctionList.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/egl/generate/eglFunctionList.py b/src/egl/generate/eglFunctionList.py index fb5b3c30bdf..667704eb2cb 100644 --- a/src/egl/generate/eglFunctionList.py +++ b/src/egl/generate/eglFunctionList.py @@ -199,5 +199,12 @@ def _eglFunc(name, method, static=None, public=False, inheader=None, prefix="dis # EGL_EXT_image_dma_buf_import_modifiers _eglFunc("eglQueryDmaBufFormatsEXT", "display"), _eglFunc("eglQueryDmaBufModifiersEXT", "display"), + + # EGL_EXT_device_base + _eglFunc("eglQueryDeviceAttribEXT", "device"), + _eglFunc("eglQueryDeviceStringEXT", "device"), + _eglFunc("eglQueryDevicesEXT", "none"), + _eglFunc("eglQueryDisplayAttribEXT", "display"), + ) From 22201d2048c1834ed46fa340cdb60273acd720b0 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 2 Nov 2018 18:34:19 +0000 Subject: [PATCH 002/220] egl/glvnd: correctly report errors when vendor cannot be found If the user provides an invalid display or device the ToVendor lookup will fail. In this case, the local [Mesa vendor] error code will be set. Thus on sequential eglGetError(), the error will be EGL_SUCCESS. To be more specific, GLVND remembers the last vendor and calls back into it's eglGetError, although there's no guarantee to ever have had one. v2: - Add _eglError call, so the debug callback is executed (Kyle) - Drop XXX comment. Piglit: tests/egl/spec/egl_ext_device_query Fixes: ce562f9e3fa ("EGL: Implement the libglvnd interface for EGL (v3)") Cc: Eric Engestrom Signed-off-by: Emil Velikov Reviewed-by: Kyle Brenneman (cherry picked from commit b3ade6538798ad9bf397d8b386eab3dd8af57f3d) --- src/egl/main/egldispatchstubs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/egl/main/egldispatchstubs.c b/src/egl/main/egldispatchstubs.c index bfc3195c779..96708aeb0dc 100644 --- a/src/egl/main/egldispatchstubs.c +++ b/src/egl/main/egldispatchstubs.c @@ -59,6 +59,11 @@ static __eglMustCastToProperFunctionPointerType FetchVendorFunc(__EGLvendorInfo } if (func == NULL) { if (errorCode != EGL_SUCCESS) { + // Since we have no vendor, the follow-up eglGetError() call will + // end up using the GLVND error code. Set it here. + if (vendor == NULL) { + exports->setEGLError(errorCode); + } _eglError(errorCode, __EGL_DISPATCH_FUNC_NAMES[index]); } return NULL; From 949b1048f731e67f34acc0a6fbe188782b31e261 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 6 Nov 2018 15:30:57 +0000 Subject: [PATCH 003/220] Update version to 18.3.0-rc1 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 8b16de0851f..bb28ad2e2bd 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0-devel +18.3.0-rc1 From 12c5eb2fd3fbd7725b169dccc32c9b29b1d0a8d0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 31 Oct 2018 23:38:20 +0000 Subject: [PATCH 004/220] radv: apply xfb buffer offset at buffer binding time not later. (v2) In order to handle pause/resume properly, the offset should be added to the buffer binding not to the begin/end paths. v2: don't add offset to size Fixes ext_transform_feedback-alignment* under zink Fixes: b4eb029062 (radv: implement VK_EXT_transform_feedback) Reviewed-by: Samuel Pitoiset (cherry picked from commit 7f37a52a21a15fc28b2c452fff54dd871d5dfe53) --- src/amd/vulkan/radv_cmd_buffer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c43e12f6d62..296b626b19c 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1950,6 +1950,8 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) va = radv_buffer_get_va(buffer->bo) + buffer->offset; + va += sb[i].offset; + /* Set the descriptor. * * On VI, the format must be non-INVALID, otherwise @@ -4754,7 +4756,7 @@ void radv_CmdBeginTransformFeedbackEXT( * SGPRs what to do. */ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); - radeon_emit(cs, (sb[i].offset + sb[i].size) >> 2); /* BUFFER_SIZE (in DW) */ + radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */ if (pCounterBuffers && pCounterBuffers[i]) { @@ -4783,7 +4785,7 @@ void radv_CmdBeginTransformFeedbackEXT( STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ radeon_emit(cs, 0); /* unused */ radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, sb[i].offset >> 2); /* buffer offset in DW */ + radeon_emit(cs, 0); /* unused */ radeon_emit(cs, 0); /* unused */ } } From 52e01585c4714c0bfcf04b4a4e99319f08ef13bf Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 31 Oct 2018 23:55:29 +0000 Subject: [PATCH 005/220] radv: fix begin/end transform feedback with 0 counter buffers. If the user gives 0 counterBuffers then the driver should still enable transform feedback on all targets. This changes the driver to always enable xfb, and use counter buffers where one is defined for the target in question. Fixes: b4eb029062 (radv: implement VK_EXT_transform_feedback) Reviewed-by: Samuel Pitoiset (cherry picked from commit 677b496b6bd07cbe05dd429344ba525619cdd08c) --- src/amd/vulkan/radv_cmd_buffer.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 296b626b19c..6510a5c4425 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -4743,13 +4743,15 @@ void radv_CmdBeginTransformFeedbackEXT( struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t i; radv_flush_vgt_streamout(cmd_buffer); assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - for (uint32_t i = firstCounterBuffer; i < counterBufferCount; i++) { - if (!(so->enabled_mask & (1 << i))) - continue; + for_each_bit(i, so->enabled_mask) { + int32_t counter_buffer_idx = i - firstCounterBuffer; + if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount) + counter_buffer_idx = -1; /* SI binds streamout buffers as shader resources. * VGT only counts primitives and tells the shader through @@ -4759,12 +4761,12 @@ void radv_CmdBeginTransformFeedbackEXT( radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */ - if (pCounterBuffers && pCounterBuffers[i]) { + if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { /* The array of counter buffers is optional. */ - RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]); + RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); uint64_t va = radv_buffer_get_va(buffer->bo); - va += buffer->offset + pCounterBufferOffsets[i]; + va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx]; /* Append */ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); @@ -4803,20 +4805,22 @@ void radv_CmdEndTransformFeedbackEXT( RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t i; radv_flush_vgt_streamout(cmd_buffer); assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - for (uint32_t i = firstCounterBuffer; i < counterBufferCount; i++) { - if (!(so->enabled_mask & (1 << i))) - continue; + for_each_bit(i, so->enabled_mask) { + int32_t counter_buffer_idx = i - firstCounterBuffer; + if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount) + counter_buffer_idx = -1; - if (pCounterBuffers && pCounterBuffers[i]) { + if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { /* The array of counters buffer is optional. */ - RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]); + RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); uint64_t va = radv_buffer_get_va(buffer->bo); - va += buffer->offset + pCounterBufferOffsets[i]; + va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx]; radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | From 959a9d42d7169b75d1ab92b71c292174555bf8cb Mon Sep 17 00:00:00 2001 From: Mauro Rossi Date: Wed, 31 Oct 2018 15:53:21 +0100 Subject: [PATCH 006/220] android: gallium/auxiliary: add include to get u_debug.h header To avoid build error in u_debug_stack_android.cpp due to now missing u_debug.h header: external/mesa/src/gallium/auxiliary/util/u_debug_stack_android.cpp:26:10: fatal error: 'u_debug.h' file not found #include "u_debug.h" ^ 1 error generated. Fixes: 37db383abb ("util: Move u_debug to utils") Signed-off-by: Mauro Rossi Reviewed-by: Dylan Baker (cherry picked from commit b9dec214f5151f3ad87eb2faf982c57e5323f534) --- src/gallium/auxiliary/Android.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk index acd243b8346..7618c6fcd93 100644 --- a/src/gallium/auxiliary/Android.mk +++ b/src/gallium/auxiliary/Android.mk @@ -36,7 +36,8 @@ LOCAL_SRC_FILES := \ util/u_debug_stack_android.cpp LOCAL_C_INCLUDES := \ - $(GALLIUM_TOP)/auxiliary/util + $(GALLIUM_TOP)/auxiliary/util \ + $(MESA_TOP)/src/util ifeq ($(MESA_ENABLE_LLVM),true) LOCAL_SRC_FILES += \ From d5e33d2aa6cc7484a1a6bed2cd65d18f62f55f49 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 2 Nov 2018 08:53:16 +1100 Subject: [PATCH 007/220] nir: allow propagation of if evaluation for bcsel Shader-db results Skylake: total instructions in shared programs: 13109035 -> 13109024 (<.01%) instructions in affected programs: 4777 -> 4766 (-0.23%) helped: 11 HURT: 0 total cycles in shared programs: 332090418 -> 332090443 (<.01%) cycles in affected programs: 19474 -> 19499 (0.13%) helped: 6 HURT: 4 Reviewed-by: Jason Ekstrand (cherry picked from commit c7bdda8aa5f1fb1d797512a0a54a032153755c6c) --- src/compiler/nir/nir_opt_if.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index 1fe95e53766..ed93cac9ce9 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -448,7 +448,7 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, if (!evaluate_if_condition(nif, b->cursor, &bool_value)) return false; - nir_ssa_def *def[2] = {0}; + nir_ssa_def *def[4] = {0}; for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { if (alu->src[i].src.ssa == use_src->ssa) { def[i] = nir_imm_bool(b, bool_value); @@ -456,7 +456,7 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, def[i] = alu->src[i].src.ssa; } } - nir_ssa_def *nalu = nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL); + nir_ssa_def *nalu = nir_build_alu(b, alu->op, def[0], def[1], def[2], def[3]); /* Rewrite use to use new alu instruction */ nir_src new_src = nir_src_for_ssa(nalu); @@ -472,14 +472,21 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, static bool can_propagate_through_alu(nir_src *src) { - if (src->parent_instr->type == nir_instr_type_alu && - (nir_instr_as_alu(src->parent_instr)->op == nir_op_ior || - nir_instr_as_alu(src->parent_instr)->op == nir_op_iand || - nir_instr_as_alu(src->parent_instr)->op == nir_op_inot || - nir_instr_as_alu(src->parent_instr)->op == nir_op_b2i)) - return true; + if (src->parent_instr->type != nir_instr_type_alu) + return false; - return false; + nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr); + switch (alu->op) { + case nir_op_ior: + case nir_op_iand: + case nir_op_inot: + case nir_op_b2i: + return true; + case nir_op_bcsel: + return src == &alu->src[0].src; + default: + return false; + } } static bool From 6adbf17ce91fbc907935af0b8e1c7d2eb26dd9a9 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 2 Nov 2018 22:10:36 +1100 Subject: [PATCH 008/220] nir: fix condition propagation when src has a swizzle We cannot use nir_build_alu() to create the new alu as it has no way to know how many components of the src we will use. This results in it guessing the max number of components from one of its inputs. Fixes the following CTS tests: dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_frag dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_geom dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_tessc dEQP-VK.spirv_assembly.instruction.graphics.selection_block_order.out_of_order_vert Fixes: 2975422ceb6c ("nir: propagates if condition evaluation down some alu chains") Reviewed-by: Jason Ekstrand (cherry picked from commit 769ae9fb7f8cea1d4a03e31f7f4a1c988e424c03) --- src/compiler/nir/nir_opt_if.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index ed93cac9ce9..8a971c43f24 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -391,6 +391,34 @@ evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value) } } +static nir_ssa_def * +clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, + nir_ssa_def **src_defs) +{ + nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op); + nalu->exact = alu->exact; + + nir_ssa_dest_init(&nalu->instr, &nalu->dest.dest, + alu->dest.dest.ssa.num_components, + alu->dest.dest.ssa.bit_size, alu->dest.dest.ssa.name); + + nalu->dest.saturate = alu->dest.saturate; + nalu->dest.write_mask = alu->dest.write_mask; + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + assert(alu->src[i].src.is_ssa); + nalu->src[i].src = nir_src_for_ssa(src_defs[i]); + nalu->src[i].negate = alu->src[i].negate; + nalu->src[i].abs = alu->src[i].abs; + memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, + sizeof(nalu->src[i].swizzle)); + } + + nir_builder_instr_insert(b, &nalu->instr); + + return &nalu->dest.dest.ssa;; +} + /* * This propagates if condition evaluation down the chain of some alu * instructions. For example by checking the use of some of the following alu @@ -456,7 +484,8 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, def[i] = alu->src[i].src.ssa; } } - nir_ssa_def *nalu = nir_build_alu(b, alu->op, def[0], def[1], def[2], def[3]); + + nir_ssa_def *nalu = clone_alu_and_replace_src_defs(b, alu, def); /* Rewrite use to use new alu instruction */ nir_src new_src = nir_src_for_ssa(nalu); From 5adc1920ee391b0d231fbd593b6a8cb0dd281bd5 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Thu, 1 Nov 2018 14:12:57 -0700 Subject: [PATCH 009/220] meson: link gallium nine with pthreads In some cases (not building with llvm, which automatically pulls in pthreads) nine needs to be directly linked with pthreads. Fixes building on x86 (32 bit) without llvm. Distro bug: https://bugs.gentoo.org/670094 Fixes: 6b4c7047d57178d3362a710ad503057c6a582ca3 ("meson: build gallium nine state_tracker") Tested-by: Rafal Lalik Reviewed-by: Matt Turner (cherry picked from commit 7652931d33b36c93b2e84713baa8c283d568402e) --- src/gallium/targets/d3dadapter9/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/targets/d3dadapter9/meson.build b/src/gallium/targets/d3dadapter9/meson.build index bd05b4f9692..bc72b1110a0 100644 --- a/src/gallium/targets/d3dadapter9/meson.build +++ b/src/gallium/targets/d3dadapter9/meson.build @@ -53,7 +53,7 @@ libgallium_nine = shared_library( libswkmsdri, ], dependencies : [ - dep_selinux, dep_expat, dep_libdrm, dep_llvm, + dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread, driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau, driver_i915, driver_svga, ], From 45fe51a0eefd1b4f7bb70869da6eb3286788a659 Mon Sep 17 00:00:00 2001 From: Mauro Rossi Date: Tue, 30 Oct 2018 22:35:43 +0100 Subject: [PATCH 010/220] android: radv: add libmesa_git_sha1 static dependency libmesa_git_sha1 whole static dependency is added to get git_sha1.h header and avoid following building error: external/mesa/src/amd/vulkan/radv_device.c:46:10: fatal error: 'git_sha1.h' file not found ^ 1 error generated. Fixes: 9d40ec2cf6 ("radv: Add support for VK_KHR_driver_properties.") Signed-off-by: Mauro Rossi Reviewed-by: Eric Engestrom (cherry picked from commit 5c0cff868a48202df1ddb30d1dca848101063cb6) --- src/amd/vulkan/Android.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/Android.mk b/src/amd/vulkan/Android.mk index 51b03561fa7..9574bf54e5a 100644 --- a/src/amd/vulkan/Android.mk +++ b/src/amd/vulkan/Android.mk @@ -74,7 +74,8 @@ LOCAL_C_INCLUDES := \ $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_util,,)/util LOCAL_WHOLE_STATIC_LIBRARIES := \ - libmesa_vulkan_util + libmesa_vulkan_util \ + libmesa_git_sha1 LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.c LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.h From 6463af186c803eb96cd70df521d6915300ea497c Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 31 Oct 2018 15:35:23 -0700 Subject: [PATCH 011/220] r600/sb: Fix constant logical operand in assert. Fixes: da977ad90747 ("r600/sb: start adding GDS support") Signed-off-by: Vinson Lee Reviewed-By: Gert Wollny (cherry picked from commit 64a9ed8848ed5c2c909b59e0e4b600b2b01bba28) --- src/gallium/drivers/r600/sb/sb_bc_builder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp index 5681fdc4425..b7d87eac9f4 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp @@ -567,7 +567,7 @@ int bc_builder::build_fetch_gds(fetch_node *n) { const fetch_op_info *fop = bc.op_ptr; unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f; unsigned mem_op = 4; - assert(fop->flags && FF_GDS); + assert(fop->flags & FF_GDS); if (bc.op == FETCH_OP_TF_WRITE) { mem_op = 5; From 97a3ef3d1cdcea64681e1350ea864ec7f7b5dec4 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 31 Oct 2018 11:43:34 +0100 Subject: [PATCH 012/220] radv: add missing TFB queries support to CmdCopyQueryPoolsResults() Cc: 18.3 Fixes: b4eb029062a ("radv: implement VK_EXT_transform_feedback") Signed-off-by: Samuel Pitoiset Reviewed-by: Dave Airlie (cherry picked from commit b1b2dd06a7b777e862b525302b15bcaf407d3648) --- src/amd/vulkan/radv_private.h | 1 + src/amd/vulkan/radv_query.c | 277 ++++++++++++++++++++++++++++++++++ 2 files changed, 278 insertions(+) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 7e9e82e3158..253e6455604 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -595,6 +595,7 @@ struct radv_meta_state { VkPipelineLayout p_layout; VkPipeline occlusion_query_pipeline; VkPipeline pipeline_statistics_query_pipeline; + VkPipeline tfb_query_pipeline; } query; }; diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 57ea22fb847..ae8fc3834bf 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -512,11 +512,233 @@ build_pipeline_statistics_query_shader(struct radv_device *device) { return b.shader; } +static nir_shader * +build_tfb_query_shader(struct radv_device *device) +{ + /* the shader this builds is roughly + * + * uint32_t src_stride = 32; + * + * location(binding = 0) buffer dst_buf; + * location(binding = 1) buffer src_buf; + * + * void main() { + * uint64_t result[2] = {}; + * bool available = false; + * uint64_t src_offset = src_stride * global_id.x; + * uint64_t dst_offset = dst_stride * global_id.x; + * uint64_t *src_data = src_buf[src_offset]; + * uint32_t avail = (src_data[0] >> 32) & + * (src_data[1] >> 32) & + * (src_data[2] >> 32) & + * (src_data[3] >> 32); + * if (avail & 0x80000000) { + * result[0] = src_data[3] - src_data[1]; + * result[1] = src_data[2] - src_data[0]; + * available = true; + * } + * uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 16 : 8; + * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) { + * if (flags & VK_QUERY_RESULT_64_BIT) { + * dst_buf[dst_offset] = result; + * } else { + * dst_buf[dst_offset] = (uint32_t)result; + * } + * } + * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + * dst_buf[dst_offset + result_size] = available; + * } + * } + */ + nir_builder b; + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "tfb_query"); + b.shader->info.cs.local_size[0] = 64; + b.shader->info.cs.local_size[1] = 1; + b.shader->info.cs.local_size[2] = 1; + + /* Create and initialize local variables. */ + nir_variable *result = + nir_local_variable_create(b.impl, + glsl_vector_type(GLSL_TYPE_UINT64, 2), + "result"); + nir_variable *available = + nir_local_variable_create(b.impl, glsl_int_type(), "available"); + + nir_store_var(&b, result, + nir_vec2(&b, nir_imm_int64(&b, 0), + nir_imm_int64(&b, 0)), 0x3); + nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1); + + nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags"); + + /* Load resources. */ + nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, + nir_intrinsic_vulkan_resource_index); + dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + nir_intrinsic_set_desc_set(dst_buf, 0); + nir_intrinsic_set_binding(dst_buf, 0); + nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); + nir_builder_instr_insert(&b, &dst_buf->instr); + + nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader, + nir_intrinsic_vulkan_resource_index); + src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + nir_intrinsic_set_desc_set(src_buf, 0); + nir_intrinsic_set_binding(src_buf, 1); + nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); + nir_builder_instr_insert(&b, &src_buf->instr); + + /* Compute global ID. */ + nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); + nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *block_size = nir_imm_ivec4(&b, + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + global_id = nir_channel(&b, global_id, 0); // We only care about x here. + + /* Compute src/dst strides. */ + nir_ssa_def *input_stride = nir_imm_int(&b, 32); + nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id); + nir_ssa_def *output_stride = radv_load_push_int(&b, 4, "output_stride"); + nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id); + + /* Load data from the query pool. */ + nir_intrinsic_instr *load1 = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); + load1->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); + load1->src[1] = nir_src_for_ssa(input_base); + nir_ssa_dest_init(&load1->instr, &load1->dest, 4, 32, NULL); + load1->num_components = 4; + nir_builder_instr_insert(&b, &load1->instr); + + nir_intrinsic_instr *load2 = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); + load2->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); + load2->src[1] = nir_src_for_ssa(nir_iadd(&b, input_base, nir_imm_int(&b, 16))); + nir_ssa_dest_init(&load2->instr, &load2->dest, 4, 32, NULL); + load2->num_components = 4; + nir_builder_instr_insert(&b, &load2->instr); + + /* Check if result is available. */ + nir_ssa_def *avails[2]; + avails[0] = nir_iand(&b, nir_channel(&b, &load1->dest.ssa, 1), + nir_channel(&b, &load1->dest.ssa, 3)); + avails[1] = nir_iand(&b, nir_channel(&b, &load2->dest.ssa, 1), + nir_channel(&b, &load2->dest.ssa, 3)); + nir_ssa_def *result_is_available = + nir_iand(&b, nir_iand(&b, avails[0], avails[1]), + nir_imm_int(&b, 0x80000000)); + + /* Only compute result if available. */ + nir_if *available_if = nir_if_create(b.shader); + available_if->condition = nir_src_for_ssa(result_is_available); + nir_cf_node_insert(b.cursor, &available_if->cf_node); + + b.cursor = nir_after_cf_list(&available_if->then_list); + + /* Pack values. */ + nir_ssa_def *packed64[4]; + packed64[0] = nir_pack_64_2x32(&b, nir_vec2(&b, + nir_channel(&b, &load1->dest.ssa, 0), + nir_channel(&b, &load1->dest.ssa, 1))); + packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b, + nir_channel(&b, &load1->dest.ssa, 2), + nir_channel(&b, &load1->dest.ssa, 3))); + packed64[2] = nir_pack_64_2x32(&b, nir_vec2(&b, + nir_channel(&b, &load2->dest.ssa, 0), + nir_channel(&b, &load2->dest.ssa, 1))); + packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b, + nir_channel(&b, &load2->dest.ssa, 2), + nir_channel(&b, &load2->dest.ssa, 3))); + + /* Compute result. */ + nir_ssa_def *num_primitive_written = + nir_isub(&b, packed64[3], packed64[1]); + nir_ssa_def *primitive_storage_needed = + nir_isub(&b, packed64[2], packed64[0]); + + nir_store_var(&b, result, + nir_vec2(&b, num_primitive_written, + primitive_storage_needed), 0x3); + nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1); + + b.cursor = nir_after_cf_node(&available_if->cf_node); + + /* Determine if result is 64 or 32 bit. */ + nir_ssa_def *result_is_64bit = + nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_64_BIT)); + nir_ssa_def *result_size = + nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), + nir_imm_int(&b, 8)); + + /* Store the result if complete or partial results have been requested. */ + nir_if *store_if = nir_if_create(b.shader); + store_if->condition = + nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags, + nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)), + nir_load_var(&b, available))); + nir_cf_node_insert(b.cursor, &store_if->cf_node); + + b.cursor = nir_after_cf_list(&store_if->then_list); + + /* Store result. */ + nir_if *store_64bit_if = nir_if_create(b.shader); + store_64bit_if->condition = nir_src_for_ssa(result_is_64bit); + nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node); + + b.cursor = nir_after_cf_list(&store_64bit_if->then_list); + + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); + store->src[0] = nir_src_for_ssa(nir_load_var(&b, result)); + store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); + store->src[2] = nir_src_for_ssa(output_base); + nir_intrinsic_set_write_mask(store, 0x3); + store->num_components = 2; + nir_builder_instr_insert(&b, &store->instr); + + b.cursor = nir_after_cf_list(&store_64bit_if->else_list); + + store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); + store->src[0] = nir_src_for_ssa(nir_u2u32(&b, nir_load_var(&b, result))); + store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); + store->src[2] = nir_src_for_ssa(output_base); + nir_intrinsic_set_write_mask(store, 0x3); + store->num_components = 2; + nir_builder_instr_insert(&b, &store->instr); + + b.cursor = nir_after_cf_node(&store_64bit_if->cf_node); + + b.cursor = nir_after_cf_node(&store_if->cf_node); + + /* Store the availability bit if requested. */ + nir_if *availability_if = nir_if_create(b.shader); + availability_if->condition = + nir_src_for_ssa(nir_iand(&b, flags, + nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))); + nir_cf_node_insert(b.cursor, &availability_if->cf_node); + + b.cursor = nir_after_cf_list(&availability_if->then_list); + + store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); + store->src[0] = nir_src_for_ssa(nir_load_var(&b, available)); + store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); + store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base)); + nir_intrinsic_set_write_mask(store, 0x1); + store->num_components = 1; + nir_builder_instr_insert(&b, &store->instr); + + b.cursor = nir_after_cf_node(&availability_if->cf_node); + + return b.shader; +} + static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device) { VkResult result; struct radv_shader_module occlusion_cs = { .nir = NULL }; struct radv_shader_module pipeline_statistics_cs = { .nir = NULL }; + struct radv_shader_module tfb_cs = { .nir = NULL }; mtx_lock(&device->meta_state.mtx); if (device->meta_state.query.pipeline_statistics_query_pipeline) { @@ -525,6 +747,7 @@ static VkResult radv_device_init_meta_query_state_internal(struct radv_device *d } occlusion_cs.nir = build_occlusion_query_shader(device); pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device); + tfb_cs.nir = build_tfb_query_shader(device); VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, @@ -611,12 +834,34 @@ static VkResult radv_device_init_meta_query_state_internal(struct radv_device *d radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, &pipeline_statistics_vk_pipeline_info, NULL, &device->meta_state.query.pipeline_statistics_query_pipeline); + if (result != VK_SUCCESS) + goto fail; + VkPipelineShaderStageCreateInfo tfb_pipeline_shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = radv_shader_module_to_handle(&tfb_cs), + .pName = "main", + .pSpecializationInfo = NULL, + }; + + VkComputePipelineCreateInfo tfb_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = tfb_pipeline_shader_stage, + .flags = 0, + .layout = device->meta_state.query.p_layout, + }; + + result = radv_CreateComputePipelines(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&device->meta_state.cache), + 1, &tfb_pipeline_info, NULL, + &device->meta_state.query.tfb_query_pipeline); fail: if (result != VK_SUCCESS) radv_device_finish_meta_query_state(device); ralloc_free(occlusion_cs.nir); ralloc_free(pipeline_statistics_cs.nir); + ralloc_free(tfb_cs.nir); mtx_unlock(&device->meta_state.mtx); return result; } @@ -631,6 +876,11 @@ VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_d void radv_device_finish_meta_query_state(struct radv_device *device) { + if (device->meta_state.query.tfb_query_pipeline) + radv_DestroyPipeline(radv_device_to_handle(device), + device->meta_state.query.tfb_query_pipeline, + &device->meta_state.alloc); + if (device->meta_state.query.pipeline_statistics_query_pipeline) radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.pipeline_statistics_query_pipeline, @@ -1115,6 +1365,33 @@ void radv_CmdCopyQueryPoolResults( assert(cs->cdw <= cdw_max); } break; + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + for(unsigned i = 0; i < queryCount; i++) { + unsigned query = firstQuery + i; + uint64_t src_va = va + query * pool->stride; + + /* Wait on the upper word of all results. */ + for (unsigned j = 0; j < 4; j++, src_va += 8) { + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, WAIT_REG_MEM_GREATER_OR_EQUAL | + WAIT_REG_MEM_MEM_SPACE(1)); + radeon_emit(cs, (src_va + 4)); + radeon_emit(cs, (src_va + 4) >> 32); + radeon_emit(cs, 0x80000000); /* reference value */ + radeon_emit(cs, 0xffffffff); /* mask */ + radeon_emit(cs, 4); /* poll interval */ + } + } + } + + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline, + pool->bo, dst_buffer->bo, + firstQuery * pool->stride, + dst_buffer->offset + dstOffset, + pool->stride, stride, + queryCount, flags, 0, 0); + break; default: unreachable("trying to get results of unhandled query type"); } From 1348e6e25558743fbbdf4b2130b334fb9a8ff475 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Mon, 5 Nov 2018 09:55:02 +0000 Subject: [PATCH 013/220] wsi/wayland: use proper VkResult type Signed-off-by: Eric Engestrom Reviewed-by: Dave Airlie (cherry picked from commit dcee22afed935a434118935bcdad5c4fc7246077) --- src/vulkan/wsi/wsi_common_wayland.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index e9cc22ec603..12014b0c143 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -455,10 +455,10 @@ wsi_wl_get_presentation_support(struct wsi_device *wsi_device, (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; struct wsi_wl_display display; - int ret = wsi_wl_display_init(wsi, &display, wl_display, false); + VkResult ret = wsi_wl_display_init(wsi, &display, wl_display, false); wsi_wl_display_finish(&display); - return ret == 0; + return ret == VK_SUCCESS; } static VkResult From 422c905f4b6739e0913fe58fd987826b1fbd119e Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Mon, 5 Nov 2018 09:57:09 +0000 Subject: [PATCH 014/220] wsi/wayland: only finish() a successfully init()ed display Fixes: 43691024982b3ea734ad0 "vulkan/wsi/wayland: Stop caching Wayland displays" Signed-off-by: Eric Engestrom Reviewed-by: Dave Airlie Reviewed-by: Philipp Zabel (cherry picked from commit d515ded4d951b830b560c352d64918a89027bee5) --- src/vulkan/wsi/wsi_common_wayland.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 12014b0c143..3d3a60167bf 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -456,7 +456,8 @@ wsi_wl_get_presentation_support(struct wsi_device *wsi_device, struct wsi_wl_display display; VkResult ret = wsi_wl_display_init(wsi, &display, wl_display, false); - wsi_wl_display_finish(&display); + if (ret == VK_SUCCESS) + wsi_wl_display_finish(&display); return ret == VK_SUCCESS; } From 327330e77c04aa1516ceb70ccad2c0599b7772a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 2 Nov 2018 20:56:42 -0400 Subject: [PATCH 015/220] st/va: fix incorrect use of resource_destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: 4373dd32154 ("st/va: Support YUV formats in vaCreateSurfaces") Cc: Drew Davenport Reviewed-by: Emil Velikov Reviewed-by: Michel Dänzer (cherry picked from commit 04298a2f24455541f28ccffd2f0f73b831833d57) --- src/gallium/state_trackers/va/surface.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index 5376be28531..9646427ea5f 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -598,10 +598,8 @@ surface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface, return VA_STATUS_SUCCESS; fail: - for (i = 0; i < VL_NUM_COMPONENTS; i++) { - if (resources[i]) - pscreen->resource_destroy(pscreen, resources[i]); - } + for (i = 0; i < VL_NUM_COMPONENTS; i++) + pipe_resource_reference(&resources[i], NULL); return result; } From 7053fe50c3cda1709b728003703c11c5080a82b2 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 5 Nov 2018 20:42:40 +0000 Subject: [PATCH 016/220] intel/decoders: fix instruction base address parsing Signed-off-by: Lionel Landwerlin Fixes: 00103db04ab879 ("intel: Fix decoding for partial STATE_BASE_ADDRESS updates.") Reviewed-by: Kenneth Graunke (cherry picked from commit b47a69ed4c907df5917de10cbe5ef12e2d900bce) --- src/intel/common/gen_batch_decoder.c | 2 +- src/intel/tools/aubinator_viewer_decoder.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c index 63f04627572..36ee7706e40 100644 --- a/src/intel/common/gen_batch_decoder.c +++ b/src/intel/common/gen_batch_decoder.c @@ -214,7 +214,7 @@ handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p) surface_modify = iter.raw_value; } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) { dynamic_modify = iter.raw_value; - } else if (strcmp(iter.name, "Insntruction Base Address Modify Enable") == 0) { + } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) { instruction_modify = iter.raw_value; } } diff --git a/src/intel/tools/aubinator_viewer_decoder.cpp b/src/intel/tools/aubinator_viewer_decoder.cpp index 5311a8afc31..59cde530409 100644 --- a/src/intel/tools/aubinator_viewer_decoder.cpp +++ b/src/intel/tools/aubinator_viewer_decoder.cpp @@ -172,7 +172,7 @@ handle_state_base_address(struct aub_viewer_decode_ctx *ctx, surface_modify = iter.raw_value; } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) { dynamic_modify = iter.raw_value; - } else if (strcmp(iter.name, "Insntruction Base Address Modify Enable") == 0) { + } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) { instruction_modify = iter.raw_value; } } From a93d19f5426d6069b9858d87fb2e9c78fbbbb02d Mon Sep 17 00:00:00 2001 From: Olivier Fourdan Date: Thu, 25 Oct 2018 14:48:15 +0200 Subject: [PATCH 017/220] wayland/egl: Resize EGL surface on update buffer for swrast After commit a9fb331ea ("wayland/egl: update surface size on window resize"), the surface size is updated as soon as the resize is done, and `update_buffers()` would resize only if the surface size differs from the attached size. However, in the case of swrast, there is no resize callback and the attached size is updated in `dri2_wl_swrast_commit_backbuffer()` prior to the `swrast_update_buffers()` so the attached size is always up to date when it reaches `swrast_update_buffers()` and the surface is never resized. This can be observed with "totem" using the GDK backend on Wayland (the default) when running on software rendering: $ LIBGL_ALWAYS_SOFTWARE=true CLUTTER_BACKEND=gdk totem Resizing the window would leave the EGL surface size unchanged. To avoid the issue, partially revert the part of commit a9fb331ea for `swrast_update_buffers()` and resize on the win size and not the attached size. Fixes: a9fb331ea - wayland/egl: update surface size on window resize Signed-off-by: Olivier Fourdan CC: Daniel Stone CC: Juan A. Suarez Romero CC: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez (cherry picked from commit 55af17ffed289bdcc1396829e461245644175ed4) --- src/egl/drivers/dri2/platform_wayland.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index eb9f5e2b1e2..dc16a69dfbc 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1661,8 +1661,8 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf) if (dri2_surf->back) return 0; - if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width || - dri2_surf->base.Height != dri2_surf->wl_win->attached_height) { + if (dri2_surf->base.Width != dri2_surf->wl_win->width || + dri2_surf->base.Height != dri2_surf->wl_win->height) { dri2_wl_release_buffers(dri2_surf); From ecb1bef8714ede7b3b5b00a62c749b4c1b722e1c Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 6 Nov 2018 11:37:51 +0000 Subject: [PATCH 018/220] anv/android: mark gralloc allocated BOs as external MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocating through Gralloc implies buffers are going to be used outside the driver. We have special MOCS settings for external BOs and we probably want to use them here too. Signed-off-by: Lionel Landwerlin Fixes: a1220e73116bad7 ("anv/android: Set the BO flags in bo_cache_import (v2)") Reviewed-by: Tapani Pälli (cherry picked from commit 421fa01d64d9f2a7191ded3a819118d216436ab8) --- src/intel/vulkan/anv_android.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c index 46c41d57861..916e76c93ff 100644 --- a/src/intel/vulkan/anv_android.c +++ b/src/intel/vulkan/anv_android.c @@ -128,7 +128,7 @@ anv_image_from_gralloc(VkDevice device_h, */ int dma_buf = gralloc_info->handle->data[0]; - uint64_t bo_flags = 0; + uint64_t bo_flags = ANV_BO_EXTERNAL; if (device->instance->physicalDevice.supports_48bit_addresses) bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; if (device->instance->physicalDevice.use_softpin) From ede46c67ea35c076721aac0d1b751eb589e50629 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 7 Nov 2018 15:59:07 +0000 Subject: [PATCH 019/220] docs: document the staging branch and add reference to it A while back we agreed that having a live/staging branch is beneficial. Sadly we forgot to document that, so here is my first attempt. Document the caveat that the branch history is not stable. CC: Andres Gomez CC: Dylan Baker CC: Juan A. Suarez Romero Signed-off-by: Emil Velikov (cherry picked from commit 0a60708870e256432bdef1a22c98b09f83d8a440) --- docs/releasing.html | 20 ++++++++++++++++++++ docs/submittingpatches.html | 3 +++ 2 files changed, 23 insertions(+) diff --git a/docs/releasing.html b/docs/releasing.html index 52e102207d1..c79a020efa7 100644 --- a/docs/releasing.html +++ b/docs/releasing.html @@ -21,6 +21,7 @@

Releasing process

  • Overview
  • Release schedule
  • Cherry-pick and test +
  • Staging branch
  • Making a branchpoint
  • Pre-release announcement
  • Making a new release @@ -209,6 +210,25 @@

    Regression/functionality testing

    idea too.

    +

    Staging branch

    + +

    +A live branch, which contains the currently merge/rejected patches is available +in the main repository under staging/X.Y. For example: +

    +
    +	staging/18.1 - WIP branch for the 18.1 series
    +	staging/18.2 - WIP branch for the 18.2 series
    +
    + +

    +Notes: +

    +
      +
    • People are encouraged to test the branch and report regressions.
    • +
    • The branch history is not stable and it will be rebased,
    • +
    +

    Making a branchpoint

    diff --git a/docs/submittingpatches.html b/docs/submittingpatches.html index e5350bdb2cf..d7ea0a310db 100644 --- a/docs/submittingpatches.html +++ b/docs/submittingpatches.html @@ -251,6 +251,9 @@

    Nominating a commit for a stable branch

    nomination request.

    +

    +The current patch status can be observed in the staging branch. +

    The stable tag

    From 5b35600422ddbe1e17f11e9235cbcff043e303bf Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 2 Nov 2018 13:33:52 +1100 Subject: [PATCH 020/220] ac/nir_to_llvm: fix b2f for f64 Fixes: d7e0d47b9de3 ("nir: Add a bunch of b2[if] optimizations") Reviewed-by: Dave Airlie (cherry picked from commit 9aa3c1915ea039a0301849f2ac78fbe579972864) --- src/amd/common/ac_nir_to_llvm.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index e5fbe003f53..c950b81dca2 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -311,9 +311,18 @@ static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, } static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, - LLVMValueRef src0) + LLVMValueRef src0, + unsigned bitsize) { - return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); + LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, + LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), + ""); + result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, ""); + + if (bitsize == 32) + return result; + + return LLVMBuildFPExt(ctx->builder, result, ctx->f64, ""); } static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, @@ -932,7 +941,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); break; case nir_op_b2f: - result = emit_b2f(&ctx->ac, src[0]); + result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; case nir_op_f2b: result = emit_f2b(&ctx->ac, src[0]); From 61c64f64d73096acb9ee352ac9535ca7e1a930e8 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 5 Nov 2018 09:54:28 +0100 Subject: [PATCH 021/220] radv: disable conditional rendering for vkCmdCopyQueryPoolResults() VK_EXT_conditional_rendering says that copy commands should not be affected by conditional rendering. Cc: 18.2 18.3 Signed-off-by: Samuel Pitoiset Reviewed-by: Dave Airlie (cherry picked from commit 0a0aa2ba6c37085948cc7efe11695ebe4fdf12fe) Conflicts: src/amd/vulkan/radv_query.c --- src/amd/vulkan/radv_query.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index ae8fc3834bf..d538170c67d 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -913,6 +913,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, { struct radv_device *device = cmd_buffer->device; struct radv_meta_saved_state saved_state; + bool old_predicating; if (!*pipeline) { VkResult ret = radv_device_init_meta_query_state_internal(device); @@ -927,6 +928,12 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + /* VK_EXT_conditional_rendering says that copy commands should not be + * affected by conditional rendering. + */ + old_predicating = cmd_buffer->state.predicating; + cmd_buffer->state.predicating = false; + struct radv_buffer dst_buffer = { .bo = dst_bo, .offset = dst_offset, @@ -1008,6 +1015,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 | RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH; + /* Restore conditional rendering. */ + cmd_buffer->state.predicating = old_predicating; radv_meta_restore(&saved_state, cmd_buffer); } From 15442cac5c747e54952fcc972f5c43faa208dbbf Mon Sep 17 00:00:00 2001 From: Sergii Romantsov Date: Thu, 1 Nov 2018 13:02:43 +0200 Subject: [PATCH 022/220] autotools: library-dependency when no sse and 32-bit Building of 32bit Mesa may fail if __SSE__ is not specified. Added missed dependency from libm. v2: avoided dependecy on any flag, just link v3: meson doesn't fail, but have added dependency on libm CC: Dylan Baker CC: Lionel G Landwerlin Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108560 Signed-off-by: Sergii Romantsov Reviewed-by: Dylan Baker (cherry picked from commit ce837a537282f10bc48c4ac426fa9a31241beea2) --- src/util/Makefile.am | 3 ++- src/util/meson.build | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/util/Makefile.am b/src/util/Makefile.am index b857db8a866..4bda54c551d 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -60,7 +60,8 @@ libmesautil_la_LIBADD = \ $(PTHREAD_LIBS) \ $(CLOCK_LIB) \ $(ZLIB_LIBS) \ - $(LIBATOMIC_LIBS) + $(LIBATOMIC_LIBS) \ + -lm libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES) libxmlconfig_la_CFLAGS = \ diff --git a/src/util/meson.build b/src/util/meson.build index 7caea27d660..156621aff65 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -119,7 +119,7 @@ libmesa_util = static_library( 'mesa_util', [files_mesa_util, format_srgb], include_directories : inc_common, - dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic], + dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic, dep_m], c_args : [c_msvc_compat_args, c_vis_args], build_by_default : false ) From 09c5e548c4eb1b9a4f5a8b3c2bac3da857338464 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Thu, 25 Oct 2018 21:33:52 +0200 Subject: [PATCH 023/220] glsl: do not allow implicit casts of unsized array initializers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GLSL 4.6 specification (section 4.1.14. "Implicit Conversions") says: "There are no implicit array or structure conversions. For example, an array of int cannot be implicitly converted to an array of float." So let's add a check in place when assigning array initializers to implicitly sized arrays, to avoid incorrectly allowing code on the form: int[] foo = float[](1.0, 2.0, 3.0) This fixes the following dEQP test-cases: - dEQP-GLES31.functional.shaders.implicit_conversions.es31.invalid.arrays.int_to_float_vertex - dEQP-GLES31.functional.shaders.implicit_conversions.es31.invalid.arrays.int_to_float_fragment - dEQP-GLES31.functional.shaders.implicit_conversions.es31.invalid.arrays.int_to_uint_vertex - dEQP-GLES31.functional.shaders.implicit_conversions.es31.invalid.arrays.int_to_uint_fragment - dEQP-GLES31.functional.shaders.implicit_conversions.es31.invalid.arrays.uint_to_float_vertex - dEQP-GLES31.functional.shaders.implicit_conversions.es31.invalid.arrays.uint_to_float_fragment - dEQP-GLES31.functional.shaders.implicit_conversions.es32.invalid.arrays.int_to_float_vertex - dEQP-GLES31.functional.shaders.implicit_conversions.es32.invalid.arrays.int_to_float_fragment - dEQP-GLES31.functional.shaders.implicit_conversions.es32.invalid.arrays.int_to_uint_vertex - dEQP-GLES31.functional.shaders.implicit_conversions.es32.invalid.arrays.int_to_uint_fragment - dEQP-GLES31.functional.shaders.implicit_conversions.es32.invalid.arrays.uint_to_float_vertex - dEQP-GLES31.functional.shaders.implicit_conversions.es32.invalid.arrays.uint_to_float_fragment Signed-off-by: Erik Faye-Lund Reviewed-by: Tapani Pälli (cherry picked from commit 742dace8251b764775ee049cf529715f90afecc1) --- src/compiler/glsl/ast_to_hir.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 084b7021a9f..cf52f079df2 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -892,7 +892,8 @@ validate_assignment(struct _mesa_glsl_parse_state *state, } if (unsized_array) { if (is_initializer) { - return rhs; + if (rhs->type->get_scalar_type() == lhs->type->get_scalar_type()) + return rhs; } else { _mesa_glsl_error(&loc, state, "implicitly sized arrays cannot be assigned"); From 4c995fcae76ed241fe963206dbe473d0f801f3ad Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 1 Nov 2018 10:49:58 +0100 Subject: [PATCH 024/220] virgl/vtest-winsys: Use virgl version of bind flags The bind flags defined by mesa/gallium might not always be in sync with the ones copied to virglrenderer/gallium. Therefore, use the flags defined in virgl like it is done for all the other calls to create resources. Signed-off-by: Gert Wollny Reviewed-by: Dave Airlie (cherry picked from commit b710680093a29d4f7dcde2e5a3ee61bba80404ca) --- src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c index a589f694bb0..f44d4d74ff1 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c @@ -559,7 +559,7 @@ virgl_cs_create_fence(struct virgl_winsys *vws) res = virgl_vtest_winsys_resource_cache_create(vws, PIPE_BUFFER, PIPE_FORMAT_R8_UNORM, - PIPE_BIND_CUSTOM, + VIRGL_BIND_CUSTOM, 8, 1, 1, 0, 0, 0, 8); return (struct pipe_fence_handle *)res; From f55265776fed8db8d738e5a25b1a1126b48f45ee Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 9 Nov 2018 11:00:00 +0000 Subject: [PATCH 025/220] Update version to 18.3.0-rc2 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index bb28ad2e2bd..9bdced0d72e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0-rc1 +18.3.0-rc2 From 73f457f486468724674883796fb3e8e7649a4236 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Thu, 1 Nov 2018 15:44:45 -0700 Subject: [PATCH 026/220] Revert "i965/batch: avoid reverting batch buffer if saved state is an empty" This reverts commit a9031bf9b55602d93cccef6c926e2179c23205b4. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108630 (cherry picked from commit 5f312e95f8775e050ef9f8218be0eed6da1ee21d) --- src/mesa/drivers/dri/i965/brw_compute.c | 3 +-- src/mesa/drivers/dri/i965/brw_draw.c | 3 +-- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 3 +-- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 7 ------- src/mesa/drivers/dri/i965/intel_batchbuffer.h | 1 - 5 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 5c8e3a5d4de..de08fc3ac16 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -167,7 +167,7 @@ static void brw_dispatch_compute_common(struct gl_context *ctx) { struct brw_context *brw = brw_context(ctx); - bool fail_next; + bool fail_next = false; if (!_mesa_check_conditional_render(ctx)) return; @@ -185,7 +185,6 @@ brw_dispatch_compute_common(struct gl_context *ctx) intel_batchbuffer_require_space(brw, 600); brw_require_statebuffer_space(brw, 2500); intel_batchbuffer_save_state(brw); - fail_next = intel_batchbuffer_saved_state_is_empty(brw); retry: brw->batch.no_wrap = true; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 19ee3962d74..8536c040109 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -885,7 +885,7 @@ brw_draw_single_prim(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; - bool fail_next; + bool fail_next = false; /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have * atoms that happen on every draw call. @@ -898,7 +898,6 @@ brw_draw_single_prim(struct gl_context *ctx, intel_batchbuffer_require_space(brw, 1500); brw_require_statebuffer_space(brw, 2400); intel_batchbuffer_save_state(brw); - fail_next = intel_batchbuffer_saved_state_is_empty(brw); if (brw->num_instances != prim->num_instances || brw->basevertex != prim->basevertex || diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index fd9ce93c6c7..34bfcad03eb 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -268,7 +268,7 @@ genX(blorp_exec)(struct blorp_batch *batch, assert(batch->blorp->driver_ctx == batch->driver_batch); struct brw_context *brw = batch->driver_batch; struct gl_context *ctx = &brw->ctx; - bool check_aperture_failed_once; + bool check_aperture_failed_once = false; #if GEN_GEN >= 11 /* The PIPE_CONTROL command description says: @@ -309,7 +309,6 @@ genX(blorp_exec)(struct blorp_batch *batch, intel_batchbuffer_require_space(brw, 1400); brw_require_statebuffer_space(brw, 600); intel_batchbuffer_save_state(brw); - check_aperture_failed_once = intel_batchbuffer_saved_state_is_empty(brw); brw->batch.no_wrap = true; #if GEN_GEN == 6 diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 6207de5a06f..8b769eaf534 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -301,13 +301,6 @@ intel_batchbuffer_save_state(struct brw_context *brw) brw->batch.saved.exec_count = brw->batch.exec_count; } -bool -intel_batchbuffer_saved_state_is_empty(struct brw_context *brw) -{ - struct intel_batchbuffer *batch = &brw->batch; - return (batch->saved.map_next == batch->batch.map); -} - void intel_batchbuffer_reset_to_saved(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 91720dad5b4..0632142cd31 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -24,7 +24,6 @@ struct intel_batchbuffer; void intel_batchbuffer_init(struct brw_context *brw); void intel_batchbuffer_free(struct intel_batchbuffer *batch); void intel_batchbuffer_save_state(struct brw_context *brw); -bool intel_batchbuffer_saved_state_is_empty(struct brw_context *brw); void intel_batchbuffer_reset_to_saved(struct brw_context *brw); void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz); int _intel_batchbuffer_flush_fence(struct brw_context *brw, From c64a78ec0a5daf862e10d8a4fb931348167b6d3f Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 7 Nov 2018 17:06:27 +0100 Subject: [PATCH 027/220] radv: only expose VK_SUBGROUP_FEATURE_ARITHMETIC_BIT for VI+ Inclusive and exclusives scan are missing because older chips don't have llvm.amdgcn.update.dpp. This fixes crashes with dEQP-VK.subgroups.arithmetic.*. CC: mesa-stable@lists.freedesktop.org Signed-off-by: Samuel Pitoiset Reviewed-by: Dave Airlie (cherry picked from commit 0dcd99c6870aae5b15c8709a8afcc942d116c976) --- src/amd/vulkan/radv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index d68111c25bf..92254bed2e1 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1058,12 +1058,12 @@ void radv_GetPhysicalDeviceProperties2( * is fixed in LLVM. */ properties->supportedOperations = - VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT; if (pdevice->rad_info.chip_class >= VI) { properties->supportedOperations |= + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; } From 5f137e94b992a62e98dfa604cd43f57c5385e53b Mon Sep 17 00:00:00 2001 From: Andre Heider Date: Tue, 6 Nov 2018 09:27:12 +0100 Subject: [PATCH 028/220] st/nine: fix stack corruption due to ABI mismatch This fixes various crashes and hangs when using nine's 'thread_submit' feature. On 64bit, the thread function's data argument would just be NULL. On 32bit, the data argument would be garbage depending on the compiler flags (in my case -march>=core2). Fixes: f3fa7e3068512d ("st/nine: Use WINE thread for threadpool") Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Andre Heider Reviewed-by: Axel Davy (cherry picked from commit 10598c9667a9c5ea04ac8279549b1df8c026ef51) --- src/gallium/state_trackers/nine/threadpool.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/threadpool.c b/src/gallium/state_trackers/nine/threadpool.c index cc62fd25799..19721aab2dd 100644 --- a/src/gallium/state_trackers/nine/threadpool.c +++ b/src/gallium/state_trackers/nine/threadpool.c @@ -37,6 +37,7 @@ #include "os/os_thread.h" #include "threadpool.h" +/* POSIX thread function */ static void * threadpool_worker(void *data) { @@ -76,6 +77,15 @@ threadpool_worker(void *data) return NULL; } +/* Windows thread function */ +static DWORD NINE_WINAPI +wthreadpool_worker(void *data) +{ + threadpool_worker(data); + + return 0; +} + struct threadpool * _mesa_threadpool_create(struct NineSwapChain9 *swapchain) { @@ -87,7 +97,9 @@ _mesa_threadpool_create(struct NineSwapChain9 *swapchain) pthread_mutex_init(&pool->m, NULL); pthread_cond_init(&pool->new_work, NULL); - pool->wthread = NineSwapChain9_CreateThread(swapchain, threadpool_worker, pool); + /* This uses WINE's CreateThread, so the thread function needs to use + * the Windows ABI */ + pool->wthread = NineSwapChain9_CreateThread(swapchain, wthreadpool_worker, pool); if (!pool->wthread) { /* using pthread as fallback */ pthread_create(&pool->pthread, NULL, threadpool_worker, pool); From fff64af317a62dae8cd4ac451aa1a643714cb3b6 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Mon, 12 Nov 2018 13:24:42 +1100 Subject: [PATCH 029/220] nir: add glsl_type_is_integer() helper Fixes: 1c9c42d16b4c ("nir: add varying component packing helpers") Reviewed-by: Jason Ekstrand (cherry picked from commit 9dd737bb02923d26d4d947ebf265b05f13dd2c0c) --- src/compiler/nir_types.cpp | 5 +++++ src/compiler/nir_types.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index d24f0941519..3cd61f66056 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -301,6 +301,11 @@ glsl_type_is_boolean(const struct glsl_type *type) { return type->is_boolean(); } +bool +glsl_type_is_integer(const struct glsl_type *type) +{ + return type->is_integer(); +} const glsl_type * glsl_void_type(void) diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 77454fa9fab..70d593b96ab 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -142,6 +142,7 @@ bool glsl_type_is_image(const struct glsl_type *type); bool glsl_type_is_dual_slot(const struct glsl_type *type); bool glsl_type_is_numeric(const struct glsl_type *type); bool glsl_type_is_boolean(const struct glsl_type *type); +bool glsl_type_is_integer(const struct glsl_type *type); bool glsl_sampler_type_is_shadow(const struct glsl_type *type); bool glsl_sampler_type_is_array(const struct glsl_type *type); bool glsl_contains_atomic(const struct glsl_type *type); From 9b8380a4f9ca28b2ba569c272e4951084b011210 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Mon, 12 Nov 2018 13:25:27 +1100 Subject: [PATCH 030/220] nir: don't pack varyings ints with floats unless flat Fixes: 1c9c42d16b4c ("nir: add varying component packing helpers") Reviewed-by: Jason Ekstrand (cherry picked from commit a068958692cc662203f04e488356005533568a56) --- src/compiler/nir/nir_linking_helpers.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index de6f2481def..3845ed66b49 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -195,9 +195,12 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) } static uint8_t -get_interp_type(nir_variable *var, bool default_to_smooth_interp) +get_interp_type(nir_variable *var, const struct glsl_type *type, + bool default_to_smooth_interp) { - if (var->data.interpolation != INTERP_MODE_NONE) + if (glsl_type_is_integer(type)) + return INTERP_MODE_FLAT; + else if (var->data.interpolation != INTERP_MODE_NONE) return var->data.interpolation; else if (default_to_smooth_interp) return INTERP_MODE_SMOOTH; @@ -252,7 +255,7 @@ get_slot_component_masks_and_interp_types(struct exec_list *var_list, unsigned comps_slot2 = 0; for (unsigned i = 0; i < slots; i++) { interp_type[location + i] = - get_interp_type(var, default_to_smooth_interp); + get_interp_type(var, type, default_to_smooth_interp); interp_loc[location + i] = get_interp_loc(var); if (dual_slot) { @@ -424,7 +427,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps, continue; bool found_new_offset = false; - uint8_t interp = get_interp_type(var, default_to_smooth_interp); + uint8_t interp = get_interp_type(var, type, default_to_smooth_interp); for (; cursor[interp] < 32; cursor[interp]++) { uint8_t cursor_used_comps = comps[cursor[interp]]; From 6a706763d0266bb1c4f86faff8b8a71fd4386641 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Fri, 9 Nov 2018 13:27:56 -0800 Subject: [PATCH 031/220] meson: Don't set -Wall meson does this for you with its warn levels, so we don't need to set it ourselves. Fixes: d1992255bb29054fa51763376d125183a9f602f3 ("meson: Add build Intel "anv" vulkan driver") Reviewed-by: Eric Engestrom (cherry picked from commit 9c2a95b29868f1388408b5eb9193fff39f942217) --- meson.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index 18667988bac..dabfb9abddd 100644 --- a/meson.build +++ b/meson.build @@ -787,7 +787,7 @@ endif # Check for generic C arguments c_args = [] -foreach a : ['-Wall', '-Werror=implicit-function-declaration', +foreach a : ['-Werror=implicit-function-declaration', '-Werror=missing-prototypes', '-Werror=return-type', '-fno-math-errno', '-fno-trapping-math', '-Qunused-arguments'] @@ -809,7 +809,7 @@ endif # Check for generic C++ arguments cpp_args = [] -foreach a : ['-Wall', '-Werror=return-type', +foreach a : ['-Werror=return-type', '-fno-math-errno', '-fno-trapping-math', '-Qunused-arguments'] if cpp.has_argument(a) From 7ea4e43c554c2ccecb3f4cd737aa34506d55ba0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 12 Nov 2018 15:43:58 -0500 Subject: [PATCH 032/220] st/mesa: disable L3 thread pinning This implementation can have massive drawbacks. Cc: 18.3 Reviewed-by: Edmondo Tommasina (cherry picked from commit e0c7114eb3c19d4c2653f661698a6baa3bc9bedf) --- src/mesa/state_tracker/st_manager.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index ceb48dd4903..776b563e50e 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -1069,15 +1069,6 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, * of the referenced drawables no longer exist. */ st_framebuffers_purge(st); - - /* Notify the driver that the context thread may have been changed. - * This should pin all driver threads to a specific L3 cache for optimal - * performance on AMD Zen CPUs. - */ - struct glthread_state *glthread = st->ctx->GLThread; - thrd_t *upper_thread = glthread ? &glthread->queue.threads[0] : NULL; - - util_context_thread_changed(st->pipe, upper_thread); } else { ret = _mesa_make_current(NULL, NULL, NULL); From ad99afdce6c2d54ae40aec10b91fe3b65f7f3904 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Fri, 9 Nov 2018 12:56:00 -0800 Subject: [PATCH 033/220] meson: fix libatomic tests There are two problems: 1) the extra underscore in MISSING_64BIT_ATOMICS 2) we should link with libatomic if the previous test decided we needed it Fixes: d1992255bb29054fa51763376d125183a9f602f3 ("meson: Add build Intel "anv" vulkan driver") Reviewed-and-Tested-by: Matt Turner (cherry picked from commit 4eab98b66e7dc495f26ac3b0e356e405c0796b74) --- meson.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index dabfb9abddd..58ff3ea9735 100644 --- a/meson.build +++ b/meson.build @@ -905,8 +905,9 @@ if not cc.links('''#include int main() { return __sync_add_and_fetch(&v, (uint64_t)1); }''', + dependencies : dep_atomic, name : 'GCC 64bit atomics') - pre_args += '-DMISSING_64_BIT_ATOMICS' + pre_args += '-DMISSING_64BIT_ATOMICS' endif # TODO: shared/static? Is this even worth doing? From cc572038bf81025c30825178b390f3e38a664887 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sun, 11 Nov 2018 13:44:41 -0800 Subject: [PATCH 034/220] util/ralloc: Make sizeof(linear_header) a multiple of 8 Prior to this patch sizeof(linear_header) was 20 bytes in a non-debug build on 32-bit platforms. We do some pointer arithmetic to calculate the next available location with ptr = (linear_size_chunk *)((char *)&latest[1] + latest->offset); in linear_alloc_child(). The &latest[1] adds 20 bytes, so an allocation would only be 4-byte aligned. On 32-bit SPARC a 'sttw' instruction (which stores a consecutive pair of 4-byte registers to memory) requires an 8-byte aligned address. Such an instruction is used to store to an 8-byte integer type, like intmax_t which is used in glcpp's expression_value_t struct. As a result of the 4-byte alignment returned by linear_alloc_child() we would generate a SIGBUS (unaligned exception) on SPARC. According to the GNU libc manual malloc() always returns memory that has at least an alignment of 8-bytes [1]. I think our allocator should do the same. So, simple fix with two parts: (1) Increase SUBALLOC_ALIGNMENT to 8 unconditionally. (2) Mark linear_header with an aligned attribute, which will cause its sizeof to be rounded up to that alignment. (We already do this for ralloc_header) With this done, all Mesa's unit tests now pass on SPARC. [1] https://www.gnu.org/software/libc/manual/html_node/Aligned-Memory-Blocks.html Fixes: 47e17586924f ("glcpp: use the linear allocator for most objects") Bug: https://bugs.gentoo.org/636326 Reviewed-by: Eric Anholt (cherry picked from commit efb1ccadca89b1b3f39fb52b7b83154dff764a15) Conflicts: src/util/ralloc.c --- src/util/ralloc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/util/ralloc.c b/src/util/ralloc.c index 5d77f75ee85..5a7fa7e84e9 100644 --- a/src/util/ralloc.c +++ b/src/util/ralloc.c @@ -554,10 +554,18 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, */ #define MIN_LINEAR_BUFSIZE 2048 -#define SUBALLOC_ALIGNMENT sizeof(uintptr_t) +#define SUBALLOC_ALIGNMENT 8 #define LMAGIC 0x87b9c7d3 -struct linear_header { +struct +#ifdef _MSC_VER + __declspec(align(8)) +#elif defined(__LP64__) + __attribute__((aligned(16))) +#else + __attribute__((aligned(8))) +#endif + linear_header { #ifdef DEBUG unsigned magic; /* for debugging */ #endif @@ -651,6 +659,8 @@ linear_alloc_child(void *parent, unsigned size) ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset); ptr->size = size; latest->offset += full_size; + + assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0); return &ptr[1]; } From 8a79c536d57fbbd77d804dc47635872c356edf9b Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 12 Nov 2018 11:37:20 +0100 Subject: [PATCH 035/220] radv: binding streamout buffers doesn't change context regs Cc: 18.3 Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit b5f213bb1dcde22949dffe9d3a431fecd5d0f33b) --- src/amd/vulkan/radv_cmd_buffer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 6510a5c4425..b9d5726bfba 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3520,8 +3520,13 @@ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; - /* Index & Vertex buffer don't change context regs, and pipeline is handled later. */ - used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_PIPELINE); + /* Index, vertex and streamout buffers don't change context regs, and + * pipeline is handled later. + */ + used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | + RADV_CMD_DIRTY_VERTEX_BUFFER | + RADV_CMD_DIRTY_STREAMOUT_BUFFER | + RADV_CMD_DIRTY_PIPELINE); /* Assume all state changes except these two can imply context rolls. */ if (cmd_buffer->state.dirty & used_states) From e2494a9387860354051f945c7dd423a59e4ffb08 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Mon, 12 Nov 2018 12:34:26 +0100 Subject: [PATCH 036/220] mesa: Reference count shaders that are used by transform feedback objects Transform feedback objects may hold a pointer to a shader program, and at least in Gallium, this must be a valid pointer until ctx->Driver.EndTransformFeedback in glEndTransformFeedback has been called - which is conform with the spec that any program that is part of a current rendering state should only be flagged for deletion by glDeleteProgram. This was not handled properly for the transform feedback objects so that a call sequence glUseProgram(x) glBeginTransformFreedback(...) glPauseTransformFeedback(...) glDeleteProgram(x) glEndTransformFeedback(...) would result in a use after free bug. With this patch the transform feedback object also updates the reference count to the used program thereby keeping the program valid as long as the transform feedback objects links to it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108713 Fixes: 654587696b4234d09a6b471b70e9629cf2887c27 mesa: add end_transform_feedback() helper Signed-off-by: Gert Wollny Reviewed-by: Emil Velikov (cherry picked from commit caa964b422152788a95a1b248c884df8918a2bbd) --- src/mesa/main/transformfeedback.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index a46c9f94bca..8eccdc20b76 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -40,6 +40,7 @@ #include "shaderapi.h" #include "shaderobj.h" +#include "program/program.h" #include "program/prog_parameter.h" struct using_program_tuple @@ -470,6 +471,7 @@ begin_transform_feedback(struct gl_context *ctx, GLenum mode, bool no_error) if (obj->program != source) { ctx->NewDriverState |= ctx->DriverFlags.NewTransformFeedbackProg; + _mesa_reference_program_(ctx, &obj->program, source); obj->program = source; } @@ -504,6 +506,7 @@ end_transform_feedback(struct gl_context *ctx, assert(ctx->Driver.EndTransformFeedback); ctx->Driver.EndTransformFeedback(ctx, obj); + _mesa_reference_program_(ctx, &obj->program, NULL); ctx->TransformFeedback.CurrentObject->Active = GL_FALSE; ctx->TransformFeedback.CurrentObject->Paused = GL_FALSE; ctx->TransformFeedback.CurrentObject->EndedAnytime = GL_TRUE; From 9d92b603f1ad710041c280d6b12ac4c6254d9209 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Thu, 11 Oct 2018 16:21:14 +0100 Subject: [PATCH 037/220] meson: only run vulkan's meson.build when building vulkan Fixes: d1992255bb29054fa5176 "meson: Add build Intel "anv" vulkan driver" Signed-off-by: Eric Engestrom Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit 3832db275efdb235b3b7b27c9b41e64d5507aa2c) --- src/meson.build | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/meson.build b/src/meson.build index 73146d37143..2c0bff73432 100644 --- a/src/meson.build +++ b/src/meson.build @@ -52,7 +52,9 @@ subdir('mapi') # TODO: opengl subdir('compiler') subdir('egl/wayland/wayland-drm') -subdir('vulkan') +if with_any_vk + subdir('vulkan') +endif if with_gallium_radeonsi or with_amd_vk subdir('amd') endif From 940d3a4ef8c101a75e439613c5f8c2c402389ca1 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Thu, 11 Oct 2018 16:27:07 +0100 Subject: [PATCH 038/220] gbm: remove unnecessary meson include `inc_wayland_drm` is only used if wayland is built, and it's already added in that case a few lines below. Fixes: a29869e8720b385d3692f "gbm: Don't traverse backwards for includes" Signed-off-by: Eric Engestrom Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit 7df80de6e645ba8c20d97f5f2b1f6c12aa962e29) --- src/gbm/meson.build | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gbm/meson.build b/src/gbm/meson.build index 2e9d380c0b4..719f9c1a9b8 100644 --- a/src/gbm/meson.build +++ b/src/gbm/meson.build @@ -32,7 +32,6 @@ args_gbm = [] deps_gbm = [] incs_gbm = [ include_directories('main'), inc_include, inc_src, inc_loader, - inc_wayland_drm, ] if with_dri2 From fbe2a549913784fb4c2b3bb2fe73809807d134f8 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Thu, 11 Oct 2018 16:00:04 +0100 Subject: [PATCH 039/220] meson: fix wayland-less builds Those empty variables in the !wayland case are useless and running that meson.build with them breaks the build: [287/850] Generating wayland-drm-client-protocol.h with a custom command. FAILED: src/egl/wayland/wayland-drm/wayland-drm-client-protocol.h client-header ../src/egl/wayland/wayland-drm/wayland-drm.xml src/egl/wayland/wayland-drm/wayland-drm-client-protocol.h /bin/sh: client-header: command not found ninja: build stopped: subcommand failed. Fixes: d1992255bb29054fa5176 "meson: Add build Intel "anv" vulkan driver" Signed-off-by: Eric Engestrom Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit 4fa2fb35245448f8b5d81fb5b37afec4f343f18a) --- meson.build | 7 ------- src/meson.build | 4 +++- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/meson.build b/meson.build index 58ff3ea9735..1b475877827 100644 --- a/meson.build +++ b/meson.build @@ -1318,13 +1318,6 @@ if with_platform_wayland 'linux-dmabuf', 'linux-dmabuf-unstable-v1.xml' ) pre_args += ['-DHAVE_WAYLAND_PLATFORM', '-DWL_HIDE_DEPRECATED'] -else - prog_wl_scanner = [] - wl_scanner_arg = '' - dep_wl_protocols = null_dep - dep_wayland_client = null_dep - dep_wayland_server = null_dep - wayland_dmabuf_xml = '' endif dep_x11 = null_dep diff --git a/src/meson.build b/src/meson.build index 2c0bff73432..0d0ecf2c530 100644 --- a/src/meson.build +++ b/src/meson.build @@ -51,7 +51,9 @@ subdir('util') subdir('mapi') # TODO: opengl subdir('compiler') -subdir('egl/wayland/wayland-drm') +if with_platform_wayland + subdir('egl/wayland/wayland-drm') +endif if with_any_vk subdir('vulkan') endif From e1dc5715b23bfdffdd2d6f4cbc86ae1120276cc5 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 26 Apr 2018 16:54:26 +0200 Subject: [PATCH 040/220] nir/spirv: cast shift operand to u32 v2: fix for specialization constants as well Reviewed-by: Jason Ekstrand Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Karol Herbst (cherry picked from commit b4380cb070c0865f1fbfb3720056545665759bc3) --- src/compiler/spirv/spirv_to_nir.c | 20 ++++++++++++++++++++ src/compiler/spirv/vtn_alu.c | 11 +++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 96ff09c3659..77ce0be369e 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1811,6 +1811,26 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, src[j] = src_val->constant->values[0]; } + /* fix up fixed size sources */ + switch (op) { + case nir_op_ishl: + case nir_op_ishr: + case nir_op_ushr: { + if (bit_size == 32) + break; + for (unsigned i = 0; i < num_components; ++i) { + switch (bit_size) { + case 64: src[1].u32[i] = src[1].u64[i]; break; + case 16: src[1].u32[i] = src[1].u16[i]; break; + case 8: src[1].u32[i] = src[1].u8[i]; break; + } + } + break; + } + default: + break; + } + val->constant->values[0] = nir_eval_const_opcode(op, num_components, bit_size, src); break; diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index 6860e7dc090..a23f8c29b5c 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -696,6 +696,17 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, src[1] = tmp; } + switch (op) { + case nir_op_ishl: + case nir_op_ishr: + case nir_op_ushr: + if (src[1]->bit_size != 32) + src[1] = nir_u2u32(&b->nb, src[1]); + break; + default: + break; + } + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); break; } /* default */ From 9650fc05a5f4fcb8b62151127ca752a5b93d5e8c Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:13 +0000 Subject: [PATCH 041/220] bin/get-pick-list.sh: simplify git oneline printing Currently we force disable the pager via "|cat" where --no-pager exists. Additionally we could use git show instead of git log -n1. Use those for a slightly more understandable code. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Reviewed-by: Eric Engestrom Signed-off-by: Emil Velikov (cherry picked from commit 559c32d2412b2ea602bb59aa61da75403d01a872) --- bin/get-pick-list.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index 9e9a39e494b..ba741cc4114 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -32,7 +32,7 @@ do continue fi - git log -n1 --pretty=oneline $sha | cat + git --no-pager show --summary --oneline $sha done rm -f already_picked From fc99358bdc57494c91ffe26dbd224747f648b1aa Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:14 +0000 Subject: [PATCH 042/220] bin/get-pick-list.sh: prefix output with "[stable] " With later commits we'll fold all the different scripts into one. Add the explicit prefix, so that we know the origin of the nomination v2: - pass the sha as argument to the function - swap $tag = none for an else statment (Juan) - grep -q instead of using a variable (Eric) - print the tag and commit oneline separately (Eric) v3: - drop unused "tag=none" assignment (Juan) - typo nomination Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Reviewed-by: Eric Engestrom (v2) Signed-off-by: Emil Velikov (cherry picked from commit fac10169bbad2da918ef07a62c01e0b321508cfe) --- bin/get-pick-list.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index ba741cc4114..2c7d87db0c6 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -7,6 +7,14 @@ # $ bin/get-pick-list.sh # $ bin/get-pick-list.sh > picklist # $ bin/get-pick-list.sh | tee picklist +# +# The output is as follows: +# [nomination_type] commit_sha commit summary + +is_stable_nomination() +{ + git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable" +} # Use the last branchpoint as our limit for the search latest_branchpoint=`git merge-base origin/master HEAD` @@ -32,6 +40,13 @@ do continue fi + if is_stable_nomination "$sha"; then + tag=stable + else + continue + fi + + printf "[ %8s ] " "$tag" git --no-pager show --summary --oneline $sha done From 98c0d87acd655314761c1ea607c19fb39fe4acfc Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:15 +0000 Subject: [PATCH 043/220] bin/get-pick-list.sh: handle "typod" usecase. As the comment in get-typod-pick-list.sh says, there's little point in having a duplicate file. Add the new pattern + tag to get-pick-list.sh and nuke this file. v2: - pass the sha as argument to the function - grep -q instead of using a variable (Eric) Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Reviewed-by: Eric Engestrom Signed-off-by: Emil Velikov (cherry picked from commit e6b3a3b2014413366110f6deeced8095e7262b1d) --- bin/get-pick-list.sh | 9 +++++++- bin/get-typod-pick-list.sh | 42 -------------------------------------- 2 files changed, 8 insertions(+), 43 deletions(-) delete mode 100755 bin/get-typod-pick-list.sh diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index 2c7d87db0c6..2476b593f14 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -16,6 +16,11 @@ is_stable_nomination() git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable" } +is_typod_nomination() +{ + git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev" +} + # Use the last branchpoint as our limit for the search latest_branchpoint=`git merge-base origin/master HEAD` @@ -25,7 +30,7 @@ git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_bra sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked # Grep for commits that were marked as a candidate for the stable tree. -git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable' $latest_branchpoint..origin/master |\ +git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev' $latest_branchpoint..origin/master |\ while read sha do # Check to see whether the patch is on the ignore list. @@ -42,6 +47,8 @@ do if is_stable_nomination "$sha"; then tag=stable + elif is_typod_nomination "$sha"; then + tag=typod else continue fi diff --git a/bin/get-typod-pick-list.sh b/bin/get-typod-pick-list.sh deleted file mode 100755 index eb4181d66b8..00000000000 --- a/bin/get-typod-pick-list.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/sh - -# Script for generating a list of candidates which have typos in the nomination line -# -# Usage examples: -# -# $ bin/get-typod-pick-list.sh -# $ bin/get-typod-pick-list.sh > picklist -# $ bin/get-typod-pick-list.sh | tee picklist - -# NB: -# This script intentionally _never_ checks for specific version tag -# Should we consider folding it with the original get-pick-list.sh - -# Use the last branchpoint as our limit for the search -latest_branchpoint=`git merge-base origin/master HEAD` - -# Grep for commits with "cherry picked from commit" in the commit message. -git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\ - grep "cherry picked from commit" |\ - sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked - -# Grep for commits that were marked as a candidate for the stable tree. -git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' $latest_branchpoint..origin/master |\ -while read sha -do - # Check to see whether the patch is on the ignore list. - if [ -f bin/.cherry-ignore ] ; then - if grep -q ^$sha bin/.cherry-ignore ; then - continue - fi - fi - - # Check to see if it has already been picked over. - if grep -q ^$sha already_picked ; then - continue - fi - - git log -n1 --pretty=oneline $sha | cat -done - -rm -f already_picked From 3853b9c14d25b6d770a14d078acf80210385e7fa Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:16 +0000 Subject: [PATCH 044/220] bin/get-pick-list.sh: handle the fixes tag Having a separate script to handle the fixes tag, brings a number of issues, so let's fold it in get-pick-list.sh. v2: - pass the sha as argument to the function - Keep original sed pattern Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Signed-off-by: Emil Velikov (cherry picked from commit 181203f3c5e9c6b3e95e0b93ad3b22630c3c0437) --- bin/get-fixes-pick-list.sh | 81 -------------------------------------- bin/get-pick-list.sh | 46 ++++++++++++++++++++-- 2 files changed, 43 insertions(+), 84 deletions(-) delete mode 100755 bin/get-fixes-pick-list.sh diff --git a/bin/get-fixes-pick-list.sh b/bin/get-fixes-pick-list.sh deleted file mode 100755 index 047ea3bec10..00000000000 --- a/bin/get-fixes-pick-list.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/sh - -# Script for generating a list of candidates [referenced by a Fixes tag] for -# cherry-picking to a stable branch -# -# Usage examples: -# -# $ bin/get-fixes-pick-list.sh -# $ bin/get-fixes-pick-list.sh > picklist -# $ bin/get-fixes-pick-list.sh | tee picklist - -# Use the last branchpoint as our limit for the search -latest_branchpoint=`git merge-base origin/master HEAD` - -# List all the commits between day 1 and the branch point... -git log --reverse --pretty=%H $latest_branchpoint > already_landed - -# ... and the ones cherry-picked. -git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_branchpoint..HEAD |\ - grep "cherry picked from commit" |\ - sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked - -# Grep for commits with Fixes tag -git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\ -while read sha -do - # Check to see whether the patch is on the ignore list ... - if [ -f bin/.cherry-ignore ] ; then - if grep -q ^$sha bin/.cherry-ignore ; then - continue - fi - fi - - # Skip if it has been already cherry-picked. - if grep -q ^$sha already_picked ; then - continue - fi - - # Place every "fixes:" tag on its own line and join with the next word - # on its line or a later one. - fixes=`git show --pretty=medium -s $sha | tr -d "\n" | sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | grep "fixes:" | sed -e 's/\(fixes:[a-zA-Z0-9]*\).*$/\1/'` - - # For each one try to extract the tag - fixes_count=`echo "$fixes" | wc -l` - warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0` - while [ $fixes_count -gt 0 ] ; do - # Treat only the current line - id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2` - fixes_count=$(($fixes_count-1)) - - # Bail out if we cannot find suitable id. - # Any specific validation the $id is valid and not some junk, is - # implied with the follow up code - if [ "x$id" = x ] ; then - continue - fi - - # Check if the offending commit is in branch. - - # Be that cherry-picked ... - # ... or landed before the branchpoint. - if grep -q ^$id already_picked || - grep -q ^$id already_landed ; then - - printf "Commit \"%s\" fixes %s\n" \ - "`git log -n1 --pretty=oneline $sha`" \ - "$id" - warn=$(($warn-1)) - fi - - done - - if [ $warn -gt 0 ] ; then - printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \ - "`git log -n1 --pretty=oneline $sha`" - fi - -done - -rm -f already_picked -rm -f already_landed diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index 2476b593f14..c1f2bf11ca5 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -21,16 +21,53 @@ is_typod_nomination() git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev" } +is_fixes_nomination() +{ + fixes=`git show --pretty=medium -s $1 | tr -d "\n" | \ + sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | \ + grep "fixes:" | sed -e 's/\(fixes:[a-zA-Z0-9]*\).*$/\1/'` + + fixes_count=`echo "$fixes" | wc -l` + if [ $fixes_count -eq 0 ] ; then + return 0 + fi + while [ $fixes_count -gt 0 ] ; do + # Treat only the current line + id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2` + fixes_count=$(($fixes_count-1)) + + # Bail out if we cannot find suitable id. + # Any specific validation the $id is valid and not some junk, is + # implied with the follow up code + if [ "x$id" = x ] ; then + continue + fi + + #Check if the offending commit is in branch. + + # Be that cherry-picked ... + # ... or landed before the branchpoint. + if grep -q ^$id already_picked || + grep -q ^$id already_landed ; then + return 0 + fi + done + return 1 +} + # Use the last branchpoint as our limit for the search latest_branchpoint=`git merge-base origin/master HEAD` -# Grep for commits with "cherry picked from commit" in the commit message. +# List all the commits between day 1 and the branch point... +git log --reverse --pretty=%H $latest_branchpoint > already_landed + +# ... and the ones cherry-picked. git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_branchpoint..HEAD |\ grep "cherry picked from commit" |\ sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked -# Grep for commits that were marked as a candidate for the stable tree. -git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev' $latest_branchpoint..origin/master |\ +# Grep for potential candidates +git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|fixes:' $latest_branchpoint..origin/master |\ while read sha do # Check to see whether the patch is on the ignore list. @@ -49,6 +86,8 @@ do tag=stable elif is_typod_nomination "$sha"; then tag=typod + elif is_fixes_nomination "$sha"; then + tag=fixes else continue fi @@ -58,3 +97,4 @@ do done rm -f already_picked +rm -f already_landed From cfd333c7686ba955b6b0d47d063279a338866d7b Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:17 +0000 Subject: [PATCH 045/220] bin/get-pick-list.sh: tweak the commit sha matching pattern Currently we match on: - any arbitrary length of, - any a-z A-Z and 0-9 characters At the same time, a commit sha consists of lowercase hexadecimal numbers. Any sha shorter than 8 characters is ambiguous - in some cases even 11+ are required. So change the pattern to a-f0-9 and adjust the length to 8-40. As we're here we could use a single grep, instead of the grep/sed combo. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Signed-off-by: Emil Velikov (cherry picked from commit 533fead4236459c3f04700ff130ffaee1503cb69) --- bin/get-pick-list.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index c1f2bf11ca5..05dd3820e12 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -25,7 +25,7 @@ is_fixes_nomination() { fixes=`git show --pretty=medium -s $1 | tr -d "\n" | \ sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | \ - grep "fixes:" | sed -e 's/\(fixes:[a-zA-Z0-9]*\).*$/\1/'` + grep -Eo 'fixes:[a-f0-9]{8,40}'` fixes_count=`echo "$fixes" | wc -l` if [ $fixes_count -eq 0 ] ; then From 8b5ce5fa709ec70fa35b4b51f0d7a18a05898f7d Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:18 +0000 Subject: [PATCH 046/220] bin/get-pick-list.sh: flesh out is_sha_nomination Refactor is_fixes_nomination into a is_sha_nomination helper. This way we can reuse it for more than the usual "Fixes:" tag. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Signed-off-by: Emil Velikov (cherry picked from commit b7418d1f3f102aeed8d3d38195f9b7d672216df0) --- bin/get-pick-list.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index 05dd3820e12..c456fdb3b81 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -21,10 +21,12 @@ is_typod_nomination() git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev" } -is_fixes_nomination() +# Helper to handle various mistypos of the fixes tag. +# The tag string itself is passed as argument and normalised within. +is_sha_nomination() { fixes=`git show --pretty=medium -s $1 | tr -d "\n" | \ - sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | \ + sed -e 's/'"$2"'/\nfixes:/Ig' | \ grep -Eo 'fixes:[a-f0-9]{8,40}'` fixes_count=`echo "$fixes" | wc -l` @@ -55,6 +57,11 @@ is_fixes_nomination() return 1 } +is_fixes_nomination() +{ + is_sha_nomination "$1" "fixes:[[:space:]]*" +} + # Use the last branchpoint as our limit for the search latest_branchpoint=`git merge-base origin/master HEAD` From 4505df167696c2b535c7e5d6582351741b5638ea Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:19 +0000 Subject: [PATCH 047/220] bin/get-pick-list.sh: handle fixes tag with missing colon Every so often, we forget to add the colon after "fixes". Trivially tweak the script to catch it. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Signed-off-by: Emil Velikov (cherry picked from commit 209525aafb8314f827838dedeb771b72c256a4d3) --- bin/get-pick-list.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index c456fdb3b81..d327c61d254 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -60,6 +60,10 @@ is_sha_nomination() is_fixes_nomination() { is_sha_nomination "$1" "fixes:[[:space:]]*" + if test $? -eq 0; then + return 0 + fi + is_sha_nomination "$1" "fixes[[:space:]]\+" } # Use the last branchpoint as our limit for the search @@ -74,7 +78,7 @@ git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_bra sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked # Grep for potential candidates -git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|fixes:' $latest_branchpoint..origin/master |\ +git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\' $latest_branchpoint..origin/master |\ while read sha do # Check to see whether the patch is on the ignore list. From c8fef27cd37c0d4f09275fda12d0ab9882ec68fe Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:20 +0000 Subject: [PATCH 048/220] bin/get-pick-list.sh: handle unofficial "broken by" tag We have a number of cases were devs will use a tag "broken by". While it's not something officially documented or recommended, checking for it is trivial enough. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Signed-off-by: Emil Velikov (cherry picked from commit 77ff0bfb5f915bb841623ec181ee359a099f2c52) --- bin/get-pick-list.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index d327c61d254..a540cedfbd0 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -66,6 +66,11 @@ is_fixes_nomination() is_sha_nomination "$1" "fixes[[:space:]]\+" } +is_brokenby_nomination() +{ + is_sha_nomination "$1" "broken by" +} + # Use the last branchpoint as our limit for the search latest_branchpoint=`git merge-base origin/master HEAD` @@ -78,7 +83,7 @@ git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_bra sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked # Grep for potential candidates -git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\' $latest_branchpoint..origin/master |\ +git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\\|\' $latest_branchpoint..origin/master |\ while read sha do # Check to see whether the patch is on the ignore list. @@ -99,6 +104,8 @@ do tag=typod elif is_fixes_nomination "$sha"; then tag=fixes + elif is_brokenby_nomination "$sha"; then + tag=brokenby else continue fi From 221a8e3366dcaef54c4773fbbbc9e2caa786d5b9 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 8 Nov 2018 15:05:21 +0000 Subject: [PATCH 049/220] bin/get-pick-list.sh: use test instead of [ ] Latter is rather picky wrt surrounding white space. The explicit `test` doesn't have that problem, plus the statements read a bit easier. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Signed-off-by: Emil Velikov (cherry picked from commit c0012a07088e86f6d30405d0522a0d72801e2ec7) --- bin/get-pick-list.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index a540cedfbd0..e7fffdd1280 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -30,10 +30,10 @@ is_sha_nomination() grep -Eo 'fixes:[a-f0-9]{8,40}'` fixes_count=`echo "$fixes" | wc -l` - if [ $fixes_count -eq 0 ] ; then + if test $fixes_count -eq 0; then return 0 fi - while [ $fixes_count -gt 0 ] ; do + while test $fixes_count -gt 0; do # Treat only the current line id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2` fixes_count=$(($fixes_count-1)) @@ -41,7 +41,7 @@ is_sha_nomination() # Bail out if we cannot find suitable id. # Any specific validation the $id is valid and not some junk, is # implied with the follow up code - if [ "x$id" = x ] ; then + if test "x$id" = x; then continue fi @@ -87,7 +87,7 @@ git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\ Date: Wed, 14 Nov 2018 18:49:54 +0000 Subject: [PATCH 050/220] bin/get-pick-list.sh: handle reverts prior to the branchpoint Currently we detect when a breaking commit: - has landed in stable, and - is referenced by a untagged fix in master Yet we did not consider the case of breaking commit: - prior to the branchpoint, and - is referenced by a untagged fix in master Addressing the latter is extremely slow, due to the size of the lookup. That said, we can trivially use the existing is_sha_nomination() helper to catch reverts. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Juan A. Suarez Signed-off-by: Emil Velikov (cherry picked from commit adbdfc6666052d604a97009d736b6dee957908a0) --- bin/get-pick-list.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index e7fffdd1280..9f9cbc44026 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -71,6 +71,11 @@ is_brokenby_nomination() is_sha_nomination "$1" "broken by" } +is_revert_nomination() +{ + is_sha_nomination "$1" "This reverts commit " +} + # Use the last branchpoint as our limit for the search latest_branchpoint=`git merge-base origin/master HEAD` @@ -83,7 +88,7 @@ git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_bra sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked # Grep for potential candidates -git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\\|\' $latest_branchpoint..origin/master |\ +git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\\|\\|This reverts commit' $latest_branchpoint..origin/master |\ while read sha do # Check to see whether the patch is on the ignore list. @@ -106,6 +111,8 @@ do tag=fixes elif is_brokenby_nomination "$sha"; then tag=brokenby + elif is_revert_nomination "$sha"; then + tag=revert else continue fi From 971ce9f854893c4fb6aabb3e60912332ba61edc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 13 Nov 2018 18:37:39 -0500 Subject: [PATCH 051/220] radeonsi: fix video APIs on Raven2 This was missed when I added the new enum. Cc: 18.3 Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Leo Liu (cherry picked from commit 9367514524f70faad99c721bac92339c8ff8bad9) --- src/gallium/drivers/radeonsi/si_get.c | 9 ++++++--- src/gallium/drivers/radeonsi/si_uvd.c | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index b440230d227..91f38329d59 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -580,10 +580,12 @@ static int si_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_SUPPORTED: return (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && (si_vce_is_fw_version_supported(sscreen) || - sscreen->info.family == CHIP_RAVEN)) || + sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2)) || (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN && (sscreen->info.family == CHIP_RAVEN || - si_radeon_uvd_enc_supported(sscreen))); + sscreen->info.family == CHIP_RAVEN2 || + si_radeon_uvd_enc_supported(sscreen))); case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; case PIPE_VIDEO_CAP_MAX_WIDTH: @@ -631,7 +633,8 @@ static int si_get_video_param(struct pipe_screen *screen, return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN; return false; case PIPE_VIDEO_FORMAT_JPEG: - if (sscreen->info.family == CHIP_RAVEN) + if (sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2) return true; if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10) return false; diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c index 1a9d8f8d9fa..8c9553acbf3 100644 --- a/src/gallium/drivers/radeonsi/si_uvd.c +++ b/src/gallium/drivers/radeonsi/si_uvd.c @@ -146,7 +146,8 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context, const struct pipe_video_codec *templ) { struct si_context *ctx = (struct si_context *)context; - bool vcn = (ctx->family == CHIP_RAVEN) ? true : false; + bool vcn = ctx->family == CHIP_RAVEN || + ctx->family == CHIP_RAVEN2; if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { if (vcn) { From a57a727617affa30ae92d11869f493825099103f Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Wed, 14 Nov 2018 12:51:38 +0000 Subject: [PATCH 052/220] egl: add missing glvnd entrypoint for EGL_ANDROID_blob_cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes dEQP-EGL.functional.get_proc_address.extension.egl_android_blob_cache on builds with glvnd enabled. Fixes: 6f5b57093b3462a54e9c7 "egl: add support for EGL_ANDROID_blob_cache" Signed-off-by: Eric Engestrom Reviewed-by: Tapani Pälli Reviewed-by: Emil Velikov (cherry picked from commit c9733649670243a1a6eb7ca2c376bd27960f8d8a) Squashed with commit: egl: fix bad rebase I screwed up a rebase over a refactor and didn't notice locally because the uncommitted refactor hid the issue. Fixes: c9733649670243a1a6eb "egl: add missing glvnd entrypoint for EGL_ANDROID_blob_cache" Signed-off-by: Eric Engestrom (cherry picked from commit 2b2f790e594cde3aa0d4dcebafc9a072651cd62a) --- src/egl/generate/eglFunctionList.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/egl/generate/eglFunctionList.py b/src/egl/generate/eglFunctionList.py index 667704eb2cb..2cd35557bc4 100644 --- a/src/egl/generate/eglFunctionList.py +++ b/src/egl/generate/eglFunctionList.py @@ -196,6 +196,9 @@ def _eglFunc(name, method, static=None, public=False, inheader=None, prefix="dis # EGL_ANDROID_native_fence_sync _eglFunc("eglDupNativeFenceFDANDROID", "display"), + # EGL_ANDROID_blob_cache + _eglFunc("eglSetBlobCacheFuncsANDROID", "display"), + # EGL_EXT_image_dma_buf_import_modifiers _eglFunc("eglQueryDmaBufFormatsEXT", "display"), _eglFunc("eglQueryDmaBufModifiersEXT", "display"), From 087f1534ae5bcfb9e4aa736e6c559ab4b768527a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Nov 2018 13:24:34 -0600 Subject: [PATCH 053/220] nir/lower_alu_to_scalar: Don't try to lower unpack_32_2x16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It messes up when trying to lower. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Samuel Iglesias Gonsálvez (cherry picked from commit 4266932c0b301005dcc747fb6c2fef36a3af6ffe) --- src/compiler/nir/nir_lower_alu_to_scalar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index 0be3aba9456..7ef032cd164 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -194,6 +194,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) } case nir_op_unpack_64_2x32: + case nir_op_unpack_32_2x16: return false; LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); From 0b48c82ad9c04fe263393cc38692fe1aaefbede7 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Thu, 15 Nov 2018 12:03:31 +0200 Subject: [PATCH 054/220] i965: Fix calculation of layers array length for isl_view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle all cases in calculation of layers count for isl_view taking into account texture view and image unit. st_convert_image was taken as a reference. When u->Layered is true the whole level is taken with respect to image view. In other case only one layer is taken. v3: (Józef Kucia and Ilia Mirkin) - Rewrote patch by taking st_convert_image as a reference - Removed now unused get_image_num_layers function - Changed commit message v4: (Jason Ekstrand) - Added assert Fixes: 5a8c8903 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107856 Signed-off-by: Danylo Piliaiev Reviewed-by: Jason Ekstrand (cherry picked from commit f9fd0cf4790cb2a530e75d1a2206dbb9d8af7cb2) --- .../drivers/dri/i965/brw_wm_surface_state.c | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 8d21cf5fa70..3286c222e5b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1499,18 +1499,6 @@ update_buffer_image_param(struct brw_context *brw, param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat); } -static unsigned -get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target, - unsigned level) -{ - if (target == GL_TEXTURE_CUBE_MAP) - return 6; - - return target == GL_TEXTURE_3D ? - minify(mt->surf.logical_level0_px.depth, level) : - mt->surf.logical_level0_px.array_len; -} - static void update_image_surface(struct brw_context *brw, struct gl_image_unit *u, @@ -1541,14 +1529,29 @@ update_image_surface(struct brw_context *brw, } else { struct intel_texture_object *intel_obj = intel_texture_object(obj); struct intel_mipmap_tree *mt = intel_obj->mt; - const unsigned num_layers = u->Layered ? - get_image_num_layers(mt, obj->Target, u->Level) : 1; + + unsigned base_layer, num_layers; + if (u->Layered) { + if (obj->Target == GL_TEXTURE_3D) { + base_layer = 0; + num_layers = minify(mt->surf.logical_level0_px.depth, u->Level); + } else { + assert(obj->Immutable || obj->MinLayer == 0); + base_layer = obj->MinLayer; + num_layers = obj->Immutable ? + obj->NumLayers : + mt->surf.logical_level0_px.array_len; + } + } else { + base_layer = obj->MinLayer + u->_Layer; + num_layers = 1; + } struct isl_view view = { .format = format, .base_level = obj->MinLevel + u->Level, .levels = 1, - .base_array_layer = obj->MinLayer + u->_Layer, + .base_array_layer = base_layer, .array_len = num_layers, .swizzle = ISL_SWIZZLE_IDENTITY, .usage = ISL_SURF_USAGE_STORAGE_BIT, From f25fb52eae78ff3b29edba5c0a18db366c5ab0e8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 25 Oct 2018 09:12:50 -0700 Subject: [PATCH 055/220] vc4: Make sure we make ro scanout resources for create_with_modifiers. The DRI3 create_with_modifiers paths don't set tmpl.bind to SCANOUT or SHARED, with the theory that given that you've got modifiers, that's all you need. However, we were looking at the tmpl.bind for setting up the KMS handle in the renderonly case, so we'd end up trying to use vc4's handle on the hx8357d fd. Fixes: 84ed8b67c56b ("vc4: Set shareable BOs as T tiled if possible") (cherry picked from commit cc0bc76a382f908b4412ee8ab7a8409766ecf16a) --- src/gallium/drivers/vc4/vc4_resource.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 94784bbdc0a..41e6ec5c1cb 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -572,7 +572,15 @@ vc4_resource_create_with_modifiers(struct pipe_screen *pscreen, goto fail; } - if (screen->ro && tmpl->bind & PIPE_BIND_SCANOUT) { + /* Set up the "scanout resource" (the dmabuf export of our buffer to + * the KMS handle) if the buffer might ever have + * resource_get_handle(WINSYS_HANDLE_TYPE_KMS) called on it. + * create_with_modifiers() doesn't give us usage flags, so we have to + * assume that all calls with modifiers are scanout-possible. + */ + if (screen->ro && + ((tmpl->bind & PIPE_BIND_SCANOUT) || + !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) { rsc->scanout = renderonly_scanout_for_resource(prsc, screen->ro, NULL); if (!rsc->scanout) From 65926d5d949a00954917d2c2f715f27960907a8b Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 17 Oct 2018 16:57:01 +0200 Subject: [PATCH 056/220] Revert "radv: disable VK_SUBGROUP_FEATURE_VOTE_BIT" This reverts commit 647c2b90e96a9ab8571baf958a7c67c1e816911a. There was one recently-introduced bug in ac for dvec3 loads, but the other test failures were actually bugs in the tests. See https://github.com/KhronosGroup/VK-GL-CTS/commit/9429e621c48848d224e35f30a1ae45a4a079922c Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit ba94a00c7ce1514372bfe9b35c8e0c7fb8cd710e) --- src/amd/vulkan/radv_device.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 92254bed2e1..957d6ac9bad 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1054,13 +1054,11 @@ void radv_GetPhysicalDeviceProperties2( (VkPhysicalDeviceSubgroupProperties*)ext; properties->subgroupSize = 64; properties->supportedStages = VK_SHADER_STAGE_ALL; - /* TODO: Enable VK_SUBGROUP_FEATURE_VOTE_BIT when wwm - * is fixed in LLVM. - */ properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | - VK_SUBGROUP_FEATURE_QUAD_BIT; + VK_SUBGROUP_FEATURE_QUAD_BIT | + VK_SUBGROUP_FEATURE_VOTE_BIT; if (pdevice->rad_info.chip_class >= VI) { properties->supportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | From 4b3f884673ac8a0d88593f207be8cd94392338be Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 16 Nov 2018 11:58:55 +0000 Subject: [PATCH 057/220] Update version to 18.3.0-rc3 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 9bdced0d72e..bd71fb7fc65 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0-rc2 +18.3.0-rc3 From 2e393b483d7d0007d3e8d1771a52f41aeea0a704 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 13 Nov 2018 14:10:45 +0000 Subject: [PATCH 058/220] egl/dri: fix error value with unknown drm format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the EGL_EXT_image_dma_buf_import spec, creating an EGL image with a DRM format not supported should yield the BAD_MATCH error : " * If is EGL_LINUX_DMA_BUF_EXT, and the EGL_LINUX_DRM_FOURCC_EXT attribute is set to a format not supported by the EGL, EGL_BAD_MATCH is generated. " Signed-off-by: Lionel Landwerlin Fixes: 20de7f9f226401 ("egl/dri2: support for creating images out of dma buffers") Reviewed-by: Emil Velikov Reviewed-by: Tapani Pälli Reviewed-by: Eric Engestrom Reviewed-by: Chad Versace (cherry picked from commit 1c56d211563300e8b837378962dd455d45d7956e) --- src/egl/drivers/dri2/egl_dri2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 87e1a704c6e..4f226b27126 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -2309,7 +2309,7 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs) { unsigned plane_n = dri2_num_fourcc_format_planes(attrs->DMABufFourCC.Value); if (plane_n == 0) { - _eglError(EGL_BAD_ATTRIBUTE, "invalid format"); + _eglError(EGL_BAD_MATCH, "unknown drm fourcc format"); return 0; } From e299f1ba5964cc68a4a542422941f7648ca4e6d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 16 Nov 2018 17:20:26 +0100 Subject: [PATCH 059/220] radeonsi: fix an out-of-bounds read reported by ASAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We read 4 values out of sample_locs_8x, so make sure the array is big enough. Fixes: ac76aeef20 ("radeonsi: switch back to standard DX sample positions") Reviewed-by: Marek Olšák (cherry picked from commit 46a59ce0262a44d6520787741085a716c99200ed) --- src/gallium/drivers/radeonsi/si_state_msaa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c b/src/gallium/drivers/radeonsi/si_state_msaa.c index b741bcadec8..e6d97fe6727 100644 --- a/src/gallium/drivers/radeonsi/si_state_msaa.c +++ b/src/gallium/drivers/radeonsi/si_state_msaa.c @@ -101,6 +101,10 @@ static const uint64_t centroid_priority_4x = 0x3210321032103210ull; static const uint32_t sample_locs_8x[] = { FILL_SREG(-3,-5, 5, 1, -1, 3, 7,-7), FILL_SREG(-7,-1, 3, 7, -5, 5, 1,-3), + /* The following are unused by hardware, but we emit them to IBs + * instead of multiple SET_CONTEXT_REG packets. */ + 0, + 0, }; static const uint64_t centroid_priority_8x = 0x3546012735460127ull; From d7795a8431b133f04744c4e0fdbec804b61b8877 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 16 Nov 2018 11:10:57 +0000 Subject: [PATCH 060/220] travis: drop unneeded x11proto-xf86vidmode-dev The only place where the package is needed is for building the DRI based libGL library. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Emil Velikov Acked-by: Dylan Baker Acked-by: Eric Engestrom (cherry picked from commit 84445a86d192c0d7f07bc25a84080458de764149) --- .travis.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8d6ddb2f201..81ac7696050 100644 --- a/.travis.yml +++ b/.travis.yml @@ -120,7 +120,6 @@ matrix: - llvm-6.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -150,7 +149,6 @@ matrix: - llvm-6.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -183,7 +181,6 @@ matrix: - llvm-3.9-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -222,7 +219,6 @@ matrix: - libclang-3.9-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -258,7 +254,6 @@ matrix: - libclang-4.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -294,7 +289,6 @@ matrix: - libclang-5.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -327,7 +321,6 @@ matrix: - libclang-6.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -361,7 +354,6 @@ matrix: - libclang-7-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -397,7 +389,6 @@ matrix: - libedit-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -427,7 +418,6 @@ matrix: - llvm-6.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev From 200004fe0312580cf5e678affe97926a65595c2b Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 16 Nov 2018 11:15:37 +0000 Subject: [PATCH 061/220] glx: make xf86vidmode mandatory for direct rendering Currently we detect the module and if missing, the glXGetMsc* API is effectively a stub, always returning false. This is what effectively has been happening with our meson build :-( Thus users have no chance of using it - they cannot even distinguish if the failure is due to a misconfigured build. There's no reason for keeping xf86vidmode optional - it has been available in all distributions for years. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Emil Velikov Acked-by: Dylan Baker Reviewed-by: Eric Engestrom Fixes: a47c525f3281a2753180e "meson: build glx" (cherry picked from commit 5bc509363b6dbc42af72668fe500b6aec988dbf0) --- configure.ac | 12 +----------- meson.build | 6 ++---- src/glx/Makefile.am | 5 ----- src/glx/SConscript | 5 +---- src/glx/glxcmds.c | 7 +------ src/glx/meson.build | 6 +----- 6 files changed, 6 insertions(+), 35 deletions(-) diff --git a/configure.ac b/configure.ac index d782f56205d..a3d10cf40e1 100644 --- a/configure.ac +++ b/configure.ac @@ -1725,11 +1725,7 @@ xdri) fi fi - # add xf86vidmode if available - PKG_CHECK_MODULES([XF86VIDMODE], [xxf86vm], HAVE_XF86VIDMODE=yes, HAVE_XF86VIDMODE=no) - if test "$HAVE_XF86VIDMODE" = yes ; then - dri_modules="$dri_modules xxf86vm" - fi + dri_modules="$dri_modules xxf86vm" PKG_CHECK_MODULES([DRIGL], [$dri_modules]) GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules" @@ -1742,10 +1738,6 @@ xdri) ;; esac -# This is outside the case (above) so that it is invoked even for non-GLX -# builds. -AM_CONDITIONAL(HAVE_XF86VIDMODE, test "x$HAVE_XF86VIDMODE" = xyes) - GLESv1_CM_LIB_DEPS="$LIBDRM_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS" GLESv1_CM_PC_LIB_PRIV="-lm $PTHREAD_LIBS $DLOPEN_LIBS" GLESv2_LIB_DEPS="$LIBDRM_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS" @@ -1762,8 +1754,6 @@ AC_SUBST([GLESv1_CM_PC_LIB_PRIV]) AC_SUBST([GLESv2_LIB_DEPS]) AC_SUBST([GLESv2_PC_LIB_PRIV]) -AC_SUBST([HAVE_XF86VIDMODE]) - dnl dnl More GLX setup dnl diff --git a/meson.build b/meson.build index 1b475877827..33f4e5ad3cf 100644 --- a/meson.build +++ b/meson.build @@ -1350,7 +1350,7 @@ if with_platform_x11 dep_xdamage = dependency('xdamage', version : '>= 1.1') dep_xfixes = dependency('xfixes') dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1') - dep_xxf86vm = dependency('xxf86vm', required : false) + dep_xxf86vm = dependency('xxf86vm') endif if (with_any_vk or with_glx == 'dri' or (with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or @@ -1428,13 +1428,11 @@ elif with_glx == 'dri' if with_dri_platform == 'drm' gl_priv_reqs += 'xcb-dri2 >= 1.8' endif + gl_priv_reqs += 'xxf86vm' endif if dep_libdrm.found() gl_priv_reqs += 'libdrm >= 2.4.75' endif -if dep_xxf86vm.found() - gl_priv_reqs += 'xxf86vm' -endif gl_priv_libs = [] if dep_thread.found() diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am index 8f9d80c9f41..d06ae2972e9 100644 --- a/src/glx/Makefile.am +++ b/src/glx/Makefile.am @@ -24,10 +24,6 @@ SUBDIRS = EXTRA_DIST = SConscript meson.build -if HAVE_XF86VIDMODE -EXTRA_DEFINES_XF86VIDMODE = -DXF86VIDMODE -endif - AM_CFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/include/GL/internal \ @@ -38,7 +34,6 @@ AM_CFLAGS = \ -I$(top_builddir)/src/mapi/glapi \ -I$(top_srcdir)/src/mapi/glapi \ $(VISIBILITY_CFLAGS) \ - $(EXTRA_DEFINES_XF86VIDMODE) \ -D_REENTRANT \ -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \ $(DEFINES) \ diff --git a/src/glx/SConscript b/src/glx/SConscript index 8ce17715814..051f55b7669 100644 --- a/src/glx/SConscript +++ b/src/glx/SConscript @@ -36,10 +36,7 @@ env.Prepend(LIBS = [ env.PkgUseModules('X11') env.PkgUseModules('XCB') env.PkgUseModules('DRM') - -if env['HAVE_XF86VIDMODE']: - env.Append(CPPDEFINES = ['XF86VIDMODE']) - env.PkgUseModules('XF86VIDMODE') +env.PkgUseModules('XF86VIDMODE') sources = [ 'clientattrib.c', diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 4db0228eaba..79e3503be8f 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -46,11 +46,9 @@ #include "util/debug.h" #else #include -#ifdef XF86VIDMODE #include #endif #endif -#endif #include #include @@ -2071,7 +2069,6 @@ _X_HIDDEN GLboolean __glxGetMscRate(struct glx_screen *psc, int32_t * numerator, int32_t * denominator) { -#ifdef XF86VIDMODE XF86VidModeModeLine mode_line; int dot_clock; int i; @@ -2118,8 +2115,6 @@ __glxGetMscRate(struct glx_screen *psc, return True; } - else -#endif return False; } @@ -2145,7 +2140,7 @@ _X_HIDDEN GLboolean __glXGetMscRateOML(Display * dpy, GLXDrawable drawable, int32_t * numerator, int32_t * denominator) { -#if defined( GLX_DIRECT_RENDERING ) && defined( XF86VIDMODE ) +#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) __GLXDRIdrawable *draw = GetGLXDRIDrawable(dpy, drawable); if (draw == NULL) diff --git a/src/glx/meson.build b/src/glx/meson.build index dd8ba60ad80..f3bbcb433ad 100644 --- a/src/glx/meson.build +++ b/src/glx/meson.build @@ -137,10 +137,6 @@ gl_lib_cargs = [ '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path), ] -if dep_xxf86vm.found() - gl_lib_cargs += '-DHAVE_XF86VIDMODE' -endif - libglx = static_library( 'glx', [files_libglx, glx_generated], @@ -167,7 +163,7 @@ if with_glx == 'dri' link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl], dependencies : [ dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb, - dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage, + dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage, dep_xxf86vm, extra_deps_libgl, ], version : gl_lib_version, From 1869f3f6af6776da675c19ea295326c06a05c58e Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 7 Sep 2018 14:58:56 +0100 Subject: [PATCH 062/220] travis: adding missing x11-xcb for meson+vulkan Required by the x11 WSI Fixes: df82012b2cb ("travis: add meson build for vulkan drivers.") Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit 982e012b3ac924dab56b41c5407f722bd2a4c359) --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 81ac7696050..6b50d49e143 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,6 +52,7 @@ matrix: # Common - xz-utils - libexpat1-dev + - libx11-xcb-dev - libelf-dev - python3.5 - python3-pip From 8168ee771278436bb103a0a86394fd80cada7226 Mon Sep 17 00:00:00 2001 From: Andrii Simiklit Date: Mon, 5 Nov 2018 09:48:26 +0200 Subject: [PATCH 063/220] i965/batch: avoid reverting batch buffer if saved state is an empty There's no point reverting to the last saved point if that save point is the empty batch, we will just repeat ourselves. v2: Merge with new commits, changes was minimized, added the 'fixes' tag v3: Added in to patch series v4: Fixed the regression which was introduced by this patch Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108630 Reported-by: Mark Janes The solution provided by: Jordan Justen CC: Chris Wilson Fixes: 3faf56ffbdeb "intel: Add an interface for saving/restoring the batchbuffer state." Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107626 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108630 (fixed in v4) Signed-off-by: Andrii Simiklit Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke (cherry picked from commit b787dcf57b7298868ce9b6885a827d57a6127ba1) --- src/mesa/drivers/dri/i965/brw_compute.c | 3 ++- src/mesa/drivers/dri/i965/brw_draw.c | 3 ++- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 1 + src/mesa/drivers/dri/i965/intel_batchbuffer.c | 7 +++++++ src/mesa/drivers/dri/i965/intel_batchbuffer.h | 1 + 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index de08fc3ac16..5c8e3a5d4de 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -167,7 +167,7 @@ static void brw_dispatch_compute_common(struct gl_context *ctx) { struct brw_context *brw = brw_context(ctx); - bool fail_next = false; + bool fail_next; if (!_mesa_check_conditional_render(ctx)) return; @@ -185,6 +185,7 @@ brw_dispatch_compute_common(struct gl_context *ctx) intel_batchbuffer_require_space(brw, 600); brw_require_statebuffer_space(brw, 2500); intel_batchbuffer_save_state(brw); + fail_next = intel_batchbuffer_saved_state_is_empty(brw); retry: brw->batch.no_wrap = true; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 8536c040109..19ee3962d74 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -885,7 +885,7 @@ brw_draw_single_prim(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; - bool fail_next = false; + bool fail_next; /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have * atoms that happen on every draw call. @@ -898,6 +898,7 @@ brw_draw_single_prim(struct gl_context *ctx, intel_batchbuffer_require_space(brw, 1500); brw_require_statebuffer_space(brw, 2400); intel_batchbuffer_save_state(brw); + fail_next = intel_batchbuffer_saved_state_is_empty(brw); if (brw->num_instances != prim->num_instances || brw->basevertex != prim->basevertex || diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 34bfcad03eb..a62b88e166c 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -309,6 +309,7 @@ genX(blorp_exec)(struct blorp_batch *batch, intel_batchbuffer_require_space(brw, 1400); brw_require_statebuffer_space(brw, 600); intel_batchbuffer_save_state(brw); + check_aperture_failed_once |= intel_batchbuffer_saved_state_is_empty(brw); brw->batch.no_wrap = true; #if GEN_GEN == 6 diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 8b769eaf534..6207de5a06f 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -301,6 +301,13 @@ intel_batchbuffer_save_state(struct brw_context *brw) brw->batch.saved.exec_count = brw->batch.exec_count; } +bool +intel_batchbuffer_saved_state_is_empty(struct brw_context *brw) +{ + struct intel_batchbuffer *batch = &brw->batch; + return (batch->saved.map_next == batch->batch.map); +} + void intel_batchbuffer_reset_to_saved(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 0632142cd31..91720dad5b4 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -24,6 +24,7 @@ struct intel_batchbuffer; void intel_batchbuffer_init(struct brw_context *brw); void intel_batchbuffer_free(struct intel_batchbuffer *batch); void intel_batchbuffer_save_state(struct brw_context *brw); +bool intel_batchbuffer_saved_state_is_empty(struct brw_context *brw); void intel_batchbuffer_reset_to_saved(struct brw_context *brw); void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz); int _intel_batchbuffer_flush_fence(struct brw_context *brw, From 3036ffa1a2300111a5808fc9b295488723176287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 13 Nov 2018 16:19:42 -0500 Subject: [PATCH 064/220] radeonsi: go back to using bottom-of-pipe for beginning of TIME_ELAPSED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102597 Cc: 18.3 Tested-by: Dieter Nützel Reviewed-by: Dave Airlie (cherry picked from commit ea9f95e2a67eca90bb84eea24e7b4b804b3b1345) --- src/gallium/drivers/radeonsi/si_query.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 9b09c74d48a..7a2c7afdbfd 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -793,17 +793,10 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, emit_sample_streamout(cs, va + 32 * stream, stream); break; case PIPE_QUERY_TIME_ELAPSED: - /* Write the timestamp from the CP not waiting for - * outstanding draws (top-of-pipe). - */ - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_COUNT_SEL | - COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, + EOP_DATA_SEL_TIMESTAMP, NULL, va, + 0, query->b.type); break; case PIPE_QUERY_PIPELINE_STATISTICS: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); From 3dd73ab2483be6f1ca0ebf08c6a73ca1dea9aa4b Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 16 Nov 2018 12:48:08 +0100 Subject: [PATCH 065/220] r600: clean up the GS ring buffers when the context is destroyed This fixes two memory leaks reported by ASAN: Direct leak of 248 byte(s) in 1 object(s) allocated from: in malloc (/usr/lib64/gcc/x86_64-pc-linux-gnu/7.3.0/libasan.so+0xdb880) in r600_alloc_buffer_struct ../../samba/mesa/src/gallium/drivers/r600/r600_buffer_common.c:578 in r600_buffer_create ../../samba/mesa/src/gallium/drivers/r600/r600_buffer_common.c:600 in r600_resource_create_common ../../samba/mesa/src/gallium/drivers/r600/r600_pipe_common.c:1265 in r600_resource_create ../../samba/mesa/src/gallium/drivers/r600/r600_pipe.c:725 in pipe_buffer_create ../../samba/mesa/src/gallium/auxiliary/util/u_inlines.h:291 in update_gs_block_state ../../samba/mesa/src/gallium/drivers/r600/r600_state_common.c:1482 Direct leak of 248 byte(s) in 1 object(s) allocated from: in malloc (/usr/lib64/gcc/x86_64-pc-linux-gnu/7.3.0/libasan.so+0xdb880) in r600_alloc_buffer_struct ../../samba/mesa/src/gallium/drivers/r600/r600_buffer_common.c:578 in r600_buffer_create ../../samba/mesa/src/gallium/drivers/r600/r600_buffer_common.c:600 in r600_resource_create_common ../../samba/mesa/src/gallium/drivers/r600/r600_pipe_common.c:1265 in r600_resource_create ../../samba/mesa/src/gallium/drivers/r600/r600_pipe.c:722 in pipe_buffer_create ../../samba/mesa/src/gallium/auxiliary/util/u_inlines.h:291 in update_gs_block_state ../../samba/mesa/src/gallium/drivers/r600/r600_state_common.c:1489 Signed-off-by: Gert Wollny Fixes: 1371d65a7fbd695d3516861fe733685569d890d0 r600g: initial support for geometry shaders on evergreen (v2) Reviewed-by: Roland Scheidegger (cherry picked from commit 61b535437e2ea1115d6915fbd62d9b8745071525) --- src/gallium/drivers/r600/r600_pipe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 2680396c3d6..41e83af1db1 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -105,6 +105,12 @@ static void r600_destroy_context(struct pipe_context *context) } util_unreference_framebuffer_state(&rctx->framebuffer.state); + if (rctx->gs_rings.gsvs_ring.buffer) + pipe_resource_reference(&rctx->gs_rings.gsvs_ring.buffer, NULL); + + if (rctx->gs_rings.esgs_ring.buffer) + pipe_resource_reference(&rctx->gs_rings.esgs_ring.buffer, NULL); + for (sh = 0; sh < PIPE_SHADER_TYPES; ++sh) for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; ++i) rctx->b.b.set_constant_buffer(context, sh, i, NULL); From 33f1569f02230b080a0a33414121fd95a7cbea6d Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 22 Nov 2018 12:58:48 +0000 Subject: [PATCH 066/220] Update version to 18.3.0-rc4 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index bd71fb7fc65..09c62b68556 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0-rc3 +18.3.0-rc4 From b8502f15177b55b0fb28ca0a7f4c2fb058340586 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Nov 2018 17:15:37 -0600 Subject: [PATCH 067/220] anv: Put robust buffer access in the pipeline hash It affects apply_pipeline_layout. Shaders compiled with the wrong value will work but they may not be robust as requested by the app. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga (cherry picked from commit 617e402b3d1be185f200b1667540096d9a8b2aec) --- src/intel/vulkan/anv_pipeline.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index ad0f08253e7..f170366d030 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -446,6 +446,9 @@ anv_pipeline_hash_graphics(struct anv_pipeline *pipeline, if (layout) _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + const bool rba = pipeline->device->robust_buffer_access; + _mesa_sha1_update(&ctx, &rba, sizeof(rba)); + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { if (stages[s].entrypoint) anv_pipeline_hash_shader(&ctx, &stages[s]); @@ -466,6 +469,9 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline, if (layout) _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + const bool rba = pipeline->device->robust_buffer_access; + _mesa_sha1_update(&ctx, &rba, sizeof(rba)); + anv_pipeline_hash_shader(&ctx, stage); _mesa_sha1_final(&ctx, sha1_out); From f7040d91078d8a903e3f81db44e48928ae44c6fa Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Thu, 22 Nov 2018 13:33:28 +0000 Subject: [PATCH 068/220] glapi: add missing visibility args Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108829 Fixes: 3218056e0eb375eeda470 "meson: Build i965 and dri stack" Signed-off-by: Eric Engestrom Reviewed-by: Emil Velikov (cherry picked from commit 896c59d690e38e92682f9bc509b5e3658aba5670) --- src/mapi/shared-glapi/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mapi/shared-glapi/meson.build b/src/mapi/shared-glapi/meson.build index dcc6079af3d..3f041471fb9 100644 --- a/src/mapi/shared-glapi/meson.build +++ b/src/mapi/shared-glapi/meson.build @@ -40,7 +40,7 @@ libglapi = shared_library( 'glapi', [files_mapi_glapi, files_mapi_util, shared_glapi_mapi_tmp_h], c_args : [ - c_msvc_compat_args, '-DMAPI_MODE_GLAPI', + c_msvc_compat_args, c_vis_args, '-DMAPI_MODE_GLAPI', '-DMAPI_ABI_HEADER="@0@"'.format(shared_glapi_mapi_tmp_h.full_path()), ], link_args : [ld_args_gc_sections], From a941399117c3706aeb31b11f28a3332d9fca83b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 19 Nov 2018 18:12:12 -0500 Subject: [PATCH 069/220] winsys/amdgpu: fix a buffer leak in amdgpu_bo_from_handle Cc: 18.2 18.3 Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit 82aa07f81fcc5ed696eea16f48cec7e39c3cd3d1) --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 68f0562a644..f108058052d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -1310,6 +1310,12 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, if (bo) { p_atomic_inc(&bo->base.reference.count); simple_mtx_unlock(&ws->bo_export_table_lock); + + /* Release the buffer handle, because we don't need it anymore. + * This function is returning an existing buffer, which has its own + * handle. + */ + amdgpu_bo_free(result.buf_handle); return &bo->base; } From 825cb768602235a9bbe4d0597aba69830f4efe9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 19 Nov 2018 18:17:40 -0500 Subject: [PATCH 070/220] winsys/amdgpu: fix a device handle leak in amdgpu_winsys_create Cc: 18.2 18.3 Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit d4e7d8b7f053db081a4ffdb59dc53f3531b0e60b) --- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index f32bbd9d086..b20d702670d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -280,6 +280,12 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, if (ws) { pipe_reference(NULL, &ws->reference); simple_mtx_unlock(&dev_tab_mutex); + + /* Release the device handle, because we don't need it anymore. + * This function is returning an existing winsys instance, which + * has its own device handle. + */ + amdgpu_device_deinitialize(dev); return &ws->base; } From 02566b97258d12a045f2de0b2d6ef7bf05ae1ceb Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 24 Nov 2018 20:52:20 +0100 Subject: [PATCH 071/220] radv: Fix opaque metadata descriptor last layer. We used the layer count which results in an off by one error. Not sure this really affects anything. Fixes: f4e499ec791 "radv: add initial non-conformant radv vulkan driver" Reviewed-by: Dave Airlie (cherry picked from commit 3c96a1e3a97ba89dad803e7be8f9e3d4f6516fa3) --- src/amd/vulkan/radv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 64346aa340f..a0fa0506350 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -691,7 +691,7 @@ radv_query_opaque_metadata(struct radv_device *device, si_make_texture_descriptor(device, image, false, (VkImageViewType)image->type, image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0, - image->info.array_size, + image->info.array_size - 1, image->info.width, image->info.height, image->info.depth, desc, NULL); From 6b9b7ce38ca3ae7745ed6faf95f4a7b03843ebb4 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 16 Nov 2018 19:12:46 +0100 Subject: [PATCH 072/220] glsl: free or reuse memory allocated for TF varying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a shader program is de-serialized the gl_shader_program passed in may actually still hold memory allocations for the transform feedback varyings. If that is the case, free the varying names and reallocate the new storage for the names array. This fixes a memory leak: Direct leak of 48 byte(s) in 6 object(s) allocated from: in malloc (/usr/lib64/gcc/x86_64-pc-linux-gnu/7.3.0/libasan.so+0xdb880) in transform_feedback_varyings ../../samba/mesa/src/mesa/main/transformfeedback.c:875 in _mesa_TransformFeedbackVaryings ../../samba/mesa/src/mesa/main/transformfeedback.c:985 ... Indirect leak of 42 byte(s) in 6 object(s) allocated from: in __interceptor_strdup (/usr/lib64/gcc/x86_64-pc-linux-gnu/7.3.0/libasan.so+0x761c8) in transform_feedback_varyings ../../samba/mesa/src/mesa/main/transformfeedback.c:887 in _mesa_TransformFeedbackVaryings ../../samba/mesa/src/mesa/main/transformfeedback.c:985 Fixes: ab2643e4b06f63c93a57624003679903442634a8 glsl: serialize data from glTransformFeedbackVaryings Signed-off-by: Gert Wollny Reviewed-by: Tapani Pälli (cherry picked from commit f5d053702fa976a3112d9c6a2425430365db40f8) --- src/compiler/glsl/serialize.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp index 267700e7e78..26d8ec4b75b 100644 --- a/src/compiler/glsl/serialize.cpp +++ b/src/compiler/glsl/serialize.cpp @@ -360,13 +360,20 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg) if (xfb_stage == ~0u) return; + if (shProg->TransformFeedback.VaryingNames) { + for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; ++i) + free(shProg->TransformFeedback.VaryingNames[i]); + } + /* Data set by glTransformFeedbackVaryings. */ shProg->TransformFeedback.BufferMode = blob_read_uint32(metadata); blob_copy_bytes(metadata, &shProg->TransformFeedback.BufferStride, sizeof(shProg->TransformFeedback.BufferStride)); shProg->TransformFeedback.NumVarying = blob_read_uint32(metadata); + shProg->TransformFeedback.VaryingNames = (char **) - malloc(shProg->TransformFeedback.NumVarying * sizeof(GLchar *)); + realloc(shProg->TransformFeedback.VaryingNames, + shProg->TransformFeedback.NumVarying * sizeof(GLchar *)); /* Note, malloc used with VaryingNames. */ for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; i++) shProg->TransformFeedback.VaryingNames[i] = From 1a905e4c5b3bff052085b3efdc17fe2fb252bad3 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Thu, 22 Nov 2018 15:17:13 +0100 Subject: [PATCH 073/220] mesa/main: remove bogus error for zero-sized images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The explanation quotes the spec on the following wording to justify the error: "An INVALID_VALUE error is generated if xoffset + width is greater than the texture’s width, yoffset + height is greater than the texture’s height, or zoffset + depth is greater than the texture’s depth." However, this shouldn't generate an error in the case where *all three* of width, xoffset and the texture's width are zero. In this case, we end up generating an unspecified error. So let's remove this check, and instead make sure that we consider this as an empty texture. So let's not generate an error, there's non mandated in the spec in xoffset/yoffset/zoffset = 0 case. We already avoid doing any work in this case, because of the final, non-error generating check in this function. Fixes: b37b35a5d26 "getteximage: assume texture image is empty for non defined levels" Signed-off-by: Erik Faye-Lund Reviewed-by: Juan A. Suarez (cherry picked from commit 38bbb61252aa503571986080afddd98a56bcf2e7) --- src/mesa/main/texgetimage.c | 49 ++++++++++--------------------------- 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 0ab9ed445d6..0c1e5d208b8 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -900,8 +900,7 @@ select_tex_image(const struct gl_texture_object *texObj, GLenum target, /** * Error-check the offset and size arguments to - * glGet[Compressed]TextureSubImage(). Also checks if the specified - * texture image is missing. + * glGet[Compressed]TextureSubImage(). * \return true if error, false if no error. */ static bool @@ -913,6 +912,7 @@ dimensions_error_check(struct gl_context *ctx, const char *caller) { const struct gl_texture_image *texImage; + GLuint imageWidth = 0, imageHeight = 0, imageDepth = 0; if (xoffset < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset); @@ -1002,61 +1002,38 @@ dimensions_error_check(struct gl_context *ctx, } texImage = select_tex_image(texObj, target, level, zoffset); - if (!texImage) { - /* Trying to return a non-defined level is a valid operation per se, as - * OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries") does not - * handle this case as an error. - * - * Rather, we need to look at section 8.22 ("Texture State and Proxy - * State"): - * - * "Each initial texture image is null. It has zero width, height, and - * depth, internal format RGBA, or R8 for buffer textures, component - * sizes set to zero and component types set to NONE, the compressed - * flag set to FALSE, a zero compressed size, and the bound buffer - * object name is zero." - * - * This means we need to assume the image for the non-defined level is - * an empty image. With this assumption, we can go back to section - * 8.11.4 and checking again the errors: - * - * "An INVALID_VALUE error is generated if xoffset + width is greater - * than the texture’s width, yoffset + height is greater than the - * texture’s height, or zoffset + depth is greater than the texture’s - * depth." - * - * Thus why we return INVALID_VALUE. - */ - _mesa_error(ctx, GL_INVALID_VALUE, "%s(missing image)", caller); - return true; + if (texImage) { + imageWidth = texImage->Width; + imageHeight = texImage->Height; + imageDepth = texImage->Depth; } - if (xoffset + width > texImage->Width) { + if (xoffset + width > imageWidth) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset %d + width %d > %u)", - caller, xoffset, width, texImage->Width); + caller, xoffset, width, imageWidth); return true; } - if (yoffset + height > texImage->Height) { + if (yoffset + height > imageHeight) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(yoffset %d + height %d > %u)", - caller, yoffset, height, texImage->Height); + caller, yoffset, height, imageHeight); return true; } if (target != GL_TEXTURE_CUBE_MAP) { /* Cube map error checking was done above */ - if (zoffset + depth > texImage->Depth) { + if (zoffset + depth > imageDepth) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(zoffset %d + depth %d > %u)", - caller, zoffset, depth, texImage->Depth); + caller, zoffset, depth, imageDepth); return true; } } /* Extra checks for compressed textures */ - { + if (texImage) { GLuint bw, bh, bd; _mesa_get_format_block_size_3d(texImage->TexFormat, &bw, &bh, &bd); if (bw > 1 || bh > 1 || bd > 1) { From 35e9cd34283a44a671db662e6f434ba3a2e5490b Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Thu, 22 Nov 2018 12:17:32 +0100 Subject: [PATCH 074/220] mesa/main: factor out tex-image error-checking This will be useful when we split error-checking for getteximage and gettexsubimage later. Signed-off-by: Erik Faye-Lund Reviewed-by: Juan A. Suarez (cherry picked from commit 5e0a84f31cac14f1ccc5c74ce2e7cd997f267752) --- src/mesa/main/texgetimage.c | 110 +++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 46 deletions(-) diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 0c1e5d208b8..5fce932c3c8 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -1139,53 +1139,15 @@ pbo_error_check(struct gl_context *ctx, GLenum target, /** - * Do error checking for all (non-compressed) get-texture-image functions. - * \return true if any error, false if no errors. + * Do teximage-related error checking for getting uncompressed images. + * \return true if there was an error */ static bool -getteximage_error_check(struct gl_context *ctx, - struct gl_texture_object *texObj, - GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, GLsizei bufSize, - GLvoid *pixels, const char *caller) +teximage_error_check(struct gl_context *ctx, + struct gl_texture_image *texImage, + GLenum format, const char *caller) { - struct gl_texture_image *texImage; - GLenum baseFormat, err; - GLint maxLevels; - - assert(texObj); - - if (texObj->Target == 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller); - return true; - } - - maxLevels = _mesa_max_texture_levels(ctx, target); - if (level < 0 || level >= maxLevels) { - _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level); - return true; - } - - err = _mesa_error_check_format_and_type(ctx, format, type); - if (err != GL_NO_ERROR) { - _mesa_error(ctx, err, "%s(format/type)", caller); - return true; - } - - if (dimensions_error_check(ctx, texObj, target, level, - xoffset, yoffset, zoffset, - width, height, depth, caller)) { - return true; - } - - if (pbo_error_check(ctx, target, width, height, depth, - format, type, bufSize, pixels, caller)) { - return true; - } - - texImage = select_tex_image(texObj, target, level, zoffset); + GLenum baseFormat; assert(texImage); /* @@ -1218,8 +1180,8 @@ getteximage_error_check(struct gl_context *ctx, return true; } else if (_mesa_is_stencil_format(format) - && !_mesa_is_depthstencil_format(baseFormat) - && !_mesa_is_stencil_format(baseFormat)) { + && !_mesa_is_depthstencil_format(baseFormat) + && !_mesa_is_stencil_format(baseFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(format mismatch)", caller); return true; @@ -1248,6 +1210,62 @@ getteximage_error_check(struct gl_context *ctx, } +/** + * Do error checking for all (non-compressed) get-texture-image functions. + * \return true if any error, false if no errors. + */ +static bool +getteximage_error_check(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + GLvoid *pixels, const char *caller) +{ + struct gl_texture_image *texImage; + GLenum err; + GLint maxLevels; + + assert(texObj); + + if (texObj->Target == 0) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller); + return true; + } + + maxLevels = _mesa_max_texture_levels(ctx, target); + if (level < 0 || level >= maxLevels) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level); + return true; + } + + err = _mesa_error_check_format_and_type(ctx, format, type); + if (err != GL_NO_ERROR) { + _mesa_error(ctx, err, "%s(format/type)", caller); + return true; + } + + if (dimensions_error_check(ctx, texObj, target, level, + xoffset, yoffset, zoffset, + width, height, depth, caller)) { + return true; + } + + if (pbo_error_check(ctx, target, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + + texImage = select_tex_image(texObj, target, level, zoffset); + if (teximage_error_check(ctx, texImage, format, caller)) { + return true; + } + + return false; +} + + /** * Return the width, height and depth of a texture image. * This function must be resilient to bad parameter values since From 5598426132a6bf6c71ecc6bd2c701696bb5da8fb Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Thu, 22 Nov 2018 12:37:33 +0100 Subject: [PATCH 075/220] mesa/main: factor out common error-checking This error checking is the same for teximage and texsubimage getters, so let's factor it out to its own function. This will be useful when getteximage and gettexsubimage gets their own error checking routines a bit later. Signed-off-by: Erik Faye-Lund Reviewed-by: Juan A. Suarez (cherry picked from commit 42820c572750c30ae86175ae58bb70439dc2e644) --- src/mesa/main/texgetimage.c | 46 +++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 5fce932c3c8..792535c3245 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -1211,24 +1211,20 @@ teximage_error_check(struct gl_context *ctx, /** - * Do error checking for all (non-compressed) get-texture-image functions. - * \return true if any error, false if no errors. + * Do common teximage-related error checking for getting uncompressed images. + * \return true if there was an error */ static bool -getteximage_error_check(struct gl_context *ctx, - struct gl_texture_object *texObj, - GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, GLsizei bufSize, - GLvoid *pixels, const char *caller) +common_error_check(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + GLvoid *pixels, const char *caller) { - struct gl_texture_image *texImage; GLenum err; GLint maxLevels; - assert(texObj); - if (texObj->Target == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller); return true; @@ -1246,6 +1242,32 @@ getteximage_error_check(struct gl_context *ctx, return true; } + return false; +} + + +/** + * Do error checking for all (non-compressed) get-texture-image functions. + * \return true if any error, false if no errors. + */ +static bool +getteximage_error_check(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + GLvoid *pixels, const char *caller) +{ + struct gl_texture_image *texImage; + + assert(texObj); + + if (common_error_check(ctx, texObj, target, level, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + if (dimensions_error_check(ctx, texObj, target, level, xoffset, yoffset, zoffset, width, height, depth, caller)) { From 7d8a9087ae8902acfa87bd51f025fc39d43a99e7 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Thu, 22 Nov 2018 17:40:47 +0100 Subject: [PATCH 076/220] mesa/main: check cube-completeness in common code This check is the only part of dimensions_error_check that isn't about error-checking the offset and size arguments of glGet[Compressed]TextureSubImage(), so it doesn't really belong in here. This doesn't make a difference right now, apart for changing the presedence of this error. But it will make a difference for the next patch, where we no longer call this method from the non-sub tex-image getters. Signed-off-by: Erik Faye-Lund Reviewed-by: Juan A. Suarez (cherry picked from commit 38af69adfaf47019926bfe3a8cf352752068d389) --- src/mesa/main/texgetimage.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 792535c3245..1876efc1311 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -981,21 +981,6 @@ dimensions_error_check(struct gl_context *ctx, "%s(zoffset + depth = %d)", caller, zoffset + depth); return true; } - /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"): - * - * "An INVALID_OPERATION error is generated by GetTextureImage if the - * effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY , - * and the texture object is not cube complete or cube array complete, - * respectively." - * - * This applies also to GetTextureSubImage, GetCompressedTexImage, - * GetCompressedTextureImage, and GetnCompressedTexImage. - */ - if (!_mesa_cube_complete(texObj)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(cube incomplete)", caller); - return true; - } break; default: ; /* nothing */ @@ -1242,6 +1227,22 @@ common_error_check(struct gl_context *ctx, return true; } + /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"): + * + * "An INVALID_OPERATION error is generated by GetTextureImage if the + * effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY , + * and the texture object is not cube complete or cube array complete, + * respectively." + * + * This applies also to GetTextureSubImage, GetCompressedTexImage, + * GetCompressedTextureImage, and GetnCompressedTexImage. + */ + if (target == GL_TEXTURE_CUBE_MAP && !_mesa_cube_complete(texObj)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(cube incomplete)", caller); + return true; + } + return false; } From d575455be6420e9e9121a0941b2e8df765cd6b0a Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Thu, 22 Nov 2018 11:10:50 +0100 Subject: [PATCH 077/220] mesa/main: fix incorrect depth-error If glGetTexImage or glGetnTexImage is called with a level that doesn't exist, we get an error message on this form: Mesa: User error: GL_INVALID_VALUE in glGetTexImage(depth = 0) This is clearly nonsensical, because these APIs don't even have a depth-parameter. The reason is that get_texture_image_dims() return all-zero dimensions for non-existent texture-images, and we go on to validate these dimensions as if they were user-input, because glGetTextureSubImage requires checking. So let's split this logic in two, so glGetTextureSubImage can have stricter input-validation. All arguments that are no longer validated are generated internally by mesa, so there's no use in validating them. Fixes: 42891dbaa12 "gettextsubimage: verify zoffset and depth are correct" Signed-off-by: Erik Faye-Lund Reviewed-by: Juan A. Suarez (cherry picked from commit c120dbfe4d18240315ecec9b43a61aeb9ab239ac) --- src/mesa/main/texgetimage.c | 57 ++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 1876efc1311..bb4f7006618 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -1255,7 +1255,6 @@ static bool getteximage_error_check(struct gl_context *ctx, struct gl_texture_object *texObj, GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, GLvoid *pixels, const char *caller) @@ -1269,6 +1268,49 @@ getteximage_error_check(struct gl_context *ctx, return true; } + if (width == 0 || height == 0 || depth == 0) { + /* Not an error, but nothing to do. Return 'true' so that the + * caller simply returns. + */ + return true; + } + + if (pbo_error_check(ctx, target, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + + texImage = select_tex_image(texObj, target, level, 0); + if (teximage_error_check(ctx, texImage, format, caller)) { + return true; + } + + return false; +} + + +/** + * Do error checking for all (non-compressed) get-texture-image functions. + * \return true if any error, false if no errors. + */ +static bool +gettexsubimage_error_check(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + GLvoid *pixels, const char *caller) +{ + struct gl_texture_image *texImage; + + assert(texObj); + + if (common_error_check(ctx, texObj, target, level, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + if (dimensions_error_check(ctx, texObj, target, level, xoffset, yoffset, zoffset, width, height, depth, caller)) { @@ -1417,7 +1459,7 @@ _mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type, get_texture_image_dims(texObj, target, level, &width, &height, &depth); if (getteximage_error_check(ctx, texObj, target, level, - 0, 0, 0, width, height, depth, + width, height, depth, format, type, bufSize, pixels, caller)) { return; } @@ -1448,7 +1490,7 @@ _mesa_GetTexImage(GLenum target, GLint level, GLenum format, GLenum type, get_texture_image_dims(texObj, target, level, &width, &height, &depth); if (getteximage_error_check(ctx, texObj, target, level, - 0, 0, 0, width, height, depth, + width, height, depth, format, type, INT_MAX, pixels, caller)) { return; } @@ -1482,7 +1524,7 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type, &width, &height, &depth); if (getteximage_error_check(ctx, texObj, texObj->Target, level, - 0, 0, 0, width, height, depth, + width, height, depth, format, type, bufSize, pixels, caller)) { return; } @@ -1515,9 +1557,10 @@ _mesa_GetTextureSubImage(GLuint texture, GLint level, return; } - if (getteximage_error_check(ctx, texObj, texObj->Target, level, - xoffset, yoffset, zoffset, width, height, depth, - format, type, bufSize, pixels, caller)) { + if (gettexsubimage_error_check(ctx, texObj, texObj->Target, level, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, bufSize, pixels, caller)) { return; } From a32c568d39eb51b21cd9abecaabb77b676a5de35 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Fri, 23 Nov 2018 17:08:28 +0000 Subject: [PATCH 078/220] anv: correctly use vulkan 1.0 by default Per chapter 3.2 "Instances": > Providing a NULL VkInstanceCreateInfo::pApplicationInfo or providing > an apiVersion of 0 is equivalent to providing an apiVersion of > VK_MAKE_VERSION(1,0,0). Reported-by: Niklas Haas Fixes: 8c048af5890d43578ca4 "anv: Copy the appliation info into the instance" Signed-off-by: Eric Engestrom Reviewed-by: Lionel Landwerlin Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit 56d126f8fd210dbd2c946bfbc2e3c81b04d27d09) --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index ee35e013329..924470b3005 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -636,7 +636,7 @@ VkResult anv_CreateInstance( } if (instance->app_info.api_version == 0) - anv_EnumerateInstanceVersion(&instance->app_info.api_version); + instance->app_info.api_version = VK_API_VERSION_1_0; instance->enabled_extensions = enabled_extensions; From a1f6ae4e27413727fc548d0201d7f6663f906e92 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 24 Nov 2018 23:21:05 +0100 Subject: [PATCH 079/220] radv: Clamp gfx9 image view extents to the allocated image extents. Mirrors AMDVLK. Looks like if we go over the alignment of height we actually start to change the addressing. Seems like the extra miplevels actually work with this. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108245 Fixes: f6cc15dccd5 "radv/gfx9: fix block compression texture views. (v2)" Reviewed-by: Dave Airlie Reviewed-by: Samuel Pitoiset (cherry picked from commit 08ea6b9d9bb047603c249468dfe00d7bb9603d5e) --- src/amd/vulkan/radv_image.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index a0fa0506350..6eb108c7e36 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -1175,8 +1175,6 @@ radv_image_view_init(struct radv_image_view *iview, if (device->physical_device->rad_info.chip_class >= GFX9 && vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) { - unsigned rounded_img_w = util_next_power_of_two(iview->extent.width); - unsigned rounded_img_h = util_next_power_of_two(iview->extent.height); unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel); unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel); @@ -1186,8 +1184,8 @@ radv_image_view_init(struct radv_image_view *iview, lvl_width <<= range->baseMipLevel; lvl_height <<= range->baseMipLevel; - iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w); - iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h); + iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->surface.u.gfx9.surf_pitch); + iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->surface.u.gfx9.surf_height); } } From ec659efcbaae8fe3030437f9a46d036f4fe93b33 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 26 Nov 2018 03:28:05 +0100 Subject: [PATCH 080/220] radv: Align large buffers to the fragment size. Improves performance in Talos by about 15% (and significant improvements in RotR and possibly other but did not bench with final patch) on kernel 4.19 and earlier. On 4.20+ a similar effect comes from 433ca054949a "drm/amdgpu: try allocating VRAM as power of two" v2: Do not impact the alignment of the physical memory. Reviewed-by: Dave Airlie Reviewed-by: Samuel Pitoiset CC: (cherry picked from commit 6569644bb6e1f58fd739d83bd4dc42e6af6b6097) --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 25764d93f6a..482cf0f6659 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -304,8 +304,12 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, return NULL; } + unsigned virt_alignment = alignment; + if (size >= ws->info.pte_fragment_size) + virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size); + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, - size, alignment, 0, &va, &va_handle, + size, virt_alignment, 0, &va, &va_handle, (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | AMDGPU_VA_RANGE_HIGH); if (r) From 41671f5dc0532be81bb21e14660a66dc8beb3777 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Tue, 27 Nov 2018 13:34:37 +0000 Subject: [PATCH 081/220] wsi/display: fix mem leak when freeing swapchains Fixes: da997ebec92942193955 "vulkan: Add KHR_display extension using DRM [v10]" Signed-off-by: Eric Engestrom Reviewed-by: Keith Packard (cherry picked from commit 9575cd289325ddbfa96291d7886cfc32a0487e79) --- src/vulkan/wsi/wsi_common_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c index fd0d30ad80c..856040b4fe1 100644 --- a/src/vulkan/wsi/wsi_common_display.c +++ b/src/vulkan/wsi/wsi_common_display.c @@ -1062,6 +1062,8 @@ wsi_display_swapchain_destroy(struct wsi_swapchain *drv_chain, for (uint32_t i = 0; i < chain->base.image_count; i++) wsi_display_image_finish(drv_chain, allocator, &chain->images[i]); + + wsi_swapchain_finish(&chain->base); vk_free(allocator, chain); return VK_SUCCESS; } From ace4860a4ff0d04a45743aeca080b314c7c7d289 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 23 Nov 2018 12:55:38 +0000 Subject: [PATCH 082/220] egl/wayland: bail out when drmGetMagic fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently as the function fails, we pass uninitialized data to the authentication function. Stop doing that and print an warning when the function fails. v2: Plug memory leak in error path (Eric) Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Emil Velikov Reviewed-by: Tapani Pälli (v1) Reviewed-by: Eric Engestrom (cherry picked from commit c59d3aa4b9bc58994e199052171a8119aaa8195c) --- src/egl/drivers/dri2/platform_wayland.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index dc16a69dfbc..73335ee2ad9 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1133,7 +1133,14 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device) if (drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER) { dri2_dpy->authenticated = true; } else { - drmGetMagic(dri2_dpy->fd, &magic); + if (drmGetMagic(dri2_dpy->fd, &magic)) { + close(dri2_dpy->fd); + dri2_dpy->fd = -1; + free(dri2_dpy->device_name); + dri2_dpy->device_name = NULL; + _eglLog(_EGL_WARNING, "wayland-egl: drmGetMagic failed"); + return; + } wl_drm_authenticate(dri2_dpy->wl_drm, magic); } } From bcc8332606de70560a83c9865ef6ea9b0d34e155 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 27 Nov 2018 11:36:01 +0000 Subject: [PATCH 083/220] egl/wayland: plug memory leak in drm_handle_device() As we fail to open the node, we leak the node/device name. v2: Log and then free() (Eric) Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Emil Velikov Reviewed-by: Eric Engestrom (cherry picked from commit ce74a7bb8de7f5b921d53384582de3324290cd60) --- src/egl/drivers/dri2/platform_wayland.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 73335ee2ad9..817e9b1988a 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1127,6 +1127,8 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device) if (dri2_dpy->fd == -1) { _eglLog(_EGL_WARNING, "wayland-egl: could not open %s (%s)", dri2_dpy->device_name, strerror(errno)); + free(dri2_dpy->device_name); + dri2_dpy->device_name = NULL; return; } From ce6a9169f09a0fb16ff6ab395b0677aca4a43ba3 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Tue, 20 Nov 2018 17:35:27 +0000 Subject: [PATCH 084/220] vulkan/wsi: fix s/,/;/ typo Fixes: 59e58c348e6af16a5f2dd "vulkan/wsi: Only wait on semaphores on the first swapchain" Signed-off-by: Eric Engestrom Reviewed-by: Jason Ekstrand (cherry picked from commit e0f1f74eda6e1bdb3bcee075f6cc5082d4137069) --- src/vulkan/wsi/wsi_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index 1cd5f8d62c5..58e25214149 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -954,8 +954,8 @@ wsi_common_queue_present(const struct wsi_device *wsi, /* We only need/want to wait on semaphores once. After that, we're * guaranteed ordering since it all happens on the same queue. */ - submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount, - submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores, + submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount; + submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores; /* Set up the pWaitDstStageMasks */ stage_flags = vk_alloc(&swapchain->alloc, From bb4bbb5c2dddcdfb384e21c38b39b47f93d399df Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 28 Nov 2018 17:52:54 +0000 Subject: [PATCH 085/220] cherry-ignore: egl/wayland: rather obvious build fix Commit was squashed into the respective offenders Signed-off-by: Emil Velikov --- bin/.cherry-ignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 bin/.cherry-ignore diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore new file mode 100644 index 00000000000..9c4a21d82d1 --- /dev/null +++ b/bin/.cherry-ignore @@ -0,0 +1,2 @@ +# fixes: Commit was squashed into the respective offenders +c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix From b28aa1178a1e336bf6f73bb777a0aebce8e3c3c7 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 29 Nov 2018 11:56:27 +0000 Subject: [PATCH 086/220] Update version to 18.3.0-rc5 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 09c62b68556..6742d1dc011 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0-rc4 +18.3.0-rc5 From fe460ee8cdb0afb41f2e36afa318576a5d4f03fd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 11 Oct 2018 13:44:02 +1000 Subject: [PATCH 087/220] r600: make suballocator 256-bytes align Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108311 Cc: (cherry picked from commit 2ddd44d941648d49dc0d917e03a579baec3590d9) --- src/gallium/drivers/r600/r600_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index ccabab9cdb0..92f243b5c9a 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -1636,7 +1636,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx, } if (query->buffer.previous) { - u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16, + u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 256, &tmp_buffer_offset, &tmp_buffer); if (!tmp_buffer) return; From c2a22a44a19850c771a899e83d5ab55b98031a82 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 26 Nov 2018 19:02:08 +0100 Subject: [PATCH 088/220] st/xa: Fix a memory leak Free the context after destruction. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Reviewed-by: Emil Velikov (cherry picked from commit 7fce3ca3759e2e156e2e3bf1bcc4ee378dc7fa2d) --- src/gallium/state_trackers/xa/xa_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/state_trackers/xa/xa_context.c b/src/gallium/state_trackers/xa/xa_context.c index ba220877c84..67d9eac53bb 100644 --- a/src/gallium/state_trackers/xa/xa_context.c +++ b/src/gallium/state_trackers/xa/xa_context.c @@ -91,6 +91,7 @@ xa_context_destroy(struct xa_context *r) } r->pipe->destroy(r->pipe); + free(r); } XA_EXPORT int From 56f90f6213ecefa04160fec6b2d7e3c552ad9cb9 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 26 Nov 2018 19:05:47 +0100 Subject: [PATCH 089/220] winsys/svga: Fix a memory leak The ioctl.cap_3d member was never freed. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Reviewed-by: Emil Velikov (cherry picked from commit 058f85d41cbe3534b1a06d321fab9afb8fbadfc0) --- src/gallium/winsys/svga/drm/vmw_screen_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c index 739e4ea131f..0ec8c1abe11 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c @@ -1198,4 +1198,6 @@ void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws) { VMW_FUNC; + + free(vws->ioctl.cap_3d); } From 98d571d212701e235266ad2db999f4b57a13517f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 29 Nov 2018 13:02:03 +0000 Subject: [PATCH 090/220] anv: flush pipeline before query result copies Pipeline state pending bits should be taken into account when copying results. In the particular bug below, the results of the vkCmdCopyQueryPoolResults() command was being overwritten by the preceding vkCmdCopyBuffer() with a same destination buffer. This is because we copy the buffers using the 3D pipeline whereas we copy the query results using the command streamer. Those pieces of HW work in parallel and the results are somewhat undefined. v2: Unconditionally flush the pipeline before copying the results (Jason) v3: Wrap & expressions (Jason) Signed-off-by: Lionel Landwerlin Suggested-by: Jason Ekstrand Reviewed-by: Jason Ekstrand Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108894 Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 37f9788e9a8e443772b5ad6f339567e6ae6a8320) --- src/intel/vulkan/genX_query.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index ce8757f2643..4831c4ea334 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -729,11 +729,10 @@ void genX(CmdCopyQueryPoolResults)( ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.CommandStreamerStallEnable = true; - pc.StallAtPixelScoreboard = true; - } + if ((flags & VK_QUERY_RESULT_WAIT_BIT) || + (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) { + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); } struct anv_address dest_addr = anv_address_add(buffer->address, destOffset); From ab83cfd2bfd551ca5520f0bdbdd3a4d245544ffc Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 5 Nov 2018 10:50:41 -0800 Subject: [PATCH 091/220] st/xvmc: Add X11 include path. This patch fixes this build error. CC tests/xvmc_bench.o In file included from tests/xvmc_bench.c:35: tests/testlib.h:38:10: fatal error: 'X11/Xlib.h' file not found ^~~~~~~~~~~~ Signed-off-by: Vinson Lee Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Emil Velikov (cherry picked from commit 4f74580d3038eca1b751a71e0c098ea9eb9cdb05) --- src/gallium/state_trackers/xvmc/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/state_trackers/xvmc/Makefile.am b/src/gallium/state_trackers/xvmc/Makefile.am index 85d0b5f4953..dc278099030 100644 --- a/src/gallium/state_trackers/xvmc/Makefile.am +++ b/src/gallium/state_trackers/xvmc/Makefile.am @@ -27,6 +27,7 @@ AM_CFLAGS = \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) \ $(VL_CFLAGS) \ + $(X11_INCLUDES) \ $(XCB_DRI3_CFLAGS) \ $(XVMC_CFLAGS) From a7c4368a66fb203786d13eec6dbe394b1eea6292 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 24 Nov 2018 20:00:00 +0100 Subject: [PATCH 092/220] nv50,nvc0: Fix gallium nine regression regarding sampler bindings The new approach is that samplers don't get unbound even if they won't be used in a draw and we should just leave them be as well. Fixes a regression in multiple windows games using gallium nine and nouveau. v2: adjust num_samplers to keep track of the highest sampler bound v3: rework how to set the new value of num_samplers Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106577 Fixes: 4d6fab245eec3880e2a59424a579851f44857ce8 "cso: don't track the number of sampler states bound" Signed-off-by: Karol Herbst Reviewed-by: Ilia Mirkin (cherry picked from commit fc0139d28339f58bcbb4946fea7608ecdaff93e7) --- src/gallium/drivers/nouveau/nv50/nv50_state.c | 14 ++++++-------- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 14 ++++++-------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index fb4a259ce16..e1b2e20810a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -600,25 +600,23 @@ static inline void nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, unsigned nr, void **hwcso) { + unsigned highest_found = 0; unsigned i; assert(nr <= PIPE_MAX_SAMPLERS); for (i = 0; i < nr; ++i) { struct nv50_tsc_entry *old = nv50->samplers[s][i]; + if (hwcso[i]) + highest_found = i; + nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]); if (old) nv50_screen_tsc_unlock(nv50->screen, old); } assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS); - for (; i < nv50->num_samplers[s]; ++i) { - if (nv50->samplers[s][i]) { - nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]); - nv50->samplers[s][i] = NULL; - } - } - - nv50->num_samplers[s] = nr; + if (nr >= nv50->num_samplers[s]) + nv50->num_samplers[s] = highest_found + 1; nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index f2393cb27b5..9653de86fe9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -464,11 +464,15 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, unsigned s, unsigned nr, void **hwcso) { + unsigned highest_found = 0; unsigned i; for (i = 0; i < nr; ++i) { struct nv50_tsc_entry *old = nvc0->samplers[s][i]; + if (hwcso[i]) + highest_found = i; + if (hwcso[i] == old) continue; nvc0->samplers_dirty[s] |= 1 << i; @@ -477,14 +481,8 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, if (old) nvc0_screen_tsc_unlock(nvc0->screen, old); } - for (; i < nvc0->num_samplers[s]; ++i) { - if (nvc0->samplers[s][i]) { - nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); - nvc0->samplers[s][i] = NULL; - } - } - - nvc0->num_samplers[s] = nr; + if (nr >= nvc0->num_samplers[s]) + nvc0->num_samplers[s] = highest_found + 1; } static void From 3985a62afce60ef2c9b3588934d87082e0ebcb39 Mon Sep 17 00:00:00 2001 From: Tobias Klausmann Date: Sat, 1 Dec 2018 18:30:20 +0100 Subject: [PATCH 093/220] amd/vulkan: meson build - use radv_deps for libvulkan_radeon Without this the build breaks with: FAILED: src/amd/vulkan/src@amd@vulkan@@vulkan_radeon@sha/radv_pipeline.c.o cc -Isrc/amd/vulkan/src@amd@vulkan@@vulkan_radeon@sha -Isrc/amd/vulkan -I../src/amd/vulkan -Isrc/../include -I../src/../include -Isrc -I../src -Isrc/mapi -I../src/mapi -Isrc/mesa -I../src/mesa -I../src/gallium/include -Isrc/gallium/auxiliary -I../src/gallium/auxiliary -Isrc/amd -I../src/amd -Isrc/amd/common -I../src/amd/common -Isrc/compiler -I../src/compiler -Isrc/vulkan/util -I../src/vulkan/util -Isrc/vulkan/wsi -I../src/vulkan/wsi -Isrc/compiler/nir -I../src/compiler/nir -I/usr/include -I/usr/include/libdrm -fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -std=c99 -O2 -g '-DVERSION="18.3.0-rc5"' -DPACKAGE_VERSION=VERSION '-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa"' -DGLX_USE_TLS -DHAVE_ST_VDPAU -DENABLE_ST_OMX_BELLAGIO=0 -DENABLE_ST_OMX_TIZONIA=0 -DHAVE_X11_PLATFORM -DGLX_INDIRECT_RENDERING -DGLX_DIRECT_RENDERING -DGLX_USE_DRM -DHAVE_DRM_PLATFORM -DENABLE_SHADER_CACHE -DHAVE___BUILTIN_BSWAP32 -DHAVE___BUILTIN_BSWAP64 -DHAVE___BUILTIN_CLZ -DHAVE___BUILTIN_CLZLL -DHAVE___BUILTIN_CTZ -DHAVE___BUILTIN_EXPECT -DHAVE___BUILTIN_FFS -DHAVE___BUILTIN_FFSLL -DHAVE___BUILTIN_POPCOUNT -DHAVE___BUILTIN_POPCOUNTLL -DHAVE___BUILTIN_UNREACHABLE -DHAVE_FUNC_ATTRIBUTE_CONST -DHAVE_FUNC_ATTRIBUTE_FLATTEN -DHAVE_FUNC_ATTRIBUTE_MALLOC -DHAVE_FUNC_ATTRIBUTE_PURE -DHAVE_FUNC_ATTRIBUTE_UNUSED -DHAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT -DHAVE_FUNC_ATTRIBUTE_WEAK -DHAVE_FUNC_ATTRIBUTE_FORMAT -DHAVE_FUNC_ATTRIBUTE_PACKED -DHAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL -DHAVE_FUNC_ATTRIBUTE_VISIBILITY -DHAVE_FUNC_ATTRIBUTE_ALIAS -DHAVE_FUNC_ATTRIBUTE_NORETURN -DUSE_SSE41 -DUSE_GCC_ATOMIC_BUILTINS -DUSE_X86_64_ASM -DMAJOR_IN_SYSMACROS -DHAVE_SYS_SYSCTL_H -DHAVE_LINUX_FUTEX_H -DHAVE_ENDIAN_H -DHAVE_DLFCN_H -DHAVE_STRTOF -DHAVE_MKOSTEMP -DHAVE_POSIX_MEMALIGN -DHAVE_TIMESPEC_GET -DHAVE_MEMFD_CREATE -DHAVE_STRTOD_L -DHAVE_DLADDR -DHAVE_DL_ITERATE_PHDR -DHAVE_ZLIB -DHAVE_PTHREAD -DHAVE_PTHREAD_SETAFFINITY -DHAVE_LIBDRM -DHAVE_LLVM=0x0600 -DMESA_LLVM_VERSION_PATCH=1 -DHAVE_WAYLAND_PLATFORM -DWL_HIDE_DEPRECATED -DHAVE_DRI3 -DHAVE_DRI3_MODIFIERS -Werror=implicit-function-declaration -Werror=missing-prototypes -Werror=return-type -fno-math-errno -fno-trapping-math -Wno-missing-field-initializers -Wno-format-truncation -O2 -Wall -D_FORTIFY_SOURCE=2 -fstack-protector-strong -funwind-tables -fasynchronous-unwind-tables -fstack-clash-protection -DNDEBUG -fPIC -pthread -D__STDC_FORMAT_MACROS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS -fvisibility=hidden -Wno-override-init -DVK_USE_PLATFORM_XCB_KHR -DVK_USE_PLATFORM_XLIB_KHR -DVK_USE_PLATFORM_WAYLAND_KHR -DVK_USE_PLATFORM_DISPLAY_KHR -DVK_USE_PLATFORM_XLIB_XRANDR_EXT -MD -MQ 'src/amd/vulkan/src@amd@vulkan@@vulkan_radeon@sha/radv_pipeline.c.o' -MF 'src/amd/vulkan/src@amd@vulkan@@vulkan_radeon@sha/radv_pipeline.c.o.d' -o 'src/amd/vulkan/src@amd@vulkan@@vulkan_radeon@sha/radv_pipeline.c.o' -c ../src/amd/vulkan/radv_pipeline.c In file included from ../src/vulkan/util/vk_alloc.h:29, from ../src/amd/vulkan/radv_private.h:52, from ../src/amd/vulkan/radv_debug.h:27, from ../src/amd/vulkan/radv_pipeline.c:30: ../src/../include/vulkan/vulkan.h:54:10: fatal error: wayland-client.h: Datei oder Verzeichnis nicht gefunden #include ^~~~~~~~~~~~~~~~~~ compilation terminated. The above command misses the include directory for wayland: -I/usr/include/wayland The missing include is contained in the (until now) unused radv_deps: if with_platform_wayland radv_deps += dep_wayland_client radv_flags += '-DVK_USE_PLATFORM_WAYLAND_KHR' libradv_files += files('radv_wsi_wayland.c') endif Fixes: 673dda83307 "meson: build "radv" vulkan driver for radeon hardware" Signed-off-by: Tobias Klausmann Reviewed-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit 9401a2f2e64bc04401a547d06810adbf0660edb8) --- src/amd/vulkan/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 0f1261d4809..cc2aa7fd17a 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -140,7 +140,7 @@ libvulkan_radeon = shared_library( ], dependencies : [ dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m, - dep_valgrind, + dep_valgrind, radv_deps, idep_nir, ], c_args : [c_vis_args, no_override_init_args, radv_flags], From babf9ab7da992299515ef01e46b15e627c8f976d Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 29 Nov 2018 15:46:18 -0800 Subject: [PATCH 094/220] mesa: Revert INTEL_fragment_shader_ordering support This extension is not properly tested (testing for GL_ARB_fragment_shader_interlock is not sufficient), and since this was noted in review on August 28th no tests have been sent. Revert "i965: Add INTEL_fragment_shader_ordering support." Revert "mesa: Add GL/GLSL plumbing for INTEL_fragment_shader_ordering" This reverts commit 03ecec9ed2099f6e2b62994b33dc948dc731e7b8. This reverts commit 119435c8778dd26cb7c8bcde9f04b3982239fe60. Cc: mesa-stable@lists.freedesktop.org Acked-by: Jason Ekstrand Acked-by: Eric Anholt (cherry picked from commit 017199d2d2e4c57015bc60edfcc656062c3a7472) --- docs/relnotes/18.3.0.html | 1 - src/compiler/glsl/builtin_functions.cpp | 17 ----------------- src/compiler/glsl/glsl_parser_extras.cpp | 1 - src/compiler/glsl/glsl_parser_extras.h | 2 -- src/compiler/glsl/glsl_to_nir.cpp | 6 ------ src/compiler/glsl/ir.h | 1 - src/compiler/nir/nir_intrinsics.py | 1 - src/intel/compiler/brw_fs_nir.cpp | 1 - src/mesa/drivers/dri/i965/intel_extensions.c | 1 - src/mesa/main/extensions_table.h | 1 - src/mesa/main/mtypes.h | 1 - 11 files changed, 33 deletions(-) diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html index 8af225a61e1..aa924391919 100644 --- a/docs/relnotes/18.3.0.html +++ b/docs/relnotes/18.3.0.html @@ -61,7 +61,6 @@

    New features

  • GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.
  • GL_EXT_window_rectangles on radeonsi.
  • GL_KHR_texture_compression_astc_sliced_3d on radeonsi.
  • -
  • GL_INTEL_fragment_shader_ordering on i965.
  • GL_NV_fragment_shader_interlock on i965.
  • EGL_EXT_device_base for all drivers.
  • EGL_EXT_device_drm for all drivers.
  • diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 5650365d1d5..b6018806865 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -525,12 +525,6 @@ supports_nv_fragment_shader_interlock(const _mesa_glsl_parse_state *state) return state->NV_fragment_shader_interlock_enable; } -static bool -supports_intel_fragment_shader_ordering(const _mesa_glsl_parse_state *state) -{ - return state->INTEL_fragment_shader_ordering_enable; -} - static bool shader_clock(const _mesa_glsl_parse_state *state) { @@ -1311,11 +1305,6 @@ builtin_builder::create_intrinsics() supports_arb_fragment_shader_interlock, ir_intrinsic_end_invocation_interlock), NULL); - add_function("__intrinsic_begin_fragment_shader_ordering", - _invocation_interlock_intrinsic( - supports_intel_fragment_shader_ordering, - ir_intrinsic_begin_fragment_shader_ordering), NULL); - add_function("__intrinsic_shader_clock", _shader_clock_intrinsic(shader_clock, glsl_type::uvec2_type), @@ -3430,12 +3419,6 @@ builtin_builder::create_builtins() supports_nv_fragment_shader_interlock), NULL); - add_function("beginFragmentShaderOrderingINTEL", - _invocation_interlock( - "__intrinsic_begin_fragment_shader_ordering", - supports_intel_fragment_shader_ordering), - NULL); - add_function("anyInvocationARB", _vote("__intrinsic_vote_any", vote), NULL); diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 1bdd7c4bf17..efd1a013dbd 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -727,7 +727,6 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT_AEP(EXT_texture_buffer), EXT_AEP(EXT_texture_cube_map_array), EXT(INTEL_conservative_rasterization), - EXT(INTEL_fragment_shader_ordering), EXT(INTEL_shader_atomic_float_minmax), EXT(MESA_shader_integer_functions), EXT(NV_fragment_shader_interlock), diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index 966d848509c..69aa6cf9cf3 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -812,8 +812,6 @@ struct _mesa_glsl_parse_state { bool EXT_texture_cube_map_array_warn; bool INTEL_conservative_rasterization_enable; bool INTEL_conservative_rasterization_warn; - bool INTEL_fragment_shader_ordering_enable; - bool INTEL_fragment_shader_ordering_warn; bool INTEL_shader_atomic_float_minmax_enable; bool INTEL_shader_atomic_float_minmax_warn; bool MESA_shader_integer_functions_enable; diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 0479f8fcfe4..0956d2f6303 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -742,9 +742,6 @@ nir_visitor::visit(ir_call *ir) case ir_intrinsic_end_invocation_interlock: op = nir_intrinsic_end_invocation_interlock; break; - case ir_intrinsic_begin_fragment_shader_ordering: - op = nir_intrinsic_begin_fragment_shader_ordering; - break; case ir_intrinsic_group_memory_barrier: op = nir_intrinsic_group_memory_barrier; break; @@ -983,9 +980,6 @@ nir_visitor::visit(ir_call *ir) case nir_intrinsic_end_invocation_interlock: nir_builder_instr_insert(&b, &instr->instr); break; - case nir_intrinsic_begin_fragment_shader_ordering: - nir_builder_instr_insert(&b, &instr->instr); - break; case nir_intrinsic_store_ssbo: { exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index f478b29a6b5..d05d1998a50 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -1122,7 +1122,6 @@ enum ir_intrinsic_id { ir_intrinsic_memory_barrier_shared, ir_intrinsic_begin_invocation_interlock, ir_intrinsic_end_invocation_interlock, - ir_intrinsic_begin_fragment_shader_ordering, ir_intrinsic_vote_all, ir_intrinsic_vote_any, diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index ec3049ca06d..910f9c336f8 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -199,7 +199,6 @@ def barrier(name): barrier("memory_barrier_shared") barrier("begin_invocation_interlock") barrier("end_invocation_interlock") -barrier("begin_fragment_shader_ordering") # A conditional discard, with a single boolean source. intrinsic("discard_if", src_comp=[1]) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index c845d87d59b..c33394d10d4 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4804,7 +4804,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_begin_fragment_shader_ordering: case nir_intrinsic_begin_invocation_interlock: { const fs_builder ubld = bld.group(8, 0); const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index d7e02efb54d..0cfe2acbdd4 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -247,7 +247,6 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.OES_primitive_bounding_box = true; ctx->Extensions.OES_texture_buffer = true; ctx->Extensions.ARB_fragment_shader_interlock = true; - ctx->Extensions.INTEL_fragment_shader_ordering = true; if (can_do_pipelined_register_writes(brw->screen)) { ctx->Extensions.ARB_draw_indirect = true; diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 47db1583135..aac96290ded 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -317,7 +317,6 @@ EXT(IBM_texture_mirrored_repeat , dummy_true EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999) EXT(INTEL_conservative_rasterization , INTEL_conservative_rasterization , x , GLC, x , 31, 2013) -EXT(INTEL_fragment_shader_ordering , INTEL_fragment_shader_ordering , GLL, GLC, x , x , 2013) EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013) EXT(INTEL_shader_atomic_float_minmax , INTEL_shader_atomic_float_minmax , GLL, GLC, x , x , 2018) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 9ed49b7ff24..f30b778a7b1 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4296,7 +4296,6 @@ struct gl_extensions GLboolean ATI_fragment_shader; GLboolean GREMEDY_string_marker; GLboolean INTEL_conservative_rasterization; - GLboolean INTEL_fragment_shader_ordering; GLboolean INTEL_performance_query; GLboolean INTEL_shader_atomic_float_minmax; GLboolean KHR_blend_equation_advanced; From cc451083824a602298338cc4a34ff99123338f94 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 3 Dec 2018 16:20:43 -0800 Subject: [PATCH 095/220] Revert "st/mesa: silenced unhanded enum warning in st_glsl_to_tgsi.cpp" This reverts commit 198c50f4873758e9f64d89eea262af5dd1644df9. This needs to be reverted after commit 017199d2d2e4 ("mesa: Revert INTEL_fragment_shader_ordering support") (cherry picked from commit dd53bb7e1f69740a5712decbae79dc79df8ecaa1) --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5322903b93a..0783f67f2b7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4072,7 +4072,6 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) case ir_intrinsic_generic_atomic_comp_swap: case ir_intrinsic_begin_invocation_interlock: case ir_intrinsic_end_invocation_interlock: - case ir_intrinsic_begin_fragment_shader_ordering: unreachable("Invalid intrinsic"); } } From d369bd91c3b501941886e0fde9eab5266dde6f06 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 26 Nov 2018 16:26:35 +0100 Subject: [PATCH 096/220] radv/android: Mark android WSI image as shareable. Fixes: b1444c9ccb0 "radv: Implement VK_ANDROID_native_buffer." Acked-by: Samuel Pitoiset (cherry picked from commit 51091b3e1f212be956f91ac5214191c14e83ac59) --- src/amd/vulkan/radv_android.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c index f5d70825dd2..93799b87b8f 100644 --- a/src/amd/vulkan/radv_android.c +++ b/src/amd/vulkan/radv_android.c @@ -110,9 +110,19 @@ radv_image_from_gralloc(VkDevice device_h, struct radv_bo *bo = NULL; VkResult result; + VkImageCreateInfo updated_base_info = *base_info; + + VkExternalMemoryImageCreateInfo external_memory_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = updated_base_info.pNext, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + + updated_base_info.pNext = &external_memory_info; + result = radv_image_create(device_h, &(struct radv_image_create_info) { - .vk_info = base_info, + .vk_info = &updated_base_info, .scanout = true, .no_metadata_planes = true}, alloc, From 5594bb584d538164e1b35b1eb6fcacd8c2b26c2f Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 26 Nov 2018 16:26:36 +0100 Subject: [PATCH 097/220] radv/android: Use buffer metadata to determine scanout compat. These days we don't always allocate scanout compatible textures anymore. That does mean we have to fix the radv android WSI though. Fixes: b1444c9ccb0 "radv: Implement VK_ANDROID_native_buffer." Acked-by: Samuel Pitoiset (cherry picked from commit 3bf48741e128b60f6430b32cc47197f62075b1e9) --- src/amd/vulkan/radv_android.c | 73 ++++++++++--------- src/amd/vulkan/radv_radeon_winsys.h | 2 + src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 53 ++++++++++++++ 3 files changed, 93 insertions(+), 35 deletions(-) diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c index 93799b87b8f..1a4425f26a5 100644 --- a/src/amd/vulkan/radv_android.c +++ b/src/amd/vulkan/radv_android.c @@ -110,27 +110,6 @@ radv_image_from_gralloc(VkDevice device_h, struct radv_bo *bo = NULL; VkResult result; - VkImageCreateInfo updated_base_info = *base_info; - - VkExternalMemoryImageCreateInfo external_memory_info = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, - .pNext = updated_base_info.pNext, - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, - }; - - updated_base_info.pNext = &external_memory_info; - - result = radv_image_create(device_h, - &(struct radv_image_create_info) { - .vk_info = &updated_base_info, - .scanout = true, - .no_metadata_planes = true}, - alloc, - &image_h); - - if (result != VK_SUCCESS) - return result; - if (gralloc_info->handle->numFds != 1) { return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, "VkNativeBufferANDROID::handle::numFds is %d, " @@ -143,23 +122,14 @@ radv_image_from_gralloc(VkDevice device_h, */ int dma_buf = gralloc_info->handle->data[0]; - image = radv_image_from_handle(image_h); - VkDeviceMemory memory_h; - const VkMemoryDedicatedAllocateInfoKHR ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, - .pNext = NULL, - .buffer = VK_NULL_HANDLE, - .image = image_h - }; - const VkImportMemoryFdInfoKHR import_info = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - .pNext = &ded_alloc, .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, .fd = dup(dma_buf), }; + /* Find the first VRAM memory type, or GART for PRIME images. */ int memory_type_index = -1; for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) { @@ -178,14 +148,49 @@ radv_image_from_gralloc(VkDevice device_h, &(VkMemoryAllocateInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = &import_info, - .allocationSize = image->size, + /* Max buffer size, unused for imports */ + .allocationSize = 0x7FFFFFFF, .memoryTypeIndex = memory_type_index, }, alloc, &memory_h); + if (result != VK_SUCCESS) + return result; + + struct radeon_bo_metadata md; + device->ws->buffer_get_metadata(radv_device_memory_from_handle(memory_h)->bo, &md); + + bool is_scanout; + if (device->physical_device->rad_info.chip_class >= GFX9) { + /* Copied from radeonsi, but is hacky so should be cleaned up. */ + is_scanout = md.u.gfx9.swizzle_mode == 0 || md.u.gfx9.swizzle_mode % 4 == 2; + } else { + is_scanout = md.u.legacy.scanout; + } + + VkImageCreateInfo updated_base_info = *base_info; + + VkExternalMemoryImageCreateInfo external_memory_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = updated_base_info.pNext, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + + updated_base_info.pNext = &external_memory_info; + + result = radv_image_create(device_h, + &(struct radv_image_create_info) { + .vk_info = &updated_base_info, + .scanout = is_scanout, + .no_metadata_planes = true}, + alloc, + &image_h); + if (result != VK_SUCCESS) goto fail_create_image; + image = radv_image_from_handle(image_h); + radv_BindImageMemory(device_h, image_h, memory_h, 0); image->owned_memory = memory_h; @@ -195,9 +200,7 @@ radv_image_from_gralloc(VkDevice device_h, return VK_SUCCESS; fail_create_image: -fail_size: - radv_DestroyImage(device_h, image_h, alloc); - + radv_FreeMemory(device_h, memory_h, alloc); return result; } diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 7977d46229e..e9d541ab150 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -223,6 +223,8 @@ struct radeon_winsys { void (*buffer_set_metadata)(struct radeon_winsys_bo *bo, struct radeon_bo_metadata *md); + void (*buffer_get_metadata)(struct radeon_winsys_bo *bo, + struct radeon_bo_metadata *md); void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent, uint64_t offset, uint64_t size, diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 482cf0f6659..ec126bfc7cb 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -540,6 +540,21 @@ radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, return true; } +static unsigned eg_tile_split(unsigned tile_split) +{ + switch (tile_split) { + case 0: tile_split = 64; break; + case 1: tile_split = 128; break; + case 2: tile_split = 256; break; + case 3: tile_split = 512; break; + default: + case 4: tile_split = 1024; break; + case 5: tile_split = 2048; break; + case 6: tile_split = 4096; break; + } + return tile_split; +} + static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split) { switch (eg_tile_split) { @@ -593,6 +608,43 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo, amdgpu_bo_set_metadata(bo->bo, &metadata); } +static void +radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo, + struct radeon_bo_metadata *md) +{ + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + struct amdgpu_bo_info info = {0}; + + int r = amdgpu_bo_query_info(bo->bo, &info); + if (r) + return; + + uint64_t tiling_flags = info.metadata.tiling_info; + + if (bo->ws->info.chip_class >= GFX9) { + md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); + } else { + md->u.legacy.microtile = RADEON_LAYOUT_LINEAR; + md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR; + + if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */ + md->u.legacy.macrotile = RADEON_LAYOUT_TILED; + else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */ + md->u.legacy.microtile = RADEON_LAYOUT_TILED; + + md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); + md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); + md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); + md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT)); + md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); + md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); + md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */ + } + + md->size_metadata = info.metadata.size_metadata; + memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata)); +} + void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) { ws->base.buffer_create = radv_amdgpu_winsys_bo_create; @@ -603,5 +655,6 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; + ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata; ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind; } From 5b50e6a7ec0dd3714619422c578e5efa8b4eed4f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 3 Dec 2018 14:38:28 +1000 Subject: [PATCH 098/220] radv: use 3d shader for gfx9 copies if dst is 3d This fixes some crucible 3d miptree tests I've been working on when executed using the compute shader path. Fixes: d08f267814 (radv/gfx9: fix 3d image to image transfers on compute queues.) Reviewed-by: Samuel Pitoiset (cherry picked from commit 1363a47c9c4f8482fea9e8a2582a1d8e9db0e8a6) --- src/amd/vulkan/radv_meta_bufimage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c index 6f074a70b4c..e9d680437e4 100644 --- a/src/amd/vulkan/radv_meta_bufimage.c +++ b/src/amd/vulkan/radv_meta_bufimage.c @@ -2061,7 +2061,7 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); if (device->physical_device->rad_info.chip_class >= GFX9 && - src->image->type == VK_IMAGE_TYPE_3D) + (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D)) pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d; radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); From 055e0d71261e489bf0dfe95e312e84f838e21ffe Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 3 Dec 2018 22:45:03 +0100 Subject: [PATCH 099/220] radv: rework the TC-compat HTILE hardware bug with COND_EXEC After investigating on this, it appears that COND_WRITE doesn't work correctly in some situations. I don't know exactly why does it fail to update DB_Z_INFO.ZRANGE_PRECISION, but as AMDVLK also uses COND_EXEC I think there is a reason. Now the driver stores a new metadata value in order to reflect the last fast depth clear state. If a TC-compat HTILE is fast cleared with 0.0f, we have to update ZRANGE_PRECISION to 0 in order to work around that hardware bug. This fixes rendering issues with The Forest and DXVK and doesn't seem to introduce any regressions. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108914 Fixes: 68dead112e7 ("radv: update the ZRANGE_PRECISION value for the TC-compat bug") Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit 824cfc1ee5e0aba15b676b9363ff32046d96eb42) --- src/amd/vulkan/radv_cmd_buffer.c | 91 ++++++++++++++++++++++---------- src/amd/vulkan/radv_image.c | 10 +++- src/amd/vulkan/radv_private.h | 8 +++ 3 files changed, 81 insertions(+), 28 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index b9d5726bfba..3eb4b312aa7 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1068,7 +1068,7 @@ static void radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, struct radv_image *image, VkImageLayout layout, - bool requires_cond_write) + bool requires_cond_exec) { uint32_t db_z_info = ds->db_z_info; uint32_t db_z_info_reg; @@ -1092,38 +1092,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, } /* When we don't know the last fast clear value we need to emit a - * conditional packet, otherwise we can update DB_Z_INFO directly. + * conditional packet that will eventually skip the following + * SET_CONTEXT_REG packet. */ - if (requires_cond_write) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0)); - - const uint32_t write_space = 0 << 8; /* register */ - const uint32_t poll_space = 1 << 4; /* memory */ - const uint32_t function = 3 << 0; /* equal to the reference */ - const uint32_t options = write_space | poll_space | function; - radeon_emit(cmd_buffer->cs, options); - - /* poll address - location of the depth clear value */ + if (requires_cond_exec) { uint64_t va = radv_buffer_get_va(image->bo); - va += image->offset + image->clear_value_offset; - - /* In presence of stencil format, we have to adjust the base - * address because the first value is the stencil clear value. - */ - if (vk_format_is_stencil(image->vk_format)) - va += 4; + va += image->offset + image->tc_compat_zrange_offset; + radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); - - radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */ - radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */ - radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */ - radeon_emit(cmd_buffer->cs, 0u); /* write address high */ - radeon_emit(cmd_buffer->cs, db_z_info); - } else { - radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ } + + radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); } static void @@ -1270,6 +1253,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, fui(ds_clear_value.depth)); } +/** + * Update the TC-compat metadata value for this image. + */ +static void +radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + uint32_t value) +{ + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->tc_compat_zrange_offset; + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, value); +} + +static void +radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + VkClearDepthStencilValue ds_clear_value) +{ + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->tc_compat_zrange_offset; + uint32_t cond_val; + + /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last + * depth clear value is 0.0f. + */ + cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; + + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val); +} + /** * Update the clear depth/stencil values for this image. */ @@ -1283,6 +1305,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects); + if (radv_image_is_tc_compat_htile(image) && + (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { + radv_update_tc_compat_zrange_metadata(cmd_buffer, image, + ds_clear_value); + } + radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value, aspects); } @@ -4192,6 +4220,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects); + + if (radv_image_is_tc_compat_htile(image)) { + /* Initialize the TC-compat metada value to 0 because by + * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only + * need have to conditionally update its value when performing + * a fast depth clear. + */ + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0); + } } static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 6eb108c7e36..4f02be40185 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image) /* + 8 for storing the clear values */ image->clear_value_offset = image->htile_offset + image->surface.htile_size; image->size = image->clear_value_offset + 8; + if (radv_image_is_tc_compat_htile(image)) { + /* Metadata for the TC-compatible HTILE hardware bug which + * have to be fixed by updating ZRANGE_PRECISION when doing + * fast depth clears to 0.0f. + */ + image->tc_compat_zrange_offset = image->clear_value_offset + 8; + image->size = image->clear_value_offset + 16; + } image->alignment = align64(image->alignment, image->surface.htile_alignment); } @@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device, /* Otherwise, try to enable HTILE for depth surfaces. */ if (radv_image_can_enable_htile(image) && !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) { - radv_image_alloc_htile(image); image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE; + radv_image_alloc_htile(image); } else { image->surface.htile_size = 0; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 253e6455604..585702a88b2 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1498,6 +1498,14 @@ struct radv_image { uint64_t clear_value_offset; uint64_t dcc_pred_offset; + /* + * Metadata for the TC-compat zrange workaround. If the 32-bit value + * stored at this offset is UINT_MAX, the driver will emit + * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the + * SET_CONTEXT_REG packet. + */ + uint64_t tc_compat_zrange_offset; + /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */ VkDeviceMemory owned_memory; }; From 462bc0d5d450d4520046f655da5f52e563ef521e Mon Sep 17 00:00:00 2001 From: Alex Smith Date: Wed, 5 Dec 2018 09:45:26 +0000 Subject: [PATCH 100/220] radv: Flush before vkCmdWriteTimestamp() if needed As done for vkCmdBeginQuery() already. Prevents timestamps from being overwritten by previous vkCmdResetQueryPool() calls if the shader path was used to do the reset. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108925 Fixes: a41e2e9cf5 ("radv: allow to use a compute shader for resetting the query pool") Signed-off-by: Alex Smith Reviewed-by: Samuel Pitoiset (cherry picked from commit c1b6cb068c4dfe49c309624610e8610b3f0b27c3) --- src/amd/vulkan/radv_query.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index d538170c67d..8f988d9de26 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1447,6 +1447,22 @@ static unsigned event_type_for_stream(unsigned stream) } } +static void emit_query_flush(struct radv_cmd_buffer *cmd_buffer, + struct radv_query_pool *pool) +{ + if (cmd_buffer->pending_reset_query) { + if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) { + /* Only need to flush caches if the query pool size is + * large enough to be resetted using the compute shader + * path. Small pools don't need any cache flushes + * because we use a CP dma clear. + */ + si_emit_cache_flush(cmd_buffer); + cmd_buffer->pending_reset_query = false; + } + } +} + static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkQueryType query_type, @@ -1593,17 +1609,7 @@ void radv_CmdBeginQueryIndexedEXT( radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo); - if (cmd_buffer->pending_reset_query) { - if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) { - /* Only need to flush caches if the query pool size is - * large enough to be resetted using the compute shader - * path. Small pools don't need any cache flushes - * because we use a CP dma clear. - */ - si_emit_cache_flush(cmd_buffer); - cmd_buffer->pending_reset_query = false; - } - } + emit_query_flush(cmd_buffer, pool); va += pool->stride * query; @@ -1680,6 +1686,8 @@ void radv_CmdWriteTimestamp( radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo); + emit_query_flush(cmd_buffer, pool); + int num_queries = 1; if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask); From 54acae83e0a76b5fbba2c642b234909cf5d4e6cf Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 3 Dec 2018 14:33:35 +0000 Subject: [PATCH 101/220] anv/query: flush render target before copying results This change tracks render target writes in the pipeline and applies a render target flush before copying the query results to make sure the preceding operations have landed in memory before the command streamer initiates the copy. v2: Simplify logic in CopyQueryResults (Jason) Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108909 Fixes: 37f9788e9a8e44 ("anv: flush pipeline before query result copies") Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 9a7b3199037ac4b798974f561067cb3d66be8010) --- src/intel/vulkan/anv_private.h | 7 +++++++ src/intel/vulkan/genX_blorp_exec.c | 1 + src/intel/vulkan/genX_cmd_buffer.c | 14 ++++++++++++++ src/intel/vulkan/genX_gpu_memcpy.c | 1 + src/intel/vulkan/genX_query.c | 9 +++++++++ 5 files changed, 32 insertions(+) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d8a08d9d67f..ce6bb302a0c 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1747,6 +1747,13 @@ enum anv_pipe_bits { * we would have to CS stall on every flush which could be bad. */ ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21), + + /* This bit does not exist directly in PIPE_CONTROL. It means that render + * target operations are ongoing. Some operations like copies on the + * command streamer might need to be aware of this to trigger the + * appropriate stall before they can proceed with the copy. + */ + ANV_PIPE_RENDER_TARGET_WRITES = (1 << 22), }; #define ANV_PIPE_FLUSH_BITS ( \ diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 2035017ce0e..c573e890946 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -263,4 +263,5 @@ genX(blorp_exec)(struct blorp_batch *batch, cmd_buffer->state.gfx.vb_dirty = ~0; cmd_buffer->state.gfx.dirty = ~0; cmd_buffer->state.push_constants_dirty = ~0; + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 43a02f22567..eea699be8ea 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1758,6 +1758,12 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) pipe.StallAtPixelScoreboard = true; } + /* If a render target flush was emitted, then we can toggle off the bit + * saying that render target writes are ongoing. + */ + if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT) + bits &= ~(ANV_PIPE_RENDER_TARGET_WRITES); + bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT); } @@ -2769,6 +2775,8 @@ void genX(CmdDraw)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = 0; } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } void genX(CmdDrawIndexed)( @@ -2808,6 +2816,8 @@ void genX(CmdDrawIndexed)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = vertexOffset; } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } /* Auto-Draw / Indirect Registers */ @@ -2941,6 +2951,8 @@ void genX(CmdDrawIndirect)( offset += stride; } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } void genX(CmdDrawIndexedIndirect)( @@ -2980,6 +2992,8 @@ void genX(CmdDrawIndexedIndirect)( offset += stride; } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } static VkResult diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 81522986550..1bee1c6dc17 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -302,4 +302,5 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, } cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 4831c4ea334..71b7a1352f0 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -729,6 +729,15 @@ void genX(CmdCopyQueryPoolResults)( ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + /* If render target writes are ongoing, request a render target cache flush + * to ensure proper ordering of the commands from the 3d pipe and the + * command streamer. + */ + if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_WRITES) { + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + } + if ((flags & VK_QUERY_RESULT_WAIT_BIT) || (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) { cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; From d92bbe54eaf8406d2b3ceb8b6b7eba6c69681b76 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 5 Dec 2018 11:45:19 +0100 Subject: [PATCH 102/220] radv: wait on the high 32 bits of timestamp queries In case we are unlucky if the low part is 0xffffffff. Fixes: 5d6a560a29 ("radv: do not use the availability bit for timestamp queries") Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit c7ada4901aaf192d7aacd51c3ab0ebbbb0ceeb3e) [Emil: resolve trivial conflicts] Signed-off-by: Emil Velikov Conflicts: src/amd/vulkan/radv_query.c --- src/amd/vulkan/radv_query.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 8f988d9de26..cdff336f8a3 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1341,10 +1341,13 @@ void radv_CmdCopyQueryPoolResults( if (flags & VK_QUERY_RESULT_WAIT_BIT) { + /* Wait on the high 32 bits of the timestamp in + * case the low part is 0xffffffff. + */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false)); radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, local_src_va); - radeon_emit(cs, local_src_va >> 32); + radeon_emit(cs, local_src_va + 4); + radeon_emit(cs, (local_src_va + 4) >> 32); radeon_emit(cs, TIMESTAMP_NOT_READY >> 32); radeon_emit(cs, 0xffffffff); radeon_emit(cs, 4); From a34228e1b079b087cd4644cc2018345f8fd183dc Mon Sep 17 00:00:00 2001 From: Michal Srb Date: Fri, 23 Nov 2018 16:02:27 +0100 Subject: [PATCH 103/220] gallium: Constify drisw_loader_funcs struct The content is not expected to change. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Michal Srb Reviewed-by: Emil Velikov (cherry picked from commit c0ac038c97b89a8266375339c297b17b3700dfb0) --- src/gallium/auxiliary/pipe-loader/pipe_loader.h | 2 +- src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c | 2 +- src/gallium/winsys/sw/dri/dri_sw_winsys.c | 4 ++-- src/gallium/winsys/sw/dri/dri_sw_winsys.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h index 05be94cae31..9b264145347 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h @@ -142,7 +142,7 @@ pipe_loader_release(struct pipe_loader_device **devs, int ndev); */ bool pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, - struct drisw_loader_funcs *drisw_lf); + const struct drisw_loader_funcs *drisw_lf); /** * Initialize a kms backed sw device given an fd. diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c index d387ce90d32..587b6f8567b 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c @@ -132,7 +132,7 @@ pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev) #ifdef HAVE_PIPE_LOADER_DRI bool -pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf) +pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, const struct drisw_loader_funcs *drisw_lf) { struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); int i; diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.c b/src/gallium/winsys/sw/dri/dri_sw_winsys.c index d519bcfedd3..cd44b036c6f 100644 --- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c +++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c @@ -62,7 +62,7 @@ struct dri_sw_winsys { struct sw_winsys base; - struct drisw_loader_funcs *lf; + const struct drisw_loader_funcs *lf; }; static inline struct dri_sw_displaytarget * @@ -282,7 +282,7 @@ dri_destroy_sw_winsys(struct sw_winsys *winsys) } struct sw_winsys * -dri_create_sw_winsys(struct drisw_loader_funcs *lf) +dri_create_sw_winsys(const struct drisw_loader_funcs *lf) { struct dri_sw_winsys *ws; diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.h b/src/gallium/winsys/sw/dri/dri_sw_winsys.h index 329ac06a05b..47e3777d4cd 100644 --- a/src/gallium/winsys/sw/dri/dri_sw_winsys.h +++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.h @@ -33,6 +33,6 @@ struct sw_winsys; -struct sw_winsys *dri_create_sw_winsys(struct drisw_loader_funcs *lf); +struct sw_winsys *dri_create_sw_winsys(const struct drisw_loader_funcs *lf); #endif From bd7edf473e4a4a02e72a77a282975a440d8b094f Mon Sep 17 00:00:00 2001 From: Michal Srb Date: Fri, 23 Nov 2018 17:03:53 +0100 Subject: [PATCH 104/220] drisw: Use separate drisw_loader_funcs for shm The original code was modifying the global drisw_lf variable, which is bad when there are multiple contexts in single process, each initialized with different loader. One may support put_image_shm and the other not. Since there are currently only two possible combinations, lets create two global tables, one for each. Lets make them const, since we won't change them and they can be shared. This fixes crash in VLC. It used two GL contexts (each in different thread), one was initialized by its Qt GUI, the other by its video output plugin. The first one set the put_image_shm=drisw_put_image_shm, the second did not, but since the same structure was used, the drisw_put_image_shm was used too. Then it crashed because the second loader did not have putImageShm set. Downstream bug: https://bugzilla.opensuse.org/show_bug.cgi?id=1113533 v2: Added Fixes and described the VLC bug. Fixes: 63c427fa71a ("drisw: use putImageShm if available") Signed-off-by: Michal Srb Reviewed-by: Emil Velikov (cherry picked from commit 63c0916ada7eed7eddc0453dfbfed3cc7f42ca85) --- src/gallium/state_trackers/dri/drisw.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c index 886f94dc02c..5a0d2e1354d 100644 --- a/src/gallium/state_trackers/dri/drisw.c +++ b/src/gallium/state_trackers/dri/drisw.c @@ -421,12 +421,19 @@ static const __DRIextension *drisw_screen_extensions[] = { NULL }; -static struct drisw_loader_funcs drisw_lf = { +static const struct drisw_loader_funcs drisw_lf = { .get_image = drisw_get_image, .put_image = drisw_put_image, .put_image2 = drisw_put_image2 }; +static const struct drisw_loader_funcs drisw_shm_lf = { + .get_image = drisw_get_image, + .put_image = drisw_put_image, + .put_image2 = drisw_put_image2, + .put_image_shm = drisw_put_image_shm +}; + static const __DRIconfig ** drisw_init_screen(__DRIscreen * sPriv) { @@ -434,6 +441,7 @@ drisw_init_screen(__DRIscreen * sPriv) const __DRIconfig **configs; struct dri_screen *screen; struct pipe_screen *pscreen = NULL; + const struct drisw_loader_funcs *lf = &drisw_lf; screen = CALLOC_STRUCT(dri_screen); if (!screen) @@ -448,10 +456,10 @@ drisw_init_screen(__DRIscreen * sPriv) sPriv->extensions = drisw_screen_extensions; if (loader->base.version >= 4) { if (loader->putImageShm) - drisw_lf.put_image_shm = drisw_put_image_shm; + lf = &drisw_shm_lf; } - if (pipe_loader_sw_probe_dri(&screen->dev, &drisw_lf)) { + if (pipe_loader_sw_probe_dri(&screen->dev, lf)) { dri_init_options(screen); pscreen = pipe_loader_create_screen(screen->dev); From 4b715e3e597122e53807944682dd97595dcf461c Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Mon, 26 Nov 2018 09:54:03 -0800 Subject: [PATCH 105/220] virgl: quadruple command buffer size Tested running WebGL aquarium on Nvidia host (10,000 fishes) This moves us from 7 fps to 9 fps. After quadrupling, performance gains diminish. v2: Remove change ID (Erik) Tested-By: Gert Wollny Reviewed-by: Erik Faye-Lund (cherry picked from commit c0773315af76cd735152c322cb7f710b4b053148) --- src/gallium/drivers/virgl/virgl_winsys.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/virgl/virgl_winsys.h b/src/gallium/drivers/virgl/virgl_winsys.h index 0e6cb7953f6..b44f8aaa54a 100644 --- a/src/gallium/drivers/virgl/virgl_winsys.h +++ b/src/gallium/drivers/virgl/virgl_winsys.h @@ -31,7 +31,7 @@ struct pipe_fence_handle; struct winsys_handle; struct virgl_hw_res; -#define VIRGL_MAX_CMDBUF_DWORDS (16*1024) +#define VIRGL_MAX_CMDBUF_DWORDS (64 * 1024) struct virgl_drm_caps { union virgl_caps caps; From a69ef11424dad4d5346c892f74e269352736bbd8 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Mon, 26 Nov 2018 09:54:04 -0800 Subject: [PATCH 106/220] virgl: avoid large inline transfers We flush everytime the command buffer (16 kB) is full, which is quite costly. This improves dEQP-GLES3.performance.buffer.data_upload.function_call.buffer_data.new_buffer.usage_stream_draw from 111.16 MB/s to 1930.36 MB/s. In addition, I made the benchmark produce buffers from 0 --> VIRGL_MAX_CMDBUF_DWORDS * 4, and tried ((VIRGL_MAX_CMDBUF_DWORDS * 4) / 2), ((VIRGL_MAX_CMDBUF_DWORDS * 4) / 4), etc. I didn't notice any clear differences, so let's just go with the most obvious heuristic. Tested-By: Gert Wollny Reviewed-by: Erik Faye-Lund (cherry picked from commit d18492c64f0abb4eb638d2b213b4b1ff3d775965) --- src/gallium/drivers/virgl/virgl_resource.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c index db5e7dd61af..9174ec5cbbd 100644 --- a/src/gallium/drivers/virgl/virgl_resource.c +++ b/src/gallium/drivers/virgl/virgl_resource.c @@ -95,7 +95,11 @@ static void virgl_buffer_subdata(struct pipe_context *pipe, usage |= PIPE_TRANSFER_DISCARD_RANGE; u_box_1d(offset, size, &box); - virgl_transfer_inline_write(pipe, resource, 0, usage, &box, data, 0, 0); + + if (size >= (VIRGL_MAX_CMDBUF_DWORDS * 4)) + u_default_buffer_subdata(pipe, resource, usage, offset, size, data); + else + virgl_transfer_inline_write(pipe, resource, 0, usage, &box, data, 0, 0); } void virgl_init_context_resource_functions(struct pipe_context *ctx) From c694d84f10cd5ef8392f95fe31d8804742c5c8bd Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Mon, 26 Nov 2018 09:54:05 -0800 Subject: [PATCH 107/220] virgl: don't mark buffers as unclean after a write We can mark the buffer unclean if it's ever bound as a TBO, SSBO, ABO, or image. This improves dEQP-GLES3.performance.buffer.data_upload.function_call.map_buffer_range.new_specified_buffer.flag_write_full.stream_draw from 9.58 MB/s to 451.17 MB/s. v2: Track buffer cleanliness as a function of bindings (Ilia). v3: virgl_modify_clean --> virgl_dirty_res (Erik) Tested-By: Gert Wollny Reviewed-by: Erik Faye-Lund (cherry picked from commit 89b4798c0619a2ba99046d5ad36f0e6851625f7a) With this and previous two patches, the performance of virgl on top of a r600 (AMD 6870 HD) host improves as follows: | FPS avg | Score -------------------------------- before | 8.2 | 343 after | 21.9 | 916 | FPS avg | Score -------------------------------- before | 13.2 | 333 after | 32.3 | 790 --- src/gallium/drivers/virgl/virgl_buffer.c | 1 - src/gallium/drivers/virgl/virgl_encode.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/virgl/virgl_buffer.c b/src/gallium/drivers/virgl/virgl_buffer.c index 88a22b56f9a..f72c93f4995 100644 --- a/src/gallium/drivers/virgl/virgl_buffer.c +++ b/src/gallium/drivers/virgl/virgl_buffer.c @@ -106,7 +106,6 @@ static void virgl_buffer_transfer_unmap(struct pipe_context *ctx, if (trans->base.usage & PIPE_TRANSFER_WRITE) { if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { struct virgl_screen *vs = virgl_screen(ctx->screen); - vbuf->base.clean = FALSE; vctx->num_transfers++; vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, &transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level); diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c index e86d0711a57..ee2764d74ea 100644 --- a/src/gallium/drivers/virgl/virgl_encode.c +++ b/src/gallium/drivers/virgl/virgl_encode.c @@ -61,6 +61,12 @@ static void virgl_encoder_write_res(struct virgl_context *ctx, } } +static void virgl_dirty_res(struct virgl_resource *res) +{ + if (res) + res->clean = FALSE; +} + int virgl_encode_bind_object(struct virgl_context *ctx, uint32_t handle, uint32_t object) { @@ -615,6 +621,7 @@ int virgl_encode_sampler_view(struct virgl_context *ctx, if (res->u.b.target == PIPE_BUFFER) { virgl_encoder_write_dword(ctx->cbuf, state->u.buf.offset / elem_size); virgl_encoder_write_dword(ctx->cbuf, (state->u.buf.offset + state->u.buf.size) / elem_size - 1); + virgl_dirty_res(res); } else { virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_layer | state->u.tex.last_layer << 16); virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_level | state->u.tex.last_level << 8); @@ -949,6 +956,7 @@ int virgl_encode_set_shader_buffers(struct virgl_context *ctx, virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset); virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size); virgl_encoder_write_res(ctx, res); + virgl_dirty_res(res); } else { virgl_encoder_write_dword(ctx->cbuf, 0); virgl_encoder_write_dword(ctx->cbuf, 0); @@ -972,6 +980,7 @@ int virgl_encode_set_hw_atomic_buffers(struct virgl_context *ctx, virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset); virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size); virgl_encoder_write_res(ctx, res); + virgl_dirty_res(res); } else { virgl_encoder_write_dword(ctx->cbuf, 0); virgl_encoder_write_dword(ctx->cbuf, 0); @@ -999,6 +1008,7 @@ int virgl_encode_set_shader_images(struct virgl_context *ctx, virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.offset); virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.size); virgl_encoder_write_res(ctx, res); + virgl_dirty_res(res); } else { virgl_encoder_write_dword(ctx->cbuf, 0); virgl_encoder_write_dword(ctx->cbuf, 0); From af223b57a4d168186fd08042d14c638926d9a8b3 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 5 Dec 2018 21:36:54 +0000 Subject: [PATCH 108/220] Update version to 18.3.0-rc6 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 6742d1dc011..ae2a4f60c6b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0-rc5 +18.3.0-rc6 From cb55bc8771d6f1baed61e82d51a2b67221662cb4 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 6 Dec 2018 18:03:25 +0000 Subject: [PATCH 109/220] Update version to 18.3.0 (final) Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index ae2a4f60c6b..ef9129a61ac 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0-rc6 +18.3.0 From d603cd9d84c8293c22407030c7664ac775ffb97f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 7 Dec 2018 11:20:00 +0000 Subject: [PATCH 110/220] docs: update 18.3.0 release notes Signed-off-by: Emil Velikov --- docs/relnotes/18.3.0.html | 204 +++++++++++++++++++++++++++++++++++++- 1 file changed, 201 insertions(+), 3 deletions(-) diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html index aa924391919..3cfa2825071 100644 --- a/docs/relnotes/18.3.0.html +++ b/docs/relnotes/18.3.0.html @@ -14,7 +14,7 @@

    The Mesa 3D Graphics Library

    -

    Mesa 18.3.0 Release Notes / TBD

    +

    Mesa 18.3.0 Release Notes / December 7, 2018

    Mesa 18.3.0 is a new development release. People who are concerned @@ -70,8 +70,206 @@

    New features

    Bug fixes

      -
    • TBD
    • -
    + +
  • Bug 13728 - [G965] Some objects in Neverwinter Nights Linux version not displayed correctly
  • + +
  • Bug 91433 - piglit.spec.arb_depth_buffer_float.fbo-depth-gl_depth_component32f-copypixels fails
  • + +
  • Bug 93355 - [BXT,SKLGT4e] intermittent ext_framebuffer_multisample.accuracy fails
  • + +
  • Bug 94957 - dEQP failures on llvmpipe
  • + +
  • Bug 98699 - "float[a+++4 ? 1:1] f;" crashes glsl_compiler
  • + +
  • Bug 99507 - Corrupted frame contents with Vulkan version of DOTA2, Talos Principle and Sascha Willems' demos when they're run Vsynched in fullscreen
  • + +
  • Bug 99730 - Metro Redux game(s) needs override for midshader extension declaration
  • + +
  • Bug 100200 - Default Unreal Engine 4 frag shader fails to compile
  • + +
  • Bug 101247 - Mesa fails to link GLSL programs with unused output blocks
  • + +
  • Bug 102597 - [Regression] mpv, high rendering times (two to three times higher)
  • + +
  • Bug 103241 - Anv crashes when using 64-bit vertex inputs
  • + +
  • Bug 104602 - [apitrace] Graphical artifacts in Civilization VI on RX Vega
  • + +
  • Bug 104809 - anv: DOOM 2016 and Wolfenstein II:The New Colossus crash due to not having depthBoundsTest
  • + +
  • Bug 104926 - swrast: Mesa 17.3.3 produces: HW cursor for format 875713089 not supported
  • + +
  • Bug 105333 - [gallium-nine] missing geometry after commit ac: replace ac_build_kill with ac_build_kill_if_false
  • + +
  • Bug 105371 - r600_shader_from_tgsi - GPR limit exceeded - shader requires 360 registers
  • + +
  • Bug 105731 - linker error "fragment shader input ... has no matching output in the previous stage" when previous stage's output declaration in a separate shader object
  • + +
  • Bug 105904 - Needed to delete mesa shader cache after driver upgrade for 32 bit wine vulkan programs to work.
  • + +
  • Bug 105975 - i965 always reports 0 viewport subpixel bits
  • + +
  • Bug 106231 - llvmpipe blends produce bad code after llvm patch https://reviews.llvm.org/D44785
  • + +
  • Bug 106283 - Shader replacements works only for limited use cases
  • + +
  • Bug 106577 - broken rendering with nine and nouveau (GM107)
  • + +
  • Bug 106833 - glLinkProgram is expected to fail when vertex attribute aliasing happens on ES3.0 context or later
  • + +
  • Bug 106865 - [GLK] piglit.spec.ext_framebuffer_multisample.accuracy stencil tests fail
  • + +
  • Bug 106980 - Basemark GPU vulkan benchmark hangs on GFX9
  • + +
  • Bug 106997 - [Regression]. Dying light game is crashing on latest mesa
  • + +
  • Bug 107088 - [GEN8+] Hang when discarding a fragment if dual source blending is enabled but shader doesn't support it
  • + +
  • Bug 107098 - Segfault after munmap(kms_sw_dt->ro_mapped)
  • + +
  • Bug 107212 - Dual-Core CPU E5500 / G45: RetroArch with reicast core results in corrupted graphics
  • + +
  • Bug 107223 - [GEN9+] 50% perf drop in SynMark Fill* tests (E2E RBC gets disabled?)
  • + +
  • Bug 107276 - radv: OpBitfieldUExtract returns incorrect result when count is zero
  • + +
  • Bug 107280 - [DXVK] Batman: Arkham City with tessellation enabled hangs on SKL GT4
  • + +
  • Bug 107313 - Meson instructions on web site are non-optimal
  • + +
  • Bug 107359 - [Regression] [bisected] [OpenGL CTS] [SKL,BDW] KHR-GL46.texture_barrier*-texels, GTF-GL46.gtf21.GL2FixedTests.buffer_corners.buffer_corners, and GTF-GL46.gtf21.GL2FixedTests.stencil_plane_corners.stencil_plane_corners fail with some configuration
  • + +
  • Bug 107460 - radv: OpControlBarrier does not always work correctly (bisected)
  • + +
  • Bug 107477 - [DXVK] Setting high shader quality in GTA V results in LLVM error
  • + +
  • Bug 107483 - DispatchSanity_test.GL31_CORE regression
  • + +
  • Bug 107487 - [intel] [tools] intel gpu tools don't honor -D tools=[]
  • + +
  • Bug 107488 - gl.h:2090: error: redefinition of typedef ‘GLeglImageOES’
  • + +
  • Bug 107510 - [GEN8+] up to 10% perf drop on several 3D benchmarks
  • + +
  • Bug 107511 - KHR/khrplatform.h not always installed when needed
  • + +
  • Bug 107524 - Broken packDouble2x32 at llvmpipe
  • + +
  • Bug 107544 - intel/decoder: out of bounds group_iter
  • + +
  • Bug 107547 - shader crashing glsl_compiler (uniform block assigned to vec2, then component substraced by 1)
  • + +
  • Bug 107550 - "0[2]" as function parameter hits assert
  • + +
  • Bug 107563 - [RADV] Broken rendering in Unity demos
  • + +
  • Bug 107565 - TypeError: __init__() got an unexpected keyword argument 'future_imports'
  • + +
  • Bug 107579 - [SNB] The graphic corruption when we reuse the GS compiled and used for TFB when statebuffer contain magic trash in the unused space
  • + +
  • Bug 107601 - Rise of the Tomb Raider Segmentation Fault when the game starts
  • + +
  • Bug 107610 - Dolphin emulator mis-renders shadow overlay in Super Mario Sunshine
  • + +
  • Bug 107626 - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test "arb_texture_multisample-large-float-texture" with parameter --fp16
  • + +
  • Bug 107658 - [Regression] [bisected] [OpenGLES CTS] KHR-GLES3.packed_pixels.*rectangle.r*8_snorm
  • + +
  • Bug 107734 - [GLSL] glsl-fface-invariant, glsl-fcoord-invariant and glsl-pcoord-invariant should fail
  • + +
  • Bug 107745 - [bisected] [bdw bsw] piglit.­spec.­arb_fragment_shader_interlock.­arb_fragment_shader_interlock-image-load-store failure
  • + +
  • Bug 107760 - GPU Hang when Playing DiRT 3 Complete Edition using Steam Play with DXVK
  • + +
  • Bug 107765 - [regression] Batman Arkham City crashes with DXVK under wine
  • + +
  • Bug 107772 - Mesa preprocessor matches if(def)s & endifs incorrectly
  • + +
  • Bug 107779 - Access violation with some games
  • + +
  • Bug 107786 - [DXVK] MSAA reflections are broken in GTA V
  • + +
  • Bug 107806 - glsl_get_natural_size_align_bytes() ABORT with GfxBench Vulkan AztecRuins
  • + +
  • Bug 107810 - The 'va_end' call is missed after 'va_copy' in 'util_vsnprintf' function under windows
  • + +
  • Bug 107832 - Gallium picking A16L16 formats when emulating INTENSITY16 conflicts with mesa
  • + +
  • Bug 107843 - 32bit Mesa build failes with meson.
  • + +
  • Bug 107856 - i965 incorrectly calculates the number of layers for texture views (assert)
  • + +
  • Bug 107857 - GPU hang - GS_EMIT without shader outputs
  • + +
  • Bug 107865 - swr fail to build with llvm-libs 6.0.1
  • + +
  • Bug 107869 - u_thread.h:87:4: error: use of undeclared identifier 'cpu_set_t'
  • + +
  • Bug 107870 - Undefined symbols for architecture x86_64: "_util_cpu_caps"
  • + +
  • Bug 107879 - crash happens when link program
  • + +
  • Bug 107891 - [wine, regression, bisected] RAGE, Wolfenstein The New Order hangs in menu
  • + +
  • Bug 107923 - build_id.c:126: multiple definition of `build_id_length'
  • + +
  • Bug 107926 - [anv] Rise of the Tomb Raider always misrendering, segfault and gpu hang.
  • + +
  • Bug 107941 - GPU hang and system crash with Dota 2 using Vulkan
  • + +
  • Bug 107971 - SPV_GOOGLE_hlsl_functionality1 / SPV_GOOGLE_decorate_string
  • + +
  • Bug 108012 - Compiler crashes on access of non-existent member incremental operations
  • + +
  • Bug 108024 - [Debian Stretch]Fail to build because "xcb_randr_lease_t"
  • + +
  • Bug 108082 - warning: unknown warning option '-Wno-format-truncation' [-Wunknown-warning-option]
  • + +
  • Bug 108109 - [GLSL] no-overloads.vert fails
  • + +
  • Bug 108112 - [vulkancts] some of the coherent memory tests fail.
  • + +
  • Bug 108113 - [vulkancts] r32g32b32 transfer operations not implemented
  • + +
  • Bug 108115 - [vulkancts] dEQP-VK.subgroups.vote.graphics.subgroupallequal.* fails
  • + +
  • Bug 108164 - [radv] VM faults since 5d6a560a2986c9ab421b3c7904d29bb7bc35e36f
  • + +
  • Bug 108245 - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage
  • + +
  • Bug 108272 - [polaris10] opencl-mesa: Anything using OpenCL segfaults, XFX Radeon RX 580
  • + +
  • Bug 108311 - Query buffer object support is broken on r600.
  • + +
  • Bug 108319 - [GLK BXT BSW] Assertion in piglit.spec.arb_gpu_shader_fp64.execution.built-in-functions.vs-sign-sat-neg-abs
  • + +
  • Bug 108491 - Commit baa38c14 causes output issues on my VEGA with RADV
  • + +
  • Bug 108524 - [RADV] GPU lockup on event synchronization
  • + +
  • Bug 108530 - (mesa-18.3) [Tracker] Mesa 18.3 Release Tracker
  • + +
  • Bug 108532 - make check nir_copy_prop_vars_test.store_store_load_different_components regression
  • + +
  • Bug 108560 - Mesa 32 is built without sse
  • + +
  • Bug 108595 - ir3_compiler valgrind build error
  • + +
  • Bug 108617 - [deqp] Mesa fails conformance for egl_ext_device
  • + +
  • Bug 108630 - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever
  • + +
  • Bug 108635 - Mesa master commit 68dc591af16ebb36814e4c187e4998948103c99c causes XWayland to segfault
  • + +
  • Bug 108713 - Gallium: use after free with transform feedback
  • + +
  • Bug 108829 - [meson] libglapi exports internal API
  • + +
  • Bug 108894 - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard
  • + +
  • Bug 108909 - Vkd3d test failure test_resolve_non_issued_query_data()
  • + +
  • Bug 108914 - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this
  • Changes

    From d81beab96afb403915805435fd4b810a00291b99 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Fri, 7 Dec 2018 11:27:49 +0000 Subject: [PATCH 111/220] docs: add sha256 checksums for 18.3.0 Signed-off-by: Emil Velikov --- docs/relnotes/18.3.0.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html index 3cfa2825071..370d5e823e2 100644 --- a/docs/relnotes/18.3.0.html +++ b/docs/relnotes/18.3.0.html @@ -40,7 +40,8 @@

    Mesa 18.3.0 Release Notes / December 7, 2018

    SHA256 checksums

    -TBD.
    +17a124d4dbc712505d22a7815c9b0cee22214c96c8abb91539a2b1351e38a000  mesa-18.3.0.tar.gz
    +b63f947e735d6ef3dfaa30c789a9adfbae18aea671191eaacde95a18c17fc38a  mesa-18.3.0.tar.xz
     
    From dff8f3bd0d5d4f79adfc28e2ac85221e82170878 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 10 Dec 2018 10:57:35 -0600 Subject: [PATCH 112/220] anv,radv: Disable VK_EXT_pci_bus_info The Vulkan working group recently discovered that we made a mistake in assuming that PCI domains are 16-bit even though they can potentially be 32-bit values. To fix this, the next spec update will change the types in the VK_EXT_pci_bus_info struct to be 32 bits which will be a backwards-incompatible change. Normally, Khronos tries very hard to never make backwards incompatible changes to specs. Hopefully, the extension is new enough (2 months) that there are no shipping apps which use the extension so this should be safe. This commit disables the extension for both anv and radv in mesa and should be back-ported to 18.3 ASAP so we avoid any potential issues with new apps running on old drivers. I'll send out a commit (which we can also back-port to 18.3 if we really care) to re-enable the extension in both drivers once this week's spec update ships. The one known use of this extension is internal to mesa and will continue working with the extension disabled and will naturally update when we get a new header. Cc: "18.3" Acked-by: Lionel Landwerlin Acked-by: Samuel Pitoiset (cherry picked from commit 8f401b0ce6e6650e1a85e9bb2be23d5ff08812b8) [Emil: resolve trivial conflict] Signed-off-by: Emil Velikov Conflicts: src/intel/vulkan/anv_extensions.py --- src/amd/vulkan/radv_extensions.py | 2 +- src/intel/vulkan/anv_extensions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 6bdf988d117..4a28f8bf41c 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -105,7 +105,7 @@ def __init__(self, name, ext_version, enable): Extension('VK_EXT_external_memory_dma_buf', 1, True), Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'), Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'), - Extension('VK_EXT_pci_bus_info', 1, True), + Extension('VK_EXT_pci_bus_info', 1, False), Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, True), diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index e9afe06bb13..e5b6c77a6d0 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -121,7 +121,7 @@ def __init__(self, version, enable): Extension('VK_EXT_external_memory_dma_buf', 1, True), Extension('VK_EXT_global_priority', 1, 'device->has_context_priority'), - Extension('VK_EXT_pci_bus_info', 1, True), + Extension('VK_EXT_pci_bus_info', 1, False), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen >= 9'), Extension('VK_EXT_vertex_attribute_divisor', 3, True), From 0e715ae29bb2694003dcd82109e088789102e8b1 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 11 Dec 2018 19:14:22 +0000 Subject: [PATCH 113/220] Update version to 18.3.1 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index ef9129a61ac..217b5c6e8ea 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0 +18.3.1 From f6ad9cf1ebd24ddc71be6e8a74e0327a8ea4fd9f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 11 Dec 2018 21:12:55 +0000 Subject: [PATCH 114/220] docs: add release notes for 18.3.1 Signed-off-by: Emil Velikov --- docs/relnotes/18.3.1.html | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 docs/relnotes/18.3.1.html diff --git a/docs/relnotes/18.3.1.html b/docs/relnotes/18.3.1.html new file mode 100644 index 00000000000..c0955885c2c --- /dev/null +++ b/docs/relnotes/18.3.1.html @@ -0,0 +1,62 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.3.1 Release Notes / December 11, 2018

    + +

    +Mesa 18.3.1 is a bug fix release which fixes bugs found since the 18.3.0 release. +

    +

    +Mesa 18.3.0 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    +

    None

    + + +

    Changes

    + +

    Emil Velikov (2):

    +
      +
    • docs: add sha256 checksums for 18.3.0
    • +
    • Update version to 18.3.1
    • +
    + +

    Jason Ekstrand (1):

    +
      +
    • anv,radv: Disable VK_EXT_pci_bus_info
    • +
    + + +
    + + From d92c9ba789196978ef2b3b332c3a3737211b5e73 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 11 Dec 2018 21:19:03 +0000 Subject: [PATCH 115/220] docs: add sha256 checksums for 18.3.1 Signed-off-by: Emil Velikov --- docs/relnotes/18.3.1.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/18.3.1.html b/docs/relnotes/18.3.1.html index c0955885c2c..8acbfb7a5f2 100644 --- a/docs/relnotes/18.3.1.html +++ b/docs/relnotes/18.3.1.html @@ -31,7 +31,8 @@

    Mesa 18.3.1 Release Notes / December 11, 2018

    SHA256 checksums

    -TBD
    +256d0c3d88e380c1b8e3fc5c6ac34001e3b7c30458b8b852407ec68b8ccd9fda  mesa-18.3.1.tar.gz
    +5b1f827d28684a25f6657289f8b7d47ac56395988c7ac23e0ec9a62b644bdc63  mesa-18.3.1.tar.xz
     
    From 6486d1c1a387655e92425168e63a6b282d086a20 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 17 Dec 2018 15:44:25 +0000 Subject: [PATCH 116/220] bin/get-pick-list.sh: rework handing of sha nominations Currently our is_sha_nomination does: - folds any whitespace, attempting to extract sha-like information - checks that at least one of the shas has landed Split it in two and do sha-like validation first. This way, commits with mesa-stable and sha nominations will feature the fixes/revert/etc instead of stable (a) or will be omitted if not applicable for the respective branch (b). Misc examples from 18.3 (a) -[ stable ] 5bc509363b6 glx: make xf86vidmode mandatory for direct rendering +[ fixes ] 5bc509363b6 glx: make xf86vidmode mandatory for direct rendering (b) -[ stable ] 9a7b3199037 anv/query: flush render target before copying results CC: Juan A. Suarez CC: Dylan Baker CC: mesa-stable@lists.freedesktop.org Signed-off-by: Emil Velikov Reviewed-by: Andres Gomez (cherry picked from commit 6b296f64af5d69a1d0e4cdcda5f8c6d090dd4449) --- bin/get-pick-list.sh | 46 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index 9f9cbc44026..08a783f35a8 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -21,32 +21,36 @@ is_typod_nomination() git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev" } +fixes= + # Helper to handle various mistypos of the fixes tag. # The tag string itself is passed as argument and normalised within. +# +# Resulting string in the global variable "fixes" and contains entries +# in the form "fixes:$sha" is_sha_nomination() { fixes=`git show --pretty=medium -s $1 | tr -d "\n" | \ sed -e 's/'"$2"'/\nfixes:/Ig' | \ grep -Eo 'fixes:[a-f0-9]{8,40}'` - fixes_count=`echo "$fixes" | wc -l` + fixes_count=`echo "$fixes" | grep "fixes:" | wc -l` if test $fixes_count -eq 0; then - return 0 + return 1 fi + return 0 +} + +# Checks if at least one of offending commits, listed in the global +# "fixes", is in branch. +sha_in_range() +{ + fixes_count=`echo "$fixes" | grep "fixes:" | wc -l` while test $fixes_count -gt 0; do # Treat only the current line id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2` fixes_count=$(($fixes_count-1)) - # Bail out if we cannot find suitable id. - # Any specific validation the $id is valid and not some junk, is - # implied with the follow up code - if test "x$id" = x; then - continue - fi - - #Check if the offending commit is in branch. - # Be that cherry-picked ... # ... or landed before the branchpoint. if grep -q ^$id already_picked || @@ -103,20 +107,30 @@ do continue fi - if is_stable_nomination "$sha"; then - tag=stable - elif is_typod_nomination "$sha"; then - tag=typod - elif is_fixes_nomination "$sha"; then + if is_fixes_nomination "$sha"; then tag=fixes elif is_brokenby_nomination "$sha"; then tag=brokenby elif is_revert_nomination "$sha"; then tag=revert + elif is_stable_nomination "$sha"; then + tag=stable + elif is_typod_nomination "$sha"; then + tag=typod else continue fi + case "$tag" in + fixes | brokenby | revert ) + if ! sha_in_range; then + continue + fi + ;; + * ) + ;; + esac + printf "[ %8s ] " "$tag" git --no-pager show --summary --oneline $sha done From 6d3811d82b92609e2c3a449a6815e2563b3fa96b Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 17 Dec 2018 16:25:40 +0000 Subject: [PATCH 117/220] bin/get-pick-list.sh: warn when commit lists invalid sha We had cases where people would list old/invalid sha in the commit. Add a trivial checker to catch those and throw a warning. CC: Juan A. Suarez CC: Dylan Baker CC: mesa-stable@lists.freedesktop.org Signed-off-by: Emil Velikov Reviewed-by: Andres Gomez (cherry picked from commit e0dbfc995370756355f28ac31495eab96a410384) --- bin/get-pick-list.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index 08a783f35a8..79b7a295ea6 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -38,6 +38,17 @@ is_sha_nomination() if test $fixes_count -eq 0; then return 1 fi + + # Throw a warning for each invalid sha + while test $fixes_count -gt 0; do + # Treat only the current line + id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2` + fixes_count=$(($fixes_count-1)) + if ! git show $id &>/dev/null; then + echo WARNING: Commit $1 lists invalid sha $id + fi + done + return 0 } From 59c37f29888ea6568bac243466f5148de9a3bee6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 7 Dec 2018 14:11:01 -0500 Subject: [PATCH 118/220] freedreno/drm: fix memory leak Fix an emberrasing memory leak with the non-softpin submit/rb implementation. Fixes: f3cc0d27475 freedreno: import libdrm_freedreno + redesign submit Signed-off-by: Rob Clark (cherry picked from commit d014af98b7afc69f4f733c8b8b6f2e3438e68407) --- src/gallium/drivers/freedreno/drm/msm_ringbuffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c b/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c index f1e96740231..9736aebd7f6 100644 --- a/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c +++ b/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c @@ -97,6 +97,7 @@ static void cmd_free(struct msm_cmd *cmd) { fd_bo_del(cmd->ring_bo); + free(cmd->relocs); free(cmd); } @@ -655,6 +656,7 @@ msm_ringbuffer_destroy(struct fd_ringbuffer *ring) _mesa_set_destroy(msm_ring->u.ring_set, unref_rings); + free(msm_ring->u.reloc_bos); free(msm_ring); } else { struct fd_submit *submit = msm_ring->u.submit; @@ -663,6 +665,7 @@ msm_ringbuffer_destroy(struct fd_ringbuffer *ring) cmd_free(msm_ring->u.cmds[i]); } + free(msm_ring->u.cmds); slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring); } } From 638287ba17e3a06b8d501a5160e26774561873d3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 7 Dec 2018 10:30:09 -0800 Subject: [PATCH 119/220] v3d: Fix a leak of the transfer helper on screen destroy. Fixes: 7a30517cce8f ("broadcom/vc5: Start adding support for rendering to Z32F_S8X24_UINT.") (cherry picked from commit 3bd73d31a862fd1e198a7c83ec656a4a376c593a) --- src/gallium/drivers/v3d/v3d_screen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 1d59dbfc12a..e8f0e291dc3 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -32,6 +32,7 @@ #include "util/u_format.h" #include "util/u_hash_table.h" #include "util/u_screen.h" +#include "util/u_transfer_helper.h" #include "util/ralloc.h" #include @@ -74,6 +75,7 @@ v3d_screen_destroy(struct pipe_screen *pscreen) v3d_simulator_destroy(screen); v3d_compiler_free(screen->compiler); + u_transfer_helper_destroy(pscreen->transfer_helper); close(screen->fd); ralloc_free(pscreen); From 61b8ae8cc8929e991d9ca6940400646813fe9197 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 7 Dec 2018 10:31:27 -0800 Subject: [PATCH 120/220] vc4: Fix a leak of the transfer helper on screen destroy. Fixes: d009463a6549 ("vc4: Switch to using u_transfer_helper for MSAA maps.") (cherry picked from commit 7f8d8b7d27868037a146f7fca04fef56b29bb85e) --- src/gallium/drivers/vc4/vc4_screen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 14ee6cf09e5..e7f7c82c271 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -33,6 +33,7 @@ #include "util/u_format.h" #include "util/u_hash_table.h" #include "util/u_screen.h" +#include "util/u_transfer_helper.h" #include "util/ralloc.h" #include @@ -110,6 +111,8 @@ vc4_screen_destroy(struct pipe_screen *pscreen) vc4_simulator_destroy(screen); #endif + u_transfer_helper_destroy(pscreen->transfer_helper); + close(screen->fd); ralloc_free(pscreen); } From d1f14bade80eade92fb3167e2bfd5a9f819f6067 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 7 Dec 2018 10:34:40 -0800 Subject: [PATCH 121/220] v3d: Fix a leak of the disassembled instruction string during debug dumps. Fixes: ade416d02369 ("broadcom: Add VC5 NIR compiler.") (cherry picked from commit f1d98204c34d36876e05e1d3f2242296ccec19e3) --- src/broadcom/compiler/vir_to_qpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c index b5a7b841ef6..4baadce294c 100644 --- a/src/broadcom/compiler/vir_to_qpu.c +++ b/src/broadcom/compiler/vir_to_qpu.c @@ -364,6 +364,7 @@ v3d_dump_qpu(struct v3d_compile *c) for (int i = 0; i < c->qpu_inst_count; i++) { const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]); fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str); + ralloc_free((void *)str); } fprintf(stderr, "\n"); } From 734f78a783a9f470f3d318e9aab9cc359fb9c0c4 Mon Sep 17 00:00:00 2001 From: Kirill Burtsev Date: Wed, 5 Dec 2018 15:54:27 +0000 Subject: [PATCH 122/220] loader: free error state, when checking the drawable type Currently we distinguish if the drawable is a window or pixmap by checking xcb_present_select_input throws an error or not. Yet, we don't always free the error state returned by xcb. Cc: Kirill Burtsev Cc: Boyan Ding Fixes: 6bd9ba7d074 ("loader: Add dri3 helper") Reviewed-by: Emil Velikov [Emil: add commit message, fixes tag] Signed-off-by: Emil Velikov (cherry picked from commit a539316485ddda074ca1b71aebf4a29b65af87c3) --- src/loader/loader_dri3_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c index 1981b5f0515..7cd6b1e8ab6 100644 --- a/src/loader/loader_dri3_helper.c +++ b/src/loader/loader_dri3_helper.c @@ -1509,6 +1509,7 @@ dri3_update_drawable(struct loader_dri3_drawable *draw) mtx_unlock(&draw->mtx); return false; } + free(error); draw->is_pixmap = true; xcb_unregister_for_special_event(draw->conn, draw->special_event); draw->special_event = NULL; From 7594f63f211b17462f0c324c36781d0c6ef94b64 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 8 Dec 2018 13:21:52 -0500 Subject: [PATCH 123/220] mesa/st/nir: fix missing nir_compact_varyings LinkedTransformFeedback is normally populated, which had nerf'd varying packing since the check was introduced. Fixes: dbd52585fa9 st/nir: Disable varying packing when doing transform feedback. Signed-off-by: Rob Clark Reviewed-by: Timothy Arceri (cherry picked from commit cfe8220904b79ee45083970c0325bf59a140e163) --- src/mesa/state_tracker/st_glsl_to_nir.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index c58deadc957..581a8639ef0 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -749,7 +749,8 @@ st_link_nir(struct gl_context *ctx, * the pipe_stream_output->output_register field is based on the * pre-compacted driver_locations. */ - if (!prev_shader->sh.LinkedTransformFeedback) + if (!(prev_shader->sh.LinkedTransformFeedback && + prev_shader->sh.LinkedTransformFeedback->NumVarying > 0)) nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir, nir, ctx->API != API_OPENGL_COMPAT); } From 1c70209c6d0ff2385a18b464a56b1eb68505171f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 18 Dec 2018 16:28:17 +0000 Subject: [PATCH 124/220] cherry-ignore: meson: libfreedreno depends upon libdrm (for fence support) The commit addresses b4476138d5ad3f8d30c14ee61f2f375edfdbab2a Signed-off-by: Emil Velikov --- bin/.cherry-ignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore index 9c4a21d82d1..92456c5c938 100644 --- a/bin/.cherry-ignore +++ b/bin/.cherry-ignore @@ -1,2 +1,4 @@ # fixes: Commit was squashed into the respective offenders c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix +# fixes: The commit addresses b4476138d5ad3f8d30c14ee61f2f375edfdbab2a +ff6f1dd0d3c6b4c15ca51b478b2884d14f6a1e06 meson: libfreedreno depends upon libdrm (for fence support) From 52f42f5b632328a399ee7239e086039cc7e2664d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 9 Oct 2018 14:15:12 +0200 Subject: [PATCH 125/220] radv: switch on EOP when primitive restart is enabled with triangle strips Otherwise, Yakuza hangs the GPU with DXVK. We don't know if linetrip and pointlist are affected, so my point is to do that only for triangle strips. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit d8325f1f07b67c8a31c6786c71e3fd0910bc3b82) --- src/amd/vulkan/radv_pipeline.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index bced19573c1..cc025f55ea3 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3396,8 +3396,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, (pipeline->graphics.prim_restart_enable && (device->physical_device->rad_info.family < CHIP_POLARIS10 || (prim != V_008958_DI_PT_POINTLIST && - prim != V_008958_DI_PT_LINESTRIP && - prim != V_008958_DI_PT_TRISTRIP)))) + prim != V_008958_DI_PT_LINESTRIP)))) ia_multi_vgt_param.wd_switch_on_eop = true; } From 31198005ec2589dc1abfa902137ffc3b2c055e02 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Tue, 11 Dec 2018 12:34:38 +0000 Subject: [PATCH 126/220] virgl: wrap vertex element state in a struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just has one member for now; the handle. But this is about to change. Signed-off-by: Erik Faye-Lund Reviewed-by: Mathias Fröhlich Tested-By: Gert Wollny (cherry picked from commit 8447b64238773db0e365802315772d1819c1923f) --- src/gallium/drivers/virgl/virgl_context.c | 28 +++++++++++++++-------- src/gallium/drivers/virgl/virgl_context.h | 2 ++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c index 4511bf3b2fb..b1cd5e7ba4e 100644 --- a/src/gallium/drivers/virgl/virgl_context.c +++ b/src/gallium/drivers/virgl/virgl_context.c @@ -47,6 +47,10 @@ #include "virgl_resource.h" #include "virgl_screen.h" +struct virgl_vertex_elements_state { + uint32_t handle; +}; + static uint32_t next_handle; uint32_t virgl_object_assign_handle(void) { @@ -386,28 +390,34 @@ static void *virgl_create_vertex_elements_state(struct pipe_context *ctx, const struct pipe_vertex_element *elements) { struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle = virgl_object_assign_handle(); - virgl_encoder_create_vertex_elements(vctx, handle, - num_elements, elements); - return (void*)(unsigned long)handle; + struct virgl_vertex_elements_state *state = + CALLOC_STRUCT(virgl_vertex_elements_state); + state->handle = virgl_object_assign_handle(); + virgl_encoder_create_vertex_elements(vctx, state->handle, + num_elements, elements); + return state; } static void virgl_delete_vertex_elements_state(struct pipe_context *ctx, void *ve) { struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle = (unsigned long)ve; - - virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS); + struct virgl_vertex_elements_state *state = + (struct virgl_vertex_elements_state *)ve; + virgl_encode_delete_object(vctx, state->handle, VIRGL_OBJECT_VERTEX_ELEMENTS); + FREE(state); } static void virgl_bind_vertex_elements_state(struct pipe_context *ctx, void *ve) { struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle = (unsigned long)ve; - virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS); + struct virgl_vertex_elements_state *state = + (struct virgl_vertex_elements_state *)ve; + vctx->vertex_elements = state; + virgl_encode_bind_object(vctx, state ? state->handle : 0, + VIRGL_OBJECT_VERTEX_ELEMENTS); } static void virgl_set_vertex_buffers(struct pipe_context *ctx, diff --git a/src/gallium/drivers/virgl/virgl_context.h b/src/gallium/drivers/virgl/virgl_context.h index 20988baa3c7..09cf0db2ae4 100644 --- a/src/gallium/drivers/virgl/virgl_context.h +++ b/src/gallium/drivers/virgl/virgl_context.h @@ -32,6 +32,7 @@ struct pipe_screen; struct tgsi_token; struct u_upload_mgr; struct virgl_cmd_buf; +struct virgl_vertex_elements_state; struct virgl_sampler_view { struct pipe_sampler_view base; @@ -53,6 +54,7 @@ struct virgl_context { struct virgl_cmd_buf *cbuf; struct virgl_textures_info samplers[PIPE_SHADER_TYPES]; + struct virgl_vertex_elements_state *vertex_elements; struct pipe_framebuffer_state framebuffer; From d66fcd1969979c7efa1861563236b4c2ff5a2d4d Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Tue, 11 Dec 2018 14:02:53 +0000 Subject: [PATCH 127/220] virgl: work around bad assumptions in virglrenderer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Virglrenderer does the wrong thing when given an instance divisor; it tries to use the element-index rather than the binding-index as the argument to glVertexBindingDivisor(). This worked fine as long as there was a 1:1 relationship between elements and bindings, which was the case util 19a91841c34 "st/mesa: Use Array._DrawVAO in st_atom_array.c.". So let's detect instance divisors, and restore a 1:1 relationship in that case. This will make old versions of virglrenderer behave correctly. For newer versions, we can consider making a better interface, where the instance divisor isn't specified per element, but rather per binding. But let's save that for another day. Signed-off-by: Erik Faye-Lund Fixes: 19a91841c34 "st/mesa: Use Array._DrawVAO in st_atom_array.c." Reviewed-by: Mathias Fröhlich Tested-By: Gert Wollny (cherry picked from commit e888f28d1fd9f125fc70b2f5d1b3c42d8f25ae53) --- src/gallium/drivers/virgl/virgl_context.c | 33 ++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c index b1cd5e7ba4e..61fb3f0636a 100644 --- a/src/gallium/drivers/virgl/virgl_context.c +++ b/src/gallium/drivers/virgl/virgl_context.c @@ -49,6 +49,8 @@ struct virgl_vertex_elements_state { uint32_t handle; + uint8_t binding_map[PIPE_MAX_ATTRIBS]; + uint8_t num_bindings; }; static uint32_t next_handle; @@ -389,10 +391,28 @@ static void *virgl_create_vertex_elements_state(struct pipe_context *ctx, unsigned num_elements, const struct pipe_vertex_element *elements) { + struct pipe_vertex_element new_elements[PIPE_MAX_ATTRIBS]; struct virgl_context *vctx = virgl_context(ctx); struct virgl_vertex_elements_state *state = CALLOC_STRUCT(virgl_vertex_elements_state); + for (int i = 0; i < num_elements; ++i) { + if (elements[i].instance_divisor) { + /* Virglrenderer doesn't deal with instance_divisor correctly if + * there isn't a 1:1 relationship between elements and bindings. + * So let's make sure there is, by duplicating bindings. + */ + for (int j = 0; j < num_elements; ++j) { + new_elements[j] = elements[j]; + new_elements[j].vertex_buffer_index = j; + state->binding_map[j] = elements[j].vertex_buffer_index; + } + elements = new_elements; + state->num_bindings = num_elements; + break; + } + } + state->handle = virgl_object_assign_handle(); virgl_encoder_create_vertex_elements(vctx, state->handle, num_elements, elements); @@ -418,6 +438,7 @@ static void virgl_bind_vertex_elements_state(struct pipe_context *ctx, vctx->vertex_elements = state; virgl_encode_bind_object(vctx, state ? state->handle : 0, VIRGL_OBJECT_VERTEX_ELEMENTS); + vctx->vertex_array_dirty = TRUE; } static void virgl_set_vertex_buffers(struct pipe_context *ctx, @@ -439,7 +460,17 @@ static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx) struct virgl_context *vctx = virgl_context(ctx); if (vctx->vertex_array_dirty) { - virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer); + struct virgl_vertex_elements_state *ve = vctx->vertex_elements; + + if (ve->num_bindings) { + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + for (int i = 0; i < ve->num_bindings; ++i) + vertex_buffers[i] = vctx->vertex_buffer[ve->binding_map[i]]; + + virgl_encoder_set_vertex_buffers(vctx, ve->num_bindings, vertex_buffers); + } else + virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer); + virgl_attach_res_vertex_buffers(vctx); } } From 76dd56bfd54a5003a8f8744b59398b88b4703330 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 11 Dec 2018 16:20:40 +0000 Subject: [PATCH 128/220] glx: mandate xf86vidmode only for "drm" dri platforms Currently we have the three dri "platforms" - drm, apple and windows. Since xf86vidmode is a thing only for the drm one, adjust the preprocessor guards and correctly check for the dependency. v2: terminate the GLX_USE_WINDOWSGL hunk Cc: Jon TURNEY Fixes: 5bc509363b6 ("glx: make xf86vidmode mandatory for direct rendering") Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker Acked-by: Eric Engestrom (cherry picked from commit a95ec13879d4f04d01fc04a62503578e85c846a8) Squashed with commit: glx: Fix compilation with GLX_USE_WINDOWSGL Sadly, the GLX_USE_APPLEGL and GLX_USE_WINDOWSGL cases are not identical (because GLX_USE_WINDOWSGL uses vtables rather than a maze of ifdefs) Include again, as functions prototyped by it are used in the GLX_USE_WINDOWSGL path. Make the include guard around the __glxGetMscRate() definition match the one at it's declaration again, as it's referenced from dri_common.c which is built for GLX_USE_WINDOWSGL. Fixes: a95ec138 ("glx: mandate xf86vidmode only for "drm" dri platforms") Signed-off-by: Jon Turney Reviewed-by: Emil Velikov (cherry picked from commit d512b35b62ff928b880a67887d36f1568aaa5e4b) --- configure.ac | 4 ++-- meson.build | 4 ++-- src/glx/glxcmds.c | 6 +++++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index a3d10cf40e1..b1c6967afee 100644 --- a/configure.ac +++ b/configure.ac @@ -1716,6 +1716,8 @@ xdri) if test x"$enable_dri" = xyes; then dri_modules="$dri_modules xcb-dri2 >= $XCBDRI2_REQUIRED" fi + + dri_modules="$dri_modules xxf86vm" fi if test x"$dri_platform" = xapple ; then DEFINES="$DEFINES -DGLX_USE_APPLEGL" @@ -1725,8 +1727,6 @@ xdri) fi fi - dri_modules="$dri_modules xxf86vm" - PKG_CHECK_MODULES([DRIGL], [$dri_modules]) GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules" X11_INCLUDES="$X11_INCLUDES $DRIGL_CFLAGS" diff --git a/meson.build b/meson.build index 33f4e5ad3cf..89f3eae92fb 100644 --- a/meson.build +++ b/meson.build @@ -1350,7 +1350,6 @@ if with_platform_x11 dep_xdamage = dependency('xdamage', version : '>= 1.1') dep_xfixes = dependency('xfixes') dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1') - dep_xxf86vm = dependency('xxf86vm') endif if (with_any_vk or with_glx == 'dri' or (with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or @@ -1377,6 +1376,7 @@ if with_platform_x11 if with_glx == 'dri' if with_dri_platform == 'drm' dep_dri2proto = dependency('dri2proto', version : '>= 2.8') + dep_xxf86vm = dependency('xxf86vm') endif dep_glproto = dependency('glproto', version : '>= 1.4.14') endif @@ -1427,8 +1427,8 @@ elif with_glx == 'dri' 'xcb-glx >= 1.8.1'] if with_dri_platform == 'drm' gl_priv_reqs += 'xcb-dri2 >= 1.8' + gl_priv_reqs += 'xxf86vm' endif - gl_priv_reqs += 'xxf86vm' endif if dep_libdrm.found() gl_priv_reqs += 'libdrm >= 2.4.75' diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 79e3503be8f..424008fd670 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -46,7 +46,9 @@ #include "util/debug.h" #else #include +#ifndef GLX_USE_WINDOWSGL #include +#endif /* GLX_USE_WINDOWSGL */ #endif #endif @@ -2069,6 +2071,7 @@ _X_HIDDEN GLboolean __glxGetMscRate(struct glx_screen *psc, int32_t * numerator, int32_t * denominator) { +#if !defined(GLX_USE_WINDOWSGL) XF86VidModeModeLine mode_line; int dot_clock; int i; @@ -2115,6 +2118,7 @@ __glxGetMscRate(struct glx_screen *psc, return True; } +#endif return False; } @@ -2140,7 +2144,7 @@ _X_HIDDEN GLboolean __glXGetMscRateOML(Display * dpy, GLXDrawable drawable, int32_t * numerator, int32_t * denominator) { -#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) +#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) && !defined(GLX_USE_WINDOWSGL) __GLXDRIdrawable *draw = GetGLXDRIDrawable(dpy, drawable); if (draw == NULL) From 8b7b2222f1b6260b99ab0291783bcd635d5bc950 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 13 Dec 2018 03:29:04 +0000 Subject: [PATCH 129/220] radv/xfb: fix counter buffer bounds checks. If we gave this function 0 counter buffers, we'd still try and access pCounterBuffers[0] as this check was incorrect. Fixes crash with ext_transform_feedback-pipeline-basic-primgen on zink on radv. Fixes: 677b496b6 (radv: fix begin/end transform feedback with 0 counter buffers.) Signed-off-by: Dave Airlie Reviewed-by: Samuel Pitoiset (cherry picked from commit b3f2b03ece06327276b183d34fc6f94a2424f094) --- src/amd/vulkan/radv_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 3eb4b312aa7..f4ad4b216e9 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -4792,7 +4792,7 @@ void radv_CmdBeginTransformFeedbackEXT( assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); for_each_bit(i, so->enabled_mask) { int32_t counter_buffer_idx = i - firstCounterBuffer; - if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount) + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) counter_buffer_idx = -1; /* SI binds streamout buffers as shader resources. @@ -4854,7 +4854,7 @@ void radv_CmdEndTransformFeedbackEXT( assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); for_each_bit(i, so->enabled_mask) { int32_t counter_buffer_idx = i - firstCounterBuffer; - if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount) + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) counter_buffer_idx = -1; if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { From 16aa279d8d7703ad6c7e1669635953d7a0caee0f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Dec 2018 16:09:16 -0500 Subject: [PATCH 130/220] pci_ids: add new vega10 pci ids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 56cf25a114436dfc4006296ecef820057f7391b6) --- include/pci_ids/radeonsi_pci_ids.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h index 35ea3559b02..f7defc4197a 100644 --- a/include/pci_ids/radeonsi_pci_ids.h +++ b/include/pci_ids/radeonsi_pci_ids.h @@ -227,8 +227,14 @@ CHIPSET(0x6863, VEGA10) CHIPSET(0x6864, VEGA10) CHIPSET(0x6867, VEGA10) CHIPSET(0x6868, VEGA10) -CHIPSET(0x687F, VEGA10) +CHIPSET(0x6869, VEGA10) +CHIPSET(0x686A, VEGA10) +CHIPSET(0x686B, VEGA10) CHIPSET(0x686C, VEGA10) +CHIPSET(0x686D, VEGA10) +CHIPSET(0x686E, VEGA10) +CHIPSET(0x686F, VEGA10) +CHIPSET(0x687F, VEGA10) CHIPSET(0x69A0, VEGA12) CHIPSET(0x69A1, VEGA12) From 721f6675094d51421ceb102b6ba60125e2a31832 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Dec 2018 16:10:33 -0500 Subject: [PATCH 131/220] pci_ids: add new vega20 pci id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 4db4b3447d5258a26a805766e156ea0a338c95b7) --- include/pci_ids/radeonsi_pci_ids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h index f7defc4197a..a2bc9213207 100644 --- a/include/pci_ids/radeonsi_pci_ids.h +++ b/include/pci_ids/radeonsi_pci_ids.h @@ -246,6 +246,7 @@ CHIPSET(0x66A0, VEGA20) CHIPSET(0x66A1, VEGA20) CHIPSET(0x66A2, VEGA20) CHIPSET(0x66A3, VEGA20) +CHIPSET(0x66A4, VEGA20) CHIPSET(0x66A7, VEGA20) CHIPSET(0x66AF, VEGA20) From efc31a64eeee9600a26f4b0a201c6af8fed36167 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 11 Dec 2018 16:14:03 -0800 Subject: [PATCH 132/220] v3d: Make sure that a thrsw doesn't split a multop from its umul24. The thrsw will invalidate rtop, just like accumulators and flags. Caught by simulator assertions in CS imulextended/umulextended tests. Fixes: 90269ba35333 ("broadcom/vc5: Use THRSW to enable multi-threaded shaders.") (cherry picked from commit 3f9bcf9136af794d44fa4b0802c0d4df6b170175) --- src/broadcom/compiler/qpu_schedule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 4f3b621fd29..54483195952 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -392,6 +392,7 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) for (int i = 0; i < ARRAY_SIZE(state->last_r); i++) add_write_dep(state, &state->last_r[i], n); add_write_dep(state, &state->last_sf, n); + add_write_dep(state, &state->last_rtop, n); /* Scoreboard-locking operations have to stay after the last * thread switch. From 7d4cad25a6c4b1dd4ece47c2ab018f0bb107afa0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 10 Dec 2018 16:47:13 -0800 Subject: [PATCH 133/220] v3d: Add missing flagging of SYNCB as a TSY op. Fixes: f2e41daac577 ("broadcom/vc5: Update QPU instruction pack/unpack for v4.2.") (cherry picked from commit ff80e58b38a13c97a4ee598497e3e7b886918087) --- src/broadcom/qpu/qpu_instr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 0846cc86174..147017a6594 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -551,6 +551,7 @@ bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) { return (waddr == V3D_QPU_WADDR_SYNC || + waddr == V3D_QPU_WADDR_SYNCB || waddr == V3D_QPU_WADDR_SYNCU); } From ce2df23205d2befc578bbcf4c5992034654b3895 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Thu, 13 Dec 2018 15:53:42 -0500 Subject: [PATCH 134/220] clover: Fix build after clang r348827 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeGenOptions were moved to Basic. Signed-off-by: Jan Vesely Reviewed-by: Aaron Watry Tested-by: Aaron Watry Reviewed-by: Kai Wasserbäch CC: mesa-stable@lists.freedesktop.org (cherry picked from commit e4f9a37ace750646fd75fbbdff9b5e77a0b26cfb) --- src/gallium/state_trackers/clover/llvm/compat.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp b/src/gallium/state_trackers/clover/llvm/compat.hpp index 975012cbda4..b91cb95a295 100644 --- a/src/gallium/state_trackers/clover/llvm/compat.hpp +++ b/src/gallium/state_trackers/clover/llvm/compat.hpp @@ -58,9 +58,14 @@ #include #include -#include #include +#if HAVE_LLVM >= 0x0800 +#include +#else +#include +#endif + namespace clover { namespace llvm { namespace compat { From 083f5fccb9e3849d955034ff7455e3fb60f7984f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 6 Dec 2018 14:31:20 -0600 Subject: [PATCH 135/220] nir/constant_folding: Fix source bit size logic Instead of looking at input_sizes[i] which contains the number of components for each source, we look at the bit size of input_types[i]. This fixes a regression in the 1-bit boolean series though I have no idea how we haven't seen it before now. Fixes: 35baee5dce5 "nir/constant_folding: fix incorrect bit-size check" Fixes: 9076c4e289d "nir: update opcode definitions for different bit sizes" Reviewed-by: Eric Anholt Reviewed-by: Bas Nieuwenhuizen Tested-by: Bas Nieuwenhuizen (cherry picked from commit 3595a0abf43be3ce27d88f5939b257a74e90035b) [Emil: resolve trivial conflicts] Signed-off-by: Emil Velikov Conflicts: src/compiler/nir/nir_opt_constant_folding.c --- src/compiler/nir/nir_opt_constant_folding.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c index 5929a60aee8..be91a2a8fd6 100644 --- a/src/compiler/nir/nir_opt_constant_folding.c +++ b/src/compiler/nir/nir_opt_constant_folding.c @@ -64,9 +64,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) return false; if (bit_size == 0 && - !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) { + !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_types[i])) bit_size = instr->src[i].src.ssa->bit_size; - } nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; From e738fc1ec74d7108484155b22ac74af48e5672b4 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 14 Dec 2018 16:47:09 +0000 Subject: [PATCH 136/220] radv: don't set surf_index for stencil-only images Fixes: f8d5b377c8b ('radv: set cb base tile swizzles for MRT speedups (v4)') Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108116 Signed-off-by: Rhys Perry Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Samuel Pitoiset (cherry picked from commit bba94a3d85c8799b2441a5d41015091e8903738f) --- src/amd/vulkan/radv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 4f02be40185..daabc489afb 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -985,7 +985,7 @@ radv_image_create(VkDevice _device, image->shareable = vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL; - if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) { + if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !create_info->scanout && !image->shareable) { image->info.surf_index = &device->image_mrt_offset_counter; } From e06618ca8031838f7df9a3d4bb5e4fee382734bb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 Jun 2018 17:18:30 -0700 Subject: [PATCH 137/220] i965/vec4/dce: Don't narrow the write mask if the flags are used In an instruction sequence like cmp(8).ge.f0.0 vgrf17:D, vgrf2.xxxx:D, vgrf9.xxxx:D (+f0.0) sel(8) vgrf1:UD, vgrf8.xyzw:UD, vgrf1.xyzw:UD The other fields of vgrf17 may be unused, but the CMP still needs to generate the other flag bits. To my surprise, nothing in shader-db or any test suite appears to hit this. However, I have a change to brw_vec4_cmod_propagation that creates cases where this can happen. This fix prevents a couple dozen regressions in that patch. Signed-off-by: Ian Romanick Reviewed-by: Lionel Landwerlin Fixes: 5df88c20 ("i965/vec4: Rewrite dead code elimination to use live in/out.") (cherry picked from commit 440c051340669e809511c05370d6d703c70f6d0e) --- src/intel/Makefile.compiler.am | 5 + .../compiler/brw_vec4_dead_code_eliminate.cpp | 47 ++++- src/intel/compiler/meson.build | 3 +- .../test_vec4_dead_code_eliminate.cpp | 163 ++++++++++++++++++ 4 files changed, 208 insertions(+), 10 deletions(-) create mode 100644 src/intel/compiler/test_vec4_dead_code_eliminate.cpp diff --git a/src/intel/Makefile.compiler.am b/src/intel/Makefile.compiler.am index cd7e6882fb9..7c33e35816b 100644 --- a/src/intel/Makefile.compiler.am +++ b/src/intel/Makefile.compiler.am @@ -64,6 +64,7 @@ COMPILER_TESTS = \ compiler/test_vf_float_conversions \ compiler/test_vec4_cmod_propagation \ compiler/test_vec4_copy_propagation \ + compiler/test_vec4_dead_code_eliminate \ compiler/test_vec4_register_coalesce TESTS += $(COMPILER_TESTS) @@ -97,6 +98,10 @@ compiler_test_vec4_cmod_propagation_SOURCES = \ compiler/test_vec4_cmod_propagation.cpp compiler_test_vec4_cmod_propagation_LDADD = $(TEST_LIBS) +compiler_test_vec4_dead_code_eliminate_SOURCES = \ + compiler/test_vec4_dead_code_eliminate.cpp +compiler_test_vec4_dead_code_eliminate_LDADD = $(TEST_LIBS) + # Strictly speaking this is neither a C++ test nor using gtest - we can address # address that at a later point. Until then, this allows us a to simplify things. compiler_test_eu_compact_SOURCES = \ diff --git a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp index c09a3d7ebe9..99e4c9cacaf 100644 --- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp +++ b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp @@ -81,17 +81,46 @@ vec4_visitor::dead_code_eliminate() result_live[3] = result; } - for (int c = 0; c < 4; c++) { - if (!result_live[c] && inst->dst.writemask & (1 << c)) { - inst->dst.writemask &= ~(1 << c); + if (inst->writes_flag()) { + /* Independently calculate the usage of the flag components and + * the destination value components. + */ + uint8_t flag_mask = inst->dst.writemask; + uint8_t dest_mask = inst->dst.writemask; + + for (int c = 0; c < 4; c++) { + if (!result_live[c] && dest_mask & (1 << c)) + dest_mask &= ~(1 << c); + + if (!BITSET_TEST(flag_live, c)) + flag_mask &= ~(1 << c); + } + + if (inst->dst.writemask != (flag_mask | dest_mask)) { progress = true; + inst->dst.writemask = flag_mask | dest_mask; + } - if (inst->dst.writemask == 0) { - if (inst->writes_accumulator || inst->writes_flag()) { - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); - } else { - inst->opcode = BRW_OPCODE_NOP; - break; + /* If none of the destination components are read, replace the + * destination register with the NULL register. + */ + if (dest_mask == 0) { + progress = true; + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); + } + } else { + for (int c = 0; c < 4; c++) { + if (!result_live[c] && inst->dst.writemask & (1 << c)) { + inst->dst.writemask &= ~(1 << c); + progress = true; + + if (inst->dst.writemask == 0) { + if (inst->writes_accumulator) { + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); + } else { + inst->opcode = BRW_OPCODE_NOP; + break; + } } } } diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 3cdeb6214a8..f2854be779a 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -145,7 +145,8 @@ if with_tests foreach t : ['fs_cmod_propagation', 'fs_copy_propagation', 'fs_saturate_propagation', 'vf_float_conversions', 'vec4_register_coalesce', 'vec4_copy_propagation', - 'vec4_cmod_propagation', 'eu_compact', 'eu_validate'] + 'vec4_cmod_propagation', 'vec4_dead_code_eliminate', + 'eu_compact', 'eu_validate'] test( t, executable( diff --git a/src/intel/compiler/test_vec4_dead_code_eliminate.cpp b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp new file mode 100644 index 00000000000..25739c2895a --- /dev/null +++ b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp @@ -0,0 +1,163 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "brw_vec4.h" +#include "program/program.h" + +using namespace brw; + +class dead_code_eliminate_test : public ::testing::Test { + virtual void SetUp(); + +public: + struct brw_compiler *compiler; + struct gen_device_info *devinfo; + struct gl_context *ctx; + struct gl_shader_program *shader_prog; + struct brw_vue_prog_data *prog_data; + vec4_visitor *v; +}; + +class dead_code_eliminate_vec4_visitor : public vec4_visitor +{ +public: + dead_code_eliminate_vec4_visitor(struct brw_compiler *compiler, + nir_shader *shader, + struct brw_vue_prog_data *prog_data) + : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL, + false /* no_spills */, -1) + { + prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; + } + +protected: + virtual dst_reg *make_reg_for_system_value(int /* location */) + { + unreachable("Not reached"); + } + + virtual void setup_payload() + { + unreachable("Not reached"); + } + + virtual void emit_prolog() + { + unreachable("Not reached"); + } + + virtual void emit_thread_end() + { + unreachable("Not reached"); + } + + virtual void emit_urb_write_header(int /* mrf */) + { + unreachable("Not reached"); + } + + virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */) + { + unreachable("Not reached"); + } +}; + + +void dead_code_eliminate_test::SetUp() +{ + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo)); + prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data)); + compiler->devinfo = devinfo; + + nir_shader *shader = + nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL, NULL); + + v = new dead_code_eliminate_vec4_visitor(compiler, shader, prog_data); + + devinfo->gen = 4; +} + +static void +dead_code_eliminate(vec4_visitor *v) +{ + bool print = false; + + if (print) { + fprintf(stderr, "instructions before:\n"); + v->dump_instructions(); + } + + v->calculate_cfg(); + v->dead_code_eliminate(); + + if (print) { + fprintf(stderr, "instructions after:\n"); + v->dump_instructions(); + } +} + +TEST_F(dead_code_eliminate_test, some_dead_channels_all_flags_used) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + src_reg r1 = src_reg(v, glsl_type::vec4_type); + src_reg r2 = src_reg(v, glsl_type::vec4_type); + src_reg r3 = src_reg(v, glsl_type::vec4_type); + src_reg r4 = src_reg(v, glsl_type::vec4_type); + src_reg r5 = src_reg(v, glsl_type::vec4_type); + src_reg r6 = src_reg(v, glsl_type::vec4_type); + + /* Sequence like the following should not be modified by DCE. + * + * cmp.l.f0(8) g4<1>F g2<4,4,1>.wF g1<4,4,1>.xF + * mov(8) g5<1>.xF g4<4,4,1>.xF + * (+f0.x) sel(8) g6<1>UD g3<4>UD g6<4>UD + */ + vec4_instruction *test_cmp = + bld.CMP(dst_reg(r4), r2, r1, BRW_CONDITIONAL_L); + + test_cmp->src[0].swizzle = BRW_SWIZZLE_WWWW; + test_cmp->src[1].swizzle = BRW_SWIZZLE_XXXX; + + vec4_instruction *test_mov = + bld.MOV(dst_reg(r5), r4); + + test_mov->dst.writemask = WRITEMASK_X; + test_mov->src[0].swizzle = BRW_SWIZZLE_XXXX; + + vec4_instruction *test_sel = + bld.SEL(dst_reg(r6), r3, r6); + + set_predicate(BRW_PREDICATE_NORMAL, test_sel); + + /* The scratch write is here just to make r5 and r6 be live so that the + * whole program doesn't get eliminated by DCE. + */ + v->emit(v->SCRATCH_WRITE(dst_reg(r4), r6, r5)); + + dead_code_eliminate(v); + + EXPECT_EQ(test_cmp->dst.writemask, WRITEMASK_XYZW); +} From f96bc5357f5573b17e6204353d4ab73df1e1d6e4 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 4 Dec 2018 09:28:10 -0800 Subject: [PATCH 138/220] meson: Fix ppc64 little endian detection Old versions of meson returned ppc64le as the cpu_family for little endian power8 cpus, versions >=0.48 don't do this, so the check wouldn't work in that case. This generalizes the check to work for both old and new versions of meson. Fixes: 34bbb24ce7702658cdc4e9d34a650e169716c39e ("meson: Add support for ppc assembly/optimizations") Reviewed-by: Eric Engestrom (cherry picked from commit e430a034b9d2be626557931cd29808a3161889f1) --- meson.build | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index 89f3eae92fb..7e3c0d2992c 100644 --- a/meson.build +++ b/meson.build @@ -628,7 +628,12 @@ if with_gallium_st_nine endif if get_option('power8') != 'false' - if host_machine.cpu_family() == 'ppc64le' + # on old versions of meson the cpu family would return as ppc64le on little + # endian power8, this was changed in 0.48 such that the family would always + # be ppc64 regardless of endianness, and the the machine.endian() value + # should be checked. Since we support versions < 0.48 we need to use + # startswith. + if host_machine.cpu_family().startswith('ppc64') and host_machine.endian() == 'little' if cc.get_id() == 'gcc' and cc.version().version_compare('< 4.8') error('Altivec is not supported with gcc version < 4.8.') endif @@ -969,7 +974,7 @@ if with_asm with_asm_arch = 'sparc' pre_args += ['-DUSE_SPARC_ASM'] endif - elif host_machine.cpu_family() == 'ppc64le' + elif host_machine.cpu_family().startswith('ppc64') and host_machine.endian() == 'little' if system_has_kms_drm with_asm_arch = 'ppc64le' pre_args += ['-DUSE_PPC64LE_ASM'] From 40c012a5aa39dddaab34b25a91db5b1bb89fb369 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 4 Dec 2018 09:56:30 -0800 Subject: [PATCH 139/220] meson: Add support for gnu hurd CC: 18.3 Reviewed-by: Eric Engestrom (cherry picked from commit 8c77f4c76ddfe0b692b430b012b65f6981a53336) --- meson.build | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/meson.build b/meson.build index 7e3c0d2992c..5045dca0149 100644 --- a/meson.build +++ b/meson.build @@ -223,8 +223,6 @@ elif system_has_kms_drm else # FIXME: haiku doesn't use dri, and xlib doesn't use dri, probably should # assert here that one of those cases has been met. - # FIXME: GNU (hurd) ends up here as well, but meson doesn't officially - # support Hurd at time of writing (2017/11) # FIXME: illumos ends up here as well with_dri_platform = 'none' endif @@ -786,7 +784,7 @@ if cc.compiles('int foo(void) __attribute__((__noreturn__));', endif # TODO: this is very incomplete -if ['linux', 'cygwin'].contains(host_machine.system()) +if ['linux', 'cygwin', 'gnu'].contains(host_machine.system()) pre_args += '-D_GNU_SOURCE' endif @@ -945,7 +943,7 @@ endif with_asm_arch = '' if with_asm if host_machine.cpu_family() == 'x86' - if system_has_kms_drm + if system_has_kms_drm or host_machine.system() == 'gnu' with_asm_arch = 'x86' pre_args += ['-DUSE_X86_ASM', '-DUSE_MMX_ASM', '-DUSE_3DNOW_ASM', '-DUSE_SSE_ASM'] From 336c7bf5971c97949de8ca14537dce62c5ac5937 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 4 Dec 2018 10:06:08 -0800 Subject: [PATCH 140/220] meson: Add toggle for glx-direct GNU Hurd needs to turn off glx-direct, rather than special case it, we'll just add a toggle. CC: 18.3 Reviewed-by: Eric Engestrom (cherry picked from commit 7a90886921eb1d5d73b40aadd6fd3f340041bd26) --- meson.build | 4 +--- meson_options.txt | 6 ++++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/meson.build b/meson.build index 5045dca0149..2fa861bae2a 100644 --- a/meson.build +++ b/meson.build @@ -54,6 +54,7 @@ with_valgrind = get_option('valgrind') with_libunwind = get_option('libunwind') with_asm = get_option('asm') with_glx_read_only_text = get_option('glx-read-only-text') +with_glx_direct = get_option('glx-direct') with_osmesa = get_option('osmesa') with_swr_arches = get_option('swr-arches') with_tools = get_option('tools') @@ -368,9 +369,6 @@ if with_glvnd endif endif -# TODO: toggle for this -with_glx_direct = true - if with_vulkan_icd_dir == '' with_vulkan_icd_dir = join_paths(get_option('datadir'), 'vulkan/icd.d') endif diff --git a/meson_options.txt b/meson_options.txt index a1d5ab0e185..589d10bb3f3 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -318,3 +318,9 @@ option( choices : ['auto', 'true', 'false'], description : 'Enable VK_EXT_acquire_xlib_display.' ) +option( + 'glx-direct', + type : 'boolean', + value : true, + description : 'Enable direct rendering in GLX and EGL for DRI', +) From 72a6f5d1a3acdfc383b1968c4cabaf1deae0a09e Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 17 Dec 2018 18:48:17 -0800 Subject: [PATCH 141/220] Revert "nir/lower_indirect: Bail early if modes == 0" "There's no point in walking the program if we're never going to actually lower anything." Except we might lower compacted local arrays. In that case, modes will be 0, but there is still lowering to be done. This reverts commit 7f75cf2a9408b9af562e033ef6c1d1fd15141421. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109081 Suggested-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Lionel Landwerlin Tested-by: Clayton Craft Cc: Kenneth Graunke (cherry picked from commit 29e4b949b45b468c366b9865298391c89ba6642c) --- src/compiler/nir/nir_lower_indirect_derefs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c index 897a0620872..40b90e6a313 100644 --- a/src/compiler/nir/nir_lower_indirect_derefs.c +++ b/src/compiler/nir/nir_lower_indirect_derefs.c @@ -205,9 +205,6 @@ nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes) { bool progress = false; - if (modes == 0) - return false; - nir_foreach_function(function, shader) { if (function->impl) progress = lower_indirects_impl(function->impl, modes) || progress; From f5461577e7c4aecabaa5c683688d17174d49a1c1 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sun, 16 Dec 2018 16:35:00 -0800 Subject: [PATCH 142/220] meson: Fix typo. Fixes: 6b4c7047d571 ("meson: build gallium nine state_tracker") Signed-off-by: Vinson Lee Reviewed-by: Emil Velikov (cherry picked from commit 84f39e5971d77549293a1b8abd479cca2ff4b97e) --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 2fa861bae2a..210b694f550 100644 --- a/meson.build +++ b/meson.build @@ -616,7 +616,7 @@ if with_gallium_st_nine error('The nine state tracker requires gallium softpipe/llvmpipe.') elif not (with_gallium_radeonsi or with_gallium_nouveau or with_gallium_r600 or with_gallium_r300 or with_gallium_svga or with_gallium_i915) - error('The nine state tracker requires at least on non-swrast gallium driver.') + error('The nine state tracker requires at least one non-swrast gallium driver.') endif if not with_dri3 error('Using nine with wine requires dri3') From 778d11ddd91732f0084de253991676d8f89f9b32 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 18 Dec 2018 09:42:04 -0800 Subject: [PATCH 143/220] meson: Fix libsensors detection. Fixes: 5e71efef44b9 ("meson: Add lmsensors support") Signed-off-by: Vinson Lee Reviewed-by: Dylan Baker (cherry picked from commit 0f7ba5758bd8a3e12b8b33ab471336eaa7dd6fbc) --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 210b694f550..53f3a48dc11 100644 --- a/meson.build +++ b/meson.build @@ -1398,7 +1398,7 @@ endif _sensors = get_option('lmsensors') if _sensors != 'false' - dep_lmsensors = cc.find_library('libsensors', required : _sensors == 'true') + dep_lmsensors = cc.find_library('sensors', required : _sensors == 'true') if dep_lmsensors.found() pre_args += '-DHAVE_LIBSENSORS=1' endif From 3af043858503c1ce066989f9531e95332bbf1bc0 Mon Sep 17 00:00:00 2001 From: Caio Marcelo de Oliveira Filho Date: Fri, 14 Dec 2018 22:19:24 -0800 Subject: [PATCH 144/220] nir: properly clear the entry sources in copy_prop_vars When updating a copy entry source value from a "non-SSA" (the data come from a copy instruction) to a "SSA" (the data or parts of it come from SSA values), it was possible to hold invalid data in ssa[0] depending on the writemask. Because the union, ssa[0] could contain a pointer to a nir_deref_instr left-over from previous non-SSA usage. Change code to clean up the array before use to avoid invalid data around. Fixes: 62332d139c8 "nir: Add a local variable-based copy propagation pass" Reviewed-by: Jason Ekstrand (cherry picked from commit 0ddc911f4d83a3c698ea02696e1b8706b2cce381) --- src/compiler/nir/nir_opt_copy_prop_vars.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 7a21ad56c79..1e6158bf511 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -337,6 +337,9 @@ store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry, const struct value *value, unsigned write_mask) { if (value->is_ssa) { + /* Clear src if it was being used as non-SSA. */ + if (!entry->src.is_ssa) + memset(entry->src.ssa, 0, sizeof(entry->src.ssa)); entry->src.is_ssa = true; /* Only overwrite the written components */ for (unsigned i = 0; i < 4; i++) { From bcfca5b35cfec467ef36487116a56dce85c589db Mon Sep 17 00:00:00 2001 From: Caio Marcelo de Oliveira Filho Date: Fri, 14 Dec 2018 16:10:32 -0800 Subject: [PATCH 145/220] nir: properly find the entry to keep in copy_prop_vars When copy propagation handles a store/copy, it iterates the current copy entries to remove aliases, but keeps the "equal" entry (if exists) to be updated. The removal step may swap the entries around (to ensure there are no holes), invalidating previous iteration pointers. The bug was saving such pointer to use later. Change the code to first perform the removals and then find the remaining right entry. This was causing updates to be lost since they were being made to an entry that was not part of the current copies. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108624 Fixes: b3c61469255 "nir: Copy propagation between blocks" Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Jason Ekstrand (cherry picked from commit 947f7b452a550c66cfb9a8c9518e35635eb25947) --- src/compiler/nir/nir_opt_copy_prop_vars.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 1e6158bf511..29b2caba892 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -265,7 +265,7 @@ lookup_entry_and_kill_aliases(struct util_dynarray *copies, { /* TODO: Take into account the write_mask. */ - struct copy_entry *entry = NULL; + nir_deref_instr *dst_match = NULL; util_dynarray_foreach_reverse(copies, struct copy_entry, iter) { if (!iter->src.is_ssa) { /* If this write aliases the source of some entry, get rid of it */ @@ -278,13 +278,26 @@ lookup_entry_and_kill_aliases(struct util_dynarray *copies, nir_deref_compare_result comp = nir_compare_derefs(iter->dst, deref); if (comp & nir_derefs_equal_bit) { - assert(entry == NULL); - entry = iter; + /* Removing entries invalidate previous iter pointers, so we'll + * collect the matching entry later. Just make sure it is unique. + */ + assert(!dst_match); + dst_match = iter->dst; } else if (comp & nir_derefs_may_alias_bit) { copy_entry_remove(copies, iter); } } + struct copy_entry *entry = NULL; + if (dst_match) { + util_dynarray_foreach(copies, struct copy_entry, iter) { + if (iter->dst == dst_match) { + entry = iter; + break; + } + } + assert(entry); + } return entry; } From a8c9d5a9a2c2a2f996468d48910b9f8e40961ca5 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 13 Dec 2018 03:54:03 +0000 Subject: [PATCH 146/220] meson: don't require glx/egl/gbm with gallium drivers The gallium drivers do not require a DRI loader. Drop the artificial and unnecessary restriction. Fixes: af9d276134d ("meson: build libmesa_gallium") Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit 9d10581897ef7cfa0f6c392e2048cc04357281b9) --- meson.build | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/meson.build b/meson.build index 53f3a48dc11..94f891efa68 100644 --- a/meson.build +++ b/meson.build @@ -384,9 +384,9 @@ endif if with_any_vk and (with_platform_x11 and not with_dri3) error('Vulkan drivers require dri3 for X11 support') endif -if with_dri or with_gallium - if with_glx == 'disabled' and not with_egl and not with_platform_haiku - error('building dri or gallium drivers require at least one window system') +if with_dri + if with_glx == 'disabled' and not with_egl and not with_gbm + error('building dri drivers require at least one windowing system') endif endif From 1319c87304000d282984bf257a7bb4781752e2c7 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 13 Dec 2018 04:10:50 +0000 Subject: [PATCH 147/220] pipe-loader: meson: reference correct library The library is called libgalliumvl_stub - note singular. Fixes: 42ea0631f10 ("meson: build clover") Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit 2eedb79e1a2e92648bd245f9db88ecb7b587b7fd) --- src/gallium/targets/pipe-loader/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/targets/pipe-loader/meson.build b/src/gallium/targets/pipe-loader/meson.build index 5a44102a69d..e9454d5666a 100644 --- a/src/gallium/targets/pipe-loader/meson.build +++ b/src/gallium/targets/pipe-loader/meson.build @@ -31,7 +31,7 @@ if (with_gallium_va or with_gallium_vdpau or with_gallium_omx != 'disabled' or with_gallium_xvmc or with_dri) pipe_loader_link_with += libgalliumvl else - pipe_loader_link_with += libgalliumvl_stubs + pipe_loader_link_with += libgalliumvl_stub endif if (with_gallium_va or with_gallium_vdpau or with_gallium_omx != 'disabled' or with_gallium_xvmc) From 62e5d649b71cf30cb252c85b9495f7c3399a0ede Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 12 Dec 2018 17:47:36 +0000 Subject: [PATCH 148/220] TODO: glx: meson: build dri based glx tests, only with -Dglx=dri The library itself (libGL) is only built when -Dglx=dri, yet it's accompanying tests are build even with -Dglx=xlib. Adjust the guards, so we don't build the tests when they are not applicable v2: - Reword commit message (Dylan) - Drop build_by_default hunk (Dylan) Fixes: a47c525f328 ("meson: build glx") Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit 9527f9ea2611b0793377016c8c16ec58ded7f287) --- src/glx/meson.build | 33 +++++++++++++++------------------ src/meson.build | 2 +- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/glx/meson.build b/src/glx/meson.build index f3bbcb433ad..6bd3e462656 100644 --- a/src/glx/meson.build +++ b/src/glx/meson.build @@ -150,26 +150,23 @@ libglx = static_library( extra_libs_libglx, ], dependencies : [dep_libdrm, dep_dri2proto, dep_glproto, dep_x11, dep_glvnd], - build_by_default : false, ) -if with_glx == 'dri' - libgl = shared_library( - gl_lib_name, - [], - include_directories : [inc_common, inc_glapi, inc_loader, inc_gl_internal], - link_with : [libglapi_static, libglapi], - link_whole : libglx, - link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl], - dependencies : [ - dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb, - dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage, dep_xxf86vm, - extra_deps_libgl, - ], - version : gl_lib_version, - install : true, - ) -endif +libgl = shared_library( + gl_lib_name, + [], + include_directories : [inc_common, inc_glapi, inc_loader, inc_gl_internal], + link_with : [libglapi_static, libglapi], + link_whole : libglx, + link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl], + dependencies : [ + dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb, + dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage, dep_xxf86vm, + extra_deps_libgl, + ], + version : gl_lib_version, + install : true, +) if with_tests subdir('tests') diff --git a/src/meson.build b/src/meson.build index 0d0ecf2c530..3b91c6a88c5 100644 --- a/src/meson.build +++ b/src/meson.build @@ -71,7 +71,7 @@ subdir('loader') if with_platform_haiku subdir('hgl') endif -if with_glx != 'disabled' +if with_glx == 'dri' subdir('glx') endif if with_gbm From 068d7550fd7e05ea50bd12347ed9cfc5f47687a1 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 12 Dec 2018 17:55:08 +0000 Subject: [PATCH 149/220] glx: meson: drop includes from a link-only library When producing the final libGL.so/libGLX_mesa.so we only link the local static helper lib (libglx). Thus there's no reason for the includes. Fixes: a47c525f328 ("meson: build glx") Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit b44875e2dc650c58986480f3360fccfc8d890605) --- src/glx/meson.build | 1 - 1 file changed, 1 deletion(-) diff --git a/src/glx/meson.build b/src/glx/meson.build index 6bd3e462656..a61f959e800 100644 --- a/src/glx/meson.build +++ b/src/glx/meson.build @@ -155,7 +155,6 @@ libglx = static_library( libgl = shared_library( gl_lib_name, [], - include_directories : [inc_common, inc_glapi, inc_loader, inc_gl_internal], link_with : [libglapi_static, libglapi], link_whole : libglx, link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl], From abd89156cf457b780e15b2737a5822bae894ad22 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 12 Dec 2018 19:07:52 +0000 Subject: [PATCH 150/220] glx: meson: wire up the dispatch-index-check test Accidentally dropped with earlier commit.! Fixes: 4ccb9816737 ("meson: Use consistent style for tests") Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit e139d7a8a315502d538b6a753cb42e841c10e57f) --- src/glx/tests/meson.build | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/glx/tests/meson.build b/src/glx/tests/meson.build index fd9d4d433b2..81a77c3a3f6 100644 --- a/src/glx/tests/meson.build +++ b/src/glx/tests/meson.build @@ -33,6 +33,11 @@ if with_shared_glapi files_glx_test += files('query_renderer_implementation_unittest.cpp') endif + test( + 'dispatch-index-check', + files('dispatch-index-check'), + suite : ['glx'], + ) test( 'glx-test', executable( From 80bea2ba6e4c5b1152e6623717d460a1c9e0e6ec Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 12 Dec 2018 19:24:14 +0000 Subject: [PATCH 151/220] glx/test: meson: assorted include fixes Swap '..' with the symbolic inc_glx and add glproto as dependency. That will pull the correct include, effectively fixing the tests on macOS. Fixes: a47c525f328 ("meson: build glx") Signed-off-by: Emil Velikov Reviewed-by: Dylan Baker (cherry picked from commit f331419f262d3a0f270376cafbb9517b4627bb7a) Signed-off-by: Emil Velikov Conflicts: src/glx/tests/meson.build --- src/glx/tests/meson.build | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/glx/tests/meson.build b/src/glx/tests/meson.build index 81a77c3a3f6..e59b42d19a6 100644 --- a/src/glx/tests/meson.build +++ b/src/glx/tests/meson.build @@ -46,9 +46,9 @@ if with_shared_glapi link_with : [libglx, libglapi], include_directories : [ inc_src, inc_include, inc_mesa, inc_mapi, inc_gl_internal, - include_directories('..'), + inc_glx, ], - dependencies : [dep_libdrm, dep_thread, idep_gtest] - ) + dependencies : [dep_libdrm, dep_glproto, dep_thread, idep_gtest] + ), ) endif From 00c3af4f0cb37108b64dbec62fe0fd91be4e7788 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Dec 2018 10:11:01 -0500 Subject: [PATCH 152/220] pci_ids: add new VegaM pci id Reviewed-by: Samuel Pitoiset Signed-off-by: Alex Deucher Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 516160d717b81141a65a03060ac440a1cad8ae94) --- include/pci_ids/radeonsi_pci_ids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h index a2bc9213207..75ac7761bb4 100644 --- a/include/pci_ids/radeonsi_pci_ids.h +++ b/include/pci_ids/radeonsi_pci_ids.h @@ -219,6 +219,7 @@ CHIPSET(0x699F, POLARIS12) CHIPSET(0x694C, VEGAM) CHIPSET(0x694E, VEGAM) +CHIPSET(0x694F, VEGAM) CHIPSET(0x6860, VEGA10) CHIPSET(0x6861, VEGA10) From df3fd9f73863a47a0928974f17705c64fb7f440a Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Sun, 25 Nov 2018 14:37:53 +0100 Subject: [PATCH 153/220] st/nine: Fix volumetexture dtor on ctor failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dtor is called on allocation failure, thus we must check the volumes are allocated before trying to release them. Signed-off-by: Axel Davy Tested-by: Dieter Nützel Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit f91f748fabd7fec8f571124df23296c07102a983) --- src/gallium/state_trackers/nine/volumetexture9.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c index 5dec4844864..c7191bce688 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.c +++ b/src/gallium/state_trackers/nine/volumetexture9.c @@ -141,7 +141,8 @@ NineVolumeTexture9_dtor( struct NineVolumeTexture9 *This ) if (This->volumes) { for (l = 0; l <= This->base.base.info.last_level; ++l) - NineUnknown_Destroy(&This->volumes[l]->base); + if (This->volumes[l]) + NineUnknown_Destroy(&This->volumes[l]->base); FREE(This->volumes); } From bf6f68fc8322e692d4d37e54d60ce91908ddb283 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Mon, 3 Dec 2018 21:15:47 +0100 Subject: [PATCH 154/220] st/nine: Bind src not dst in nine_context_box_upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nine_context_box_upload uploads a ram buffer (from src) to a pipe_resource (dst). We already have a refcount on the pipe_resource, what needs to be protected from release is the ram buffer, thus a reference to src. Signed-off-by: Axel Davy Tested-by: Dieter Nützel Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 42d672fa6a766363e5703f119607f7c7975918aa) --- src/gallium/state_trackers/nine/nine_state.c | 6 +++--- src/gallium/state_trackers/nine/nine_state.h | 2 +- src/gallium/state_trackers/nine/surface9.c | 2 +- src/gallium/state_trackers/nine/volume9.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 74aaf57a549..ac18f8f6839 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -2429,7 +2429,7 @@ CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload, } CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload, - ARG_BIND_REF(struct NineUnknown, dst), + ARG_BIND_REF(struct NineUnknown, src_ref), ARG_BIND_RES(struct pipe_resource, res), ARG_VAL(unsigned, level), ARG_COPY_REF(struct pipe_box, dst_box), @@ -2444,8 +2444,8 @@ CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload, struct pipe_transfer *transfer = NULL; uint8_t *map; - /* We just bind dst for the bind count */ - (void)dst; + /* Binding src_ref avoids release before upload */ + (void)src_ref; map = pipe->transfer_map(pipe, res, diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 51e5e326527..8de9f84a256 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -568,7 +568,7 @@ nine_context_range_upload(struct NineDevice9 *device, void nine_context_box_upload(struct NineDevice9 *device, unsigned *counter, - struct NineUnknown *dst, + struct NineUnknown *src_ref, struct pipe_resource *res, unsigned level, const struct pipe_box *dst_box, diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index 5fd662fa049..10518219a0a 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -660,7 +660,7 @@ NineSurface9_CopyMemToDefault( struct NineSurface9 *This, nine_context_box_upload(This->base.base.device, &From->pending_uploads_counter, - (struct NineUnknown *)This, + (struct NineUnknown *)From, r_dst, This->level, &dst_box, diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index ec811aeba13..840f01dae10 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -449,7 +449,7 @@ NineVolume9_CopyMemToDefault( struct NineVolume9 *This, nine_context_box_upload(This->base.device, &From->pending_uploads_counter, - (struct NineUnknown *)This, + (struct NineUnknown *)From, r_dst, This->level, &dst_box, From 5f76202eafbb960fb2da767574e10198a4246054 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Mon, 3 Dec 2018 21:24:54 +0100 Subject: [PATCH 155/220] st/nine: Add src reference to nine_context_range_upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like nine_context_box_upload, nine_context_range_upload should reference the src, which holds the ram source buffer. Fixes: https://github.com/iXit/Mesa-3D/issues/327 Signed-off-by: Axel Davy Tested-by: Dieter Nützel Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 104681c5d528a823a3fdc3f7d9c6f8133c27201c) --- src/gallium/state_trackers/nine/buffer9.h | 4 +++- src/gallium/state_trackers/nine/nine_state.c | 4 ++++ src/gallium/state_trackers/nine/nine_state.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/buffer9.h b/src/gallium/state_trackers/nine/buffer9.h index b04a0a721bb..1803d8d6405 100644 --- a/src/gallium/state_trackers/nine/buffer9.h +++ b/src/gallium/state_trackers/nine/buffer9.h @@ -104,7 +104,9 @@ NineBuffer9_Upload( struct NineBuffer9 *This ) struct NineDevice9 *device = This->base.base.device; assert(This->base.pool == D3DPOOL_MANAGED && This->managed.dirty); - nine_context_range_upload(device, &This->managed.pending_upload, This->base.resource, + nine_context_range_upload(device, &This->managed.pending_upload, + (struct NineUnknown *)This, + This->base.resource, This->managed.dirty_box.x, This->managed.dirty_box.width, (char *)This->managed.data + This->managed.dirty_box.x); diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index ac18f8f6839..c5596a5ee94 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -2418,6 +2418,7 @@ CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap, } CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload, + ARG_BIND_REF(struct NineUnknown, src_ref), ARG_BIND_RES(struct pipe_resource, res), ARG_VAL(unsigned, offset), ARG_VAL(unsigned, size), @@ -2425,6 +2426,9 @@ CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload, { struct nine_context *context = &device->context; + /* Binding src_ref avoids release before upload */ + (void)src_ref; + context->pipe->buffer_subdata(context->pipe, res, 0, offset, size, data); } diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 8de9f84a256..55960007bfb 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -560,6 +560,7 @@ nine_context_gen_mipmap(struct NineDevice9 *device, void nine_context_range_upload(struct NineDevice9 *device, unsigned *counter, + struct NineUnknown *src_ref, struct pipe_resource *res, unsigned offset, unsigned size, From be8c1c89819dd223b15a2c45615e02cbc92e6f88 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 3 Dec 2018 18:40:10 +0000 Subject: [PATCH 156/220] anv: don't do partial resolve on layer > 0 We've made the choice not to use fast clears on layer > 0 with multilayer images. This is partly because we would need to store multiple clear colors for each layer, making the existing memory layout, already including aux surfaces, fast clear color, image state, etc... even more complex. Partial resolves are the operations transfering the clear colors into the auxiliary buffers. This operation is currently implemented in Blorp by loading the clear color from the image's BO, into a shader that then samples from the auxiliary buffer and writes the color only if it isn't there already. The problem here is that because we store only one clear color for all layers and it is used for partial resolves. If you trigger a partial clear on a layer > 0, then you're likely to deal with a color that is not what you actually want. In the particular issues below, we have multiple layers, each cleared with a different color but the partial resolve just writes the wrong color into the auxiliary buffers for layers > 0. Signed-off-by: Lionel Landwerlin Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108910 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108911 Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit e2ae5f2f0a0dbdae08e026b88e30552728c4abd6) --- src/intel/vulkan/genX_cmd_buffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index eea699be8ea..4f54533b377 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1137,6 +1137,14 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, level, array_layer, resolve_op, final_fast_clear); } else { + /* We only support fast-clear on the first layer so partial + * resolves should not be used on other layers as they will use + * the clear color stored in memory that is only valid for layer0. + */ + if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE && + array_layer != 0) + continue; + anv_cmd_predicated_mcs_resolve(cmd_buffer, image, aspect, array_layer, resolve_op, final_fast_clear); From 4569cec1737e6e57c82417f624a12ed1edd290ce Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 20 Dec 2018 08:12:50 -0800 Subject: [PATCH 157/220] gallium/ttn: Fix setup of outputs_written. We need a 64-bit value, otherwise we only handle the low 32, and happen to sign-extend to claim to write all varying slots if VARYING_SLOT_VAR2 was used. Fixes: 4d0b2c7aaac3 ("ttn: Update shader->info as we generate code.") Reviewed-by: Rob Clark (cherry picked from commit 7d7ecfbcbc2236ba76d9ad9dc7c589ce5a6bfa32) --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 0ad274b535a..4fa36cc7de4 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -375,7 +375,7 @@ ttn_emit_declaration(struct ttn_compile *c) c->outputs[idx] = var; for (int i = 0; i < array_size; i++) - b->shader->info.outputs_written |= 1 << (var->data.location + i); + b->shader->info.outputs_written |= 1ull << (var->data.location + i); } break; case TGSI_FILE_CONSTANT: From 7e3fb7c0f9f83b682ad7d9d6b50534f2d68a03c5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 27 Dec 2018 16:09:19 +1000 Subject: [PATCH 158/220] virgl/vtest: fix front buffer flush with protocol version 0. Older versions of virglrenderer before 33da7361aec486290df0aec4ad8dfa8ff6adde2c in vtest mode, misrender gears. Fixes: 9d81cd8e7c (virgl: Pass resource size and transfer offsets) Reviewed-By: Gert Wollny (cherry picked from commit d1ce7eba8b056bfd7d8f29cc0f583777a7ee30e5) --- src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c index f44d4d74ff1..176d04388f2 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c @@ -639,7 +639,7 @@ static void virgl_vtest_flush_frontbuffer(struct virgl_winsys *vws, * get the data. */ virgl_vtest_recv_transfer_get_data(vtws, map + offset, size, valid_stride, &box, res->format, - util_format_get_stride(res->format, res->width)); + vtws->protocol_version == 0 ? valid_stride : util_format_get_stride(res->format, res->width)); vtws->sws->displaytarget_unmap(vtws->sws, res->dt); From 99da650b1f7bcff06456963285b9e0b56c537971 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 4 Dec 2018 13:52:19 -0800 Subject: [PATCH 159/220] meson: Override C++ standard to gnu++11 when building with altivec on ppc64 Otherwise there will be symbol collisions for the vector name. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108943 Distro Bug: https://bugs.gentoo.org/673622 Fixes: 42ea0631f108d82554339530d6c88aa1b448af1e ("meson: build clover") Acked-by: Matt Turner (cherry picked from commit 133a5b838306e25b469cc514ba2ea92b4e6ce4b5) --- meson.build | 9 +++++++++ src/gallium/state_trackers/clover/meson.build | 3 +++ 2 files changed, 12 insertions(+) diff --git a/meson.build b/meson.build index 94f891efa68..b7af08d92a7 100644 --- a/meson.build +++ b/meson.build @@ -651,6 +651,7 @@ if get_option('power8') != 'false' endif _opencl = get_option('gallium-opencl') +clover_cpp_std = [] if _opencl != 'disabled' if not with_gallium error('OpenCL Clover implementation requires at least one gallium driver.') @@ -659,6 +660,14 @@ if _opencl != 'disabled' dep_clc = dependency('libclc') with_gallium_opencl = true with_opencl_icd = _opencl == 'icd' + + if host_machine.cpu_family().startswith('ppc') and cpp.compiles(''' + #if !defined(__VEC__) || !defined(__ALTIVEC__) + #error "AltiVec not enabled" + #endif''', + name : 'Altivec') + clover_cpp_std += ['cpp_std=gnu++11'] + endif else dep_clc = null_dep with_gallium_opencl = false diff --git a/src/gallium/state_trackers/clover/meson.build b/src/gallium/state_trackers/clover/meson.build index 1a09d8f2ca9..a6729af2fb8 100644 --- a/src/gallium/state_trackers/clover/meson.build +++ b/src/gallium/state_trackers/clover/meson.build @@ -30,6 +30,7 @@ libcltgsi = static_library( files('tgsi/compiler.cpp', 'tgsi/invocation.hpp'), include_directories : clover_incs, cpp_args : [cpp_vis_args], + override_options : clover_cpp_std, ) libclllvm = static_library( @@ -56,6 +57,7 @@ libclllvm = static_library( )), ], dependencies : [dep_llvm, dep_elf], + override_options : clover_cpp_std, ) clover_files = files( @@ -119,4 +121,5 @@ libclover = static_library( include_directories : clover_incs, cpp_args : [clover_cpp_args, cpp_vis_args], link_with : [libcltgsi, libclllvm], + override_options : clover_cpp_std, ) From efa0c384dca7182301da05b95a44a5157930bb34 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 30 Dec 2018 23:37:11 +0100 Subject: [PATCH 160/220] radv: Do a cache flush if needed before reading predicates. This caused random failures for two conditional rendering tests: dEQP-VK.conditional_rendering.draw_clear.draw.update_with_rendering_discard dEQP-VK.conditional_rendering.draw_clear.draw.update_with_rendering_no_discard These wrote the predicate with the vertex shader, did a barrier and then started the conditional rendering. However the cache flushes for the barrier only happen on first draw, so after the predicate has been read. Fixes: e45ba51ea45 "radv: add support for VK_EXT_conditional_rendering" Reviewed-by: Dave Airlie (cherry picked from commit 8c93ef5de98a90a93434d351c769f3c06f206397) --- src/amd/vulkan/radv_cmd_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f4ad4b216e9..4ebb01c6810 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -4657,6 +4657,8 @@ void radv_CmdBeginConditionalRenderingEXT( draw_visible = false; } + si_emit_cache_flush(cmd_buffer); + /* Enable predication for this command buffer. */ si_emit_set_predication_state(cmd_buffer, draw_visible, va); cmd_buffer->state.predicating = true; From 418fec283c81462cd29e3a7774035d37cbad6cfe Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 14 Dec 2018 15:09:38 +1100 Subject: [PATCH 161/220] tgsi/scan: fix loop exit point in tgsi_scan_tess_ctrl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just happened not to crash/assert because all loops have at least 1 if-statement and due to a second bug we end up matching the same ENDIF to exit both the iteration over the if-statment and the loop. The second bug is fixed in the following patch. Fixes: 386d165d8d09 ("tgsi/scan: add a new pass that analyzes tess factor writes") Reviewed-by: Marek Olšák (cherry picked from commit dd061eb0442a25cad0cc775103ae31d62280fa44) --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index e13500a7f7b..ecd0b379c2a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -1147,7 +1147,7 @@ tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, case TGSI_OPCODE_BGNLOOP: cond_block_tf_writemask |= - get_block_tessfactor_writemask(info, &parse, TGSI_OPCODE_ENDIF); + get_block_tessfactor_writemask(info, &parse, TGSI_OPCODE_ENDLOOP); continue; case TGSI_OPCODE_BARRIER: From a7a8f906d40205005f7cd3104f714c28593e87fd Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 14 Dec 2018 15:36:02 +1100 Subject: [PATCH 162/220] tgsi/scan: correctly walk instructions in tgsi_scan_tess_ctrl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous code used a do while loop and continues after walking a nested loop/if-statement. This means we end up evaluating the last instruction from the nested block against the while condition and potentially exit early if it matches the exit condition of the outer block. Fixes: 386d165d8d09 ("tgsi/scan: add a new pass that analyzes tess factor writes") Reviewed-by: Marek Olšák (cherry picked from commit 4dda4457504ee5cafa0388543620d4598b8561cf) --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 72 +++++++++++++++----------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index ecd0b379c2a..75c2e08632e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -1004,11 +1004,12 @@ get_block_tessfactor_writemask(const struct tgsi_shader_info *info, struct tgsi_full_instruction *inst; unsigned writemask = 0; - do { - tgsi_parse_token(parse); - assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); - inst = &parse->FullToken.FullInstruction; - check_no_subroutines(inst); + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + + while (inst->Instruction.Opcode != end_opcode) { /* Recursively process nested blocks. */ switch (inst->Instruction.Opcode) { @@ -1016,20 +1017,26 @@ get_block_tessfactor_writemask(const struct tgsi_shader_info *info, case TGSI_OPCODE_UIF: writemask |= get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDIF); - continue; + break; case TGSI_OPCODE_BGNLOOP: writemask |= get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP); - continue; + break; case TGSI_OPCODE_BARRIER: unreachable("nested BARRIER is illegal"); - continue; + break; + + default: + writemask |= get_inst_tessfactor_writemask(info, inst); } - writemask |= get_inst_tessfactor_writemask(info, inst); - } while (inst->Instruction.Opcode != end_opcode); + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + } return writemask; } @@ -1043,18 +1050,20 @@ get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info, struct tgsi_full_instruction *inst; unsigned then_tessfactor_writemask = 0; unsigned else_tessfactor_writemask = 0; + unsigned writemask; bool is_then = true; - do { - tgsi_parse_token(parse); - assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); - inst = &parse->FullToken.FullInstruction; - check_no_subroutines(inst); + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + + while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF) { switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ELSE: is_then = false; - continue; + break; /* Recursively process nested blocks. */ case TGSI_OPCODE_IF: @@ -1063,28 +1072,33 @@ get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info, is_then ? &then_tessfactor_writemask : &else_tessfactor_writemask, cond_block_tf_writemask); - continue; + break; case TGSI_OPCODE_BGNLOOP: *cond_block_tf_writemask |= get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP); - continue; + break; case TGSI_OPCODE_BARRIER: unreachable("nested BARRIER is illegal"); - continue; - } - - /* Process an instruction in the current block. */ - unsigned writemask = get_inst_tessfactor_writemask(info, inst); + break; + default: + /* Process an instruction in the current block. */ + writemask = get_inst_tessfactor_writemask(info, inst); - if (writemask) { - if (is_then) - then_tessfactor_writemask |= writemask; - else - else_tessfactor_writemask |= writemask; + if (writemask) { + if (is_then) + then_tessfactor_writemask |= writemask; + else + else_tessfactor_writemask |= writemask; + } } - } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF); + + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + } if (then_tessfactor_writemask || else_tessfactor_writemask) { /* If both statements write the same tess factor channels, From 65f7c848352f8c5224bc3b5ee338409276d1ddf2 Mon Sep 17 00:00:00 2001 From: Alexander von Gluck IV Date: Thu, 27 Dec 2018 20:41:47 +0000 Subject: [PATCH 163/220] egl/haiku: Fix reference to disp vs dpy Reviewed-by: Eric Engestrom Fixes: 00992700c9a812a54563 "egl: set the EGLDevice when creating a display" (cherry picked from commit 1b97a72328b7b549aa8d050495f504815444c0dd) --- src/egl/drivers/haiku/egl_haiku.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index a9c5cf8d29b..d4b046c79b4 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -29,6 +29,7 @@ #include "eglconfig.h" #include "eglcontext.h" +#include "egldevice.h" #include "egldisplay.h" #include "egldriver.h" #include "eglcurrent.h" @@ -215,7 +216,7 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy) _eglError(EGL_NOT_INITIALIZED, "DRI2: failed to find EGLDevice"); return EGL_FALSE; } - disp->Device = dev; + dpy->Device = dev; TRACE("Add configs\n"); if (!haiku_add_configs_for_visuals(dpy)) From cec0f1721616673886497e84886304467034672d Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Mon, 31 Dec 2018 19:23:52 -0800 Subject: [PATCH 164/220] meson: Error out if building nouveau and using LLVM without rtti Nouveau requires rtti. Often LLVM is configured without rtti, and code with and without cannot be linked safely. Lets just error out if nouveau is requested and llvm is built without rtti. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109202 Fixes: c5a97d658ec19cc02719d7f86c1b0715e3d9ffc4 ("meson: fix builds against LLVM built without rtti") Reviewed-by: Bas Nieuwenhuizen (cherry picked from commit a2596450ac7330c8965c819491038fb1ad454333) --- meson.build | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meson.build b/meson.build index b7af08d92a7..74673e5af9c 100644 --- a/meson.build +++ b/meson.build @@ -1234,6 +1234,9 @@ if with_llvm # programs, so we need to build all C++ code in mesa without rtti as well to # ensure that linking works. if dep_llvm.get_configtool_variable('has-rtti') == 'NO' + if with_gallium_nouveau + error('The Nouveau driver requires rtti. You either need to turn off nouveau or use an LLVM built with LLVM_ENABLE_RTTI.') + endif cpp_args += '-fno-rtti' endif elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr From 2a54a90e51347e3431ebb21faaa747fdc320d9d2 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 3 Jan 2019 16:17:04 +0000 Subject: [PATCH 165/220] i965: include draw_params/derived_draw_params for VF cache workaround These buffers are using VB slots and should be included in the workaround decision. Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand Reviewed-by: Kenneth Graunke Fixes: a363bb2cd0e2a1 ("i965: Allocate VMA in userspace for full-PPGTT systems.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109072 (cherry picked from commit 92b7407090b1f11af49133968b63d583eba9b803) --- src/mesa/drivers/dri/i965/genX_state_upload.c | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 9cd017a5cff..28c60c9edf5 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -505,9 +505,8 @@ vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw) { #if GEN_GEN >= 8 bool need_invalidate = false; - unsigned i; - for (i = 0; i < brw->vb.nr_buffers; i++) { + for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo); if (high_bits != brw->vb.last_bo_high_bits[i]) { @@ -516,9 +515,23 @@ vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw) } } - /* Don't bother with draw parameter buffers - those are generated by - * the driver so we can select a consistent memory zone. - */ + if (brw->draw.draw_params_bo) { + uint16_t high_bits = pinned_bo_high_bits(brw->draw.draw_params_bo); + + if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers] != high_bits) { + need_invalidate = true; + brw->vb.last_bo_high_bits[brw->vb.nr_buffers] = high_bits; + } + } + + if (brw->draw.derived_draw_params_bo) { + uint16_t high_bits = pinned_bo_high_bits(brw->draw.derived_draw_params_bo); + + if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] != high_bits) { + need_invalidate = true; + brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] = high_bits; + } + } if (need_invalidate) { brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); From 6499126ea9ebfe8f1c299603a51cc0bcb8babefb Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 3 Jan 2019 16:18:48 +0000 Subject: [PATCH 166/220] i965: add CS stall on VF invalidation workaround Even with the previous commit, hangs are still happening. The problem there is that the VF cache invalidate do happen immediately without waiting for previous rendering to complete. What happens is that we invalidate the cache the moment the PIPE_CONTROL is parsed but we still have old rendering in the pipe which continues to pull data into the cache with the old high address bits. The later rendering with the new high address bits then doesn't have the clean cache that it expects/needs. v2: Update commit message/explanation with Jason's Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand Reviewed-by: Kenneth Graunke Fixes: a363bb2cd0e2a1 ("i965: Allocate VMA in userspace for full-PPGTT systems.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109072 (cherry picked from commit 31e4c9ce400341df9b0136419b3b3c73b8c9eb7e) --- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 2 +- src/mesa/drivers/dri/i965/genX_state_upload.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index a62b88e166c..97ae2707049 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -213,7 +213,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch, } if (need_invalidate) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL); } #endif } diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 28c60c9edf5..ce9a3adcfc1 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -534,7 +534,7 @@ vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw) } if (need_invalidate) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL); } #endif } From 9ccb447e727955b924b9581f4d0301b5bac7e9f8 Mon Sep 17 00:00:00 2001 From: Caio Marcelo de Oliveira Filho Date: Wed, 19 Dec 2018 15:23:28 -0800 Subject: [PATCH 167/220] nir: remove dead code from copy_prop_vars When copy_prop_vars also took care of dead write handling, intrin was used as part of store_to_entry. Now it isn't, so this assignment isn't used really used. Add a comment clarifying what happens to intrin. Fixes: 4dfa7adc100 "nir: Remove handling of dead writes from copy_prop_vars" Reviewed-by: Jordan Justen Reviewed-by: Jason Ekstrand (cherry picked from commit bbf9ee9b18b5917632e6bb74e5859398e6cea224) --- src/compiler/nir/nir_opt_copy_prop_vars.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 29b2caba892..594c4ddd0c2 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -721,9 +721,9 @@ copy_prop_vars_block(struct copy_prop_var_state *state, lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit); struct value value; if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) { + /* If load works, intrin (the copy_deref) is removed. */ if (value.is_ssa) { nir_store_deref(b, dst, value.ssa[0], 0xf); - intrin = nir_instr_as_intrinsic(nir_builder_last_instr(b)); } else { /* If this would be a no-op self-copy, don't bother. */ if (nir_compare_derefs(value.deref, dst) & nir_derefs_equal_bit) From 666ffbbae591da56fecee04afa1240b044e636c2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Jan 2019 11:32:40 -0600 Subject: [PATCH 168/220] intel/blorp: Be more conservative about copying clear colors In 92eb5bbc68d7324 we attempted to avoid copying clear colors whenever we weren't doing a resolve. However, this broke MSAA resolves because we need the clear color in the source. This patch makes blorp much more conservative such that it only avoids the clear color copy if either aux_usage == NONE or it's explicitly doing a fast-clear. Fixes: 92eb5bbc68d7 "intel/blorp: Only copy clear color when doing..." Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107728 Reviewed-by: Rafael Antognolli (cherry picked from commit 19c608fe43ae7e1578920326690a361ff1be9d88) --- src/intel/blorp/blorp_genX_exec.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 065980616ec..01bea99d3d8 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1326,7 +1326,7 @@ blorp_emit_memcpy(struct blorp_batch *batch, static void blorp_emit_surface_state(struct blorp_batch *batch, const struct brw_blorp_surface_info *surface, - enum isl_aux_op op, + enum isl_aux_op aux_op, void *state, uint32_t state_offset, const bool color_write_disables[4], bool is_render_target) @@ -1382,7 +1382,7 @@ blorp_emit_surface_state(struct blorp_batch *batch, surface->aux_addr, *aux_addr); } - if (surface->clear_color_addr.buffer) { + if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) { #if GEN_GEN >= 10 assert((surface->clear_color_addr.offset & 0x3f) == 0); uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset; @@ -1390,7 +1390,10 @@ blorp_emit_surface_state(struct blorp_batch *batch, isl_dev->ss.clear_color_state_offset, surface->clear_color_addr, *clear_addr); #elif GEN_GEN >= 7 - if (op == ISL_AUX_OP_FULL_RESOLVE || op == ISL_AUX_OP_PARTIAL_RESOLVE) { + /* Fast clears just whack the AUX surface and don't actually use the + * clear color for anything. We can avoid the MI memcpy on that case. + */ + if (aux_op != ISL_AUX_OP_FAST_CLEAR) { struct blorp_address dst_addr = blorp_get_surface_base_address(batch); dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, From e5b1fde8c2e35938cda8373dbd502bfa608fa8d9 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 24 Dec 2018 15:41:56 +0100 Subject: [PATCH 169/220] radv: Implement buffer stores with less than 4 components. We started using it in the btoi paths for r32g32b32, and the LLVM IR checker will complain about it because we end up with intrinsics with the wrong type extension in the name. Fixes: 593996bc02 ("radv: implement buffer to image operations for R32G32B32") Reviewed-by: Samuel Pitoiset (cherry picked from commit 9a45a190ad22849a492506389413046948e0b093) Signed-off-by: Emil Velikov Conflicts: src/amd/common/ac_nir_to_llvm.c --- src/amd/common/ac_nir_to_llvm.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c950b81dca2..bd0e93b8399 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2380,17 +2380,27 @@ static void visit_image_store(struct ac_nir_context *ctx, glc = ctx->ac.i1true; if (dim == GLSL_SAMPLER_DIM_BUF) { + char name[48]; + const char *types[] = { "f32", "v2f32", "v4f32" }; LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true); + LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); + unsigned src_channels = ac_get_llvm_num_components(src); - params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); /* data */ + if (src_channels == 3) + src = ac_build_expand(&ctx->ac, src, 3, 4); + + params[0] = src; /* data */ params[1] = rsrc; params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); /* vindex */ params[3] = ctx->ac.i32_0; /* voffset */ + snprintf(name, sizeof(name), "%s.%s", + "llvm.amdgcn.buffer.store.format", + types[CLAMP(src_channels, 1, 3) - 1]); + params[4] = glc; /* glc */ params[5] = ctx->ac.i1false; /* slc */ - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, - params, 6, 0); + ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6, 0); } else { struct ac_image_args args = {}; args.opcode = ac_image_store; From 2f2e8cc2c5e49f72b45e302fad25ab7ae2917480 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 5 Dec 2018 11:28:12 +0100 Subject: [PATCH 170/220] anv/android: Do not reject storage images. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do the ImageFormatProperties check already, and rejecting an usage flag when both ImageFormatProperties and the WSI (which is Android) support it is not allowed. Intel does support storage for some of the support WSI formats, such as R8G8B8A8_UNORM, and looking at the ISL_SURF_USAGE_DISABLE_AUX_BIT, the imported images do not have any form of compression that would prevent this fix. v2: Also consider STORAGE bit for Gralloc usage bits. (From Kevin Strasser ) Fixes: 053d4c328fa "anv: Implement VK_ANDROID_native_buffer (v9)" Reviewed-by: Tapani Pälli (cherry picked from commit 110564fdecd0948e7aa9b1a607af7631430092ff) --- src/intel/vulkan/anv_android.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c index 916e76c93ff..cdca9f95caa 100644 --- a/src/intel/vulkan/anv_android.c +++ b/src/intel/vulkan/anv_android.c @@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( *grallocUsage = 0; intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage); - /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags + /* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags. * The relevant code in libvulkan/swapchain.cpp contains this fun comment: * @@ -268,19 +268,13 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( "inside %s", __func__); } - /* Reject STORAGE here to avoid complexity elsewhere. */ - if (imageUsage & VK_IMAGE_USAGE_STORAGE_BIT) { - return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED, - "VK_IMAGE_USAGE_STORAGE_BIT unsupported for gralloc " - "swapchain"); - } - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) *grallocUsage |= GRALLOC_USAGE_HW_RENDER; if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; From eac2b2161e06d44ab4e6662e5d4ee0648846a86e Mon Sep 17 00:00:00 2001 From: Andres Gomez Date: Mon, 7 Jan 2019 15:50:35 +0200 Subject: [PATCH 171/220] glsl: correct typo in GLSL compilation error message v2: Add the "fix" tag (Erik). Fixes: 037f68d81e1 ("glsl: apply align layout qualifier rules to block offsets") Cc: Timothy Arceri Signed-off-by: Andres Gomez Reviewed-by: Erik Faye-Lund (cherry picked from commit 0cc01f45e7294122a2ae171b09ac3375b2a75d83) --- src/compiler/glsl/ast_to_hir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index cf52f079df2..f4bd8c17db3 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -7423,7 +7423,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions, if (member_align == 0 || member_align & (member_align - 1)) { _mesa_glsl_error(&loc, state, "align layout qualifier " - "in not a power of 2"); + "is not a power of 2"); } else { fields[i].offset = glsl_align(offset, member_align); next_offset = glsl_align(fields[i].offset + size, align); From 7f29d45f915b00aa335bf3555388c3c8ff1c1bc4 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 2 Jan 2019 17:53:41 +0100 Subject: [PATCH 172/220] radv: Fix rasterization precision bits. Note that these limits are exact, not a "precision is at least x", as texel coords also get snapped to a multiple of this step size before filtering. This fixes CTS tests dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat dEQP-VK.texture.explicit_lod.2d.sizes.57x35_nearest_linear_mipmap_nearest_repeat Fixes: f4e499ec791 "radv: add initial non-conformant radv vulkan driver" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109151 Reviewed-by: Samuel Pitoiset (cherry picked from commit 3cc940277a45285d9932ed62398d7a54d2afffb8) --- src/amd/vulkan/radv_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 957d6ac9bad..ac6cff23d58 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -936,9 +936,9 @@ void radv_GetPhysicalDeviceProperties( 2048, 2048 }, - .subPixelPrecisionBits = 4 /* FIXME */, - .subTexelPrecisionBits = 4 /* FIXME */, - .mipmapPrecisionBits = 4 /* FIXME */, + .subPixelPrecisionBits = 8, + .subTexelPrecisionBits = 8, + .mipmapPrecisionBits = 8, .maxDrawIndexedIndexValue = UINT32_MAX, .maxDrawIndirectCount = UINT32_MAX, .maxSamplerLodBias = 16, From 8d09bd10d2a7b5e2a9be3f42fc00ddd5572a18bd Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 6 Jan 2019 12:34:24 +0100 Subject: [PATCH 173/220] spirv: Fix matrix parameters in function calls. They can be handled exactly the same as arrays, we just need to handle the base type correctly in the switches. Fixes: a45b6fb4524 "spirv: Pass SSA values through functions" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109204 Reviewed-by: Jason Ekstrand Reviewed-by: Samuel Pitoiset (cherry picked from commit 70ed049cc6a682fba084cf57e06728fa5934881b) --- src/compiler/spirv/vtn_cfg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c index 726f717e8d5..6406f4911df 100644 --- a/src/compiler/spirv/vtn_cfg.c +++ b/src/compiler/spirv/vtn_cfg.c @@ -47,6 +47,7 @@ vtn_type_count_function_params(struct vtn_type *type) { switch (type->base_type) { case vtn_base_type_array: + case vtn_base_type_matrix: return type->length * vtn_type_count_function_params(type->array_element); case vtn_base_type_struct: { @@ -76,6 +77,7 @@ vtn_type_add_to_function_params(struct vtn_type *type, switch (type->base_type) { case vtn_base_type_array: + case vtn_base_type_matrix: for (unsigned i = 0; i < type->length; i++) vtn_type_add_to_function_params(type->array_element, func, param_idx); break; @@ -123,6 +125,7 @@ vtn_ssa_value_add_to_call_params(struct vtn_builder *b, { switch (type->base_type) { case vtn_base_type_array: + case vtn_base_type_matrix: for (unsigned i = 0; i < type->length; i++) { vtn_ssa_value_add_to_call_params(b, value->elems[i], type->array_element, @@ -152,6 +155,7 @@ vtn_ssa_value_load_function_param(struct vtn_builder *b, { switch (type->base_type) { case vtn_base_type_array: + case vtn_base_type_matrix: for (unsigned i = 0; i < type->length; i++) { vtn_ssa_value_load_function_param(b, value->elems[i], type->array_element, param_idx); From 484efe75e4862b118351dd16e5a232d3e579c366 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 14 Dec 2018 10:54:08 -0600 Subject: [PATCH 174/220] spirv: Handle any bit size in vector_insert/extract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This crops up both in the actual SPIR-V VectorInsert/Extract opcodes as well as various places where we deal with vector derefs. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Alejandro Piñeiro Reviewed-by: Caio Marcelo de Oliveira Filho (cherry picked from commit 42b2f3e91f6d6c4eead40ef91e5edde68019f23c) --- src/compiler/spirv/spirv_to_nir.c | 10 ++++++++-- src/compiler/spirv/vtn_glsl450.c | 5 ++--- src/compiler/spirv/vtn_variables.c | 11 +++++------ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 77ce0be369e..16d9c92046e 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -2894,13 +2894,19 @@ vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, return &vec->dest.dest.ssa; } +static nir_ssa_def * +nir_ieq_imm(nir_builder *b, nir_ssa_def *x, uint64_t i) +{ + return nir_ieq(b, x, nir_imm_intN_t(b, i, x->bit_size)); +} + nir_ssa_def * vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *index) { nir_ssa_def *dest = vtn_vector_extract(b, src, 0); for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i), vtn_vector_extract(b, src, i), dest); return dest; @@ -2912,7 +2918,7 @@ vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, { nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i), vtn_vector_insert(b, src, insert, i), dest); return dest; diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 06a49e48e3f..0d8100384d6 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -807,10 +807,9 @@ handle_glsl450_interpolation(struct vtn_builder *b, enum GLSLstd450 opcode, if (vec_array_deref) { assert(vec_deref); - nir_const_value *const_index = nir_src_as_const_value(vec_deref->arr.index); - if (const_index) { + if (nir_src_is_const(vec_deref->arr.index)) { val->ssa->def = vtn_vector_extract(b, &intrin->dest.ssa, - const_index->u32[0]); + nir_src_as_uint(vec_deref->arr.index)); } else { val->ssa->def = vtn_vector_extract_dynamic(b, &intrin->dest.ssa, vec_deref->arr.index.ssa); diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index c5cf345d02a..f3e64d2a819 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -512,9 +512,9 @@ vtn_local_load(struct vtn_builder *b, nir_deref_instr *src) if (src_tail != src) { val->type = src->type; - nir_const_value *const_index = nir_src_as_const_value(src->arr.index); - if (const_index) - val->def = vtn_vector_extract(b, val->def, const_index->u32[0]); + if (nir_src_is_const(src->arr.index)) + val->def = vtn_vector_extract(b, val->def, + nir_src_as_uint(src->arr.index)); else val->def = vtn_vector_extract_dynamic(b, val->def, src->arr.index.ssa); } @@ -532,10 +532,9 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); _vtn_local_load_store(b, true, dest_tail, val); - nir_const_value *const_index = nir_src_as_const_value(dest->arr.index); - if (const_index) + if (nir_src_is_const(dest->arr.index)) val->def = vtn_vector_insert(b, val->def, src->def, - const_index->u32[0]); + nir_src_as_uint(dest->arr.index)); else val->def = vtn_vector_insert_dynamic(b, val->def, src->def, dest->arr.index.ssa); From ca8ef8234e5564568cf4b96d3fc337d8941dbc6c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 13 Dec 2018 18:33:46 -0600 Subject: [PATCH 175/220] anv/apply_pipeline_layout: Set the cursor in lower_res_reindex_intrinsic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The loop through instructions doesn't set the cursor for us so unless we set it somewhere, we may end up emitting instructions in the wrong place. The only reason why we haven't been bitten by this in the past is that it only happens in a few variable pointers cases and the CTS tests for those don't use much control flow so things were getting emitted in the correct order by accident. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Alejandro Piñeiro Reviewed-by: Caio Marcelo de Oliveira Filho (cherry picked from commit f8992eb5badf91e0d9ad3732d56a4e3d78fd1abf) --- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 6868288e486..60c196e5c4f 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -171,6 +171,8 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, { nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&intrin->instr); + /* For us, the resource indices are just indices into the binding table and * array elements are sequential. A resource_reindex just turns into an * add of the two indices. From 39b2d2fe087f79f1f631254389411cab75817766 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 15 Dec 2018 09:57:26 -0600 Subject: [PATCH 176/220] spirv: Sign-extend array indices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SPIR-V spec was recently updated to clarify that array indices are treated as signed integers. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Alejandro Piñeiro Reviewed-by: Caio Marcelo de Oliveira Filho (cherry picked from commit c59f07684c0c99fad0c246dd4a475f6bef5d3cbc) Signed-off-by: Emil Velikov Conflicts: src/compiler/spirv/vtn_variables.c --- src/compiler/spirv/vtn_variables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index f3e64d2a819..0eb9f263436 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -132,12 +132,12 @@ vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, } else if (stride == 1) { nir_ssa_def *ssa = vtn_ssa_value(b, link.id)->def; if (ssa->bit_size != 32) - ssa = nir_u2u32(&b->nb, ssa); + ssa = nir_i2i32(&b->nb, ssa); return ssa; } else { nir_ssa_def *src0 = vtn_ssa_value(b, link.id)->def; if (src0->bit_size != 32) - src0 = nir_u2u32(&b->nb, src0); + src0 = nir_i2i32(&b->nb, src0); return nir_imul(&b->nb, src0, nir_imm_int(&b->nb, stride)); } } From 39375bef8810878ca072fe40f237db68c6ef0777 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 24 Dec 2018 10:11:59 +0000 Subject: [PATCH 177/220] anv: explictly specify format for blorp ccs/mcs op Resolve operations can happen when dealing with view (begin/end subpasses) in which case the view's format needs to apply, not the image's format. v2: Relayout arguments of a ccs_op() call (Jason) Signed-off-by: Lionel Landwerlin Suggested-by: Jason Ekstrand Reviewed-by: Jason Ekstrand Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108911 Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 366eb656ac6fe677828f20295d8916506552a0e8) --- src/intel/vulkan/anv_blorp.c | 10 +++++---- src/intel/vulkan/anv_private.h | 2 ++ src/intel/vulkan/genX_cmd_buffer.c | 34 +++++++++++++++++++----------- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 478b8e7a3db..acc9a22c484 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1658,6 +1658,7 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, void anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op mcs_op, union isl_color_value *clear_value, @@ -1713,12 +1714,12 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, switch (mcs_op) { case ISL_AUX_OP_FAST_CLEAR: - blorp_fast_clear(&batch, &surf, surf.surf->format, + blorp_fast_clear(&batch, &surf, format, 0, base_layer, layer_count, 0, 0, image->extent.width, image->extent.height); break; case ISL_AUX_OP_PARTIAL_RESOLVE: - blorp_mcs_partial_resolve(&batch, &surf, surf.surf->format, + blorp_mcs_partial_resolve(&batch, &surf, format, base_layer, layer_count); break; case ISL_AUX_OP_FULL_RESOLVE: @@ -1736,6 +1737,7 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, void anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t level, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op ccs_op, union isl_color_value *clear_value, @@ -1799,14 +1801,14 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, switch (ccs_op) { case ISL_AUX_OP_FAST_CLEAR: - blorp_fast_clear(&batch, &surf, surf.surf->format, + blorp_fast_clear(&batch, &surf, format, level, base_layer, layer_count, 0, 0, level_width, level_height); break; case ISL_AUX_OP_FULL_RESOLVE: case ISL_AUX_OP_PARTIAL_RESOLVE: blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count, - surf.surf->format, ccs_op); + format, ccs_op); break; case ISL_AUX_OP_AMBIGUATE: for (uint32_t a = 0; a < layer_count; a++) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ce6bb302a0c..37c710ad09a 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2957,6 +2957,7 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, void anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op mcs_op, union isl_color_value *clear_value, @@ -2964,6 +2965,7 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, void anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t level, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op ccs_op, union isl_color_value *clear_value, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 4f54533b377..65ad566795f 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -737,6 +737,7 @@ anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer, static void anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t level, uint32_t array_layer, enum isl_aux_op resolve_op, @@ -761,13 +762,14 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) resolve_op = ISL_AUX_OP_FULL_RESOLVE; - anv_image_ccs_op(cmd_buffer, image, aspect, level, + anv_image_ccs_op(cmd_buffer, image, format, aspect, level, array_layer, 1, resolve_op, NULL, true); } static void anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t array_layer, enum isl_aux_op resolve_op, @@ -781,7 +783,7 @@ anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer, aspect, 0, array_layer, resolve_op, fast_clear_supported); - anv_image_mcs_op(cmd_buffer, image, aspect, + anv_image_mcs_op(cmd_buffer, image, format, aspect, array_layer, 1, resolve_op, NULL, true); #else unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail"); @@ -1037,8 +1039,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, uint32_t level_layer_count = MIN2(layer_count, aux_layers - base_layer); - anv_image_ccs_op(cmd_buffer, image, aspect, level, - base_layer, level_layer_count, + anv_image_ccs_op(cmd_buffer, image, + image->planes[plane].surface.isl.format, + aspect, level, base_layer, level_layer_count, ISL_AUX_OP_AMBIGUATE, NULL, false); if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { @@ -1055,8 +1058,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, } assert(base_level == 0 && level_count == 1); - anv_image_mcs_op(cmd_buffer, image, aspect, - base_layer, layer_count, + anv_image_mcs_op(cmd_buffer, image, + image->planes[plane].surface.isl.format, + aspect, base_layer, layer_count, ISL_AUX_OP_FAST_CLEAR, NULL, false); } return; @@ -1133,8 +1137,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, for (uint32_t a = 0; a < level_layer_count; a++) { uint32_t array_layer = base_layer + a; if (image->samples == 1) { - anv_cmd_predicated_ccs_resolve(cmd_buffer, image, aspect, - level, array_layer, resolve_op, + anv_cmd_predicated_ccs_resolve(cmd_buffer, image, + image->planes[plane].surface.isl.format, + aspect, level, array_layer, resolve_op, final_fast_clear); } else { /* We only support fast-clear on the first layer so partial @@ -1145,8 +1150,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, array_layer != 0) continue; - anv_cmd_predicated_mcs_resolve(cmd_buffer, image, aspect, - array_layer, resolve_op, + anv_cmd_predicated_mcs_resolve(cmd_buffer, image, + image->planes[plane].surface.isl.format, + aspect, array_layer, resolve_op, final_fast_clear); } } @@ -3667,12 +3673,16 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, union isl_color_value clear_color = {}; anv_clear_color_from_att_state(&clear_color, att_state, iview); if (iview->image->samples == 1) { - anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, + anv_image_ccs_op(cmd_buffer, image, + iview->planes[0].isl.format, + VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1, ISL_AUX_OP_FAST_CLEAR, &clear_color, false); } else { - anv_image_mcs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, + anv_image_mcs_op(cmd_buffer, image, + iview->planes[0].isl.format, + VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, ISL_AUX_OP_FAST_CLEAR, &clear_color, false); From 57e5fe707455ce648a81d2d2205851bd2e426864 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 23 Dec 2018 17:49:23 +0000 Subject: [PATCH 178/220] anv: flush fast clear colors into compressed surfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the following scenario : 1. Create image format R8G8B8A8_UNORM 2. Create image view format R8G8B8A8_SRGB 3. Clear the view through a sub pass to a particular color 4. Barrier on the image to from color attachment to source transfer 5. Copy the image into a linear buffer to check the content The step 4 resolving the clear color is unaware of the SRGB format of the view, because the blorp resolve operations operate on images the color associated with the resolve will not operate on SRGB format but UNORM. Leading to the wrong color being written into surfaces. This change forces a clear color resolve at the end of the render pass so following resolves won't have to deal with the clear color with a format that doesn't match the image's format. On gfxbench vulkan_5_normal 1280x720, this appear to cost us ~0.5fps, from 49.316 down to 48.949. v2: Only fast clear resolve when image & view have different formats (Lionel) v3: Update warning (Jason) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108911 Signed-off-by: Lionel Landwerlin Suggested-by: Jason Ekstrand Reviewed-by: Samuel Iglesias Gonsálvez Reviewed-by: Jason Ekstrand Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit add5a2ec92f4b3f7ac8353e5986dc04186a7b6da) --- src/intel/vulkan/genX_cmd_buffer.c | 49 ++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 65ad566795f..dcf37654954 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3902,6 +3902,55 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) struct anv_image_view *iview = fb->attachments[a]; const struct anv_image *image = iview->image; + if ((image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && + image->vk_format != iview->vk_format) { + enum anv_fast_clear_type fast_clear_type = + anv_layout_to_fast_clear_type(&cmd_buffer->device->info, + image, VK_IMAGE_ASPECT_COLOR_BIT, + att_state->current_layout); + + /* If any clear color was used, flush it down the aux surfaces. If we + * don't do it now using the view's format we might use the clear + * color incorrectly in the following resolves (for example with an + * SRGB view & a UNORM image). + */ + if (fast_clear_type != ANV_FAST_CLEAR_NONE) { + anv_perf_warn(cmd_buffer->device->instance, fb, + "Doing a partial resolve to get rid of clear color at the " + "end of a renderpass due to an image/view format mismatch"); + + uint32_t base_layer, layer_count; + if (image->type == VK_IMAGE_TYPE_3D) { + base_layer = 0; + layer_count = anv_minify(iview->image->extent.depth, + iview->planes[0].isl.base_level); + } else { + base_layer = iview->planes[0].isl.base_array_layer; + layer_count = fb->layers; + } + + for (uint32_t a = 0; a < layer_count; a++) { + uint32_t array_layer = base_layer + a; + if (image->samples == 1) { + anv_cmd_predicated_ccs_resolve(cmd_buffer, image, + iview->planes[0].isl.format, + VK_IMAGE_ASPECT_COLOR_BIT, + iview->planes[0].isl.base_level, + array_layer, + ISL_AUX_OP_PARTIAL_RESOLVE, + ANV_FAST_CLEAR_NONE); + } else { + anv_cmd_predicated_mcs_resolve(cmd_buffer, image, + iview->planes[0].isl.format, + VK_IMAGE_ASPECT_COLOR_BIT, + base_layer, + ISL_AUX_OP_PARTIAL_RESOLVE, + ANV_FAST_CLEAR_NONE); + } + } + } + } + /* Transition the image into the final layout for this render pass */ VkImageLayout target_layout = cmd_state->pass->attachments[a].final_layout; From 4305b7abc3899c72a1ac32d894a1db344a499a3d Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 18 Dec 2018 14:37:52 -0800 Subject: [PATCH 179/220] autotools: Remove tegra vdpau driver This has never functioned and probably wont ever function, due to the way gallium media state trackers are architected and the tegra video decoder is architected. Cc: Thierry Reding Reviewed-by: Ilia Mirkin Fixes: 1755f608f5201e0a23f00cc3ea1b01edd07eb6ef ("tegra: Initial support") (cherry picked from commit 401dca1c73cad4e6ac3137fdd5513e350d060f25) --- src/gallium/targets/vdpau/Makefile.am | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am index cd05a024451..2742c7acd44 100644 --- a/src/gallium/targets/vdpau/Makefile.am +++ b/src/gallium/targets/vdpau/Makefile.am @@ -57,8 +57,6 @@ include $(top_srcdir)/src/gallium/drivers/r300/Automake.inc include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc -include $(top_srcdir)/src/gallium/drivers/tegra/Automake.inc - if HAVE_GALLIUM_STATIC_TARGETS libvdpau_gallium_la_SOURCES += target.c From 74c80a0117bf176e964213307a9c2d5ae8cfccb2 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 9 Jan 2019 11:09:33 +0000 Subject: [PATCH 180/220] ac/nir,radv,radeonsi/nir: use correct indices for interpolation intrinsics Fixes artifacts in World of Warcraft when Multi-sample Alpha-Test is enabled with DXVK. It also fixes artifacts with Fallout 4's god rays with DXVK. Various piglit interpolateAt*() tests under NIR are also fixed. v2: formatting fix update commit message to include Fallout 4 and the Fixes tag Fixes: f4e499ec791 ('radv: add initial non-conformant radv vulkan driver') Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106595 Signed-off-by: Rhys Perry (cherry picked from commit ee8488ea3b99ad0632e5eac6defcef0264d8782c) --- src/amd/common/ac_nir_to_llvm.c | 2 +- src/amd/common/ac_shader_abi.h | 3 +++ src/amd/vulkan/radv_nir_to_llvm.c | 2 ++ src/gallium/drivers/radeonsi/si_shader_nir.c | 3 +++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index bd0e93b8399..836f518e047 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2812,7 +2812,7 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx, LLVMValueRef src0 = NULL; nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); - int input_index = var->data.location - VARYING_SLOT_VAR0; + int input_index = ctx->abi->fs_input_attr_indices[var->data.location - VARYING_SLOT_VAR0]; switch (instr->intrinsic) { case nir_intrinsic_interp_deref_at_centroid: location = INTERP_CENTROID; diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index 6b9a91c92a9..ee18e6c1923 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -77,6 +77,9 @@ struct ac_shader_abi { */ LLVMValueRef *inputs; + /* Varying -> attribute number mapping. Also NIR-only */ + unsigned fs_input_attr_indices[MAX_VARYING]; + void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index f56eb01dc52..8c21c423511 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2242,6 +2242,8 @@ handle_fs_inputs(struct radv_shader_context *ctx, if (LLVMIsUndef(interp_param)) ctx->shader_info->fs.flat_shaded_mask |= 1u << index; + if (i >= VARYING_SLOT_VAR0) + ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index; ++index; } else if (i == VARYING_SLOT_CLIP_DIST0) { int length = ctx->shader_info->info.ps.num_input_clips_culls; diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 87ca0161b45..cd38145daec 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -989,6 +989,9 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) LLVMValueRef data[4]; unsigned loc = variable->data.location; + if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT) + ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4; + for (unsigned i = 0; i < attrib_count; i++) { /* Packed components share the same location so skip * them if we have already processed the location. From 78dd4d6536db38ec83791c0a81155767a051f156 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Mon, 7 Jan 2019 19:42:29 +0100 Subject: [PATCH 181/220] radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With Mesa 18.1, commit be973ed21f6e, si_llvm_load_input_vs() changed the number of source 32-bit wide dword components used for fetching vertex attributes into the vertex shader from a constant 4 to a variable num_channels number, depending on input data format, with some special case handling for input data formats like 64-Bit doubles. In the case of a GL_DOUBLE input data format with one or two components though, e.g, submitted via ... a) glTexCoordPointer(1, GL_DOUBLE, 0, buffer); b) glTexCoordPointer(2, GL_DOUBLE, 0, buffer); ... the input format would be SI_FIX_FETCH_RG_64_FLOAT, but no special case handling was implemented for that case, so in the default path the number of 32-bit dwords would be set to the number of float input components derived from info->input_usage_mask. This ends with corrupted input to the vertex shader, because fetching a 64-bit double from the vbo requires fetching two 32-bit dwords instead of 1, and fetching a two double input requires 4 dword fetches instead of 2, so in these cases the vertex shader receives incomplete/truncated input data: a) float v = gl_MultiTexCoord0.x; -> v.x is corrupted. b) vec2 v = gl_MultiTexCoord0.xy; -> v.x is assigned correctly, but v.y is corrupted. This happens with the standard TGSI IR compiled shaders. Under NIR with R600_DEBUG=nir, we got correct behavior because the current radeonsi nir code always assigns info->input_usage_mask = TGSI_WRITEMASK_XYZW, thereby always fetches 4 dwords regardless of what the shader actually needs. Fix this by properly assigning 2 or 4 dword fetches for one or two component GL_DOUBLE input. Fixes: be973ed21f6e ("radeonsi: load the right number of components for VS inputs and TBOs") Signed-off-by: Mario Kleiner Cc: mesa-stable@lists.freedesktop.org Cc: Marek Olšák Signed-off-by: Marek Olšák (cherry picked from commit 5e30e54e057300f65019c927482e633c367963fc) --- src/gallium/drivers/radeonsi/si_shader.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 19522cc97b1..f1d5ad31365 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -561,6 +561,14 @@ void si_llvm_load_input_vs( /* Do multiple loads for special formats. */ switch (fix_fetch) { + case SI_FIX_FETCH_RG_64_FLOAT: + num_fetches = 1; /* 1 2-dword or 4-dword load */ + fetch_stride = 0; + if (util_last_bit(info->input_usage_mask[input_index]) >= 2) + num_channels = 4; /* 2 doubles in 4 dwords */ + else + num_channels = 2; /* 1 double in 2 dwords */ + break; case SI_FIX_FETCH_RGB_64_FLOAT: num_fetches = 3; /* 3 2-dword loads */ fetch_stride = 8; From fe9ff38d51a79ab19505b2c0d09e94bed6010165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 8 Jan 2019 11:12:05 -0500 Subject: [PATCH 182/220] st/mesa: don't leak pipe_surface if pipe_context is not current We have found some pipe_surface leaks internally. This is the same code as surface_destroy in radeonsi. Ideally, surface_destroy would be in pipe_screen. Cc: 18.3 Reviewed-by: Brian Paul (cherry picked from commit e986c1ca1d61bd7492936f43e45996cd4f8bdb61) --- src/gallium/auxiliary/util/u_inlines.h | 19 +++++++++++++++++++ src/mesa/state_tracker/st_cb_fbo.c | 5 ++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index b06fb111709..fa1e920b509 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -154,6 +154,25 @@ pipe_resource_reference(struct pipe_resource **dst, struct pipe_resource *src) *dst = src; } +/** + * Same as pipe_surface_release, but used when pipe_context doesn't exist + * anymore. + */ +static inline void +pipe_surface_release_no_context(struct pipe_surface **ptr) +{ + struct pipe_surface *surf = *ptr; + + if (pipe_reference_described(&surf->reference, NULL, + (debug_reference_descriptor) + debug_describe_surface)) { + /* trivially destroy pipe_surface */ + pipe_resource_reference(&surf->texture, NULL); + free(surf); + } + *ptr = NULL; +} + /** * Set *dst to \p src with proper reference counting. * diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 0e535257cb4..bdc8dda82c2 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -285,8 +285,11 @@ st_renderbuffer_delete(struct gl_context *ctx, struct gl_renderbuffer *rb) struct st_context *st = st_context(ctx); pipe_surface_release(st->pipe, &strb->surface_srgb); pipe_surface_release(st->pipe, &strb->surface_linear); - strb->surface = NULL; + } else { + pipe_surface_release_no_context(&strb->surface_srgb); + pipe_surface_release_no_context(&strb->surface_linear); } + strb->surface = NULL; pipe_resource_reference(&strb->texture, NULL); free(strb->data); _mesa_delete_renderbuffer(ctx, rb); From 10b99dd59df85b05545b92f5e3822c46cdf9d11e Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 29 Dec 2018 01:44:00 -0800 Subject: [PATCH 183/220] intel/fs: Handle source modifiers in lower_integer_multiplication(). lower_integer_multiplication() implements 32x32-bit multiplication on some platforms by bit-casting one of the 32-bit sources into two 16-bit unsigned integer portions. This can give incorrect results if the original instruction specified a source modifier. Fix it by emitting an additional MOV instruction implementing the source modifiers where necessary. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga (cherry picked from commit c5f9c0009d5161e059e54a76fbdb910a6c151f9f) --- src/intel/compiler/brw_fs.cpp | 20 ++++++++++++++++++-- src/intel/compiler/brw_fs.h | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 3e083723471..6220ee301ff 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3853,6 +3853,9 @@ fs_visitor::lower_integer_multiplication() high.offset = inst->dst.offset % REG_SIZE; if (devinfo->gen >= 7) { + if (inst->src[1].abs) + lower_src_modifiers(this, block, inst, 1); + if (inst->src[1].file == IMM) { ibld.MUL(low, inst->src[0], brw_imm_uw(inst->src[1].ud & 0xffff)); @@ -3865,6 +3868,9 @@ fs_visitor::lower_integer_multiplication() subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1)); } } else { + if (inst->src[0].abs) + lower_src_modifiers(this, block, inst, 0); + ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0), inst->src[1]); ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1), @@ -3882,6 +3888,18 @@ fs_visitor::lower_integer_multiplication() } } else if (inst->opcode == SHADER_OPCODE_MULH) { + /* According to the BDW+ BSpec page for the "Multiply Accumulate + * High" instruction: + * + * "An added preliminary mov is required for source modification on + * src1: + * mov (8) r3.0<1>:d -r3<8;8,1>:d + * mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw + * mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d" + */ + if (devinfo->gen >= 8 && (inst->src[1].negate || inst->src[1].abs)) + lower_src_modifiers(this, block, inst, 1); + /* Should have been lowered to 8-wide. */ assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst)); const fs_reg acc = retype(brw_acc_reg(inst->exec_size), @@ -3897,8 +3915,6 @@ fs_visitor::lower_integer_multiplication() * On Gen8, the multiply instruction does a full 32x32-bit * multiply, but in order to do a 64-bit multiply we can simulate * the previous behavior and then use a MACH instruction. - * - * FINISHME: Don't use source modifiers on src1. */ assert(mul->src[1].type == BRW_REGISTER_TYPE_D || mul->src[1].type == BRW_REGISTER_TYPE_UD); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index aba19d5ab2c..e0e9759b6b5 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -529,6 +529,25 @@ namespace brw { return fs_reg(retype(brw_vec8_grf(regs[0], 0), type)); } } + + /** + * Remove any modifiers from the \p i-th source region of the instruction, + * including negate, abs and any implicit type conversion to the execution + * type. Instead any source modifiers will be implemented as a separate + * MOV instruction prior to the original instruction. + */ + inline bool + lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) + { + assert(inst->components_read(i) == 1); + const fs_builder ibld(v, block, inst); + const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); + + ibld.MOV(tmp, inst->src[i]); + inst->src[i] = tmp; + + return true; + } } void shuffle_from_32bit_read(const brw::fs_builder &bld, From 9d8479d632884cf0c63596f253511f07cb74f43d Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 6 Dec 2018 14:11:34 -0800 Subject: [PATCH 184/220] intel/fs: Implement quad swizzles on ICL+. Align16 is no longer a thing, so a new implementation is provided using Align1 instead. Not all possible swizzles can be represented as a single Align1 region, but some fast paths are provided for frequently used swizzles that can be represented efficiently in Align1 mode. Fixes ~90 subgroup quad swap Vulkan CTS tests. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga (cherry picked from commit 812ede088f5f6bea4e6fba991bd59d5cce264212) --- src/intel/compiler/brw_fs.cpp | 29 ++++++++- src/intel/compiler/brw_fs.h | 4 ++ src/intel/compiler/brw_fs_generator.cpp | 82 ++++++++++++++++++++----- 3 files changed, 97 insertions(+), 18 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 6220ee301ff..6a17a9a1ec1 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -315,6 +315,24 @@ fs_inst::has_source_and_destination_hazard() const * may stomp all over it. */ return true; + case SHADER_OPCODE_QUAD_SWIZZLE: + switch (src[1].ud) { + case BRW_SWIZZLE_XXXX: + case BRW_SWIZZLE_YYYY: + case BRW_SWIZZLE_ZZZZ: + case BRW_SWIZZLE_WWWW: + case BRW_SWIZZLE_XXZZ: + case BRW_SWIZZLE_YYWW: + case BRW_SWIZZLE_XYXY: + case BRW_SWIZZLE_ZWZW: + /* These can be implemented as a single Align1 region on all + * platforms, so there's never a hazard between source and + * destination. C.f. fs_generator::generate_quad_swizzle(). + */ + return false; + default: + return !is_uniform(src[0]); + } default: /* The SIMD16 compressed instruction * @@ -5550,9 +5568,14 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: return MIN2(8, inst->exec_size); - case SHADER_OPCODE_QUAD_SWIZZLE: - return 8; - + case SHADER_OPCODE_QUAD_SWIZZLE: { + const unsigned swiz = inst->src[1].ud; + return (is_uniform(inst->src[0]) ? + get_fpu_lowered_simd_width(devinfo, inst) : + devinfo->gen < 11 && type_sz(inst->src[0].type) == 4 ? 8 : + swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 : + get_fpu_lowered_simd_width(devinfo, inst)); + } case SHADER_OPCODE_MOV_INDIRECT: { /* From IVB and HSW PRMs: * diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index e0e9759b6b5..6e68c7a2836 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -478,6 +478,10 @@ class fs_generator struct brw_reg src, struct brw_reg idx); + void generate_quad_swizzle(const fs_inst *inst, + struct brw_reg dst, struct brw_reg src, + unsigned swiz); + bool patch_discard_jumps_to_fb_writes(); const struct brw_compiler *compiler; diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 08dd83dded7..84627e83132 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -582,6 +582,72 @@ fs_generator::generate_shuffle(fs_inst *inst, } } +void +fs_generator::generate_quad_swizzle(const fs_inst *inst, + struct brw_reg dst, struct brw_reg src, + unsigned swiz) +{ + /* Requires a quad. */ + assert(inst->exec_size >= 4); + + if (src.file == BRW_IMMEDIATE_VALUE || + has_scalar_region(src)) { + /* The value is uniform across all channels */ + brw_MOV(p, dst, src); + + } else if (devinfo->gen < 11 && type_sz(src.type) == 4) { + /* This only works on 8-wide 32-bit values */ + assert(inst->exec_size == 8); + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.vstride == src.width + 1); + brw_set_default_access_mode(p, BRW_ALIGN_16); + struct brw_reg swiz_src = stride(src, 4, 4, 1); + swiz_src.swizzle = swiz; + brw_MOV(p, dst, swiz_src); + + } else { + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.vstride == src.width + 1); + const struct brw_reg src_0 = suboffset(src, BRW_GET_SWZ(swiz, 0)); + + switch (swiz) { + case BRW_SWIZZLE_XXXX: + case BRW_SWIZZLE_YYYY: + case BRW_SWIZZLE_ZZZZ: + case BRW_SWIZZLE_WWWW: + brw_MOV(p, dst, stride(src_0, 4, 4, 0)); + break; + + case BRW_SWIZZLE_XXZZ: + case BRW_SWIZZLE_YYWW: + brw_MOV(p, dst, stride(src_0, 2, 2, 0)); + break; + + case BRW_SWIZZLE_XYXY: + case BRW_SWIZZLE_ZWZW: + assert(inst->exec_size == 4); + brw_MOV(p, dst, stride(src_0, 0, 2, 1)); + break; + + default: + assert(inst->force_writemask_all); + brw_set_default_exec_size(p, cvt(inst->exec_size / 4) - 1); + + for (unsigned c = 0; c < 4; c++) { + brw_inst *insn = brw_MOV( + p, stride(suboffset(dst, c), + 4 * inst->dst.stride, 1, 4 * inst->dst.stride), + stride(suboffset(src, BRW_GET_SWZ(swiz, c)), 4, 1, 0)); + + brw_inst_set_no_dd_clear(devinfo, insn, c < 3); + brw_inst_set_no_dd_check(devinfo, insn, c > 0); + } + + break; + } + } +} + void fs_generator::generate_urb_read(fs_inst *inst, struct brw_reg dst, @@ -2303,23 +2369,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case SHADER_OPCODE_QUAD_SWIZZLE: - /* This only works on 8-wide 32-bit values */ - assert(inst->exec_size == 8); - assert(type_sz(src[0].type) == 4); - assert(inst->force_writemask_all); assert(src[1].file == BRW_IMMEDIATE_VALUE); assert(src[1].type == BRW_REGISTER_TYPE_UD); - - if (src[0].file == BRW_IMMEDIATE_VALUE || - (src[0].vstride == 0 && src[0].hstride == 0)) { - /* The value is uniform across all channels */ - brw_MOV(p, dst, src[0]); - } else { - brw_set_default_access_mode(p, BRW_ALIGN_16); - struct brw_reg swiz_src = stride(src[0], 4, 4, 1); - swiz_src.swizzle = inst->src[1].ud; - brw_MOV(p, dst, swiz_src); - } + generate_quad_swizzle(inst, dst, src[0], src[1].ud); break; case SHADER_OPCODE_CLUSTER_BROADCAST: { From 7d5057bfe4ea630d18752a67d9dd69b90a17be66 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 7 Dec 2018 14:15:50 -0800 Subject: [PATCH 185/220] intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split. This seems to be a problem in combination with the lower_regioning pass introduced by a future commit, which can modify a SIMD-split instruction causing its execution size to become illegal again. A subsequent call to lower_simd_width() would hit this bug on a future platform. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga (cherry picked from commit bc781a0323d719634e29d82b5f14e22db943536e) --- src/intel/compiler/brw_fs.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 6a17a9a1ec1..f68c667a159 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5640,8 +5640,10 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i) static fs_reg emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i) { + assert(lbld.group() >= inst->group); + /* Specified channel group from the source region. */ - const fs_reg src = horiz_offset(inst->src[i], lbld.group()); + const fs_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group); if (needs_src_copy(lbld, inst, i)) { /* Builder of the right width to perform the copy avoiding uninitialized @@ -5730,9 +5732,10 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after, { assert(lbld_before.dispatch_width() == lbld_after.dispatch_width()); assert(lbld_before.group() == lbld_after.group()); + assert(lbld_after.group() >= inst->group); /* Specified channel group from the destination region. */ - const fs_reg dst = horiz_offset(inst->dst, lbld_after.group()); + const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group); const unsigned dst_size = inst->size_written / inst->dst.component_size(inst->exec_size); From c44c83ddd27dfd9e82503ca12871a50312afa2f5 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 7 Dec 2018 14:05:52 -0800 Subject: [PATCH 186/220] intel/eu/gen7: Fix brw_MOV() with DF destination and strided source. I triggered this bug while prototyping code for a future platform on IVB. Could be a problem today though if a strided move is copy-propagated into a type-converting move with DF destination. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga (cherry picked from commit 464e79144f8090eb42b8994a983470628c248be0) --- src/intel/compiler/brw_eu_emit.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 4630b83b1a0..2618e9c2e93 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -925,8 +925,8 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) const struct gen_device_info *devinfo = p->devinfo; /* When converting F->DF on IVB/BYT, every odd source channel is ignored. - * To avoid the problems that causes, we use a <1,2,0> source region to read - * each element twice. + * To avoid the problems that causes, we use an source region to + * read each element twice. */ if (devinfo->gen == 7 && !devinfo->is_haswell && brw_get_default_access_mode(p) == BRW_ALIGN_1 && @@ -935,11 +935,8 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) src0.type == BRW_REGISTER_TYPE_D || src0.type == BRW_REGISTER_TYPE_UD) && !has_scalar_region(src0)) { - assert(src0.vstride == BRW_VERTICAL_STRIDE_4 && - src0.width == BRW_WIDTH_4 && - src0.hstride == BRW_HORIZONTAL_STRIDE_1); - - src0.vstride = BRW_VERTICAL_STRIDE_1; + assert(src0.vstride == src0.width + src0.hstride); + src0.vstride = src0.hstride; src0.width = BRW_WIDTH_2; src0.hstride = BRW_HORIZONTAL_STRIDE_0; } From 78d28da2092206d745f5070aa8b7263ad30124ad Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 7 Dec 2018 14:26:23 -0800 Subject: [PATCH 187/220] intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass. Currently the visitor attempts to enforce the regioning restrictions that apply to double-precision instructions on CHV/BXT at NIR-to-i965 translation time. It is possible though for the copy propagation pass to violate this restriction if a strided move is propagated into one of the affected instructions. I've only reproduced this issue on a future platform but it could affect CHV/BXT too under the right conditions. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga (cherry picked from commit c301f447ea8449804208e414f189c0571e4339a8) --- .../compiler/brw_fs_copy_propagation.cpp | 10 +++++++ src/intel/compiler/brw_ir_fs.h | 28 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index ab34b63748e..a76e0f3a6b5 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -315,6 +315,16 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride, if (stride > 4) return false; + /* Bail if the channels of the source need to be aligned to the byte offset + * of the corresponding channel of the destination, and the provided stride + * would break this restriction. + */ + if (has_dst_aligned_region_restriction(devinfo, inst) && + !(type_sz(inst->src[arg].type) * stride == + type_sz(inst->dst.type) * inst->dst.stride || + stride == 0)) + return false; + /* 3-source instructions can only be Align16, which restricts what strides * they can take. They can only take a stride of 1 (the usual case), or 0 * with a special "repctrl" bit. But the repctrl bit doesn't work for diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 07e7224e0f8..95b069a2e02 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -486,4 +486,32 @@ get_exec_type_size(const fs_inst *inst) return type_sz(get_exec_type(inst)); } +/** + * Return whether the following regioning restriction applies to the specified + * instruction. From the Cherryview PRM Vol 7. "Register Region + * Restrictions": + * + * "When source or destination datatype is 64b or operation is integer DWord + * multiply, regioning in Align1 must follow these rules: + * + * 1. Source and Destination horizontal stride must be aligned to the same qword. + * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. + * 3. Source and Destination offset must be the same, except the case of + * scalar source." + */ +static inline bool +has_dst_aligned_region_restriction(const gen_device_info *devinfo, + const fs_inst *inst) +{ + const brw_reg_type exec_type = get_exec_type(inst); + const bool is_int_multiply = !brw_reg_type_is_floating_point(exec_type) && + (inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD); + + if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 || + (type_sz(exec_type) == 4 && is_int_multiply)) + return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo); + else + return false; +} + #endif From 37a8e85fa4accae9bba35d2a08f619ae2dc5ca8c Mon Sep 17 00:00:00 2001 From: Andres Gomez Date: Mon, 7 Jan 2019 16:11:44 +0200 Subject: [PATCH 188/220] glsl/linker: specify proper direction in location aliasing error The check for location aliasing was always asuming output variables but this validation is also called for input variables. Fixes: e2abb75b0e4 ("glsl/linker: validate explicit locations for SSO programs") Cc: Iago Toral Quiroga Signed-off-by: Andres Gomez Reviewed-by: Timothy Arceri Reviewed-by: Iago Toral Quiroga (cherry picked from commit 428164d87f8dc1d378236b4913538803653770c6) --- src/compiler/glsl/link_varyings.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 52e493cb599..3969c0120b3 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -481,9 +481,10 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4], /* Component aliasing is not alloed */ if (comp >= component && comp < last_comp) { linker_error(prog, - "%s shader has multiple outputs explicitly " + "%s shader has multiple %sputs explicitly " "assigned to location %d and component %d\n", _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? "in" : "out", location, comp); return false; } else { @@ -502,10 +503,12 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4], if (info->interpolation != interpolation) { linker_error(prog, - "%s shader has multiple outputs at explicit " + "%s shader has multiple %sputs at explicit " "location %u with different interpolation " "settings\n", - _mesa_shader_stage_to_string(stage), location); + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? + "in" : "out", location); return false; } @@ -513,9 +516,11 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4], info->sample != sample || info->patch != patch) { linker_error(prog, - "%s shader has multiple outputs at explicit " + "%s shader has multiple %sputs at explicit " "location %u with different aux storage\n", - _mesa_shader_stage_to_string(stage), location); + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? + "in" : "out", location); return false; } } From 79c865f2310df6b843702bc47d3393d125801701 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 17 Oct 2018 11:34:32 -0500 Subject: [PATCH 189/220] intel/peephole_ffma: Fix swizzle propagation The num_components value passed into get_mul_for_src is used to only compose the parts of the swizzle that we know will be used so we don't compose invalid swizzle components. However, we had a bug where we passed the number of components of the add all the way through. For the given source, we need the number of components read from that source. In the case where we have a narrow add, say 2 components, that is sourced from a chain of wider instructions, we may not compose all the swizzles. All we really need to do is pass through the right number of components at each level. Fixes: 2231cf0ba3a "nir: Fix output swizzle in get_mul_for_src" Reviewed-by: Ian Romanick (cherry picked from commit 1ede463b6e66eb0a6df5250261810b6985c35eb9) --- src/intel/compiler/brw_nir_opt_peephole_ffma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_nir_opt_peephole_ffma.c b/src/intel/compiler/brw_nir_opt_peephole_ffma.c index cc225e1847b..7271bdbca43 100644 --- a/src/intel/compiler/brw_nir_opt_peephole_ffma.c +++ b/src/intel/compiler/brw_nir_opt_peephole_ffma.c @@ -68,7 +68,7 @@ are_all_uses_fadd(nir_ssa_def *def) } static nir_alu_instr * -get_mul_for_src(nir_alu_src *src, int num_components, +get_mul_for_src(nir_alu_src *src, unsigned num_components, uint8_t swizzle[4], bool *negate, bool *abs) { uint8_t swizzle_tmp[4]; @@ -93,16 +93,19 @@ get_mul_for_src(nir_alu_src *src, int num_components, switch (alu->op) { case nir_op_imov: case nir_op_fmov: - alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, + swizzle, negate, abs); break; case nir_op_fneg: - alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, + swizzle, negate, abs); *negate = !*negate; break; case nir_op_fabs: - alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, + swizzle, negate, abs); *negate = false; *abs = true; break; From 0d5511f350600018075241b647429f34d18ffb17 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 7 Dec 2018 09:44:55 +0100 Subject: [PATCH 190/220] nv50/ir: fix use-after-free in ConstantFolding::visit opnd() might delete the passed in instruction, but it's used through i->srcExists() later in visit v2: use continue instead return v3: use brackets for the outer if/else chain Signed-off-by: Karol Herbst Reviewed-by: Ilia Mirkin (cherry picked from commit 77944fb2b7c9b40539084f600b5df4fff18e9640) --- .../nouveau/codegen/nv50_ir_peephole.cpp | 82 +++++++++++-------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 8767e5efb99..ca0192a9cc0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -370,7 +370,8 @@ class ConstantFolding : public Pass void expr(Instruction *, ImmediateValue&, ImmediateValue&); void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&); - void opnd(Instruction *, ImmediateValue&, int s); + /* true if i was deleted */ + bool opnd(Instruction *i, ImmediateValue&, int s); void opnd3(Instruction *, ImmediateValue&); void unary(Instruction *, const ImmediateValue&); @@ -414,18 +415,21 @@ ConstantFolding::visit(BasicBlock *bb) if (i->srcExists(2) && i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1) && - i->src(2).getImmediate(src2)) + i->src(2).getImmediate(src2)) { expr(i, src0, src1, src2); - else + } else if (i->srcExists(1) && - i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) + i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) { expr(i, src0, src1); - else - if (i->srcExists(0) && i->src(0).getImmediate(src0)) - opnd(i, src0, 0); - else - if (i->srcExists(1) && i->src(1).getImmediate(src1)) - opnd(i, src1, 1); + } else + if (i->srcExists(0) && i->src(0).getImmediate(src0)) { + if (opnd(i, src0, 0)) + continue; + } else + if (i->srcExists(1) && i->src(1).getImmediate(src1)) { + if (opnd(i, src1, 1)) + continue; + } if (i->srcExists(2) && i->src(2).getImmediate(src2)) opnd3(i, src2); } @@ -1010,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value * return false; } -void +bool ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) { const int t = !s; const operation op = i->op; Instruction *newi = i; + bool deleted = false; switch (i->op) { case OP_SPLIT: { @@ -1035,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) val >>= bitsize; } delete_Instruction(prog, i); + deleted = true; break; } case OP_MUL: @@ -1049,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0), TYPE_S32, i->getSrc(t), bld.mkImm(0)); delete_Instruction(prog, i); + deleted = true; } else if (imm0.isInteger(0) || imm0.isInteger(1)) { // The high bits can't be set in this case (either mul by 0 or // unsigned by 1) @@ -1099,8 +1106,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) if (!isFloatType(i->dType) && !i->src(t).mod) { bld.setPosition(i, false); int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32; - if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) { delete_Instruction(prog, i); + deleted = true; + } } else if (i->postFactor && i->sType == TYPE_F32) { /* Can't emit a postfactor with an immediate, have to fold it in */ @@ -1136,8 +1145,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) { bld.setPosition(i, false); int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32; - if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) { delete_Instruction(prog, i); + deleted = true; + } } break; case OP_SUB: @@ -1207,6 +1218,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); delete_Instruction(prog, i); + deleted = true; } else if (imm0.reg.data.s32 == -1) { i->op = OP_NEG; @@ -1239,6 +1251,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); delete_Instruction(prog, i); + deleted = true; } break; @@ -1270,6 +1283,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2); delete_Instruction(prog, i); + deleted = true; } } else if (s == 1) { // In this case, we still want the optimized lowering that we get @@ -1286,6 +1300,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) newi->src(1).mod = Modifier(NV50_IR_MOD_NEG); delete_Instruction(prog, i); + deleted = true; } break; @@ -1298,7 +1313,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); CondCode cc, ccZ; if (imm0.reg.data.u32 != 0 || !si) - return; + return false; cc = si->setCond; ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); // We do everything assuming var (cmp) 0, reverse the condition if 0 is @@ -1324,7 +1339,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case CC_GT: break; // bool > 0 -- bool case CC_NE: break; // bool != 0 -- bool default: - return; + return false; } // Update the condition of this SET to be identical to the origin set, @@ -1359,13 +1374,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) } else if (src->asCmp()) { CmpInstruction *cmp = src->asCmp(); if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1) - return; + return false; if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32)) - return; + return false; if (imm0.reg.data.f32 != 1.0) - return; + return false; if (cmp->dType != TYPE_U32) - return; + return false; cmp->dType = TYPE_F32; if (i->src(t).mod != Modifier(0)) { @@ -1432,13 +1447,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case OP_MUL: int muls; if (isFloatType(si->dType)) - return; + return false; if (si->src(1).getImmediate(imm1)) muls = 1; else if (si->src(0).getImmediate(imm1)) muls = 0; else - return; + return false; bld.setPosition(i, false); i->op = OP_MUL; @@ -1449,15 +1464,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case OP_ADD: int adds; if (isFloatType(si->dType)) - return; + return false; if (si->op != OP_SUB && si->src(0).getImmediate(imm1)) adds = 0; else if (si->src(1).getImmediate(imm1)) adds = 1; else - return; + return false; if (si->src(!adds).mod != Modifier(0)) - return; + return false; // SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z)) // This is more operations, but if one of x, y is an immediate, then @@ -1472,7 +1487,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) bld.mkImm(imm0.reg.data.u32))); break; default: - return; + return false; } } break; @@ -1497,7 +1512,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break; case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break; default: - return; + return false; } if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0) res = 31 - res; @@ -1523,11 +1538,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) // TODO: handle 64-bit values properly if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8) - return; + return false; // TODO: handle single byte/word extractions if (i->subOp) - return; + return false; bld.setPosition(i, true); /* make sure bld is init'ed */ @@ -1564,7 +1579,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) CLAMP(imm0.reg.data.u16, umin, umax) : \ imm0.reg.data.u16; \ break; \ - default: return; \ + default: return false; \ } \ i->setSrc(0, bld.mkImm(res.data.dst)); \ break @@ -1591,7 +1606,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; default: - return; + return false; } i->setSrc(0, bld.mkImm(res.data.f32)); break; @@ -1612,12 +1627,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break; case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break; default: - return; + return false; } i->setSrc(0, bld.mkImm(res.data.f64)); break; default: - return; + return false; } #undef CASE @@ -1628,7 +1643,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) break; } default: - return; + return false; } // This can get left behind some of the optimizations which simplify @@ -1643,6 +1658,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) if (newi->op != op) foldCount++; + return deleted; } // ============================================================================= From 2a9073301fc9c5ee2ce735f5345bb454bd5bebb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 5 Dec 2018 13:28:03 +0100 Subject: [PATCH 191/220] meson: link LLVM 'native' component when LLVM is available Linking against LLVM built with BUILD_SHARED_LIBS fails otherwise, as the component is required for the draw module. Reviewed-by: Dylan Baker (cherry picked from commit 4275cae95c8cb217d6164dfeeafae7b56484b13d) --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 74673e5af9c..5a20e1ea30d 100644 --- a/meson.build +++ b/meson.build @@ -1173,7 +1173,7 @@ endif llvm_modules = ['bitwriter', 'engine', 'mcdisassembler', 'mcjit'] llvm_optional_modules = [] if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 - llvm_modules += ['amdgpu', 'bitreader', 'ipo'] + llvm_modules += ['amdgpu', 'native', 'bitreader', 'ipo'] if with_gallium_r600 llvm_modules += 'asmparser' endif From f5e752c4152b9924d780e60f73c7afd36c77f5f5 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 17 Dec 2018 15:37:05 +0000 Subject: [PATCH 192/220] ac: split 16-bit ssbo loads that may not be dword aligned This ends up refactoring visit_load_buffer() a little. Fixes: 7e7ee826982 ('ac: add support for 16bit buffer loads') Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108114 Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset (cherry picked from commit bde9f482de69528db5ccf5dd6bbfd8359adfbb19) Note: this is a backport of the above commit provided by Rhys. --- src/amd/common/ac_llvm_build.c | 8 ++-- src/amd/common/ac_nir_to_llvm.c | 80 ++++++++++++++++----------------- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 1392ec0f238..8953da7f18d 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2882,9 +2882,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, if (count == num_components) return value; - LLVMValueRef masks[] = { - ctx->i32_0, ctx->i32_1, - LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)}; + LLVMValueRef masks[MAX2(count, 2)]; + masks[0] = ctx->i32_0; + masks[1] = ctx->i32_1; + for (unsigned i = 2; i < count; i++) + masks[i] = LLVMConstInt(ctx->i32, i, false); if (count == 1) return LLVMBuildExtractElement(ctx->builder, value, masks[0], diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 836f518e047..827cb5d85a8 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1622,37 +1622,45 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { - LLVMValueRef results[2]; - int load_bytes; int elem_size_bytes = instr->dest.ssa.bit_size / 8; int num_components = instr->num_components; - int num_bytes = num_components * elem_size_bytes; enum gl_access_qualifier access = nir_intrinsic_access(instr); LLVMValueRef glc = ctx->ac.i1false; if (access & (ACCESS_VOLATILE | ACCESS_COHERENT)) glc = ctx->ac.i1true; - for (int i = 0; i < num_bytes; i += load_bytes) { - load_bytes = MIN2(num_bytes - i, 16); - const char *load_name; - LLVMTypeRef data_type; - LLVMValueRef offset = get_src(ctx, instr->src[1]); - LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false); - LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, - get_src(ctx, instr->src[0]), false); - LLVMValueRef vindex = ctx->ac.i32_0; + LLVMValueRef offset = get_src(ctx, instr->src[1]); + LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, + get_src(ctx, instr->src[0]), false); + LLVMValueRef vindex = ctx->ac.i32_0; + + LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa); + LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type; - int idx = i ? 1 : 0; + LLVMValueRef results[4]; + for (int i = 0; i < num_components;) { + int num_elems = num_components - i; + if (elem_size_bytes < 4) + num_elems = 1; + if (num_elems * elem_size_bytes > 16) + num_elems = 16 / elem_size_bytes; + int load_bytes = num_elems * elem_size_bytes; + + LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false); + + LLVMValueRef ret; if (load_bytes == 2) { - results[idx] = ac_build_tbuffer_load_short(&ctx->ac, - rsrc, - vindex, - offset, - ctx->ac.i32_0, - immoffset, - glc); + ret = ac_build_tbuffer_load_short(&ctx->ac, + rsrc, + vindex, + offset, + ctx->ac.i32_0, + immoffset, + glc); } else { + const char *load_name; + LLVMTypeRef data_type; switch (load_bytes) { case 16: case 12: @@ -1678,33 +1686,23 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, glc, ctx->ac.i1false, }; - results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); - unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes; - LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems); - results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, ""); + ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); } - } - assume(results[0]); - LLVMValueRef ret = results[0]; - if (num_bytes > 16 || num_components == 3) { - LLVMValueRef masks[] = { - LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), - LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), - }; + LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret))); + ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, ""); + ret = ac_trim_vector(&ctx->ac, ret, load_bytes); - if (num_bytes > 16 && num_components == 3) { - /* we end up with a v2i64 and i64 but shuffle fails on that */ - results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2); - } + LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems); + ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, ""); - LLVMValueRef swizzle = LLVMConstVector(masks, num_components); - ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0], - results[num_bytes > 16 ? 1 : 0], swizzle, ""); + for (unsigned j = 0; j < num_elems; j++) { + results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), ""); + } + i += num_elems; } - return LLVMBuildBitCast(ctx->ac.builder, ret, - get_def_type(ctx, &instr->dest.ssa), ""); + return ac_build_gather_values(&ctx->ac, results, num_components); } static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, From 0fabb09696ece097b99093bf4b35c07df2792838 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 17 Jan 2019 11:15:36 +0000 Subject: [PATCH 193/220] Update version to 18.3.2 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 217b5c6e8ea..a19b2d9a021 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.1 +18.3.2 From 95a3b709c0d4618d900f8b8bed429ee4f786fab2 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 17 Jan 2019 11:24:36 +0000 Subject: [PATCH 194/220] docs: add release notes for 18.3.2 Signed-off-by: Emil Velikov --- docs/relnotes/18.3.2.html | 264 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 docs/relnotes/18.3.2.html diff --git a/docs/relnotes/18.3.2.html b/docs/relnotes/18.3.2.html new file mode 100644 index 00000000000..f219d720301 --- /dev/null +++ b/docs/relnotes/18.3.2.html @@ -0,0 +1,264 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.3.2 Release Notes / January 17, 2019

    + +

    +Mesa 18.3.2 is a bug fix release which fixes bugs found since the 18.3.1 release. +

    +

    +Mesa 18.3.2 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + +
      + +
    • Bug 106595 - [RADV] Rendering distortions only when MSAA is enabled
    • + +
    • Bug 107728 - Wrong background in Sascha Willem's Multisampling Demo
    • + +
    • Bug 108114 - [vulkancts] new VK_KHR_16bit_storage tests fail.
    • + +
    • Bug 108116 - [vulkancts] stencil partial clear tests fail.
    • + +
    • Bug 108624 - [regression][bisected] "nir: Copy propagation between blocks" regression
    • + +
    • Bug 108910 - Vkd3d test failure test_multisample_array_texture()
    • + +
    • Bug 108911 - Vkd3d test failure test_clear_render_target_view()
    • + +
    • Bug 108943 - Build fails on ppc64le with meson
    • + +
    • Bug 109072 - GPU hang in blender 2.80
    • + +
    • Bug 109081 - [bisected] [HSW] Regression in clipping.user_defined.clip_* vulkancts tests
    • + +
    • Bug 109151 - [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.
    • + +
    • Bug 109202 - nv50_ir.cpp:749:19: error: cannot use typeid with -fno-rtti
    • + +
    • Bug 109204 - [regression, bisected] retroarch's crt-royale shader crash radv
    • + +
    + + +

    Changes

    + +

    Alex Deucher (3):

    +
      +
    • pci_ids: add new vega10 pci ids
    • +
    • pci_ids: add new vega20 pci id
    • +
    • pci_ids: add new VegaM pci id
    • +
    + +

    Alexander von Gluck IV (1):

    +
      +
    • egl/haiku: Fix reference to disp vs dpy
    • +
    + +

    Andres Gomez (2):

    +
      +
    • glsl: correct typo in GLSL compilation error message
    • +
    • glsl/linker: specify proper direction in location aliasing error
    • +
    + +

    Axel Davy (3):

    +
      +
    • st/nine: Fix volumetexture dtor on ctor failure
    • +
    • st/nine: Bind src not dst in nine_context_box_upload
    • +
    • st/nine: Add src reference to nine_context_range_upload
    • +
    + +

    Bas Nieuwenhuizen (5):

    +
      +
    • radv: Do a cache flush if needed before reading predicates.
    • +
    • radv: Implement buffer stores with less than 4 components.
    • +
    • anv/android: Do not reject storage images.
    • +
    • radv: Fix rasterization precision bits.
    • +
    • spirv: Fix matrix parameters in function calls.
    • +
    + +

    Caio Marcelo de Oliveira Filho (3):

    +
      +
    • nir: properly clear the entry sources in copy_prop_vars
    • +
    • nir: properly find the entry to keep in copy_prop_vars
    • +
    • nir: remove dead code from copy_prop_vars
    • +
    + +

    Dave Airlie (2):

    +
      +
    • radv/xfb: fix counter buffer bounds checks.
    • +
    • virgl/vtest: fix front buffer flush with protocol version 0.
    • +
    + +

    Dylan Baker (6):

    +
      +
    • meson: Fix ppc64 little endian detection
    • +
    • meson: Add support for gnu hurd
    • +
    • meson: Add toggle for glx-direct
    • +
    • meson: Override C++ standard to gnu++11 when building with altivec on ppc64
    • +
    • meson: Error out if building nouveau and using LLVM without rtti
    • +
    • autotools: Remove tegra vdpau driver
    • +
    + +

    Emil Velikov (12):

    +
      +
    • docs: add sha256 checksums for 18.3.1
    • +
    • bin/get-pick-list.sh: rework handing of sha nominations
    • +
    • bin/get-pick-list.sh: warn when commit lists invalid sha
    • +
    • cherry-ignore: meson: libfreedreno depends upon libdrm (for fence support)
    • +
    • glx: mandate xf86vidmode only for "drm" dri platforms
    • +
    • meson: don't require glx/egl/gbm with gallium drivers
    • +
    • pipe-loader: meson: reference correct library
    • +
    • TODO: glx: meson: build dri based glx tests, only with -Dglx=dri
    • +
    • glx: meson: drop includes from a link-only library
    • +
    • glx: meson: wire up the dispatch-index-check test
    • +
    • glx/test: meson: assorted include fixes
    • +
    • Update version to 18.3.2
    • +
    + +

    Eric Anholt (6):

    +
      +
    • v3d: Fix a leak of the transfer helper on screen destroy.
    • +
    • vc4: Fix a leak of the transfer helper on screen destroy.
    • +
    • v3d: Fix a leak of the disassembled instruction string during debug dumps.
    • +
    • v3d: Make sure that a thrsw doesn't split a multop from its umul24.
    • +
    • v3d: Add missing flagging of SYNCB as a TSY op.
    • +
    • gallium/ttn: Fix setup of outputs_written.
    • +
    + +

    Erik Faye-Lund (2):

    +
      +
    • virgl: wrap vertex element state in a struct
    • +
    • virgl: work around bad assumptions in virglrenderer
    • +
    + +

    Francisco Jerez (5):

    +
      +
    • intel/fs: Handle source modifiers in lower_integer_multiplication().
    • +
    • intel/fs: Implement quad swizzles on ICL+.
    • +
    • intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.
    • +
    • intel/eu/gen7: Fix brw_MOV() with DF destination and strided source.
    • +
    • intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.
    • +
    + +

    Ian Romanick (2):

    +
      +
    • i965/vec4/dce: Don't narrow the write mask if the flags are used
    • +
    • Revert "nir/lower_indirect: Bail early if modes == 0"
    • +
    + +

    Jan Vesely (1):

    +
      +
    • clover: Fix build after clang r348827
    • +
    + +

    Jason Ekstrand (6):

    +
      +
    • nir/constant_folding: Fix source bit size logic
    • +
    • intel/blorp: Be more conservative about copying clear colors
    • +
    • spirv: Handle any bit size in vector_insert/extract
    • +
    • anv/apply_pipeline_layout: Set the cursor in lower_res_reindex_intrinsic
    • +
    • spirv: Sign-extend array indices
    • +
    • intel/peephole_ffma: Fix swizzle propagation
    • +
    + +

    Karol Herbst (1):

    +
      +
    • nv50/ir: fix use-after-free in ConstantFolding::visit
    • +
    + +

    Kirill Burtsev (1):

    +
      +
    • loader: free error state, when checking the drawable type
    • +
    + +

    Lionel Landwerlin (5):

    +
      +
    • anv: don't do partial resolve on layer > 0
    • +
    • i965: include draw_params/derived_draw_params for VF cache workaround
    • +
    • i965: add CS stall on VF invalidation workaround
    • +
    • anv: explictly specify format for blorp ccs/mcs op
    • +
    • anv: flush fast clear colors into compressed surfaces
    • +
    + +

    Marek Olšák (1):

    +
      +
    • st/mesa: don't leak pipe_surface if pipe_context is not current
    • +
    + +

    Mario Kleiner (1):

    +
      +
    • radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's.
    • +
    + +

    Nicolai Hähnle (1):

    +
      +
    • meson: link LLVM 'native' component when LLVM is available
    • +
    + +

    Rhys Perry (3):

    +
      +
    • radv: don't set surf_index for stencil-only images
    • +
    • ac/nir,radv,radeonsi/nir: use correct indices for interpolation intrinsics
    • +
    • ac: split 16-bit ssbo loads that may not be dword aligned
    • +
    + +

    Rob Clark (2):

    +
      +
    • freedreno/drm: fix memory leak
    • +
    • mesa/st/nir: fix missing nir_compact_varyings
    • +
    + +

    Samuel Pitoiset (1):

    +
      +
    • radv: switch on EOP when primitive restart is enabled with triangle strips
    • +
    + +

    Timothy Arceri (2):

    +
      +
    • tgsi/scan: fix loop exit point in tgsi_scan_tess_ctrl()
    • +
    • tgsi/scan: correctly walk instructions in tgsi_scan_tess_ctrl()
    • +
    + +

    Vinson Lee (2):

    +
      +
    • meson: Fix typo.
    • +
    • meson: Fix libsensors detection.
    • +
    + + + +
    + + From 8320a07221a342ea56528a1839ce5b33c8226b36 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 17 Jan 2019 11:30:49 +0000 Subject: [PATCH 195/220] docs: add sha256 checksums for 18.3.2 Signed-off-by: Emil Velikov --- docs/relnotes/18.3.2.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/18.3.2.html b/docs/relnotes/18.3.2.html index f219d720301..594b42cdf4e 100644 --- a/docs/relnotes/18.3.2.html +++ b/docs/relnotes/18.3.2.html @@ -31,7 +31,8 @@

    Mesa 18.3.2 Release Notes / January 17, 2019

    SHA256 checksums

    -TBD
    +1cde4fafd40cd1ad4ee3a13b364b7a0175a08b7afdd127fb46f918c1e1dfd4b0  mesa-18.3.2.tar.gz
    +f7ce7181c07b6d8e0132da879af1729523a6c8aa87f79a9d59dfd064024cfb35  mesa-18.3.2.tar.xz
     
    From bd92a16de7421c4170c1cc4db75950113e2b283f Mon Sep 17 00:00:00 2001 From: Kevin Strasser Date: Wed, 22 Mar 2017 03:38:07 -0700 Subject: [PATCH 196/220] AndroidIA: REVERTME: Enable sampling for imported dma_buf images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HWC Vulkan backend needs to be able to sample from source images, so for now enable that for all users of vkCreateDmaBufImageINTEL. We can revert this patch once we land support for VK_MESAX_external_image_dma_buf, which allows the application to fill the 'usage' field. Jira: IAHWC-40 Test: Enable Vulkan backend of IA-Hardware-Composer and try kmscube. The cube should be visible and animated, but at this time there is severe flickering. Signed-off-by: Kevin Strasser Acked-by: Tapani Pälli --- src/intel/vulkan/anv_intel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index ed1bc096c66..f6b8ded20a9 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -64,7 +64,8 @@ VkResult anv_CreateDmaBufImageINTEL( .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT, .flags = 0, }}, pAllocator, &image_h); From c01ae2dfe2a137b0124a07b8216622f4133516be Mon Sep 17 00:00:00 2001 From: Yogesh Marathe Date: Fri, 8 Dec 2017 13:02:39 -0800 Subject: [PATCH 197/220] AndroidIA: enable O3 for Mesa compilation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improves performance of graphics tests significantly. Signed-off-by: Yogesh Marathe Acked-by: Tapani Pälli --- Android.common.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/Android.common.mk b/Android.common.mk index aa1b266a393..e9e03d4de80 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -31,6 +31,7 @@ LOCAL_C_INCLUDES += \ MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION) LOCAL_CFLAGS += \ + -O3 \ -Wno-error \ -Wno-unused-parameter \ -Wno-pointer-arith \ From a83b9439d9f106fae78294d8ed7d78bd6f4fa2de Mon Sep 17 00:00:00 2001 From: Munish Bhardwaj Date: Sun, 1 Jul 2018 03:12:19 -0700 Subject: [PATCH 198/220] AndroidIA: Adds support for gralloc1.0. --- src/egl/Android.mk | 4 + src/egl/drivers/dri2/egl_dri2.h | 13 +- src/egl/drivers/dri2/platform_android.c | 189 +++++++++++++++++++++--- 3 files changed, 186 insertions(+), 20 deletions(-) diff --git a/src/egl/Android.mk b/src/egl/Android.mk index 42b391e6d86..3317e1d2e9b 100644 --- a/src/egl/Android.mk +++ b/src/egl/Android.mk @@ -64,6 +64,10 @@ ifeq ($(BOARD_USES_DRM_GRALLOC),true) LOCAL_SHARED_LIBRARIES += libgralloc_drm endif +ifeq ($(strip $(BOARD_USES_GRALLOC1)),true) +LOCAL_CFLAGS += -DHAVE_GRALLOC1 +endif + ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),) LOCAL_SHARED_LIBRARIES += libnativewindow endif diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 4abe1ba1952..3e5a567472c 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -69,6 +69,10 @@ struct zwp_linux_dmabuf_v1; #include #endif /* HAVE_ANDROID_PLATFORM */ +#ifdef HAVE_GRALLOC1 +#include +#endif + #include "eglconfig.h" #include "eglcontext.h" #include "egldevice.h" @@ -237,7 +241,14 @@ struct dri2_egl_display #endif #ifdef HAVE_ANDROID_PLATFORM - const gralloc_module_t *gralloc; + const hw_module_t *gralloc; + uint16_t gralloc_version; +#ifdef HAVE_GRALLOC1 + gralloc1_device_t *gralloc1_dvc; + GRALLOC1_PFN_LOCK_FLEX pfn_lockflex; + GRALLOC1_PFN_GET_FORMAT pfn_getFormat; + GRALLOC1_PFN_UNLOCK pfn_unlock; +#endif #endif bool is_render_node; diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index 1e93ab4d4d2..a737895c9f4 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -49,6 +49,8 @@ #define ALIGN(val, align) (((val) + (align) - 1) & ~((align) - 1)) +#define GRALLOC_DRM_GET_FORMAT 1 + struct droid_yuv_format { /* Lookup keys */ int native; /* HAL_PIXEL_FORMAT_ */ @@ -59,6 +61,14 @@ struct droid_yuv_format { int fourcc; /* __DRI_IMAGE_FOURCC_ */ }; +/* This enumeration can be deleted if Android defined it in + * system/core/include/system/graphics.h + */ +enum { + HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL = 0x100, + HAL_PIXEL_FORMAT_NV12 = 0x10F, +}; + /* The following table is used to look up a DRI image FourCC based * on native format and information contained in android_ycbcr struct. */ static const struct droid_yuv_format droid_yuv_formats[] = { @@ -67,6 +77,8 @@ static const struct droid_yuv_format droid_yuv_formats[] = { { HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 1, __DRI_IMAGE_FOURCC_YUV420 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, 1, 1, __DRI_IMAGE_FOURCC_YVU420 }, { HAL_PIXEL_FORMAT_YV12, 1, 1, __DRI_IMAGE_FOURCC_YVU420 }, + { HAL_PIXEL_FORMAT_NV12, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, + { HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, /* HACK: See droid_create_image_from_prime_fd() and * https://issuetracker.google.com/32077885. */ { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, @@ -248,6 +260,51 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf) return EGL_TRUE; } +static int +droid_resolve_format(struct dri2_egl_display *dri2_dpy, + struct ANativeWindowBuffer *buf) +{ + int format = -1; + int ret; + + if (buf->format != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) + return buf->format; +#ifdef HAVE_GRALLOC1 + if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + + if (!dri2_dpy->pfn_getFormat) { + _eglLog(_EGL_WARNING, "Gralloc does not support getFormat"); + return -1; + } + ret = dri2_dpy->pfn_getFormat(dri2_dpy->gralloc1_dvc, buf->handle, + &format); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->getFormat failed: %d", ret); + return -1; + } + } else { +#else + const gralloc_module_t *gralloc0; + gralloc0 = dri2_dpy->gralloc; + + if (!gralloc0->perform) { + _eglLog(_EGL_WARNING, "gralloc->perform not supported"); + return -1; + } + ret = gralloc0->perform(dri2_dpy->gralloc, + GRALLOC_DRM_GET_FORMAT, + buf->handle, &format); + if (ret){ + _eglLog(_EGL_WARNING, "gralloc->perform failed with error: %d", ret); + return -1; + } +#endif +#ifdef HAVE_GRALLOC1 + } +#endif + return format; +} + static EGLBoolean droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf) { @@ -730,6 +787,31 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) return EGL_TRUE; } +static int get_ycbcr_from_flexlayout(struct android_flex_layout *outFlexLayout, struct android_ycbcr *ycbcr) +{ + + for( int i = 0; i < outFlexLayout->num_planes; i++) { + switch(outFlexLayout->planes[i].component){ + case FLEX_COMPONENT_Y: + ycbcr->y = outFlexLayout->planes[i].top_left; + ycbcr->ystride = outFlexLayout->planes[i].v_increment; + break; + case FLEX_COMPONENT_Cb: + ycbcr->cb = outFlexLayout->planes[i].top_left; + ycbcr->cstride = outFlexLayout->planes[i].v_increment; + break; + case FLEX_COMPONENT_Cr: + ycbcr->cr = outFlexLayout->planes[i].top_left; + ycbcr->chroma_step = outFlexLayout->planes[i].h_increment; + break; + default: + _eglLog(_EGL_WARNING,"unknown component 0x%x", __func__, outFlexLayout->planes[i].component); + break; + } + } + return 0; +} + #if ANDROID_API_LEVEL >= 23 static EGLBoolean droid_set_damage_region(_EGLDriver *drv, @@ -773,30 +855,70 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct android_ycbcr ycbcr; +#ifdef HAVE_GRALLOC1 + struct android_flex_layout outFlexLayout; + gralloc1_rect_t accessRegion; +#endif size_t offsets[3]; size_t pitches[3]; int is_ycrcb; int fourcc; int ret; - if (!dri2_dpy->gralloc->lock_ycbcr) { - _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr"); + int format = droid_resolve_format(dri2_dpy, buf); + if (format < 0) { + _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; } memset(&ycbcr, 0, sizeof(ycbcr)); - ret = dri2_dpy->gralloc->lock_ycbcr(dri2_dpy->gralloc, buf->handle, - 0, 0, 0, 0, 0, &ycbcr); - if (ret) { - /* HACK: See droid_create_image_from_prime_fd() and - * https://issuetracker.google.com/32077885.*/ - if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) - return NULL; - - _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret); - return NULL; - } - dri2_dpy->gralloc->unlock(dri2_dpy->gralloc, buf->handle); +#ifdef HAVE_GRALLOC1 + if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + if (!dri2_dpy->pfn_lockflex) { + _eglLog(_EGL_WARNING, "Gralloc does not support lockflex"); + return NULL; + } + + ret = dri2_dpy->pfn_lockflex(dri2_dpy->gralloc1_dvc, buf->handle, + 0, 0, &accessRegion, &outFlexLayout, -1); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret); + return NULL; + } + ret = get_ycbcr_from_flexlayout(&outFlexLayout, &ycbcr); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret); + return NULL; + } + int outReleaseFence = 0; + dri2_dpy->pfn_unlock(dri2_dpy->gralloc1_dvc, buf->handle, &outReleaseFence); + } else { +#endif + const gralloc_module_t *gralloc0; + gralloc0 = dri2_dpy->gralloc; + + if (!gralloc0->lock_ycbcr) { + _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr"); + return NULL; + } + + ret = gralloc0->lock_ycbcr(gralloc0, buf->handle, + 0, 0, 0, 0, 0, &ycbcr); + + if (ret) { + /* HACK: See droid_create_image_from_prime_fd() and + * https://issuetracker.google.com/32077885.*/ + if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) + return NULL; + + _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret); + return NULL; + } + + gralloc0->unlock(dri2_dpy->gralloc, buf->handle); +#ifdef HAVE_GRALLOC1 + } +#endif /* When lock_ycbcr's usage argument contains no SW_READ/WRITE flags * it will return the .y/.cb/.cr pointers based on a NULL pointer, @@ -821,10 +943,10 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, /* .chroma_step is the byte distance between the same chroma channel * values of subsequent pixels, assumed to be the same for Cb and Cr. */ - fourcc = get_fourcc_yuv(buf->format, is_ycrcb, ycbcr.chroma_step); + fourcc = get_fourcc_yuv(format, is_ycrcb, ycbcr.chroma_step); if (fourcc == -1) { _eglLog(_EGL_WARNING, "unsupported YUV format, native = %x, is_ycrcb = %d, chroma_step = %d", - buf->format, is_ycrcb, ycbcr.chroma_step); + format, is_ycrcb, ycbcr.chroma_step); return NULL; } @@ -870,9 +992,16 @@ static _EGLImage * droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx, struct ANativeWindowBuffer *buf, int fd) { + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); unsigned int pitch; - if (is_yuv(buf->format)) { + int format = droid_resolve_format(dri2_dpy, buf); + if (format < 0) { + _eglLog(_EGL_WARNING, "Could not resolve buffer format"); + return NULL; + } + + if (is_yuv(format)) { _EGLImage *image; image = droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd); @@ -887,13 +1016,13 @@ droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx, return image; } - const int fourcc = get_fourcc(buf->format); + const int fourcc = get_fourcc(format); if (fourcc == -1) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; } - pitch = buf->stride * get_format_bpp(buf->format); + pitch = buf->stride * get_format_bpp(format); if (pitch == 0) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; @@ -1529,6 +1658,7 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp) _EGLDevice *dev; struct dri2_egl_display *dri2_dpy; const char *err; + hw_device_t *device; int ret; /* Not supported yet */ @@ -1548,6 +1678,27 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp) err = "DRI2: failed to get gralloc module"; goto cleanup; } + dri2_dpy->gralloc_version = dri2_dpy->gralloc->module_api_version; +#ifdef HAVE_GRALLOC1 + if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + ret = dri2_dpy->gralloc->methods->open(dri2_dpy->gralloc, GRALLOC_HARDWARE_MODULE_ID, &device); + if (ret) { + err = "Failed to open hw_device device"; + goto cleanup; + } else { + dri2_dpy->gralloc1_dvc = (gralloc1_device_t *)device; + + dri2_dpy->pfn_lockflex = (GRALLOC1_PFN_LOCK_FLEX)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_LOCK_FLEX); + + dri2_dpy->pfn_getFormat = (GRALLOC1_PFN_GET_FORMAT)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_GET_FORMAT); + + dri2_dpy->pfn_unlock = (GRALLOC1_PFN_UNLOCK)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_UNLOCK); + } + } +#endif disp->DriverData = (void *) dri2_dpy; From 42cf9b72b80d6aeabe282460e50b60af2280f8ae Mon Sep 17 00:00:00 2001 From: Lin Johnson Date: Sun, 15 Oct 2017 00:09:09 +0800 Subject: [PATCH 199/220] Add format P010 etc for 10bit/12bit/16bit YUV420 formats Add those definition in dri2_interface.h and in intel_screen.c This will make P010 formats be sampleable in OpenGL Signed-off-by: Lin Johnson --- include/GL/internal/dri_interface.h | 4 +++ src/egl/drivers/dri2/egl_dri2.c | 33 ++++++++++++++++++++++++ src/egl/drivers/dri2/platform_android.c | 5 +++- src/gallium/state_trackers/dri/dri2.c | 6 +++++ src/mesa/drivers/dri/i965/intel_screen.c | 12 +++++++++ 5 files changed, 59 insertions(+), 1 deletion(-) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index 6f9c2c8b8cf..48060ac8de6 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1334,6 +1334,10 @@ struct __DRIdri2ExtensionRec { #define __DRI_IMAGE_FOURCC_YVU422 0x36315659 #define __DRI_IMAGE_FOURCC_YVU444 0x34325659 +#define __DRI_IMAGE_FOURCC_P010 0x30313050 +#define __DRI_IMAGE_FOURCC_P012 0x32313050 +#define __DRI_IMAGE_FOURCC_P016 0x36313050 + /** * Queryable on images created by createImageFromNames. * diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 4f226b27126..3958e6d94e5 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -65,6 +65,38 @@ #include "util/u_vector.h" #include "mapi/glapi/glapi.h" +/* The kernel header drm_fourcc.h defines the DRM formats below. We duplicate + * some of the definitions here so that building Mesa won't bleeding-edge + * kernel headers. + */ +#ifndef DRM_FORMAT_R8 +#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ +#endif + +#ifndef DRM_FORMAT_RG88 +#define DRM_FORMAT_RG88 fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */ +#endif + +#ifndef DRM_FORMAT_GR88 +#define DRM_FORMAT_GR88 fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */ +#endif + +#ifndef DRM_FORMAT_R16 +#define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R 16 little endian */ +#endif + +#ifndef DRM_FORMAT_GR1616 +#define DRM_FORMAT_GR1616 fourcc_code('G', 'R', '3', '2') /* [31:0] R:G 16:16 little endian */ +#endif + +#ifndef DRM_FORMAT_P010 +#define DRM_FORMAT_P010 fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cb:Cr plane 10 bits per channel */ +#endif + +#ifndef DRM_FORMAT_MOD_INVALID +#define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1) +#endif + #define NUM_ATTRIBS 12 static void @@ -2284,6 +2316,7 @@ dri2_num_fourcc_format_planes(EGLint format) case DRM_FORMAT_NV21: case DRM_FORMAT_NV16: case DRM_FORMAT_NV61: + case DRM_FORMAT_P010: return 2; case DRM_FORMAT_YUV410: diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index a737895c9f4..0451844f6db 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -67,6 +67,7 @@ struct droid_yuv_format { enum { HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL = 0x100, HAL_PIXEL_FORMAT_NV12 = 0x10F, + HAL_PIXEL_FORMAT_P010_INTEL = 0x110 }; /* The following table is used to look up a DRI image FourCC based @@ -74,6 +75,7 @@ enum { static const struct droid_yuv_format droid_yuv_formats[] = { /* Native format, YCrCb, Chroma step, DRI image FourCC */ { HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, + { HAL_PIXEL_FORMAT_P010_INTEL, 0, 4, __DRI_IMAGE_FOURCC_P010 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 1, __DRI_IMAGE_FOURCC_YUV420 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, 1, 1, __DRI_IMAGE_FOURCC_YVU420 }, { HAL_PIXEL_FORMAT_YV12, 1, 1, __DRI_IMAGE_FOURCC_YVU420 }, @@ -950,7 +952,8 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, return NULL; } - if (ycbcr.chroma_step == 2) { + /* FIXME? we should not rely on chroma_step */ + if (ycbcr.chroma_step == 2 || ycbcr.chroma_step == 4) { /* Semi-planar Y + CbCr or Y + CrCb format. */ const EGLint attr_list_2plane[] = { EGL_WIDTH, buf->width, diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index a09787bb215..d99187c6eaa 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -176,6 +176,12 @@ static int convert_fourcc(int format, int *dri_components_p) format = __DRI_IMAGE_FORMAT_R8; dri_components = __DRI_IMAGE_COMPONENTS_Y_UV; break; + case __DRI_IMAGE_FOURCC_P010: + case __DRI_IMAGE_FOURCC_P012: + case __DRI_IMAGE_FOURCC_P016: + format = __DRI_IMAGE_FORMAT_R16; + dri_components = __DRI_IMAGE_COMPONENTS_Y_UV; + break; default: return -1; } diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index c3bd30f7837..f2eea4e14dc 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -282,6 +282,18 @@ static const struct intel_image_format intel_image_formats[] = { { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } }, + { __DRI_IMAGE_FOURCC_P010, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + + { __DRI_IMAGE_FOURCC_P012, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + + { __DRI_IMAGE_FOURCC_P016, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + { __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2, { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } }, From e8b922a5d7ee66b276c58095d9ff009d481422ab Mon Sep 17 00:00:00 2001 From: Kishore Kadiyala Date: Mon, 30 Oct 2017 22:33:50 +0530 Subject: [PATCH 200/220] Mesa Static link with libexpat In Android O, MESA needs to statically link libexpat so that it's in same namespace. Change-Id: I82b0be5c817c21e734dfdf5bfb6a9aa1d414ab33 Signed-off-by: Kishore Kadiyala --- src/gallium/targets/dri/Android.mk | 10 +++++++++- src/intel/Android.common.mk | 11 ++++++++++- src/mesa/drivers/dri/Android.mk | 9 ++++++++- src/util/Android.mk | 6 ++++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 9c43fa1e8fd..83f439071f8 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -43,9 +43,17 @@ LOCAL_SHARED_LIBRARIES := \ libbacktrace \ libdl \ libglapi \ - libexpat \ libz +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +else +LOCAL_SHARED_LIBRARIES += \ + libexpat +endif + $(foreach d, $(MESA_BUILD_GALLIUM), $(eval LOCAL_CFLAGS += $(patsubst HAVE_%,-D%,$(d)))) # sort GALLIUM_LIBS to remove any duplicates diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk index 12cea6e5472..20e78f518b5 100644 --- a/src/intel/Android.common.mk +++ b/src/intel/Android.common.mk @@ -38,7 +38,16 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa -LOCAL_SHARED_LIBRARIES := libexpat libz +LOCAL_SHARED_LIBRARIES := libz + +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +else +LOCAL_SHARED_LIBRARIES += \ + libexpat +endif LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml diff --git a/src/mesa/drivers/dri/Android.mk b/src/mesa/drivers/dri/Android.mk index 53ff4b4f632..dc1f98364c8 100644 --- a/src/mesa/drivers/dri/Android.mk +++ b/src/mesa/drivers/dri/Android.mk @@ -49,11 +49,18 @@ MESA_DRI_WHOLE_STATIC_LIBRARIES := \ MESA_DRI_SHARED_LIBRARIES := \ libcutils \ libdl \ - libexpat \ libglapi \ liblog \ libz +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +MESA_DRI_WHOLE_STATIC_LIBRARIES += \ + libexpat +else +MESA_DRI_SHARED_LIBRARIES += \ + libexpat +endif #----------------------------------------------- # Build drivers and libmesa_dri_common diff --git a/src/util/Android.mk b/src/util/Android.mk index 2d59e1ae15e..6d770ca9575 100644 --- a/src/util/Android.mk +++ b/src/util/Android.mk @@ -41,8 +41,14 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +else LOCAL_SHARED_LIBRARIES := \ libexpat +endif LOCAL_MODULE := libmesa_util From 6c7cb3af67867bb186f1749ffc468aa12130d814 Mon Sep 17 00:00:00 2001 From: Chen Lin Z Date: Sun, 1 Jul 2018 03:14:56 -0700 Subject: [PATCH 201/220] AndroidIA: Build fixes for OMR1. Jira: None. Test: Build passes. Signed-off-by: Chen Lin Z --- Android.common.mk | 13 +++++++++++-- src/egl/Android.mk | 8 +++++++- src/intel/Android.common.mk | 1 + src/intel/Android.vulkan.mk | 31 ++++++++++++++++++++++++++----- src/vulkan/Android.mk | 7 +++++-- 5 files changed, 50 insertions(+), 10 deletions(-) diff --git a/Android.common.mk b/Android.common.mk index e9e03d4de80..05a9e5917de 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -79,14 +79,23 @@ LOCAL_CFLAGS += \ -fvisibility=hidden \ -fno-math-errno \ -fno-trapping-math \ - -Wno-sign-compare + -Wno-sign-compare \ + -Wno-self-assign \ + -Wno-constant-logical-operand \ + -Wno-format \ + -Wno-incompatible-pointer-types \ + -Wno-enum-conversion LOCAL_CPPFLAGS += \ -D__STDC_CONSTANT_MACROS \ -D__STDC_FORMAT_MACROS \ -D__STDC_LIMIT_MACROS \ -Wno-error=non-virtual-dtor \ - -Wno-non-virtual-dtor + -Wno-non-virtual-dtor \ + -Wno-delete-non-virtual-dtor \ + -Wno-overloaded-virtual \ + -Wno-missing-braces \ + -Wno-deprecated-register # mesa requires at least c99 compiler LOCAL_CONLYFLAGS += \ diff --git a/src/egl/Android.mk b/src/egl/Android.mk index 3317e1d2e9b..7e149b4a4d2 100644 --- a/src/egl/Android.mk +++ b/src/egl/Android.mk @@ -45,7 +45,9 @@ LOCAL_CFLAGS := \ LOCAL_C_INCLUDES := \ $(MESA_TOP)/include/drm-uapi \ $(MESA_TOP)/src/egl/main \ - $(MESA_TOP)/src/egl/drivers/dri2 + $(MESA_TOP)/src/egl/drivers/dri2 \ + frameworks/native/libs/nativewindow/include \ + frameworks/native/libs/arect/include LOCAL_STATIC_LIBRARIES := \ libmesa_util \ @@ -83,6 +85,10 @@ ifneq ($(MESA_BUILD_GALLIUM),) LOCAL_REQUIRED_MODULES += gallium_dri endif +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_HEADER_LIBRARIES += libnativebase_headers +endif + LOCAL_MODULE := libGLES_mesa LOCAL_MODULE_RELATIVE_PATH := egl diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk index 20e78f518b5..12bd8947e2e 100644 --- a/src/intel/Android.common.mk +++ b/src/intel/Android.common.mk @@ -44,6 +44,7 @@ LOCAL_SHARED_LIBRARIES := libz ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) LOCAL_STATIC_LIBRARIES := \ libexpat +LOCAL_HEADER_LIBRARIES += liblog_headers else LOCAL_SHARED_LIBRARIES += \ libexpat diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk index db81fada277..1c5ee2598ca 100644 --- a/src/intel/Android.vulkan.mk +++ b/src/intel/Android.vulkan.mk @@ -38,7 +38,10 @@ VULKAN_COMMON_INCLUDES := \ $(MESA_TOP)/src/intel \ $(MESA_TOP)/include/drm-uapi \ $(MESA_TOP)/src/intel/vulkan \ - frameworks/native/vulkan/include + frameworks/native/vulkan/include \ + frameworks/native/libs/nativebase/include \ + frameworks/native/libs/nativewindow/include \ + frameworks/native/libs/arect/include # libmesa_anv_entrypoints with header and dummy.c # @@ -74,6 +77,8 @@ LOCAL_EXPORT_C_INCLUDE_DIRS := \ LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -107,6 +112,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -120,13 +127,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN75_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=75 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -140,13 +149,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN8_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=80 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -160,13 +171,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN9_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=90 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -180,13 +193,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN10_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=100 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -207,6 +222,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -261,6 +278,8 @@ $(intermediates)/vulkan/anv_extensions.h: LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -310,5 +329,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libz libsync liblog +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk index 6253f1c3be9..730d036d18c 100644 --- a/src/vulkan/Android.mk +++ b/src/vulkan/Android.mk @@ -32,12 +32,15 @@ include $(LOCAL_PATH)/Makefile.sources include $(CLEAR_VARS) LOCAL_MODULE := libmesa_vulkan_util LOCAL_MODULE_CLASS := STATIC_LIBRARIES - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers intermediates := $(call local-generated-sources-dir) LOCAL_C_INCLUDES := \ $(MESA_TOP)/include/vulkan \ - $(MESA_TOP)/src/vulkan/util + $(MESA_TOP)/src/vulkan/util \ + frameworks/native/libs/nativebase/include \ + frameworks/native/libs/nativewindow/include \ + frameworks/native/libs/arect/include LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, \ $(VULKAN_UTIL_GENERATED_FILES)) From 12af1e1b931589127bc7573617fd74bbd71fce7d Mon Sep 17 00:00:00 2001 From: Kalyan Kondapally Date: Sat, 3 Feb 2018 17:51:04 -0800 Subject: [PATCH 202/220] Internal: Relax permissions when we export prime handle for GEM. This is needed in case we want to use mmap with dma-buf and write into the buffer in CPU side. This is useful when layers are rendered using software and we will either need to scan or texture from these layers. Check: https://patchwork.freedesktop.org/patch/56380/ Signed-off-by: Kalyan Kondapally --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index f1675b191c1..d4e6ba039c9 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -1487,7 +1487,7 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd) brw_bo_make_external(bo); if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, - DRM_CLOEXEC, prime_fd) != 0) + DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) return -errno; bo->reusable = false; From 39d2de603242cc89d816c23ea8cd14e3a14980a1 Mon Sep 17 00:00:00 2001 From: Iosif Antochi Date: Wed, 14 Jun 2017 14:49:55 +0100 Subject: [PATCH 203/220] CHROMIUM: egl: automatically call eglReleaseThread on thread termination EGL thread cleanup conformance tests could run out of memory as the contexts were not freed even though the application requested to have them deleted. This was caused by the fact that the contexts were still current on their threads when delete was called and (in order not to block any potential pending renders) they were just marked for delete. Fix this by calling eglReleaseThread on thread termination. This is safe to do even if this was already called by the application since, according to the EGL 1.5 spec, eglReleaseThread can be called multiple times without error. Fixes: dEQP-EGL.functional.thread_cleanup.multi_context_* dEQP-EGL.functional.robustness.create_context.query_robust_access --- src/egl/main/eglcurrent.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c index 7af3011b757..545697e5662 100644 --- a/src/egl/main/eglcurrent.c +++ b/src/egl/main/eglcurrent.c @@ -137,13 +137,37 @@ _eglDestroyThreadInfo(_EGLThreadInfo *t) } +/** + * Delete/free a _EGLThreadInfo object. + */ +static void +_eglDestroyThreadInfoCallback(_EGLThreadInfo *t) +{ + /* If this callback is called on thread termination then try to also give a + * chance to cleanup to the client drivers. If called for module termination + * then just release the thread information as calling eglReleaseThread + * would result in a deadlock. + */ + if (_egl_TSDInitialized) { + /* The callback handler has replaced the TLS entry, which is passed in as + * 't', with NULL. Restore it here so that the release thread finds it in + * the TLS entry. + */ + _eglSetTSD(t); + eglReleaseThread(); + } else { + _eglDestroyThreadInfo(t); + } +} + + /** * Make sure TSD is initialized and return current value. */ static inline _EGLThreadInfo * _eglCheckedGetTSD(void) { - if (_eglInitTSD(&_eglDestroyThreadInfo) != EGL_TRUE) { + if (_eglInitTSD(&_eglDestroyThreadInfoCallback) != EGL_TRUE) { _eglLog(_EGL_FATAL, "failed to initialize \"current\" system"); return NULL; } From 2a58bacab874f8f9f451a0c4842f5b3a2412db79 Mon Sep 17 00:00:00 2001 From: Min He Date: Wed, 18 Apr 2018 10:34:50 +0800 Subject: [PATCH 204/220] AndroidIA: prevent deadlock in droid_query_buffer_age To avoid blocking other EGL calls, release the display mutex before calling update_buffers(), which will call droid_window_dequeue_buffer(). This patch fixes some failure cases in android graphics cts test. Signed-off-by: Min He Signed-off-by: Chenglei Ren --- src/egl/drivers/dri2/platform_android.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index 0451844f6db..ffb7a7e80a0 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -722,11 +722,18 @@ droid_query_buffer_age(_EGLDriver *drv, { struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface); + /* To avoid blocking other EGL calls, release the display mutex before + * we enter droid_window_dequeue_buffer() and re-acquire the mutex upon + * return. + */ + mtx_unlock(&disp->Mutex); if (update_buffers(dri2_surf) < 0) { _eglError(EGL_BAD_ALLOC, "droid_query_buffer_age"); + mtx_lock(&disp->Mutex); return -1; } + mtx_lock(&disp->Mutex); return dri2_surf->back ? dri2_surf->back->age : 0; } From 0dd6cbcccfc03bbcafe42596f7737a256759243c Mon Sep 17 00:00:00 2001 From: samiuddi Date: Wed, 25 Apr 2018 16:23:11 +0530 Subject: [PATCH 205/220] From List: Check if the window is non-NULL before setting swap interval. This fixes crash due to NULL window when swap interval is set for pbuffer surface. Jira: 61995 Test: CtsDisplayTestCases pass Signed-off-by: samiuddi --- src/egl/drivers/dri2/platform_android.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index ffb7a7e80a0..0c79fe9b5e0 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -521,7 +521,7 @@ droid_swap_interval(_EGLDriver *drv, _EGLDisplay *dpy, struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); struct ANativeWindow *window = dri2_surf->window; - if (window->setSwapInterval(window, interval)) + if (window && window->setSwapInterval(window, interval)) return EGL_FALSE; surf->SwapInterval = interval; From 8fe1b4fdf584574291f18b589bc5cba7e58d151f Mon Sep 17 00:00:00 2001 From: Kalyan Kondapally Date: Wed, 23 May 2018 15:42:46 -0700 Subject: [PATCH 206/220] Add ReadMe file. Signed-off-by: Kalyan Kondapally --- Readme.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Readme.md diff --git a/Readme.md b/Readme.md new file mode 100644 index 00000000000..5df295abc3a --- /dev/null +++ b/Readme.md @@ -0,0 +1,2 @@ +Any security related issues should be reported by following the instructions here: +https://01.org/security From 90e1a9c097af6da81a01a4c07ad34b00ce7f4816 Mon Sep 17 00:00:00 2001 From: "jenny.q.cao" Date: Tue, 29 May 2018 17:51:59 -0700 Subject: [PATCH 207/220] ignore some compile warning on android. Signed-off-by: Qiming Shi Signed-off-by: Mingwei Wang Signed-off-by: jenny.q.cao --- src/compiler/Android.glsl.mk | 2 +- src/compiler/Android.nir.mk | 3 +++ src/egl/Android.mk | 3 ++- src/intel/Android.compiler.mk | 2 +- src/intel/Android.dev.mk | 3 +++ src/intel/Android.vulkan.mk | 2 +- src/mesa/drivers/dri/i965/Android.mk | 2 ++ src/mesa/program/Android.mk | 2 +- 8 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk index 0aabafa2673..37b3cb80251 100644 --- a/src/compiler/Android.glsl.mk +++ b/src/compiler/Android.glsl.mk @@ -48,7 +48,7 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_nir LOCAL_MODULE := libmesa_glsl - +LOCAL_CFLAGS += -Wno-error include $(LOCAL_PATH)/Android.glsl.gen.mk include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/compiler/Android.nir.mk b/src/compiler/Android.nir.mk index 75a247a245d..59da5dbdc1c 100644 --- a/src/compiler/Android.nir.mk +++ b/src/compiler/Android.nir.mk @@ -41,6 +41,9 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary +LOCAL_CFLAGS := \ + -Wno-missing-braces + LOCAL_STATIC_LIBRARIES := libmesa_compiler LOCAL_MODULE := libmesa_nir diff --git a/src/egl/Android.mk b/src/egl/Android.mk index 7e149b4a4d2..bbc7df2aff8 100644 --- a/src/egl/Android.mk +++ b/src/egl/Android.mk @@ -46,6 +46,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/include/drm-uapi \ $(MESA_TOP)/src/egl/main \ $(MESA_TOP)/src/egl/drivers/dri2 \ + frameworks/native/libs/nativebase/include \ frameworks/native/libs/nativewindow/include \ frameworks/native/libs/arect/include @@ -91,6 +92,6 @@ endif LOCAL_MODULE := libGLES_mesa LOCAL_MODULE_RELATIVE_PATH := egl - +LOCAL_CFLAGS += -Wno-error include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/intel/Android.compiler.mk b/src/intel/Android.compiler.mk index c2b01221dfc..41af7b20b9c 100644 --- a/src/intel/Android.compiler.mk +++ b/src/intel/Android.compiler.mk @@ -28,7 +28,7 @@ # --------------------------------------- include $(CLEAR_VARS) - +LOCAL_CFLAGS += -Wno-error LOCAL_MODULE := libmesa_intel_compiler LOCAL_MODULE_CLASS := STATIC_LIBRARIES diff --git a/src/intel/Android.dev.mk b/src/intel/Android.dev.mk index cd2ed66a176..3011ee232ed 100644 --- a/src/intel/Android.dev.mk +++ b/src/intel/Android.dev.mk @@ -33,5 +33,8 @@ LOCAL_C_INCLUDES := $(MESA_TOP)/include/drm-uapi LOCAL_SRC_FILES := $(DEV_FILES) +LOCAL_CFLAGS := \ + -Wno-gnu-variable-sized-type-not-at-end + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk index 1c5ee2598ca..03120cf48a0 100644 --- a/src/intel/Android.vulkan.mk +++ b/src/intel/Android.vulkan.mk @@ -234,7 +234,7 @@ include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libmesa_vulkan_common LOCAL_MODULE_CLASS := STATIC_LIBRARIES - +LOCAL_CFLAGS += -Wno-error intermediates := $(call local-generated-sources-dir) LOCAL_SRC_FILES := $(VULKAN_FILES) diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index fbad63a0824..580b5443965 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -310,6 +310,8 @@ LOCAL_LDFLAGS += $(MESA_DRI_LDFLAGS) LOCAL_CFLAGS := \ $(MESA_DRI_CFLAGS) +LOCAL_CFLAGS += -Wno-error + LOCAL_C_INCLUDES := \ $(MESA_DRI_C_INCLUDES) \ $(MESA_TOP)/include/drm-uapi diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk index c6470e6289e..13d0da85882 100644 --- a/src/mesa/program/Android.mk +++ b/src/mesa/program/Android.mk @@ -41,7 +41,7 @@ endef include $(MESA_TOP)/src/mesa/Makefile.sources include $(CLEAR_VARS) - +LOCAL_CFLAGS += -Wno-error LOCAL_MODULE := libmesa_program LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_STATIC_LIBRARIES := libmesa_nir \ From 89b9b856f235f27d783d759bb7db6b22959dd412 Mon Sep 17 00:00:00 2001 From: Kevin Strasser Date: Thu, 15 Mar 2018 14:00:01 +0530 Subject: [PATCH 208/220] Disable EXT_pixel_format_float for Android CtsViewTestCases Jira: None. Test: Android CTS should PASS for below android.view.cts.PixelCopyTest#testWideGamutWindowProducerCopyToRGBA16F Signed-off-by: Kevin Strasser Signed-off-by: Kishore Kadiyala --- src/egl/drivers/dri2/egl_dri2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 3958e6d94e5..81d4ea456b3 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -705,7 +705,7 @@ dri2_setup_screen(_EGLDisplay *disp) dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_HAS_CONTEXT_PRIORITY); - disp->Extensions.EXT_pixel_format_float = EGL_TRUE; + disp->Extensions.EXT_pixel_format_float = EGL_FALSE; if (dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB)) From 091eab6f8ab51294bde74ca3387ca484537b7591 Mon Sep 17 00:00:00 2001 From: renchenglei Date: Wed, 6 Jun 2018 13:35:38 +0800 Subject: [PATCH 209/220] Revert "Revert "mesa: add GL_HALF_FLOAT as supported type to readpixels"" This reverts commit 6b2139172969e68295c22fda92438637c7a6e6d5. Andriod ask GL_HALF_FLOAT as supported type to readpixels. This patch help fix CTS test android.view.cts.PixelCopyTest. However, it may cause KHR-GLES3.packed_pixels.* regression on Linux. As CTS test is "Must Pass" on Android, let's keep this patch as specific fix for Android. Tracked-On: https://jira01.devtools.intel.com/browse/OAM-63305 --- src/mesa/main/readpix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 556c860d393..d9a12d2387f 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -922,6 +922,8 @@ read_pixels_es3_error_check(struct gl_context *ctx, GLenum format, GLenum type, case GL_RGBA: if (type == GL_FLOAT && data_type == GL_FLOAT) return GL_NO_ERROR; /* EXT_color_buffer_float */ + if (type == GL_HALF_FLOAT && data_type == GL_FLOAT) + return GL_NO_ERROR; if (type == GL_UNSIGNED_BYTE && data_type == GL_UNSIGNED_NORMALIZED) return GL_NO_ERROR; if (internalFormat == GL_RGB10_A2 && From 78734ef2fc7b621ced035816b1d4d7cdd429ee68 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 4 Oct 2017 10:32:26 +0300 Subject: [PATCH 210/220] AndroidIA: glcpp: Hack to handle expressions in #line directives. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GLSL ES 320 technically allows #line to have arbitrary expression trees rather than integer literal constants, unlike the C and C++ preprocessor. This is likely a completely unused feature that does not make sense. However, Android irritatingly mandates this useless behavior, so this patch implements a hack to try and support it. We handle a single expression: #line but we avoid handling the double expression: #line because this is an ambiguous grammar. Instead, we handle the case that wraps both in parenthesis, which is actually well defined: #line () () With this change following tests pass: dEQP-GLES3.functional.shaders.preprocessor.builtin.line_expression_vertex dEQP-GLES3.functional.shaders.preprocessor.builtin.line_expression_fragment dEQP-GLES3.functional.shaders.preprocessor.builtin.line_and_file_expression_vertex dEQP-GLES3.functional.shaders.preprocessor.builtin.line_and_file_expression_fragment Signed-off-by: Tapani Pälli Signed-off-by: Kenneth Graunke BUG=b:33352633 BUG=b:33247335 TEST=affected tests passing on CTS 7.1_r1 sentry Change-Id: I7afbbb386bd4a582e3f241014a83eaccad1d50d9 Reviewed-on: https://chromium-review.googlesource.com/427305 Tested-by: Haixia Shi Reviewed-by: Ilja H. Friedel Commit-Queue: Haixia Shi Trybot-Ready: Haixia Shi --- src/compiler/glsl/glcpp/glcpp-parse.y | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 1c095cb66f9..c951d9526ac 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -224,10 +224,12 @@ expanded_line: glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); _glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value); } -| LINE_EXPANDED integer_constant NEWLINE { +| LINE_EXPANDED expression NEWLINE { + if (parser->is_gles && $2.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); parser->has_new_line_number = 1; - parser->new_line_number = $2; - _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2); + parser->new_line_number = $2.value; + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2.value); } | LINE_EXPANDED integer_constant integer_constant NEWLINE { parser->has_new_line_number = 1; @@ -238,6 +240,17 @@ expanded_line: "#line %" PRIiMAX " %" PRIiMAX "\n", $2, $3); } +| LINE_EXPANDED '(' expression ')' '(' expression ')' NEWLINE { + if (parser->is_gles && $3.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $3.undefined_macro); + if (parser->is_gles && $6.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $6.undefined_macro); + parser->has_new_line_number = 1; + parser->new_line_number = $3.value; + parser->has_new_source_number = 1; + parser->new_source_number = $6.value; + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX " %" PRIiMAX "\n", $3.value, $6.value); + } ; define: From b0b078e2d8f690724a6447b1db926fcc8ea86cdc Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 8 Feb 2018 10:45:56 +0000 Subject: [PATCH 211/220] REVERTME: i965: perf: implement GPA's null renderer GPA requires a null renderer query which disables all rendering. This feels fairly at odds with the spirit of the INTEL_performance_query extension. Note: Considering the INTEL_blackhole_render implementation(https://www. khronos.org/registry/OpenGL/extensions/INTEL/INTEL_blackhole_render .txt, https://patchwork.freedesktop.org/series/40035/)need test case changes, and also need time to review in upstream, we keep this patch firstly for urgent project milestone. Test: Pass mdapi test_GfxDrv_DriverAcceptance test case GfxDrv_DriverAcceptanceQuery.GL_NULL_HARDWARE and has no reg issue Signed-off-by: Landwerlin, Lionel --- src/mesa/drivers/dri/i965/brw_context.h | 2 + src/mesa/drivers/dri/i965/brw_defines.h | 8 ++- .../drivers/dri/i965/brw_performance_query.c | 69 ++++++++++++++++++- 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7fd15669eb9..9ed56b69e59 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -686,6 +686,7 @@ enum brw_query_kind { OA_COUNTERS, OA_COUNTERS_RAW, PIPELINE_STATS, + NULL_RENDERER, }; struct brw_perf_query_register_prog { @@ -1242,6 +1243,7 @@ struct brw_context int n_active_oa_queries; int n_active_pipeline_stats_queries; + int n_active_null_renderers; /* The number of queries depending on running OA counters which * extends beyond brw_end_perf_query() since we need to wait until diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 97a787a2ab3..0264f3f79d0 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode { #define GEN10_CACHE_MODE_SS 0x0e420 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) -#define INSTPM 0x20c0 +#define INSTPM 0x20c0 /* Gen6-8 */ # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6) +# define INSTPM_GLOBAL_DEBUG_ENABLE (1 << 4) +# define INSTPM_MEDIA_INSTRUCTION_DISABLE (1 << 3) +# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE (1 << 2) +# define INSTPM_3D_STATE_INSTRUCTION_DISABLE (1 << 1) #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */ # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) +# define CSDBG2_MEDIA_INSTRUCTION_DISABLE (1 << 1) +# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE (1 << 0) #define GEN7_RPSTAT1 0xA01C #define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 10e3d024f17..85d14a83c7e 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -330,6 +330,12 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"), obj->pipeline_stats.bo ? "yes" : "no"); break; + case NULL_RENDERER: + DBG("%4d: %-6s %-8s NULL_RENDERER\n", + id, + o->Used ? "Dirty," : "New,", + o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,")); + break; default: unreachable("Unknown query type"); break; @@ -431,6 +437,10 @@ brw_get_perf_query_info(struct gl_context *ctx, *n_active = brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + *n_active = brw->perfquery.n_active_null_renderers; + break; + default: unreachable("Unknown query type"); break; @@ -1020,6 +1030,7 @@ brw_begin_perf_query(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); const struct brw_perf_query_info *query = obj->query; + const struct gen_device_info *devinfo = &brw->screen->devinfo; /* We can assume the frontend hides mistaken attempts to Begin a * query object multiple times before its End. Similarly if an @@ -1104,7 +1115,6 @@ brw_begin_perf_query(struct gl_context *ctx, /* If the OA counters aren't already on, enable them. */ if (brw->perfquery.oa_stream_fd == -1) { __DRIscreen *screen = brw->screen->driScrnPriv; - const struct gen_device_info *devinfo = &brw->screen->devinfo; /* The period_exponent gives a sampling period as follows: * sample_period = timestamp_period * 2^(period_exponent + 1) @@ -1250,6 +1260,23 @@ brw_begin_perf_query(struct gl_context *ctx, ++brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + ++brw->perfquery.n_active_null_renderers; + if (devinfo->gen >= 9) { + brw_load_register_imm32(brw, CS_DEBUG_MODE2, + REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE) | + CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE); + } else { + brw_load_register_imm32(brw, INSTPM, + REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE) | + INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE); + } + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_LRI_WRITE_IMMEDIATE); + break; + default: unreachable("Unknown query type"); break; @@ -1270,6 +1297,7 @@ brw_end_perf_query(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); + const struct gen_device_info *devinfo = &brw->screen->devinfo; DBG("End(%d)\n", o->Id); @@ -1312,6 +1340,21 @@ brw_end_perf_query(struct gl_context *ctx, --brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + if (--brw->perfquery.n_active_null_renderers == 0) { + if (devinfo->gen >= 9) { + brw_load_register_imm32(brw, CS_DEBUG_MODE2, + REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE)); + } else { + brw_load_register_imm32(brw, INSTPM, + REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE)); + } + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_LRI_WRITE_IMMEDIATE); + } + break; + default: unreachable("Unknown query type"); break; @@ -1337,6 +1380,9 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) bo = obj->pipeline_stats.bo; break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -1387,6 +1433,8 @@ brw_is_perf_query_ready(struct gl_context *ctx, return (obj->pipeline_stats.bo && !brw_batch_references(&brw->batch, obj->pipeline_stats.bo) && !brw_bo_busy(obj->pipeline_stats.bo)); + case NULL_RENDERER: + return true; default: unreachable("Unknown query type"); @@ -1602,6 +1650,9 @@ brw_get_perf_query_data(struct gl_context *ctx, written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data); break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -1672,6 +1723,9 @@ brw_delete_perf_query(struct gl_context *ctx, } break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -2152,6 +2206,15 @@ get_register_queries_function(const struct gen_device_info *devinfo) return NULL; } +static void +fill_null_renderer_perf_query_info(struct brw_context *brw, + struct brw_perf_query_info *query) +{ + query->kind = NULL_RENDERER; + query->name = "Intel_Null_Hardware_Query"; + query->n_counters = 0; +} + static unsigned brw_init_perf_query_info(struct gl_context *ctx) { @@ -2210,6 +2273,10 @@ brw_init_perf_query_info(struct gl_context *ctx) enumerate_sysfs_metrics(brw); brw_perf_query_register_mdapi_oa_query(brw); + + struct brw_perf_query_info *null_query = + brw_perf_query_append_query_info(brw); + fill_null_renderer_perf_query_info(brw, null_query); } brw->perfquery.unaccumulated = From c9a5ddb0e88b8be24f523a1ca6fae61637e0ffe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Mon, 27 Aug 2018 12:31:37 +0300 Subject: [PATCH 212/220] REVERTME: anv/android: disable couple of KHR extensions for Android CTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes following test pass: dEQP-VK.api.info.device.extensions Test: dEQP-VK.api.info.device.extensions Signed-off-by: Tapani Pälli [strassek: carry this patch until the extensions are whitelisted in CTS] --- src/intel/vulkan/anv_extensions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index e5b6c77a6d0..dd7111b58e1 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -71,9 +71,9 @@ def __init__(self, version, enable): EXTENSIONS = [ Extension('VK_ANDROID_native_buffer', 5, 'ANDROID'), Extension('VK_KHR_16bit_storage', 1, 'device->info.gen >= 8'), - Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8'), + Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8 && !ANDROID'), Extension('VK_KHR_bind_memory2', 1, True), - Extension('VK_KHR_create_renderpass2', 1, True), + Extension('VK_KHR_create_renderpass2', 1, '!ANDROID'), Extension('VK_KHR_dedicated_allocation', 1, True), Extension('VK_KHR_descriptor_update_template', 1, True), Extension('VK_KHR_device_group', 1, True), From 9e828ce9f7dc662ab90cc8da16bad06f8657f1d4 Mon Sep 17 00:00:00 2001 From: Kevin Strasser Date: Thu, 6 Sep 2018 15:54:09 -0700 Subject: [PATCH 213/220] FROMLIST: anv/android: handle storage images in vkGetSwapchainGrallocUsageANDROID Android P and earlier expect that the surface supports storage images, and so many of the tests fail when the framework checks for that support. The framework also includes various image format and usage combinations that are invalid for the hardware. Drop the STORAGE restriction from the HAL and whitelist a pair of formats so that existing versions of Android can pass these tests. Fixes: dEQP-VK.wsi.android.* Signed-off-by: Kevin Strasser (am from https://patchwork.freedesktop.org/patch/247681/) --- src/intel/vulkan/anv_android.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c index cdca9f95caa..4720095c6cd 100644 --- a/src/intel/vulkan/anv_android.c +++ b/src/intel/vulkan/anv_android.c @@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. */ - const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { + VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, .format = format, .type = VK_IMAGE_TYPE_2D, @@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( .usage = imageUsage, }; + /* Android P and earlier doesn't check if the physical device supports a + * given format and usage combination before calling this function. Omit the + * storage requirement to make the tests pass. + */ +#if ANDROID_API_LEVEL <= 28 + if (format == VK_FORMAT_R8G8B8A8_SRGB || + format == VK_FORMAT_R5G6B5_UNORM_PACK16) { + image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT; + } +#endif + VkImageFormatProperties2KHR image_format_props = { .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, }; From 91685d0001da5deb273cc008aad48a52ca0a5751 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Thu, 15 Mar 2018 15:22:20 -0700 Subject: [PATCH 214/220] FROMLIST: intel/genxml: Add register for object preemption. Signed-off-by: Rafael Antognolli (am from https://patchwork.freedesktop.org/patch/210951/) --- src/intel/genxml/gen10.xml | 8 ++++++++ src/intel/genxml/gen11.xml | 8 ++++++++ src/intel/genxml/gen9.xml | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml index abd5da297d6..acded759335 100644 --- a/src/intel/genxml/gen10.xml +++ b/src/intel/genxml/gen10.xml @@ -3553,6 +3553,14 @@ + + + + + + + + diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml index c69d7dc89c2..d39bf09a5d7 100644 --- a/src/intel/genxml/gen11.xml +++ b/src/intel/genxml/gen11.xml @@ -3551,6 +3551,14 @@ + + + + + + + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index ca268254503..b7ce3095ab4 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -3491,6 +3491,14 @@ + + + + + + + + From 3d6ca362131f084735747b522c8941fa587ae191 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Thu, 15 Mar 2018 15:22:21 -0700 Subject: [PATCH 215/220] FROMLIST: anv/gen10: Enable object level preemption. Set bit when initializing a device. Signed-off-by: Rafael Antognolli (am from https://patchwork.freedesktop.org/patch/210949/) --- src/intel/vulkan/genX_state.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 42800a2581e..99b86f68b3a 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -231,6 +231,22 @@ genX(init_device_state)(struct anv_device *device) #endif } +#if GEN_GEN >= 10 + /* A fixed function pipe flush is required before modifying this field */ + anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) { + pipe.PipeControlFlushEnable = true; + } + /* enable object level preemption */ + uint32_t csc1; + anv_pack_struct(&csc1, GENX(CS_CHICKEN1), + .ReplayMode = ObjectLevelPreemption, + .ReplayModeMask = 1); + anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = GENX(CS_CHICKEN1_num); + lri.DataDWord = csc1; + } +#endif + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe); assert(batch.next <= batch.end); From d4b8ff4c67296cd6895de98d0c967d37821cb75f Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Thu, 15 Mar 2018 15:22:22 -0700 Subject: [PATCH 216/220] FROMLIST: i965/gen10+: Enable object level preemption. Set bit when initializing context. Signed-off-by: Rafael Antognolli (am from https://patchwork.freedesktop.org/patch/210950/) --- src/mesa/drivers/dri/i965/brw_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_defines.h | 5 ++++ src/mesa/drivers/dri/i965/brw_state.h | 3 ++- src/mesa/drivers/dri/i965/brw_state_upload.c | 25 ++++++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 9ed56b69e59..47183da66bc 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -843,6 +843,8 @@ struct brw_context GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */ + bool object_preemption; /**< Object level preemption enabled. */ + GLenum reduced_primitive; /** diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0264f3f79d0..0ec50e1d27a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1687,4 +1687,9 @@ enum brw_pixel_shader_coverage_mask_mode { # define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS (1 << 5) # define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK REG_MASK(1 << 5) +#define CS_CHICKEN1 0x2580 /* Gen9+ */ +# define GEN9_REPLAY_MODE_MIDBUFFER (0 << 0) +# define GEN9_REPLAY_MODE_MIDOBJECT (1 << 0) +# define GEN9_REPLAY_MODE_MASK REG_MASK(1 << 0) + #endif diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index f6acf81b899..546d103d1a4 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -128,7 +128,7 @@ void brw_disk_cache_write_compute_program(struct brw_context *brw); void brw_disk_cache_write_render_programs(struct brw_context *brw); /*********************************************************************** - * brw_state.c + * brw_state_upload.c */ void brw_upload_render_state(struct brw_context *brw); void brw_render_state_finished(struct brw_context *brw); @@ -138,6 +138,7 @@ void brw_init_state(struct brw_context *brw); void brw_destroy_state(struct brw_context *brw); void brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline); +void brw_enable_obj_preemption(struct brw_context *brw, bool enable); static inline void brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7f20579fb87..2e42dfb36d6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -45,6 +45,28 @@ #include "brw_cs.h" #include "main/framebuffer.h" +void +brw_enable_obj_preemption(struct brw_context *brw, bool enable) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + assert(devinfo->gen >= 9); + + if (enable == brw->object_preemption) + return; + + /* A fixed function pipe flush is required before modifying this field */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); + + bool replay_mode = enable ? + GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER; + + /* enable object level preemption */ + brw_load_register_imm32(brw, CS_CHICKEN1, + replay_mode | GEN9_REPLAY_MODE_MASK); + + brw->object_preemption = enable; +} + static void brw_upload_initial_gpu_state(struct brw_context *brw) { @@ -153,6 +175,9 @@ brw_upload_initial_gpu_state(struct brw_context *brw) ADVANCE_BATCH(); } } + + if (devinfo->gen >= 10) + brw_enable_obj_preemption(brw, true); } static inline const struct brw_tracked_state * From f9682c05dae9b3a97c581f0cac6bce86c2283912 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Thu, 15 Mar 2018 15:22:23 -0700 Subject: [PATCH 217/220] FROMLIST: i965/gen9: Add workarounds for object preemption. Gen9 hardware requires some workarounds to disable preemption depending on the type of primitive being emitted. We implement this by adding a new atom that tracks BRW_NEW_PRIMITIVE. Whenever it happens, we check the current type of primitive and enable/disable object preemption. For now, we just ignore blorp. The only primitive it emits is 3DPRIM_RECTLIST, and since it's not listed in the workarounds, we can safely leave preemption enabled when it happens. Or it will be disabled by a previous 3DPRIMITIVE, which should be fine too. Signed-off-by: Rafael Antognolli Cc: Kenneth Graunke (am from https://patchwork.freedesktop.org/patch/210952/) --- src/mesa/drivers/dri/i965/genX_state_upload.c | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index ce9a3adcfc1..5d2572cb4dc 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -5615,6 +5615,50 @@ static const struct brw_tracked_state genX(blend_constant_color) = { /* ---------------------------------------------------------------------- */ +#if GEN_GEN == 9 + +/** + * Implement workarounds for preemption: + * - WaDisableMidObjectPreemptionForGSLineStripAdj + * - WaDisableMidObjectPreemptionForTrifanOrPolygon + */ +static void +gen9_emit_preempt_wa(struct brw_context *brw) +{ + /* WaDisableMidObjectPreemptionForGSLineStripAdj + * + * WA: Disable mid-draw preemption when draw-call is a linestrip_adj and + * GS is enabled. + */ + bool object_preemption = + !(brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled); + + /* WaDisableMidObjectPreemptionForTrifanOrPolygon + * + * TriFan miscompare in Execlist Preemption test. Cut index that is on a + * previous context. End the previous, the resume another context with a + * tri-fan or polygon, and the vertex count is corrupted. If we prempt + * again we will cause corruption. + * + * WA: Disable mid-draw preemption when draw-call has a tri-fan. + */ + object_preemption = + object_preemption && !(brw->primitive == _3DPRIM_TRIFAN); + + brw_enable_obj_preemption(brw, object_preemption); +} + +static const struct brw_tracked_state gen9_preempt_wa = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM, + }, + .emit = gen9_emit_preempt_wa, +}; +#endif + +/* ---------------------------------------------------------------------- */ + void genX(init_atoms)(struct brw_context *brw) { @@ -5919,6 +5963,9 @@ genX(init_atoms)(struct brw_context *brw) &genX(cut_index), &gen8_pma_fix, +#if GEN_GEN == 9 + &gen9_preempt_wa, +#endif }; #endif From f372bc8f6dd9518fe8f90741f871040a63a1c940 Mon Sep 17 00:00:00 2001 From: Aditya Swarup Date: Wed, 31 Oct 2018 17:12:40 -0700 Subject: [PATCH 218/220] UPSTREAM: i965: Lift restriction in external textures for EGLImage support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes Skqp's unitTest_EGLImageTest test. For Intel platforms, we support external textures only for EGLImages created with EGL_EXT_image_dma_buf_import. This restriction seems to be Intel specific and not present for other platforms. While running SKQP test - unitTest_EGLImageTest, GL_INVALID is sent to the test because of this restriction. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105301 Signed-off-by: Aditya Swarup Reviewed-by: Tapani Pälli Reviewed-by: Chad Versace (cherry picked from commit a5c39ed974402c6a40d51c6189547d1f29581fbe) --- src/mesa/drivers/dri/i965/intel_image.h | 3 --- src/mesa/drivers/dri/i965/intel_screen.c | 2 -- src/mesa/drivers/dri/i965/intel_tex_image.c | 10 ---------- 3 files changed, 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h index a8193c6def9..ca604159dc2 100644 --- a/src/mesa/drivers/dri/i965/intel_image.h +++ b/src/mesa/drivers/dri/i965/intel_image.h @@ -89,9 +89,6 @@ struct __DRIimageRec { GLuint tile_y; bool has_depthstencil; - /** The image was created with EGL_EXT_image_dma_buf_import. */ - bool dma_buf_imported; - /** Offset of the auxiliary compression surface in the bo. */ uint32_t aux_offset; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index f2eea4e14dc..8a091590269 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -969,7 +969,6 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) image->tile_y = orig_image->tile_y; image->has_depthstencil = orig_image->has_depthstencil; image->data = loaderPrivate; - image->dma_buf_imported = orig_image->dma_buf_imported; image->aux_offset = orig_image->aux_offset; image->aux_pitch = orig_image->aux_pitch; @@ -1249,7 +1248,6 @@ intel_create_image_from_dma_bufs2(__DRIscreen *dri_screen, return NULL; } - image->dma_buf_imported = true; image->yuv_color_space = yuv_color_space; image->sample_range = sample_range; image->horizontal_siting = horizontal_siting; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index bdcdb7736e6..674fa1c6fbf 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -614,16 +614,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, if (image == NULL) return; - /* We support external textures only for EGLImages created with - * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future. - */ - if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetTexture2DOES(external target is enabled only " - "for images created with EGL_EXT_image_dma_buf_import"); - return; - } - /* Disallow depth/stencil textures: we don't have a way to pass the * separate stencil miptree of a GL_DEPTH_STENCIL texture through. */ From cbac9f27f1ae84d47f40c89e15097f4dc77d9ed6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toni=20L=C3=B6nnberg?= Date: Mon, 15 Oct 2018 16:19:52 +0300 Subject: [PATCH 219/220] FROMLIST: SQUASH: i965: SIMD32 selection heuristics (cover letter https://patchwork.freedesktop.org/series/51006/) FROMLIST: i965: SIMD32 heuristics debug flag Added a new DEBUG_HEUR32 flag to INTEL_DEBUG flags for enabling SIMD32 selection heuristics. (am from https://patchwork.freedesktop.org/patch/256764/) FROMLIST: i965: SIMD32 heuristics control data Added a new structure for holding SIMD32 heuristics control data. The control data itself will be fetched from drirc. (am from https://patchwork.freedesktop.org/patch/256806/) FROMLIST: i965: SIMD32 heuristics control data from drirc To be able to test the heuristics with different parameters, they can be controlled via environment variables through drirc. (am from https://patchwork.freedesktop.org/patch/256788/) FROMLIST: mesa: Helper functions for counting set bits in a mask (am from https://patchwork.freedesktop.org/patch/256765/) FROMLIST: i965/fs: Save the instruction count of each dispatch width The SIMD32 selection heuristics will use this information for deciding whether SIMD32 shaders should be used. (am from https://patchwork.freedesktop.org/patch/256793/) FROMLIST: i965/fs: SIMD32 selection heuristic based on grouped texture fetches The function goes through the compiled shader and checks how many grouped texture fetches there are. This is a simple heuristic which gets rid of most of the regressions when enabling SIMD32 shaders but still retains some of the benefits. (am from https://patchwork.freedesktop.org/patch/256798/) FROMLIST: i965/fs: Enable all SIMD32 heuristics There are three simple heuristics for SIMD32 shader enabling: - How many MRTs does the shader write into? - How many grouped texture fetches does the shader have? - How many instructions does the SIMD32 shader have compared to the SIMD16 shader? For testing purposes, the heuristics can be controlled via these environment variables: simd32_heuristic_mrt_check - Enables MRT write check - Default: true simd32_heuristic_max_mrts - How many MRT writes the heuristic allows - Default: 1 simd32_heuristic_grouped_check - Enables grouped texture fetch check - Default: true simd32_heuristic_grouped_sends - How many grouped texture fetches the heuristic allows - Default: 6 simd32_heuristic_inst_check - Enables SIMD32 vs. SIMD16 instruction count check - Default: true simd32_heuristic_inst_ratio - SIMD32 vs. SIMD16 instruction count ratio the heuristic allows - Default: 2.3 SIMD32 shaders will not be compiled also when SIMD16 compilation fails or spills. (am from https://patchwork.freedesktop.org/patch/256766/) --- src/intel/common/gen_debug.c | 1 + src/intel/common/gen_debug.h | 3 +- src/intel/compiler/brw_compiler.h | 11 +++++ src/intel/compiler/brw_fs.cpp | 63 +++++++++++++++++++++--- src/intel/compiler/brw_fs.h | 4 ++ src/intel/compiler/brw_fs_generator.cpp | 12 +++++ src/mesa/drivers/dri/i965/brw_context.c | 13 +++++ src/mesa/drivers/dri/i965/intel_screen.c | 27 ++++++++++ src/util/bitscan.h | 25 ++++++++++ 9 files changed, 152 insertions(+), 7 deletions(-) diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c index a978f2f5818..8990d208207 100644 --- a/src/intel/common/gen_debug.c +++ b/src/intel/common/gen_debug.c @@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = { { "nohiz", DEBUG_NO_HIZ }, { "color", DEBUG_COLOR }, { "reemit", DEBUG_REEMIT }, + { "heur32", DEBUG_HEUR32 }, { NULL, 0 } }; diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h index 72d7ca20a39..c2ca2e2ebd6 100644 --- a/src/intel/common/gen_debug.h +++ b/src/intel/common/gen_debug.h @@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_NO_HIZ (1ull << 39) #define DEBUG_COLOR (1ull << 40) #define DEBUG_REEMIT (1ull << 41) +#define DEBUG_HEUR32 (1ull << 42) /* These flags are not compatible with the disk shader cache */ #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME @@ -90,7 +91,7 @@ extern uint64_t INTEL_DEBUG; /* These flags may affect program generation */ #define DEBUG_DISK_CACHE_MASK \ (DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \ - DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32) + DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_HEUR32) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index d8c9499065f..785acdb3343 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -38,6 +38,15 @@ struct ra_regs; struct nir_shader; struct brw_program; +struct brw_simd32_heuristics_control { + bool grouped_sends_check; + int max_grouped_sends; + bool inst_count_check; + float inst_count_ratio; + bool mrt_check; + int max_mrts; +}; + struct brw_compiler { const struct gen_device_info *devinfo; @@ -118,6 +127,8 @@ struct brw_compiler { * whether nir_opt_large_constants will be run. */ bool supports_shader_constants; + + struct brw_simd32_heuristics_control simd32_heuristics_control; }; /** diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index f68c667a159..6826226e209 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7169,6 +7169,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, char **error_str) { const struct gen_device_info *devinfo = compiler->devinfo; + bool simd16_failed = false; + bool simd16_spilled = false; nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); @@ -7236,10 +7238,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, shader_time_index16); v16.import_uniforms(&v8); if (!v16.run_fs(allow_spilling, use_rep_send)) { + simd16_failed = true; compiler->shader_perf_log(log_data, "SIMD16 shader failed to compile: %s", v16.fail_msg); } else { + simd16_spilled = v16.spilled_any_registers; simd16_cfg = v16.cfg; prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs; prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used); @@ -7247,9 +7251,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } /* Currently, the compiler only supports SIMD32 on SNB+ */ + const brw_simd32_heuristics_control *ctrl = &compiler->simd32_heuristics_control; + uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0; + if (v8.max_dispatch_width >= 32 && !use_rep_send && compiler->devinfo->gen >= 6 && - unlikely(INTEL_DEBUG & DEBUG_DO32)) { + (unlikely(INTEL_DEBUG & DEBUG_DO32) || + (unlikely(INTEL_DEBUG & DEBUG_HEUR32) && + !simd16_failed && !simd16_spilled && + (!ctrl->mrt_check || + (ctrl->mrt_check && + u_count_bits64(&mrts) <= ctrl->max_mrts))))) { /* Try a SIMD32 compile */ fs_visitor v32(compiler, log_data, mem_ctx, key, &prog_data->base, prog, shader, 32, @@ -7260,9 +7272,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, "SIMD32 shader failed to compile: %s", v32.fail_msg); } else { - simd32_cfg = v32.cfg; - prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs; - prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used); + if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) || + v32.run_heuristic(ctrl)) { + simd32_cfg = v32.cfg; + prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs; + prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used); + } } } @@ -7341,13 +7356,49 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } if (simd32_cfg) { - prog_data->dispatch_32 = true; - prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32); + uint32_t offset = g.generate_code(simd32_cfg, 32); + + if (unlikely(INTEL_DEBUG & DEBUG_DO32) || + (unlikely(INTEL_DEBUG & DEBUG_HEUR32) && + (!simd16_cfg || + (simd16_cfg && + (!ctrl->inst_count_check || + (ctrl->inst_count_check && + (float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= ctrl->inst_count_ratio)))))) { + prog_data->dispatch_32 = true; + prog_data->prog_offset_32 = offset; + } } return g.get_assembly(); } +bool +fs_visitor::run_heuristic(const struct brw_simd32_heuristics_control *ctrl) { + int grouped_sends = 0; + int max_grouped_sends = 0; + bool pass = true; + + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->opcode >= SHADER_OPCODE_TEX && inst->opcode <= SHADER_OPCODE_SAMPLEINFO_LOGICAL) { + ++grouped_sends; + } else if (grouped_sends > 0) { + if (grouped_sends > max_grouped_sends) { + max_grouped_sends = grouped_sends; + } + grouped_sends = 0; + } + } + + if (ctrl->grouped_sends_check) { + if (max_grouped_sends > ctrl->max_grouped_sends) { + pass = false; + } + } + + return pass; +} + fs_reg * fs_visitor::emit_cs_work_group_id_setup() { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 6e68c7a2836..26868c10107 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -286,6 +286,8 @@ class fs_visitor : public backend_shader void dump_instruction(backend_instruction *inst); void dump_instruction(backend_instruction *inst, FILE *file); + bool run_heuristic(const struct brw_simd32_heuristics_control *ctrl); + const void *const key; const struct brw_sampler_prog_key_data *key_tex; @@ -397,6 +399,7 @@ class fs_generator void enable_debug(const char *shader_name); int generate_code(const cfg_t *cfg, int dispatch_width); + int get_inst_count(int dispatch_width); const unsigned *get_assembly(); private: @@ -493,6 +496,7 @@ class fs_generator struct brw_stage_prog_data * const prog_data; unsigned dispatch_width; /**< 8, 16 or 32 */ + int inst_count[3]; /* for 8, 16 and 32 */ exec_list discard_halt_patches; unsigned promoted_constants; diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 84627e83132..4c452e1c38a 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2538,6 +2538,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) fill_count, promoted_constants, before_size, after_size); + inst_count[ffs(dispatch_width) - 4] = before_size / 16; + return start_offset; } @@ -2546,3 +2548,13 @@ fs_generator::get_assembly() { return brw_get_program(p, &prog_data->program_size); } + +int +fs_generator::get_inst_count(int dispatch_width) +{ + if (dispatch_width == 8 || dispatch_width == 16 || dispatch_width == 32) { + return inst_count[ffs(dispatch_width) - 4]; + } else { + return 0; + } +} \ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 6ba64e4e06d..8cc0529d7e8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -893,6 +893,19 @@ brw_process_driconf_options(struct brw_context *brw) ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20); driComputeOptionsSha1(&brw->screen->optionCache, ctx->Const.dri_config_options_sha1); + + brw->screen->compiler->simd32_heuristics_control.grouped_sends_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_grouped_check"); + brw->screen->compiler->simd32_heuristics_control.max_grouped_sends = + driQueryOptioni(&brw->optionCache, "simd32_heuristic_grouped_sends"); + brw->screen->compiler->simd32_heuristics_control.inst_count_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_inst_check"); + brw->screen->compiler->simd32_heuristics_control.inst_count_ratio = + driQueryOptionf(&brw->optionCache, "simd32_heuristic_inst_ratio"); + brw->screen->compiler->simd32_heuristics_control.mrt_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_mrt_check"); + brw->screen->compiler->simd32_heuristics_control.max_mrts = + driQueryOptioni(&brw->optionCache, "simd32_heuristic_max_mrts"); } GLboolean diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 8a091590269..89110e60a8d 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -61,6 +61,33 @@ DRI_CONF_BEGIN DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") DRI_CONF_DESC_END DRI_CONF_OPT_END + + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_grouped_check, "true") + DRI_CONF_DESC(en, "Enable/disable grouped texture fetch " + "check in the SIMD32 selection heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_grouped_sends, int, 6, "1:999") + DRI_CONF_DESC(en, "How many grouped texture fetches should " + "the SIMD32 selection heuristic allow.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_inst_check, "true") + DRI_CONF_DESC(en, "Enable/disable SIMD32/SIMD16 instruction " + "count ratio check in the SIMD32 selection " + "heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_inst_ratio, float, 2.3, "1:999") + DRI_CONF_DESC(en, "SIMD32/SIMD16 instruction count ratio " + "the SIMD32 selection heuristic should allow.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_mrt_check, "true") + DRI_CONF_DESC(en, "Enable/disable MRT write check in the " + "SIMD32 selection heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_max_mrts, int, 1, "1:8") + DRI_CONF_DESC(en, "How many MRT writes should the SIMD32 " + "selection heuristic allow.") + DRI_CONF_OPT_END + DRI_CONF_MESA_NO_ERROR("false") DRI_CONF_SECTION_END diff --git a/src/util/bitscan.h b/src/util/bitscan.h index dc89ac93f28..cdfecafaf01 100644 --- a/src/util/bitscan.h +++ b/src/util/bitscan.h @@ -112,6 +112,31 @@ u_bit_scan64(uint64_t *mask) return i; } +/* Count bits set in mask */ +static inline int +u_count_bits(unsigned *mask) +{ + unsigned v = *mask; + int c; + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + v = (v + (v >> 4)) & 0xF0F0F0F; + c = (int)((v * 0x1010101) >> 24); + return c; +} + +static inline int +u_count_bits64(uint64_t *mask) +{ + uint64_t v = *mask; + int c; + v = v - ((v >> 1) & 0x5555555555555555ull); + v = (v & 0x3333333333333333ull) + ((v >> 2) & 0x3333333333333333ull); + v = (v + (v >> 4)) & 0xF0F0F0F0F0F0F0Full; + c = (int)((v * 0x101010101010101ull) >> 56); + return c; +} + /* Determine if an unsigned value is a power of two. * * \note From dd09d15cfdd5a051eb07af49df896190c71f5bd7 Mon Sep 17 00:00:00 2001 From: renchenglei Date: Wed, 2 Jan 2019 14:31:15 +0800 Subject: [PATCH 220/220] WIP: use the private drm lib name Change-Id: I567da1c48e588b1a65c883323e92c2a34822b5d8 Signed-off-by: Yong Yao Signed-off-by: Ren Chenglei --- Android.common.mk | 2 +- src/gallium/winsys/i915/drm/Android.mk | 2 +- src/intel/Android.vulkan.mk | 4 ++-- src/mesa/drivers/dri/i915/Android.mk | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Android.common.mk b/Android.common.mk index 05a9e5917de..d7c5f20fabc 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -122,7 +122,7 @@ LOCAL_CFLAGS_arm64 += -DUSE_AARCH64_ASM ifneq ($(LOCAL_IS_HOST_MODULE),true) LOCAL_CFLAGS += -DHAVE_LIBDRM -LOCAL_SHARED_LIBRARIES += libdrm +LOCAL_SHARED_LIBRARIES += libdrm_pri endif LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib/$(MESA_DRI_MODULE_REL_PATH)\" diff --git a/src/gallium/winsys/i915/drm/Android.mk b/src/gallium/winsys/i915/drm/Android.mk index bab3e85c5dd..bc8cd0ebe2e 100644 --- a/src/gallium/winsys/i915/drm/Android.mk +++ b/src/gallium/winsys/i915/drm/Android.mk @@ -30,7 +30,7 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(C_SOURCES) -LOCAL_SHARED_LIBRARIES := libdrm_intel +LOCAL_SHARED_LIBRARIES := libdrm_intel_pri LOCAL_MODULE := libmesa_winsys_i915 include $(GALLIUM_COMMON_MK) diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk index 03120cf48a0..f4fda3f86ce 100644 --- a/src/intel/Android.vulkan.mk +++ b/src/intel/Android.vulkan.mk @@ -75,7 +75,7 @@ $(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c LOCAL_EXPORT_C_INCLUDE_DIRS := \ $(intermediates) -LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_SHARED_LIBRARIES := libdrm_pri LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers @@ -89,7 +89,7 @@ ANV_INCLUDES := \ $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_common,,)/vulkan \ $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_util,,)/util -ANV_SHARED_LIBRARIES := libdrm +ANV_SHARED_LIBRARIES := libdrm_pri ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),) ANV_SHARED_LIBRARIES += libnativewindow diff --git a/src/mesa/drivers/dri/i915/Android.mk b/src/mesa/drivers/dri/i915/Android.mk index b1054aa6e28..7c9c8210dff 100644 --- a/src/mesa/drivers/dri/i915/Android.mk +++ b/src/mesa/drivers/dri/i915/Android.mk @@ -47,7 +47,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ LOCAL_SHARED_LIBRARIES := \ $(MESA_DRI_SHARED_LIBRARIES) \ - libdrm_intel + libdrm_intel_pri LOCAL_GENERATED_SOURCES := \ $(MESA_DRI_OPTIONS_H) \