date:20181015

[Mesa-dev] [Bug 108355] Civilization VI - Artifacts in mouse cursor

2018-10-15 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=108355

Hadrien Nilsson  changed:

   What|Removed |Added

  Component|Drivers/Gallium/radeonsi|Drivers/Gallium/softpipe
   Assignee|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org
 QA Contact|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org

--- Comment #1 from Hadrien Nilsson  ---
I start thinking I'm not in the right Bugzilla section as OpenGL does not
handle mouse cursors. Sorry if that the case, I would love to know the exact
faulting component in the graphics stack.

I do not know if this is a SDL, X11, drm, amdgpu or hardware problem, or the
game itself (some kind of surface corruption). But Gnome screenshot program is
able to correctly retrieve the cursor image as shown in the attachment.

I wrote a small SDL program that changes my mouse cursor, as this is what Civ6
seems to use, but everything works fine.

I contacted Aspyr support but their response was a dead-end : "AMD an Intel
GPUs aren't supported".

I tried to make the game use my own compiled SDL version with no luck. Steam
games seem to use some kind of sandbox, LD_PRELOAD seems to be ignored. Or the
game may not actually use SDL for the mouse cursor though the related symbols
are referenced in the executable.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v4]

2018-10-15 Thread Keith Packard

Offers three clocks, device, clock monotonic and clock monotonic
raw. Could use some kernel support to reduce the deviation between
clock values.

v2:
Ensure deviation is at least as big as the GPU time interval.

v3:
Set device->lost when returning DEVICE_LOST.
Use MAX2 and DIV_ROUND_UP instead of open coding these.
Delete spurious TIMESTAMP in radv version.
Suggested-by: Jason Ekstrand 
Suggested-by: Lionel Landwerlin 

v4:
Add anv_gem_reg_read to anv_gem_stubs.c
Suggested-by: Jason Ekstrand 

Signed-off-by: Keith Packard 
---
 src/amd/vulkan/radv_device.c   | 81 +++
 src/amd/vulkan/radv_extensions.py  |  1 +
 src/intel/vulkan/anv_device.c  | 89 ++
 src/intel/vulkan/anv_extensions.py |  1 +
 src/intel/vulkan/anv_gem.c | 13 +
 src/intel/vulkan/anv_gem_stubs.c   |  7 +++
 src/intel/vulkan/anv_private.h |  2 +
 7 files changed, 194 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 174922780fc..80050485e54 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -4955,3 +4955,84 @@ radv_GetDeviceGroupPeerMemoryFeatures(
   VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
   VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
 }
+
+static const VkTimeDomainEXT radv_time_domains[] = {
+   VK_TIME_DOMAIN_DEVICE_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+   VkPhysicalDevice physicalDevice,
+   uint32_t *pTimeDomainCount,
+   VkTimeDomainEXT  *pTimeDomains)
+{
+   int d;
+   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+   for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+   vk_outarray_append(, i) {
+   *i = radv_time_domains[d];
+   }
+   }
+
+   return vk_outarray_status();
+}
+
+static uint64_t
+radv_clock_gettime(clockid_t clock_id)
+{
+   struct timespec current;
+   int ret;
+
+   ret = clock_gettime(clock_id, );
+   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+   ret = clock_gettime(CLOCK_MONOTONIC, );
+   if (ret < 0)
+   return 0;
+
+   return (uint64_t) current.tv_sec * 10ULL + current.tv_nsec;
+}
+
+VkResult radv_GetCalibratedTimestampsEXT(
+   VkDevice _device,
+   uint32_t timestampCount,
+   const VkCalibratedTimestampInfoEXT   *pTimestampInfos,
+   uint64_t *pTimestamps,
+   uint64_t *pMaxDeviation)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   uint32_t clock_crystal_freq = 
device->physical_device->rad_info.clock_crystal_freq;
+   int d;
+   uint64_t begin, end;
+
+   begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+   for (d = 0; d < timestampCount; d++) {
+   switch (pTimestampInfos[d].timeDomain) {
+   case VK_TIME_DOMAIN_DEVICE_EXT:
+   pTimestamps[d] = device->ws->query_value(device->ws,
+
RADEON_TIMESTAMP);
+   break;
+   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+   pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+   break;
+
+   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+   pTimestamps[d] = begin;
+   break;
+   default:
+   pTimestamps[d] = 0;
+   break;
+   }
+   }
+
+   end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+   uint64_t clock_period = end - begin;
+   uint64_t device_period = DIV_ROUND_UP(100, clock_crystal_freq);
+
+   *pMaxDeviation = MAX2(clock_period, device_period);
+
+   return VK_SUCCESS;
+}
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 5dcedae1c63..4c81d3f0068 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -92,6 +92,7 @@ EXTENSIONS = [
 Extension('VK_KHR_display',  23, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_direct_mode_display',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_acquire_xlib_display',  1, 
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+Extension('VK_EXT_calibrated_timestamps', 1, True),
 Extension('VK_EXT_conditional_rendering', 1, True),
 Extension('VK_EXT_conservative_rasterization',1, 
'device->rad_info.chip_class

Re: [Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v3]

2018-10-15 Thread Keith Packard

Jason Ekstrand  writes:

> You need to add this to anv_gem_stubs.c as well or else the unit tests
> won't build.  Sorry for not catching it earlier.  I'm always missing this
> too.

Well, that's a bit hard to test as -Dbuild-tests=true fails in a bunch
of glx tests, but I think I've got it.

> With that fixed, the anv bits are
>
> Reviewed-by: Jason Ekstrand 

Thanks. I haven't heard from any radv developers, so I can either split
the patch apart or wait for another day or two. In any case, I'll post
v4 of the patch here with the anv_gem_reg_read addition made.

-- 
-keith

signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] meson: Add -Werror=return-type when supported.

2018-10-15 Thread Kenneth Graunke

This warning detects non-void functions with a missing return statement,
return statements with a value in void functions, and functions with an
bogus return type that ends up defaulting to int.  It's already enabled
by default with -Wall.  Generally, these are fairly serious bugs in the
code, which developers would like to notice and fix immediately.  This
patch promotes it from a warning to an error, to help developers catch
such mistakes early.

I would not expect this warning to change much based on the compiler
version, so hopefully it won't become a problem for packagers/builders.

See the GCC documentation or 'man gcc' for more details:
https://gcc.gnu.org/onlinedocs/gcc-7.3.0/gcc/Warning-Options.html#index-Wreturn-type
---
 meson.build | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 002ce35a608..11e0ea2c08e 100644
--- a/meson.build
+++ b/meson.build
@@ -788,7 +788,8 @@ endif
 # Check for generic C arguments
 c_args = []
 foreach a : ['-Wall', '-Werror=implicit-function-declaration',
- '-Werror=missing-prototypes', '-fno-math-errno',
+ '-Werror=missing-prototypes', '-Werror=return-type',
+ '-fno-math-errno',
  '-fno-trapping-math', '-Qunused-arguments']
   if cc.has_argument(a)
 c_args += a
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v3]

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 6:05 PM Keith Packard  wrote:

> Offers three clocks, device, clock monotonic and clock monotonic
> raw. Could use some kernel support to reduce the deviation between
> clock values.
>
> v2:
> Ensure deviation is at least as big as the GPU time interval.
>
> v3:
> Set device->lost when returning DEVICE_LOST.
> Use MAX2 and DIV_ROUND_UP instead of open coding these.
> Delete spurious TIMESTAMP in radv version.
> Suggested-by: Jason Ekstrand 
> Suggested-by: Lionel Landwerlin 
>
> Signed-off-by: Keith Packard 
> ---
>  src/amd/vulkan/radv_device.c   | 81 +++
>  src/amd/vulkan/radv_extensions.py  |  1 +
>  src/intel/vulkan/anv_device.c  | 89 ++
>  src/intel/vulkan/anv_extensions.py |  1 +
>  src/intel/vulkan/anv_gem.c | 13 +
>  src/intel/vulkan/anv_private.h |  2 +
>  6 files changed, 187 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 174922780fc..80050485e54 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -4955,3 +4955,84 @@ radv_GetDeviceGroupPeerMemoryFeatures(
>VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
>VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
>  }
> +
> +static const VkTimeDomainEXT radv_time_domains[] = {
> +   VK_TIME_DOMAIN_DEVICE_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +   VkPhysicalDevice physicalDevice,
> +   uint32_t *pTimeDomainCount,
> +   VkTimeDomainEXT  *pTimeDomains)
> +{
> +   int d;
> +   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> +   for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
> +   vk_outarray_append(, i) {
> +   *i = radv_time_domains[d];
> +   }
> +   }
> +
> +   return vk_outarray_status();
> +}
> +
> +static uint64_t
> +radv_clock_gettime(clockid_t clock_id)
> +{
> +   struct timespec current;
> +   int ret;
> +
> +   ret = clock_gettime(clock_id, );
> +   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> +   ret = clock_gettime(CLOCK_MONOTONIC, );
> +   if (ret < 0)
> +   return 0;
> +
> +   return (uint64_t) current.tv_sec * 10ULL + current.tv_nsec;
> +}
> +
> +VkResult radv_GetCalibratedTimestampsEXT(
> +   VkDevice _device,
> +   uint32_t timestampCount,
> +   const VkCalibratedTimestampInfoEXT   *pTimestampInfos,
> +   uint64_t *pTimestamps,
> +   uint64_t *pMaxDeviation)
> +{
> +   RADV_FROM_HANDLE(radv_device, device, _device);
> +   uint32_t clock_crystal_freq =
> device->physical_device->rad_info.clock_crystal_freq;
> +   int d;
> +   uint64_t begin, end;
> +
> +   begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +   for (d = 0; d < timestampCount; d++) {
> +   switch (pTimestampInfos[d].timeDomain) {
> +   case VK_TIME_DOMAIN_DEVICE_EXT:
> +   pTimestamps[d] =
> device->ws->query_value(device->ws,
> +
> RADEON_TIMESTAMP);
> +   break;
> +   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +   pTimestamps[d] =
> radv_clock_gettime(CLOCK_MONOTONIC);
> +   break;
> +
> +   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +   pTimestamps[d] = begin;
> +   break;
> +   default:
> +   pTimestamps[d] = 0;
> +   break;
> +   }
> +   }
> +
> +   end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +   uint64_t clock_period = end - begin;
> +   uint64_t device_period = DIV_ROUND_UP(100, clock_crystal_freq);
> +
> +   *pMaxDeviation = MAX2(clock_period, device_period);
> +
> +   return VK_SUCCESS;
> +}
> diff --git a/src/amd/vulkan/radv_extensions.py
> b/src/amd/vulkan/radv_extensions.py
> index 5dcedae1c63..4c81d3f0068 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -92,6 +92,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_display',  23,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
>  Extension('VK_EXT_direct_mode_display',   1,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
>  Extension('VK_EXT_acquire_xlib_display',  1,
> 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
> +Extension('VK_EXT_calibrated_timestamps', 1, True),
>  Extension('VK_EXT_conditional_rendering',

[Mesa-dev] [PATCH] nir: Allow using nir_lower_io_to_scalar_early on VS input vars.

2018-10-15 Thread Eric Anholt

This will be used on V3D to cut down the size of the VS inputs in the VPM
(memory area for sharing data between shader stages).
---
 src/compiler/nir/nir_lower_io_to_scalar.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_io_to_scalar.c 
b/src/compiler/nir/nir_lower_io_to_scalar.c
index f0c2a6a95d6d..c64f641a0ae4 100644
--- a/src/compiler/nir/nir_lower_io_to_scalar.c
+++ b/src/compiler/nir/nir_lower_io_to_scalar.c
@@ -320,7 +320,9 @@ nir_lower_io_to_scalar_early(nir_shader *shader, 
nir_variable_mode mask)
if (glsl_type_is_64bit(glsl_without_array(var->type)))
   continue;
 
-   if (var->data.location < VARYING_SLOT_VAR0 &&
+   if (!(shader->info.stage == MESA_SHADER_VERTEX &&
+ mode == nir_var_shader_in) &&
+   var->data.location < VARYING_SLOT_VAR0 &&
var->data.location >= 0)
   continue;
 
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Add PCI IDs for new Amberlake parts that are Coffeelake based

2018-10-15 Thread Rodrigo Vivi

On Mon, Oct 15, 2018 at 04:05:39PM -0700, Kenneth Graunke wrote:
> See commit c0c46ca461f136a0ae1ed69da6c874e850aeeb53 in the Linux kernel,
> where José Roberto de Souza added this new PCI ID there.


Reviewed-by: Rodrigo Vivi 



> ---
>  include/pci_ids/i965_pci_ids.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
> index cb33bea7d4d..7201562d824 100644
> --- a/include/pci_ids/i965_pci_ids.h
> +++ b/include/pci_ids/i965_pci_ids.h
> @@ -163,8 +163,9 @@ CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3")
>  CHIPSET(0x5926, kbl_gt3, "Intel(R) Iris Plus Graphics 640 (Kaby Lake GT3e)")
>  CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 (Kaby Lake GT3e)")
>  CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
> -CHIPSET(0x591C, kbl_gt2, "Intel(R) Amber Lake GT2")
> -CHIPSET(0x87C0, kbl_gt2, "Intel(R) Amber Lake GT2")
> +CHIPSET(0x591C, kbl_gt2, "Intel(R) Amber Lake (Kabylake) GT2")
> +CHIPSET(0x87C0, kbl_gt2, "Intel(R) Amber Lake (Kabylake) GT2")
> +CHIPSET(0x87CA, cfl_gt2, "Intel(R) Amber Lake (Coffeelake) GT2")
>  CHIPSET(0x3184, glk, "Intel(R) UHD Graphics 605 (Geminilake)")
>  CHIPSET(0x3185, glk_2x6, "Intel(R) UHD Graphics 600 (Geminilake 2x6)")
>  CHIPSET(0x3E90, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
> -- 
> 2.19.0
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Add PCI IDs for new Amberlake parts that are Coffeelake based

2018-10-15 Thread Jordan Justen

Reviewed-by: Jordan Justen 

On 2018-10-15 16:05:39, Kenneth Graunke wrote:
> See commit c0c46ca461f136a0ae1ed69da6c874e850aeeb53 in the Linux kernel,
> where José Roberto de Souza added this new PCI ID there.
> ---
>  include/pci_ids/i965_pci_ids.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
> index cb33bea7d4d..7201562d824 100644
> --- a/include/pci_ids/i965_pci_ids.h
> +++ b/include/pci_ids/i965_pci_ids.h
> @@ -163,8 +163,9 @@ CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3")
>  CHIPSET(0x5926, kbl_gt3, "Intel(R) Iris Plus Graphics 640 (Kaby Lake GT3e)")
>  CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 (Kaby Lake GT3e)")
>  CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
> -CHIPSET(0x591C, kbl_gt2, "Intel(R) Amber Lake GT2")
> -CHIPSET(0x87C0, kbl_gt2, "Intel(R) Amber Lake GT2")
> +CHIPSET(0x591C, kbl_gt2, "Intel(R) Amber Lake (Kabylake) GT2")
> +CHIPSET(0x87C0, kbl_gt2, "Intel(R) Amber Lake (Kabylake) GT2")
> +CHIPSET(0x87CA, cfl_gt2, "Intel(R) Amber Lake (Coffeelake) GT2")
>  CHIPSET(0x3184, glk, "Intel(R) UHD Graphics 605 (Geminilake)")
>  CHIPSET(0x3185, glk_2x6, "Intel(R) UHD Graphics 600 (Geminilake 2x6)")
>  CHIPSET(0x3E90, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
> -- 
> 2.19.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] i965: Add PCI IDs for new Amberlake parts that are Coffeelake based

2018-10-15 Thread Kenneth Graunke

See commit c0c46ca461f136a0ae1ed69da6c874e850aeeb53 in the Linux kernel,
where José Roberto de Souza added this new PCI ID there.
---
 include/pci_ids/i965_pci_ids.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index cb33bea7d4d..7201562d824 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -163,8 +163,9 @@ CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5926, kbl_gt3, "Intel(R) Iris Plus Graphics 640 (Kaby Lake GT3e)")
 CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 (Kaby Lake GT3e)")
 CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
-CHIPSET(0x591C, kbl_gt2, "Intel(R) Amber Lake GT2")
-CHIPSET(0x87C0, kbl_gt2, "Intel(R) Amber Lake GT2")
+CHIPSET(0x591C, kbl_gt2, "Intel(R) Amber Lake (Kabylake) GT2")
+CHIPSET(0x87C0, kbl_gt2, "Intel(R) Amber Lake (Kabylake) GT2")
+CHIPSET(0x87CA, cfl_gt2, "Intel(R) Amber Lake (Coffeelake) GT2")
 CHIPSET(0x3184, glk, "Intel(R) UHD Graphics 605 (Geminilake)")
 CHIPSET(0x3185, glk_2x6, "Intel(R) UHD Graphics 600 (Geminilake 2x6)")
 CHIPSET(0x3E90, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v3]

2018-10-15 Thread Keith Packard

Offers three clocks, device, clock monotonic and clock monotonic
raw. Could use some kernel support to reduce the deviation between
clock values.

v2:
Ensure deviation is at least as big as the GPU time interval.

v3:
Set device->lost when returning DEVICE_LOST.
Use MAX2 and DIV_ROUND_UP instead of open coding these.
Delete spurious TIMESTAMP in radv version.
Suggested-by: Jason Ekstrand 
Suggested-by: Lionel Landwerlin 

Signed-off-by: Keith Packard 
---
 src/amd/vulkan/radv_device.c   | 81 +++
 src/amd/vulkan/radv_extensions.py  |  1 +
 src/intel/vulkan/anv_device.c  | 89 ++
 src/intel/vulkan/anv_extensions.py |  1 +
 src/intel/vulkan/anv_gem.c | 13 +
 src/intel/vulkan/anv_private.h |  2 +
 6 files changed, 187 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 174922780fc..80050485e54 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -4955,3 +4955,84 @@ radv_GetDeviceGroupPeerMemoryFeatures(
   VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
   VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
 }
+
+static const VkTimeDomainEXT radv_time_domains[] = {
+   VK_TIME_DOMAIN_DEVICE_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+   VkPhysicalDevice physicalDevice,
+   uint32_t *pTimeDomainCount,
+   VkTimeDomainEXT  *pTimeDomains)
+{
+   int d;
+   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+   for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+   vk_outarray_append(, i) {
+   *i = radv_time_domains[d];
+   }
+   }
+
+   return vk_outarray_status();
+}
+
+static uint64_t
+radv_clock_gettime(clockid_t clock_id)
+{
+   struct timespec current;
+   int ret;
+
+   ret = clock_gettime(clock_id, );
+   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+   ret = clock_gettime(CLOCK_MONOTONIC, );
+   if (ret < 0)
+   return 0;
+
+   return (uint64_t) current.tv_sec * 10ULL + current.tv_nsec;
+}
+
+VkResult radv_GetCalibratedTimestampsEXT(
+   VkDevice _device,
+   uint32_t timestampCount,
+   const VkCalibratedTimestampInfoEXT   *pTimestampInfos,
+   uint64_t *pTimestamps,
+   uint64_t *pMaxDeviation)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   uint32_t clock_crystal_freq = 
device->physical_device->rad_info.clock_crystal_freq;
+   int d;
+   uint64_t begin, end;
+
+   begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+   for (d = 0; d < timestampCount; d++) {
+   switch (pTimestampInfos[d].timeDomain) {
+   case VK_TIME_DOMAIN_DEVICE_EXT:
+   pTimestamps[d] = device->ws->query_value(device->ws,
+
RADEON_TIMESTAMP);
+   break;
+   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+   pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+   break;
+
+   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+   pTimestamps[d] = begin;
+   break;
+   default:
+   pTimestamps[d] = 0;
+   break;
+   }
+   }
+
+   end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+   uint64_t clock_period = end - begin;
+   uint64_t device_period = DIV_ROUND_UP(100, clock_crystal_freq);
+
+   *pMaxDeviation = MAX2(clock_period, device_period);
+
+   return VK_SUCCESS;
+}
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 5dcedae1c63..4c81d3f0068 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -92,6 +92,7 @@ EXTENSIONS = [
 Extension('VK_KHR_display',  23, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_direct_mode_display',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_acquire_xlib_display',  1, 
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+Extension('VK_EXT_calibrated_timestamps', 1, True),
 Extension('VK_EXT_conditional_rendering', 1, True),
 Extension('VK_EXT_conservative_rasterization',1, 
'device->rad_info.chip_class >= GFX9'),
 Extension('VK_EXT_display_surface_counter',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
diff --git

Re: [Mesa-dev] [PATCH] intel: disable FS IR validation in release mode.

2018-10-15 Thread Matt Turner

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] st/mesa: Record shader access qualifiers for images

2018-10-15 Thread Kenneth Graunke

From: Jason Ekstrand 

They're not required to be the same as the access flag on the image
unit.  For hardware that does shader image lowering based on the
qualifier (Intel), it may be required for state setup.
---
 src/gallium/include/pipe/p_state.h |  1 +
 src/mesa/state_tracker/st_atom_image.c | 27 ++
 src/mesa/state_tracker/st_cb_texture.c |  2 +-
 src/mesa/state_tracker/st_texture.c|  2 +-
 src/mesa/state_tracker/st_texture.h|  5 +++--
 5 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/gallium/include/pipe/p_state.h 
b/src/gallium/include/pipe/p_state.h
index a58d91fb3dd..331417b1d7f 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -485,6 +485,7 @@ struct pipe_image_view
struct pipe_resource *resource; /**< resource into which this is a view  */
enum pipe_format format;  /**< typed PIPE_FORMAT_x */
unsigned access;  /**< PIPE_IMAGE_ACCESS_x */
+   unsigned shader_access;   /**< PIPE_IMAGE_ACCESS_x */
 
union {
   struct {
diff --git a/src/mesa/state_tracker/st_atom_image.c 
b/src/mesa/state_tracker/st_atom_image.c
index 421c926cf04..db3539259ce 100644
--- a/src/mesa/state_tracker/st_atom_image.c
+++ b/src/mesa/state_tracker/st_atom_image.c
@@ -50,7 +50,7 @@
  */
 void
 st_convert_image(const struct st_context *st, const struct gl_image_unit *u,
- struct pipe_image_view *img)
+ struct pipe_image_view *img, unsigned shader_access)
 {
struct st_texture_object *stObj = st_texture_object(u->TexObj);
 
@@ -70,6 +70,23 @@ st_convert_image(const struct st_context *st, const struct 
gl_image_unit *u,
   unreachable("bad gl_image_unit::Access");
}
 
+   switch (shader_access) {
+   case GL_NONE:
+  img->shader_access = 0;
+  break;
+   case GL_READ_ONLY:
+  img->shader_access = PIPE_IMAGE_ACCESS_READ;
+  break;
+   case GL_WRITE_ONLY:
+  img->shader_access = PIPE_IMAGE_ACCESS_WRITE;
+  break;
+   case GL_READ_WRITE:
+  img->shader_access = PIPE_IMAGE_ACCESS_READ_WRITE;
+  break;
+   default:
+  unreachable("bad gl_image_unit::Access");
+   }
+
if (stObj->base.Target == GL_TEXTURE_BUFFER) {
   struct st_buffer_object *stbuf =
  st_buffer_object(stObj->base.BufferObject);
@@ -125,7 +142,8 @@ st_convert_image(const struct st_context *st, const struct 
gl_image_unit *u,
 void
 st_convert_image_from_unit(const struct st_context *st,
struct pipe_image_view *img,
-   GLuint imgUnit)
+   GLuint imgUnit,
+   unsigned shader_access)
 {
struct gl_image_unit *u = >ctx->ImageUnits[imgUnit];
 
@@ -134,7 +152,7 @@ st_convert_image_from_unit(const struct st_context *st,
   return;
}
 
-   st_convert_image(st, u, img);
+   st_convert_image(st, u, img, shader_access);
 }
 
 static void
@@ -153,7 +171,8 @@ st_bind_images(struct st_context *st, struct gl_program 
*prog,
for (i = 0; i < prog->info.num_images; i++) {
   struct pipe_image_view *img = [i];
 
-  st_convert_image_from_unit(st, img, prog->sh.ImageUnits[i]);
+  st_convert_image_from_unit(st, img, prog->sh.ImageUnits[i],
+ prog->sh.ImageAccess[i]);
}
cso_set_shader_images(st->cso_context, shader_type, 0,
  prog->info.num_images, images);
diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index e6e27a852f5..b8cc616d8f2 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -3237,7 +3237,7 @@ st_NewImageHandle(struct gl_context *ctx, struct 
gl_image_unit *imgObj)
struct pipe_context *pipe = st->pipe;
struct pipe_image_view image;
 
-   st_convert_image(st, imgObj, );
+   st_convert_image(st, imgObj, , GL_READ_WRITE);
 
return pipe->create_image_handle(pipe, );
 }
diff --git a/src/mesa/state_tracker/st_texture.c 
b/src/mesa/state_tracker/st_texture.c
index 9655eede5fe..56d01d39bf0 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -540,7 +540,7 @@ st_create_image_handle_from_unit(struct st_context *st,
struct pipe_context *pipe = st->pipe;
struct pipe_image_view img;
 
-   st_convert_image_from_unit(st, , imgUnit);
+   st_convert_image_from_unit(st, , imgUnit, GL_READ_WRITE);
 
return pipe->create_image_handle(pipe, );
 }
diff --git a/src/mesa/state_tracker/st_texture.h 
b/src/mesa/state_tracker/st_texture.h
index 726ab78dad4..7fb3f09a1c2 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -320,12 +320,13 @@ st_compressed_format_fallback(struct st_context *st, 
mesa_format format);
 
 void
 st_convert_image(const struct st_context *st, const struct gl_image_unit *u,
- struct pipe_image_view *img);
+ struct pipe_image_view *img,

[Mesa-dev] [PATCH] intel: disable FS IR validation in release mode.

2018-10-15 Thread Kenneth Graunke

We probably don't need to iterate, fprintf, and abort in release mode.
---
 src/intel/compiler/brw_fs_validate.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/intel/compiler/brw_fs_validate.cpp 
b/src/intel/compiler/brw_fs_validate.cpp
index 676942c19c0..75a794fd794 100644
--- a/src/intel/compiler/brw_fs_validate.cpp
+++ b/src/intel/compiler/brw_fs_validate.cpp
@@ -41,6 +41,7 @@
 void
 fs_visitor::validate()
 {
+#ifndef NDEBUG
foreach_block_and_inst (block, fs_inst, inst, cfg) {
   if (inst->dst.file == VGRF) {
  fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <=
@@ -54,4 +55,5 @@ fs_visitor::validate()
  }
   }
}
+#endif
 }
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v2]

2018-10-15 Thread Lionel Landwerlin


On 15/10/2018 22:22, Keith Packard wrote:

+#define TIMESTAMP 0x2358
+
+VkResult radv_GetCalibratedTimestampsEXT(


Heh, I think you copied that define over from Anv ;)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v2]

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 4:22 PM Keith Packard  wrote:

> Offers three clocks, device, clock monotonic and clock monotonic
> raw. Could use some kernel support to reduce the deviation between
> clock values.
>
> v2:
> Ensure deviation is at least as big as the GPU time interval.
>
> Signed-off-by: Keith Packard 
> ---
>  src/amd/vulkan/radv_device.c   | 84 
>  src/amd/vulkan/radv_extensions.py  |  1 +
>  src/intel/vulkan/anv_device.c  | 88 ++
>  src/intel/vulkan/anv_extensions.py |  1 +
>  src/intel/vulkan/anv_gem.c | 13 +
>  src/intel/vulkan/anv_private.h |  2 +
>  6 files changed, 189 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 174922780fc..29f0afbc69b 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -4955,3 +4955,87 @@ radv_GetDeviceGroupPeerMemoryFeatures(
>VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
>VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
>  }
> +
> +static const VkTimeDomainEXT radv_time_domains[] = {
> +   VK_TIME_DOMAIN_DEVICE_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +   VkPhysicalDevice physicalDevice,
> +   uint32_t *pTimeDomainCount,
> +   VkTimeDomainEXT  *pTimeDomains)
> +{
> +   int d;
> +   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> +   for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
> +   vk_outarray_append(, i) {
> +   *i = radv_time_domains[d];
> +   }
> +   }
> +
> +   return vk_outarray_status();
> +}
> +
> +static uint64_t
> +radv_clock_gettime(clockid_t clock_id)
> +{
> +   struct timespec current;
> +   int ret;
> +
> +   ret = clock_gettime(clock_id, );
> +   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> +   ret = clock_gettime(CLOCK_MONOTONIC, );
> +   if (ret < 0)
> +   return 0;
> +
> +   return (uint64_t) current.tv_sec * 10ULL + current.tv_nsec;
> +}
> +
> +#define TIMESTAMP 0x2358
> +
> +VkResult radv_GetCalibratedTimestampsEXT(
> +   VkDevice _device,
> +   uint32_t timestampCount,
> +   const VkCalibratedTimestampInfoEXT   *pTimestampInfos,
> +   uint64_t *pTimestamps,
> +   uint64_t *pMaxDeviation)
> +{
> +   RADV_FROM_HANDLE(radv_device, device, _device);
> +   uint32_t clock_crystal_freq =
> device->physical_device->rad_info.clock_crystal_freq;
> +   int d;
> +   uint64_t begin, end;
> +
> +   begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +   for (d = 0; d < timestampCount; d++) {
> +   switch (pTimestampInfos[d].timeDomain) {
> +   case VK_TIME_DOMAIN_DEVICE_EXT:
> +   /* XXX older kernels don't support this interface.
> */
> +   pTimestamps[d] =
> device->ws->query_value(device->ws,
> +
> RADEON_TIMESTAMP);
> +   break;
> +   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +   pTimestamps[d] =
> radv_clock_gettime(CLOCK_MONOTONIC);
> +   break;
> +
> +   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +   pTimestamps[d] = begin;
> +   break;
> +   default:
> +   pTimestamps[d] = 0;
> +   break;
> +   }
> +   }
> +
> +   end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +   uint64_t clock_period = end - begin;
> +   uint64_t device_period = (100 + clock_crystal_freq - 1) /
> clock_crystal_freq;
> +
> +   *pMaxDeviation = clock_period > device_period ? clock_period :
> device_period;
> +
> +   return VK_SUCCESS;
> +}
> diff --git a/src/amd/vulkan/radv_extensions.py
> b/src/amd/vulkan/radv_extensions.py
> index 5dcedae1c63..4c81d3f0068 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -92,6 +92,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_display',  23,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
>  Extension('VK_EXT_direct_mode_display',   1,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
>  Extension('VK_EXT_acquire_xlib_display',  1,
> 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
> +Extension('VK_EXT_calibrated_timestamps', 1, True),
>  Extension('VK_EXT_conditional_rendering', 1, True),
>  Extension('VK_EXT_conservative_rasterization',1,
>

Re: [Mesa-dev] [PATCH] anv: Don't advertise ASTC support on BSW

2018-10-15 Thread Nanley Chery

On Mon, Oct 15, 2018 at 01:07:12PM -0500, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/anv_formats.c | 8 
>  1 file changed, 8 insertions(+)
> 

This patch is
Reviewed-by: Nanley Chery 

> diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
> index 33faf7cc37f..9199567f445 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -521,6 +521,14 @@ get_image_format_features(const struct gen_device_info 
> *devinfo,
> isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC)
>return 0;
>  
> +   /* ASTC requires nasty workarounds on BSW so we just disable it for now.
> +*
> +* TODO: Figure out the ASTC workarounds and re-enable on BSW.
> +*/
> +   if (devinfo->gen < 9 &&
> +   isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC)
> +  return 0;
> +
> if (isl_format_supports_sampling(devinfo, plane_format.isl_format)) {
>flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
>  
> -- 
> 2.19.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Add tests for VK_EXT_calibrated_timestamps [v2]

2018-10-15 Thread Keith Packard

Jason Ekstrand  writes:

> We're using MRs for crucible.  Please create one and make sure you check
> the "Allow commits from members who can merge to the target branch" so it
> can be rebased through the UI by someone other than yourself.

OOo. Shiny!

-- 
-keith


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v2]

2018-10-15 Thread Keith Packard

Offers three clocks, device, clock monotonic and clock monotonic
raw. Could use some kernel support to reduce the deviation between
clock values.

v2:
Ensure deviation is at least as big as the GPU time interval.

Signed-off-by: Keith Packard 
---
 src/amd/vulkan/radv_device.c   | 84 
 src/amd/vulkan/radv_extensions.py  |  1 +
 src/intel/vulkan/anv_device.c  | 88 ++
 src/intel/vulkan/anv_extensions.py |  1 +
 src/intel/vulkan/anv_gem.c | 13 +
 src/intel/vulkan/anv_private.h |  2 +
 6 files changed, 189 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 174922780fc..29f0afbc69b 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -4955,3 +4955,87 @@ radv_GetDeviceGroupPeerMemoryFeatures(
   VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
   VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
 }
+
+static const VkTimeDomainEXT radv_time_domains[] = {
+   VK_TIME_DOMAIN_DEVICE_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+   VkPhysicalDevice physicalDevice,
+   uint32_t *pTimeDomainCount,
+   VkTimeDomainEXT  *pTimeDomains)
+{
+   int d;
+   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+   for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+   vk_outarray_append(, i) {
+   *i = radv_time_domains[d];
+   }
+   }
+
+   return vk_outarray_status();
+}
+
+static uint64_t
+radv_clock_gettime(clockid_t clock_id)
+{
+   struct timespec current;
+   int ret;
+
+   ret = clock_gettime(clock_id, );
+   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+   ret = clock_gettime(CLOCK_MONOTONIC, );
+   if (ret < 0)
+   return 0;
+
+   return (uint64_t) current.tv_sec * 10ULL + current.tv_nsec;
+}
+
+#define TIMESTAMP 0x2358
+
+VkResult radv_GetCalibratedTimestampsEXT(
+   VkDevice _device,
+   uint32_t timestampCount,
+   const VkCalibratedTimestampInfoEXT   *pTimestampInfos,
+   uint64_t *pTimestamps,
+   uint64_t *pMaxDeviation)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   uint32_t clock_crystal_freq = 
device->physical_device->rad_info.clock_crystal_freq;
+   int d;
+   uint64_t begin, end;
+
+   begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+   for (d = 0; d < timestampCount; d++) {
+   switch (pTimestampInfos[d].timeDomain) {
+   case VK_TIME_DOMAIN_DEVICE_EXT:
+   /* XXX older kernels don't support this interface. */
+   pTimestamps[d] = device->ws->query_value(device->ws,
+
RADEON_TIMESTAMP);
+   break;
+   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+   pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+   break;
+
+   case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+   pTimestamps[d] = begin;
+   break;
+   default:
+   pTimestamps[d] = 0;
+   break;
+   }
+   }
+
+   end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+   uint64_t clock_period = end - begin;
+   uint64_t device_period = (100 + clock_crystal_freq - 1) / 
clock_crystal_freq;
+
+   *pMaxDeviation = clock_period > device_period ? clock_period : 
device_period;
+
+   return VK_SUCCESS;
+}
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 5dcedae1c63..4c81d3f0068 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -92,6 +92,7 @@ EXTENSIONS = [
 Extension('VK_KHR_display',  23, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_direct_mode_display',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_acquire_xlib_display',  1, 
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+Extension('VK_EXT_calibrated_timestamps', 1, True),
 Extension('VK_EXT_conditional_rendering', 1, True),
 Extension('VK_EXT_conservative_rasterization',1, 
'device->rad_info.chip_class >= GFX9'),
 Extension('VK_EXT_display_surface_counter',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index a2551452eb1..6a6539c9685 100644
---

Re: [Mesa-dev] [PATCH] Add tests for VK_EXT_calibrated_timestamps [v2]

2018-10-15 Thread Jason Ekstrand

We're using MRs for crucible.  Please create one and make sure you check
the "Allow commits from members who can merge to the target branch" so it
can be rebased through the UI by someone other than yourself.

--Jason

On Mon, Oct 15, 2018 at 4:15 PM Keith Packard  wrote:

> Five tests:
>
>  1) Check for non-null function pointers
>  2) Check for in-range time domains
>  3) Check monotonic domains for correct values
>  4) Check correlation between monotonic and device domains
>  5) Check to make sure times in device domain match queue times
>
> Signed-off-by: Keith Packard 
> ---
>  Makefile.am|   1 +
>  src/tests/func/calibrated-timestamps.c | 442 +
>  2 files changed, 443 insertions(+)
>  create mode 100644 src/tests/func/calibrated-timestamps.c
>
> diff --git a/Makefile.am b/Makefile.am
> index 0ca35bd..ba98c60 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -113,6 +113,7 @@ bin_crucible_SOURCES = \
> src/tests/stress/lots-of-surface-state.c \
> src/tests/stress/buffer_limit.c \
> src/tests/self/concurrent-output.c \
> +   src/tests/func/calibrated-timestamps.c \
> src/util/cru_cleanup.c \
> src/util/cru_format.c \
> src/util/cru_image.c \
> diff --git a/src/tests/func/calibrated-timestamps.c
> b/src/tests/func/calibrated-timestamps.c
> new file mode 100644
> index 000..a98150b
> --- /dev/null
> +++ b/src/tests/func/calibrated-timestamps.c
> @@ -0,0 +1,442 @@
> +/*
> + * Copyright © 2018 Keith Packard 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +
> +#include "tapi/t.h"
> +#include 
> +#include 
> +#include 
> +
> +#define GET_DEVICE_FUNCTION_PTR(name) \
> +PFN_vk##name name = (PFN_vk##name)vkGetDeviceProcAddr(t_device,
> "vk"#name)
> +
> +#define GET_INSTANCE_FUNCTION_PTR(name) \
> +PFN_vk##name name = (PFN_vk##name)vkGetInstanceProcAddr(t_instance,
> "vk"#name)
> +
> +/* Test 1: Make sure the function pointers promised by the extension
> + * are valid
> + */
> +static void
> +test_funcs(void)
> +{
> +t_require_ext("VK_EXT_calibrated_timestamps");
> +
> +
> GET_INSTANCE_FUNCTION_PTR(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
> +GET_DEVICE_FUNCTION_PTR(GetCalibratedTimestampsEXT);
> +
> +t_assert(GetPhysicalDeviceCalibrateableTimeDomainsEXT != NULL);
> +t_assert(GetCalibratedTimestampsEXT != NULL);
> +}
> +
> +test_define {
> +.name = "func.calibrated-timestamps.funcs",
> +.start = test_funcs,
> +.no_image = true,
> +};
> +
> +/* Test 2: Make sure all of the domains offered by the driver are in range
> + */
> +static void
> +test_domains(void)
> +{
> +t_require_ext("VK_EXT_calibrated_timestamps");
> +
> +
> GET_INSTANCE_FUNCTION_PTR(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
> +GET_DEVICE_FUNCTION_PTR(GetCalibratedTimestampsEXT);
> +
> +t_assert(GetPhysicalDeviceCalibrateableTimeDomainsEXT != NULL);
> +t_assert(GetCalibratedTimestampsEXT != NULL);
> +
> +VkResult result;
> +
> +uint32_t timeDomainCount;
> +result = GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +t_physical_dev,
> +,
> +NULL);
> +t_assert(result == VK_SUCCESS);
> +t_assert(timeDomainCount > 0);
> +
> +VkTimeDomainEXT *timeDomains = calloc(timeDomainCount, sizeof
> (VkTimeDomainEXT));
> +t_assert(timeDomains != NULL);
> +
> +result = GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +t_physical_dev,
> +,
> +timeDomains);
> +
> +t_assert(result == VK_SUCCESS);
> +
> +/* Make sure all reported domains are valid */
> +for (uint32_t d = 0; d < timeDomainCount; d++) {
> +t_assert(VK_TIME_DOMAIN_BEGIN_RANGE_EXT <= timeDomains[d] &&
> + timeDomains[d] <= VK_TIME_DOMAIN_END_RANGE_EXT);
> +}
> +}
> +
> +test_define {
> +.name = "func.calibrated-timestamps.domains",
> +.start = test_domains,
> +.no_image = true,
> +};
> +
> +static uint64_t
> +crucible_clock_gettime(VkTimeDomainEXT domain)
> +{
> +struct timespec current;
> +int ret;
> +clockid_t clock_id;
> +
> +switch (domain) {
> +case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +clock_id = CLOCK_MONOTONIC;
> +break;
> +case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +clock_id = CLOCK_MONOTONIC_RAW;
> +break;
> +default:
> +t_assert(0);
> +return 0;
> +}
> +
> +ret = clock_gettime(clock_id, );
> +t_assert (ret >= 0);
> +if

[Mesa-dev] [PATCH] Add tests for VK_EXT_calibrated_timestamps [v2]

2018-10-15 Thread Keith Packard

Five tests:

 1) Check for non-null function pointers
 2) Check for in-range time domains
 3) Check monotonic domains for correct values
 4) Check correlation between monotonic and device domains
 5) Check to make sure times in device domain match queue times

Signed-off-by: Keith Packard 
---
 Makefile.am|   1 +
 src/tests/func/calibrated-timestamps.c | 442 +
 2 files changed, 443 insertions(+)
 create mode 100644 src/tests/func/calibrated-timestamps.c

diff --git a/Makefile.am b/Makefile.am
index 0ca35bd..ba98c60 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -113,6 +113,7 @@ bin_crucible_SOURCES = \
src/tests/stress/lots-of-surface-state.c \
src/tests/stress/buffer_limit.c \
src/tests/self/concurrent-output.c \
+   src/tests/func/calibrated-timestamps.c \
src/util/cru_cleanup.c \
src/util/cru_format.c \
src/util/cru_image.c \
diff --git a/src/tests/func/calibrated-timestamps.c 
b/src/tests/func/calibrated-timestamps.c
new file mode 100644
index 000..a98150b
--- /dev/null
+++ b/src/tests/func/calibrated-timestamps.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright © 2018 Keith Packard 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "tapi/t.h"
+#include 
+#include 
+#include 
+
+#define GET_DEVICE_FUNCTION_PTR(name) \
+PFN_vk##name name = (PFN_vk##name)vkGetDeviceProcAddr(t_device, "vk"#name)
+
+#define GET_INSTANCE_FUNCTION_PTR(name) \
+PFN_vk##name name = (PFN_vk##name)vkGetInstanceProcAddr(t_instance, 
"vk"#name)
+
+/* Test 1: Make sure the function pointers promised by the extension
+ * are valid
+ */
+static void
+test_funcs(void)
+{
+t_require_ext("VK_EXT_calibrated_timestamps");
+
+GET_INSTANCE_FUNCTION_PTR(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
+GET_DEVICE_FUNCTION_PTR(GetCalibratedTimestampsEXT);
+
+t_assert(GetPhysicalDeviceCalibrateableTimeDomainsEXT != NULL);
+t_assert(GetCalibratedTimestampsEXT != NULL);
+}
+
+test_define {
+.name = "func.calibrated-timestamps.funcs",
+.start = test_funcs,
+.no_image = true,
+};
+
+/* Test 2: Make sure all of the domains offered by the driver are in range
+ */
+static void
+test_domains(void)
+{
+t_require_ext("VK_EXT_calibrated_timestamps");
+
+GET_INSTANCE_FUNCTION_PTR(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
+GET_DEVICE_FUNCTION_PTR(GetCalibratedTimestampsEXT);
+
+t_assert(GetPhysicalDeviceCalibrateableTimeDomainsEXT != NULL);
+t_assert(GetCalibratedTimestampsEXT != NULL);
+
+VkResult result;
+
+uint32_t timeDomainCount;
+result = GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+t_physical_dev,
+,
+NULL);
+t_assert(result == VK_SUCCESS);
+t_assert(timeDomainCount > 0);
+
+VkTimeDomainEXT *timeDomains = calloc(timeDomainCount, sizeof 
(VkTimeDomainEXT));
+t_assert(timeDomains != NULL);
+
+result = GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+t_physical_dev,
+,
+timeDomains);
+
+t_assert(result == VK_SUCCESS);
+
+/* Make sure all reported domains are valid */
+for (uint32_t d = 0; d < timeDomainCount; d++) {
+t_assert(VK_TIME_DOMAIN_BEGIN_RANGE_EXT <= timeDomains[d] &&
+ timeDomains[d] <= VK_TIME_DOMAIN_END_RANGE_EXT);
+}
+}
+
+test_define {
+.name = "func.calibrated-timestamps.domains",
+.start = test_domains,
+.no_image = true,
+};
+
+static uint64_t
+crucible_clock_gettime(VkTimeDomainEXT domain)
+{
+struct timespec current;
+int ret;
+clockid_t clock_id;
+
+switch (domain) {
+case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+clock_id = CLOCK_MONOTONIC;
+break;
+case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+clock_id = CLOCK_MONOTONIC_RAW;
+break;
+default:
+t_assert(0);
+return 0;
+}
+
+ret = clock_gettime(clock_id, );
+t_assert (ret >= 0);
+if (ret < 0)
+return 0;
+
+return (uint64_t) current.tv_sec * 10ULL + current.tv_nsec;
+}
+
+/* Test 3: Make sure any monotonic domains return accurate data
+ */
+static void
+test_monotonic(void)
+{
+t_require_ext("VK_EXT_calibrated_timestamps");
+
+GET_INSTANCE_FUNCTION_PTR(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
+GET_DEVICE_FUNCTION_PTR(GetCalibratedTimestampsEXT);
+
+t_assert(GetPhysicalDeviceCalibrateableTimeDomainsEXT != NULL);
+t_assert(GetCalibratedTimestampsEXT != NULL);
+
+VkResult result;
+
+uint32_t

[Mesa-dev] [Bug 107971] SPV_GOOGLE_hlsl_functionality1 / SPV_GOOGLE_decorate_string

2018-10-15 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=107971

--- Comment #5 from Jason Ekstrand  ---
If you want to give me a "Tested-by" tag, I'll happily add it to the patch
before pushing.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3] nir: Copy propagation between blocks

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 3:07 PM Caio Marcelo de Oliveira Filho <
caio.olive...@intel.com> wrote:

> Extend the pass to propagate the copies information along the control
> flow graph.  It performs two walks, first it collects the vars
> that were written inside each node. Then it walks applying the copy
> propagation using a list of copies previously available.  At each node
> the list is invalidated according to results from the first walk.
>
> This approach is simpler than a full data-flow analysis, but covers
> various cases.  If derefs are used for operating on more memory
> resources (e.g. SSBOs), the difference from a regular pass is expected
> to be more visible -- as the SSA copy propagation pass won't apply to
> those.
>
> A full data-flow analysis would handle more scenarios: conditional
> breaks in the control flow and merge equivalent effects from multiple
> branches (e.g. using a phi node to merge the source for writes to the
> same deref).  However, as previous commentary in the code stated, its
> complexity 'rapidly get out of hand'.  The current patch is a good
> intermediate step towards more complex analysis.
>
> The 'copies' linked list was modified to use util_dynarray to make it
> more convenient to clone it (to handle ifs/loops).
>
> Annotated shader-db results for Skylake:
>
> total instructions in shared programs: 15105796 -> 15105451 (<.01%)
> instructions in affected programs: 152293 -> 151948 (-0.23%)
> helped: 96
> HURT: 17
>
> All the HURTs and many HELPs are one instruction.  Looking
> at pass by pass outputs, the copy prop kicks in removing a
> bunch of loads correctly, which ends up altering what other
> other optimizations kick.  In those cases the copies would be
> propagated after lowering to SSA.
>
> In few HELPs we are actually helping doing more than was
> possible previously, e.g. consolidating load_uniforms from
> different blocks.  Most of those are from
> shaders/dolphin/ubershaders/.
>
> total cycles in shared programs: 566048861 -> 565954876 (-0.02%)
> cycles in affected programs: 151461830 -> 151367845 (-0.06%)
> helped: 2933
> HURT: 2950
>
> A lot of noise on both sides.
>
> total loops in shared programs: 4603 -> 4603 (0.00%)
> loops in affected programs: 0 -> 0
> helped: 0
> HURT: 0
>
> total spills in shared programs: 11085 -> 11073 (-0.11%)
> spills in affected programs: 23 -> 11 (-52.17%)
> helped: 1
> HURT: 0
>
> The shaders/dolphin/ubershaders/12.shader_test was able to
> pull a couple of loads from inside if statements and reuse
> them.
>
> total fills in shared programs: 23143 -> 23089 (-0.23%)
> fills in affected programs: 2718 -> 2664 (-1.99%)
> helped: 27
> HURT: 0
>
> All from shaders/dolphin/ubershaders/.
>
> LOST:   0
> GAINED: 0
>
> The other generations follow the same overall shape.  The spills and
> fills HURTs are all from the same game.
>
> shader-db results for Broadwell.
>
> total instructions in shared programs: 15402037 -> 15401841 (<.01%)
> instructions in affected programs: 144386 -> 144190 (-0.14%)
> helped: 86
> HURT: 9
>
> total cycles in shared programs: 600912755 -> 600902486 (<.01%)
> cycles in affected programs: 185662820 -> 185652551 (<.01%)
> helped: 2598
> HURT: 3053
>
> total loops in shared programs: 4579 -> 4579 (0.00%)
> loops in affected programs: 0 -> 0
> helped: 0
> HURT: 0
>
> total spills in shared programs: 80929 -> 80924 (<.01%)
> spills in affected programs: 720 -> 715 (-0.69%)
> helped: 1
> HURT: 5
>
> total fills in shared programs: 93057 -> 93013 (-0.05%)
> fills in affected programs: 3398 -> 3354 (-1.29%)
> helped: 27
> HURT: 5
>
> LOST:   0
> GAINED: 2
>
> shader-db results for Haswell:
>
> total instructions in shared programs: 9231975 -> 9230357 (-0.02%)
> instructions in affected programs: 44992 -> 43374 (-3.60%)
> helped: 27
> HURT: 69
>
> total cycles in shared programs: 87760587 -> 87727502 (-0.04%)
> cycles in affected programs: 7720673 -> 7687588 (-0.43%)
> helped: 1609
> HURT: 1416
>
> total loops in shared programs: 1830 -> 1830 (0.00%)
> loops in affected programs: 0 -> 0
> helped: 0
> HURT: 0
>
> total spills in shared programs: 1988 -> 1692 (-14.89%)
> spills in affected programs: 296 -> 0
> helped: 1
> HURT: 0
>
> total fills in shared programs: 2103 -> 1668 (-20.68%)
> fills in affected programs: 438 -> 3 (-99.32%)
> helped: 4
> HURT: 0
>
> LOST:   0
> GAINED: 1
>
> v2: Remove the DISABLE prefix from tests we now pass.
>
> v3: Add comments about missing write_mask handling. (Caio)
> Add unreachable when switching on cf_node type. (Jason)
> Properly merge the component information in written map
> instead of

[Mesa-dev] [Bug 107971] SPV_GOOGLE_hlsl_functionality1 / SPV_GOOGLE_decorate_string

2018-10-15 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=107971

--- Comment #4 from xbx  ---

thanks.

the patches work fine.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 3:13 PM Connor Abbott  wrote:

> On Mon, Oct 15, 2018 at 8:41 PM Jason Ekstrand 
> wrote:
> >
> > On Mon, Oct 15, 2018 at 1:39 PM Ian Romanick 
> wrote:
> >>
> >> On 10/14/2018 03:58 PM, Jason Ekstrand wrote:
> >> > On October 14, 2018 17:12:34 Matt Turner  wrote:
> >> >> +static nir_ssa_def *
> >> >> +lower_iabs64(nir_builder *b, nir_ssa_def *x)
> >> >> +{
> >> >> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> >> >> +   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
> >> >> +   return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x);
> >> >
> >> > lower_bcsel?  Or, since we're depending on this running multiple
> times,
> >> > just nir_ineg?  I go back and forth on whether a pass like this should
> >> > run in a loop or be smart enough to lower intermediate bits on the
> fly.
> >> > We should probably pick one.
> >>
> >> In principle, I agree.  I've been bitten a couple times by lowering
> >> passes that generate other things that need to be lowered on some
> >> platforms (that I didn't test).  In this case, I think the loop is the
> >> right answer since each operation is lowered by a separate flag.
> >
> >
> > That's the easy answer, certainly.  The other option is to have every
> lowered thing builder check the flag and conditionally do the lowering.
> That's annoying and hard to get right so a loop is probably best for now.
>
> Couldn't you just have the builder be right after the instruction,
> instead of before it, and make the outer loop use a non-safe iterator
> so that it will immediately run over the instructions generated? Doing
> another pass over the whole shader is usually a little expensive.
>

That's sneaky and also a really good idea!  We should totally just do that.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

2018-10-15 Thread Connor Abbott

On Mon, Oct 15, 2018 at 8:41 PM Jason Ekstrand  wrote:
>
> On Mon, Oct 15, 2018 at 1:39 PM Ian Romanick  wrote:
>>
>> On 10/14/2018 03:58 PM, Jason Ekstrand wrote:
>> > On October 14, 2018 17:12:34 Matt Turner  wrote:
>> >> +static nir_ssa_def *
>> >> +lower_iabs64(nir_builder *b, nir_ssa_def *x)
>> >> +{
>> >> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
>> >> +   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
>> >> +   return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x);
>> >
>> > lower_bcsel?  Or, since we're depending on this running multiple times,
>> > just nir_ineg?  I go back and forth on whether a pass like this should
>> > run in a loop or be smart enough to lower intermediate bits on the fly.
>> > We should probably pick one.
>>
>> In principle, I agree.  I've been bitten a couple times by lowering
>> passes that generate other things that need to be lowered on some
>> platforms (that I didn't test).  In this case, I think the loop is the
>> right answer since each operation is lowered by a separate flag.
>
>
> That's the easy answer, certainly.  The other option is to have every lowered 
> thing builder check the flag and conditionally do the lowering.  That's 
> annoying and hard to get right so a loop is probably best for now.

Couldn't you just have the builder be right after the instruction,
instead of before it, and make the outer loop use a non-safe iterator
so that it will immediately run over the instructions generated? Doing
another pass over the whole shader is usually a little expensive.

>
> --Jason
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3] nir: Copy propagation between blocks

2018-10-15 Thread Caio Marcelo de Oliveira Filho

Extend the pass to propagate the copies information along the control
flow graph.  It performs two walks, first it collects the vars
that were written inside each node. Then it walks applying the copy
propagation using a list of copies previously available.  At each node
the list is invalidated according to results from the first walk.

This approach is simpler than a full data-flow analysis, but covers
various cases.  If derefs are used for operating on more memory
resources (e.g. SSBOs), the difference from a regular pass is expected
to be more visible -- as the SSA copy propagation pass won't apply to
those.

A full data-flow analysis would handle more scenarios: conditional
breaks in the control flow and merge equivalent effects from multiple
branches (e.g. using a phi node to merge the source for writes to the
same deref).  However, as previous commentary in the code stated, its
complexity 'rapidly get out of hand'.  The current patch is a good
intermediate step towards more complex analysis.

The 'copies' linked list was modified to use util_dynarray to make it
more convenient to clone it (to handle ifs/loops).

Annotated shader-db results for Skylake:

total instructions in shared programs: 15105796 -> 15105451 (<.01%)
instructions in affected programs: 152293 -> 151948 (-0.23%)
helped: 96
HURT: 17

All the HURTs and many HELPs are one instruction.  Looking
at pass by pass outputs, the copy prop kicks in removing a
bunch of loads correctly, which ends up altering what other
other optimizations kick.  In those cases the copies would be
propagated after lowering to SSA.

In few HELPs we are actually helping doing more than was
possible previously, e.g. consolidating load_uniforms from
different blocks.  Most of those are from
shaders/dolphin/ubershaders/.

total cycles in shared programs: 566048861 -> 565954876 (-0.02%)
cycles in affected programs: 151461830 -> 151367845 (-0.06%)
helped: 2933
HURT: 2950

A lot of noise on both sides.

total loops in shared programs: 4603 -> 4603 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 11085 -> 11073 (-0.11%)
spills in affected programs: 23 -> 11 (-52.17%)
helped: 1
HURT: 0

The shaders/dolphin/ubershaders/12.shader_test was able to
pull a couple of loads from inside if statements and reuse
them.

total fills in shared programs: 23143 -> 23089 (-0.23%)
fills in affected programs: 2718 -> 2664 (-1.99%)
helped: 27
HURT: 0

All from shaders/dolphin/ubershaders/.

LOST:   0
GAINED: 0

The other generations follow the same overall shape.  The spills and
fills HURTs are all from the same game.

shader-db results for Broadwell.

total instructions in shared programs: 15402037 -> 15401841 (<.01%)
instructions in affected programs: 144386 -> 144190 (-0.14%)
helped: 86
HURT: 9

total cycles in shared programs: 600912755 -> 600902486 (<.01%)
cycles in affected programs: 185662820 -> 185652551 (<.01%)
helped: 2598
HURT: 3053

total loops in shared programs: 4579 -> 4579 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 80929 -> 80924 (<.01%)
spills in affected programs: 720 -> 715 (-0.69%)
helped: 1
HURT: 5

total fills in shared programs: 93057 -> 93013 (-0.05%)
fills in affected programs: 3398 -> 3354 (-1.29%)
helped: 27
HURT: 5

LOST:   0
GAINED: 2

shader-db results for Haswell:

total instructions in shared programs: 9231975 -> 9230357 (-0.02%)
instructions in affected programs: 44992 -> 43374 (-3.60%)
helped: 27
HURT: 69

total cycles in shared programs: 87760587 -> 87727502 (-0.04%)
cycles in affected programs: 7720673 -> 7687588 (-0.43%)
helped: 1609
HURT: 1416

total loops in shared programs: 1830 -> 1830 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 1988 -> 1692 (-14.89%)
spills in affected programs: 296 -> 0
helped: 1
HURT: 0

total fills in shared programs: 2103 -> 1668 (-20.68%)
fills in affected programs: 438 -> 3 (-99.32%)
helped: 4
HURT: 0

LOST:   0
GAINED: 1

v2: Remove the DISABLE prefix from tests we now pass.

v3: Add comments about missing write_mask handling. (Caio)
Add unreachable when switching on cf_node type. (Jason)
Properly merge the component information in written map
instead of replacing. (Jason)
Explain how removal from written arrays works. (Jason)
Use mode directly from deref instead of getting the var. (Jason)
---
 src/compiler/nir/nir_opt_copy_prop_vars.c | 422 ++
 src/compiler/nir/tests/vars_tests.cpp |  10 +-
 2 files changed, 350 insertions(+), 82 deletions(-)

diff --git

Re: [Mesa-dev] [RFC 4/7] mesa: Helper functions for counting set bits in a mask

2018-10-15 Thread Roland Scheidegger

Am 15.10.18 um 15:19 schrieb Toni Lönnberg:
> ---
>  src/util/bitscan.h | 25 +
>  1 file changed, 25 insertions(+)
> 
> diff --git a/src/util/bitscan.h b/src/util/bitscan.h
> index dc89ac9..cdfecaf 100644
> --- a/src/util/bitscan.h
> +++ b/src/util/bitscan.h
> @@ -112,6 +112,31 @@ u_bit_scan64(uint64_t *mask)
> return i;
>  }
>  
> +/* Count bits set in mask */
> +static inline int
> +u_count_bits(unsigned *mask)
I don't think you'd want to pass a pointer.

Besides, I don't think we need another set of functions for this.
src/util/u_math.h already has util_bitcount64 and util_bitcount which do
the same thing.
(Although I don't know which one is better, util_bitcount looks like it
would be potentially faster with just very few bits set, but with
"random" uint/uint64 it certainly would seem the new one is better. But
in any case, can't beat the cpu popcount instruction...)

Roland


> +{
> +   unsigned v = *mask;
> +   int c;
> +   v = v - ((v >> 1) & 0x);
> +   v = (v & 0x) + ((v >> 2) & 0x);
> +   v = (v + (v >> 4)) & 0xF0F0F0F;
> +   c = (int)((v * 0x1010101) >> 24);
> +   return c;
> +}
> +
> +static inline int
> +u_count_bits64(uint64_t *mask)
> +{
> +   uint64_t v = *mask;
> +   int c;
> +   v = v - ((v >> 1) & 0xull);
> +   v = (v & 0xull) + ((v >> 2) & 0xull);
> +   v = (v + (v >> 4)) & 0xF0F0F0F0F0F0F0Full;
> +   c = (int)((v * 0x101010101010101ull) >> 56);
> +   return c;
> +}
> +
>  /* Determine if an unsigned value is a power of two.
>   *
>   * \note
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv: Don't advertise ASTC support on BSW

2018-10-15 Thread Mark Janes

Tested-by: Mark Janes 

Jason Ekstrand  writes:

> ---
>  src/intel/vulkan/anv_formats.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
> index 33faf7cc37f..9199567f445 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -521,6 +521,14 @@ get_image_format_features(const struct gen_device_info 
> *devinfo,
> isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC)
>return 0;
>  
> +   /* ASTC requires nasty workarounds on BSW so we just disable it for now.
> +*
> +* TODO: Figure out the ASTC workarounds and re-enable on BSW.
> +*/
> +   if (devinfo->gen < 9 &&
> +   isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC)
> +  return 0;
> +
> if (isl_format_supports_sampling(devinfo, plane_format.isl_format)) {
>flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
>  
> -- 
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/7] nir/int64: Call nir_lower_int64() in a loop

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 1:48 PM Ian Romanick  wrote:

> On 10/14/2018 03:11 PM, Matt Turner wrote:
> > Unfortunately some int64 lowerings generate more int64 operations, so we
> > need to call this function a few times. Also call
> > nir_lower_alu_to_scalar() beforehand to make more int64 operations
> > available for lowering.
> > ---
> >  src/intel/compiler/brw_nir.c | 10 +++---
> >  1 file changed, 7 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> > index f61baee230a..066724c58a6 100644
> > --- a/src/intel/compiler/brw_nir.c
> > +++ b/src/intel/compiler/brw_nir.c
> > @@ -670,12 +670,16 @@ brw_preprocess_nir(const struct brw_compiler
> *compiler, nir_shader *nir)
> >  */
> > OPT(nir_opt_algebraic);
> >
> > +   if (is_scalar) {
> > +  OPT(nir_lower_alu_to_scalar);
> > +   }
> > +
> > /* Lower int64 instructions before nir_optimize so that loop
> unrolling
> >  * sees their actual cost.
> >  */
> > -   nir_lower_int64(nir, nir_lower_imul64 |
> > -nir_lower_isign64 |
> > -nir_lower_divmod64);
> > +   while (nir_lower_int64(nir, nir_lower_imul64 |
> > +   nir_lower_isign64 |
> > +   nir_lower_divmod64));
>
> I don't know that we have a specific coding standard about this, but I
> always like
>
> while (foo)
> /* empty */ ;
>
> just to make it obvious.
>

I think I'd opt for "continue" but yeah, I think one or the other is better
than silently putting the semicolon at the end.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/11] nir: Copy propagation between blocks

2018-10-15 Thread Jason Ekstrand

Can you please re-send the latest version of this patch?  It's easier to
comment on the ML.

On Mon, Oct 15, 2018 at 12:44 PM Caio Marcelo de Oliveira Filho <
caio.olive...@intel.com> wrote:

> Hi,
>
> > > +   }
> > > +
> > > +   if (new_written) {
> > > +  /* Merge new information to the parent control flow node. */
> > > +  if (written) {
> > > + written->modes |= new_written->modes;
> > > + struct hash_entry *ht_entry;
> > > + hash_table_foreach(new_written->derefs, ht_entry) {
> > > +_mesa_hash_table_insert_pre_hashed(written->derefs,
> > > ht_entry->hash,
> > > +   ht_entry->key,
> > > ht_entry->data);
> > >
> >
> > Do you want to somehow OR masks together?  This is just picking one of
> the
> > two masks.
>
> You are correct.  Fixed.
>
> Turns out the way the local code we are reusing here is structured, we
> don't take much advantage of the fine-grained tracking here.  Added a
> TODO about this.
>
>
>
>
> > >  static void
> > > -copy_entry_remove(struct copy_prop_var_state *state, struct copy_entry
> > > *entry)
> > > +copy_entry_remove(struct util_dynarray *copies,
> > > +  struct copy_entry *entry)
> > >  {
> > > -   list_del(>link);
> > > -   list_add(>link, >copy_free_list);
> > > +   *entry = util_dynarray_pop(copies, struct copy_entry);
> > >
> >
> > It might be worth a quick comment to justify that this works.  It took
> me a
> > minute to figure out that you were re-ordering the array in the process.
>
> Added a function comment describing what this does and stating it is
> safe to use during a reverse iteration.  And also added a comment
> highlighting how this works when it is the last element.
>
> (...)
>
> >
> > > +lookup_entry_and_kill_aliases(struct util_dynarray *copies,
> > > +  nir_deref_instr *deref,
> > > +  unsigned write_mask)
> > >  {
> > > struct copy_entry *entry = NULL;
> > > -   list_for_each_entry_safe(struct copy_entry, iter, >copies,
> > > link) {
> > > +   util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
> > >
> >
> > Also might be worth commenting why it's safe to remove elements while
> > walking the array.
>
> I think the comments to the copy_entry_remove suffice, but can add it
> here before landing if you prefer.
>
> The latest code is in
>
> https://gitlab.freedesktop.org/cmarcelo/mesa/commits/copy-prop
>
> and all the issues I haven't commented are supposed to be fixed
> according your comment (added new version to each patch that changed).
> Merged the use patches into a single one
>
> intel/nir, freedreno/ir3: Use the separated dead write vars pass
>
> Patches that still need R-b:
>
>nir: Copy propagation between blocks
>nir: Separate dead write removal into its own pass
>
>
> Caio
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/11] nir: Separate dead write removal into its own pass

2018-10-15 Thread Jason Ekstrand

Latest version is

Reviewed-by: Jason Ekstrand 

On Mon, Oct 15, 2018 at 2:41 PM Jason Ekstrand  wrote:

> Cool, thanks!
>
> On Mon, Oct 15, 2018 at 2:38 PM Caio Marcelo de Oliveira Filho <
> caio.olive...@intel.com> wrote:
>
>> > > > > +{
>> > > > > +   bool progress = false;
>> > > > > +
>> > > > > +   /* Find writes that are unused and can be removed. */
>> > > > > +   util_dynarray_foreach_reverse(unused_writes, struct
>> write_entry,
>> > > > > entry) {
>> > > > > +  nir_deref_compare_result comp = nir_compare_derefs(dst,
>> > > entry->dst);
>> > > > > +  if (comp & nir_derefs_a_contains_b_bit) {
>> > > > >
>> > > >
>> > > > Mind throwing an assert in here:
>> > > >
>> > > > assert((comp & nir_derefs_equal_bit) || mask ==
>> > > ~(nir_component_mask_t)0);
>> > >
>> > > We can assert that.  We can have an entry for a copy between arrays a
>> > > and b, and see a store a[1].x that will invalidate the 'x' component
>> > > of the copy.
>> > >
>> >
>> > Do you mean, "we can't assert that"?
>>
>> Correct. I meant "we can't".
>>
>>
>> > I'm trying to think about whether or not the type of per-component
>> > invalidation you're talking about there is valid or not.  If we can
>> assume
>> > that all struct copies are split and that all copies are fully qualified
>> > (i.e., they end in a vector or scalar with wildcards for all the
>> arrays),
>> > then I think such inference is fine.  Maybe worth a comment that such is
>> > intentional?
>>
>> I've added the following comment and assert to update_unused_writes()
>>
>>/* This pass assumes that destination of copies and stores are derefs
>> that
>> * end in a vector or scalar (it is OK to have wildcards or indirects
>> for
>> * arrays).
>> */
>>assert(glsl_type_is_vector_or_scalar(dst->type));
>>
>> My understanding is that in this context this always is true, but in
>> the future might not be if we do things like: "copy a b" instead of
>> "copy a[*] b[*]" when a and b are arrays (similar to structs).
>>
>> Updated my branch with that too.
>>
>>
>>
>> Caio
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: Pass index to pipe->create_query() for statistics queries.

2018-10-15 Thread Roland Scheidegger

FWIW the gallium pipeline stats query exists for way longer than the GL
ARB spec for it, and at least llvmpipe implemented it for ages.
So the reason for it being like that is due to dx10 (which always
queries these together), when gl couldn't do it at all.

To make it a bit nicer you could use new defines instead of just numbers
for indices, without making the change more intrusive.
I'm not sure it's really worth the trouble of splitting it up (well it
would mean we'd have to emit a boatload of queries for dx10, unless you
just add additional ones, but then the drivers would need to support
both...), since I don't think it's really something which gets used a lot.

Some comment inline.


Am 15.10.18 um 08:29 schrieb Kenneth Graunke:
> GL exposes separate queries for each pipeline statistics counter.
> For some reason, Gallium chose to map them all to a single target,
> PIPE_QUERY_PIPELINE_STATISTICS.  Radeon hardware appears to query
> them all as a group.  pipe->get_query_result_resource() takes an
> index, indicating which to write to the buffer.  The CPU-side hook,
> pipe->get_query_result(), simply writes them all, and st/mesa returns
> the one that was actually desired.
> 
> On Intel hardware, each individual pipeline statistics value is handled
> as a separate counter and query.  We can query each individually, and
> that is more efficient than querying all 11 counters each time.  But,
> we need pipe->get_query_result() to know which one to return.
> 
> To handle this, we pass the index into pipe->create_query(), which
> was previously always 0 for these queries.  Drivers which return all
> of the counters as a group can simply ignore it; drivers querying one
> at a time can use it to distinguish between the counters.
> 
> This is the least invasive fix, but it is kind of ugly, and I wonder
> whether we'd be better off just adding PIPE_QUERY_IA_VERTICES (etc.)
> targets...
> ---
>  src/mesa/state_tracker/st_cb_queryobj.c | 76 -
>  1 file changed, 36 insertions(+), 40 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_cb_queryobj.c 
> b/src/mesa/state_tracker/st_cb_queryobj.c
> index 69e6004c3f1..0dc06ceb574 100644
> --- a/src/mesa/state_tracker/st_cb_queryobj.c
> +++ b/src/mesa/state_tracker/st_cb_queryobj.c
> @@ -88,6 +88,40 @@ st_DeleteQuery(struct gl_context *ctx, struct 
> gl_query_object *q)
> free(stq);
>  }
>  
> +static int
> +target_to_index(const struct gl_query_object *q)
> +{
> +   switch (q->Target) {
> +   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> +   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
The last one here doesn't actually have an index (as it is used for
querying all streams) - albeit I suppose q->Stream should be 0 anyway.
GL_PRIMITIVES_GENERATED though can have an index.

Otherwise looks reasonable to me.

Roland


> +  return q->Stream;
> +   case GL_VERTICES_SUBMITTED_ARB:
> +  return 0;
> +   case GL_PRIMITIVES_SUBMITTED_ARB:
> +  return 1;
> +   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
> +  return 2;
> +   case GL_GEOMETRY_SHADER_INVOCATIONS:
> +  return 3;
> +   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
> +  return 4;
> +   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
> +  return 5;
> +   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
> +  return 6;
> +   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
> +  return 7;
> +   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
> +  return 8;
> +   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
> +  return 9;
> +   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
> +  return 10;
> +   default:
> +  return 0;
> +   }
> +}
>  
>  static void
>  st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)
> @@ -164,7 +198,7 @@ st_BeginQuery(struct gl_context *ctx, struct 
> gl_query_object *q)
>   ret = pipe->end_query(pipe, stq->pq_begin);
> } else {
>if (!stq->pq) {
> - stq->pq = pipe->create_query(pipe, type, q->Stream);
> + stq->pq = pipe->create_query(pipe, type, target_to_index(q));
>   stq->type = type;
>}
>if (stq->pq)
> @@ -383,46 +417,8 @@ st_StoreQueryResult(struct gl_context *ctx, struct 
> gl_query_object *q,
>  
> if (pname == GL_QUERY_RESULT_AVAILABLE) {
>index = -1;
> -   } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
> -  switch (q->Target) {
> -  case GL_VERTICES_SUBMITTED_ARB:
> - index = 0;
> - break;
> -  case GL_PRIMITIVES_SUBMITTED_ARB:
> - index = 1;
> - break;
> -  case GL_VERTEX_SHADER_INVOCATIONS_ARB:
> - index = 2;
> - break;
> -  case GL_GEOMETRY_SHADER_INVOCATIONS:
> - index = 3;
> - break;
> -  case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
> - index = 4;
> - break;
> -  case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
> - index = 5;
> - break;
> -  case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:

Re: [Mesa-dev] [PATCH 05/11] nir: Separate dead write removal into its own pass

2018-10-15 Thread Jason Ekstrand

Cool, thanks!

On Mon, Oct 15, 2018 at 2:38 PM Caio Marcelo de Oliveira Filho <
caio.olive...@intel.com> wrote:

> > > > > +{
> > > > > +   bool progress = false;
> > > > > +
> > > > > +   /* Find writes that are unused and can be removed. */
> > > > > +   util_dynarray_foreach_reverse(unused_writes, struct
> write_entry,
> > > > > entry) {
> > > > > +  nir_deref_compare_result comp = nir_compare_derefs(dst,
> > > entry->dst);
> > > > > +  if (comp & nir_derefs_a_contains_b_bit) {
> > > > >
> > > >
> > > > Mind throwing an assert in here:
> > > >
> > > > assert((comp & nir_derefs_equal_bit) || mask ==
> > > ~(nir_component_mask_t)0);
> > >
> > > We can assert that.  We can have an entry for a copy between arrays a
> > > and b, and see a store a[1].x that will invalidate the 'x' component
> > > of the copy.
> > >
> >
> > Do you mean, "we can't assert that"?
>
> Correct. I meant "we can't".
>
>
> > I'm trying to think about whether or not the type of per-component
> > invalidation you're talking about there is valid or not.  If we can
> assume
> > that all struct copies are split and that all copies are fully qualified
> > (i.e., they end in a vector or scalar with wildcards for all the arrays),
> > then I think such inference is fine.  Maybe worth a comment that such is
> > intentional?
>
> I've added the following comment and assert to update_unused_writes()
>
>/* This pass assumes that destination of copies and stores are derefs
> that
> * end in a vector or scalar (it is OK to have wildcards or indirects
> for
> * arrays).
> */
>assert(glsl_type_is_vector_or_scalar(dst->type));
>
> My understanding is that in this context this always is true, but in
> the future might not be if we do things like: "copy a b" instead of
> "copy a[*] b[*]" when a and b are arrays (similar to structs).
>
> Updated my branch with that too.
>
>
>
> Caio
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/11] nir: Separate dead write removal into its own pass

2018-10-15 Thread Caio Marcelo de Oliveira Filho

> > > > +{
> > > > +   bool progress = false;
> > > > +
> > > > +   /* Find writes that are unused and can be removed. */
> > > > +   util_dynarray_foreach_reverse(unused_writes, struct write_entry,
> > > > entry) {
> > > > +  nir_deref_compare_result comp = nir_compare_derefs(dst,
> > entry->dst);
> > > > +  if (comp & nir_derefs_a_contains_b_bit) {
> > > >
> > >
> > > Mind throwing an assert in here:
> > >
> > > assert((comp & nir_derefs_equal_bit) || mask ==
> > ~(nir_component_mask_t)0);
> >
> > We can assert that.  We can have an entry for a copy between arrays a
> > and b, and see a store a[1].x that will invalidate the 'x' component
> > of the copy.
> >
> 
> Do you mean, "we can't assert that"?

Correct. I meant "we can't".


> I'm trying to think about whether or not the type of per-component
> invalidation you're talking about there is valid or not.  If we can assume
> that all struct copies are split and that all copies are fully qualified
> (i.e., they end in a vector or scalar with wildcards for all the arrays),
> then I think such inference is fine.  Maybe worth a comment that such is
> intentional?

I've added the following comment and assert to update_unused_writes()

   /* This pass assumes that destination of copies and stores are derefs that
* end in a vector or scalar (it is OK to have wildcards or indirects for
* arrays).
*/
   assert(glsl_type_is_vector_or_scalar(dst->type));

My understanding is that in this context this always is true, but in
the future might not be if we do things like: "copy a b" instead of
"copy a[*] b[*]" when a and b are arrays (similar to structs).

Updated my branch with that too.



Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/7] nir/int64: Call nir_lower_int64() in a loop

2018-10-15 Thread Ian Romanick

On 10/14/2018 03:11 PM, Matt Turner wrote:
> Unfortunately some int64 lowerings generate more int64 operations, so we
> need to call this function a few times. Also call
> nir_lower_alu_to_scalar() beforehand to make more int64 operations
> available for lowering.
> ---
>  src/intel/compiler/brw_nir.c | 10 +++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index f61baee230a..066724c58a6 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -670,12 +670,16 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
> nir_shader *nir)
>  */
> OPT(nir_opt_algebraic);
>  
> +   if (is_scalar) {
> +  OPT(nir_lower_alu_to_scalar);
> +   }
> +
> /* Lower int64 instructions before nir_optimize so that loop unrolling
>  * sees their actual cost.
>  */
> -   nir_lower_int64(nir, nir_lower_imul64 |
> -nir_lower_isign64 |
> -nir_lower_divmod64);
> +   while (nir_lower_int64(nir, nir_lower_imul64 |
> +   nir_lower_isign64 |
> +   nir_lower_divmod64));

I don't know that we have a specific coding standard about this, but I
always like

while (foo)
/* empty */ ;

just to make it obvious.

>  
> nir = brw_nir_optimize(nir, compiler, is_scalar, true);
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/7] nir/int64: Implement lowering of shift operations

2018-10-15 Thread Ian Romanick

On 10/14/2018 07:16 PM, Jason Ekstrand wrote:
> On Sun, Oct 14, 2018 at 5:12 PM Matt Turner  > wrote:
> 
> ---
>  src/compiler/nir/nir.h             |   1 +
>  src/compiler/nir/nir_lower_int64.c | 142
> +
>  2 files changed, 143 insertions(+)
> 
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 12cbd030e21..2c477126acc 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3001,6 +3001,7 @@ typedef enum {
>     nir_lower_ineg64    = (1 << 7),
>     nir_lower_logic64   = (1 << 8),
>     nir_lower_minmax64  = (1 << 9),
> +   nir_lower_shift64   = (1 << 10),
>  } nir_lower_int64_options;
> 
>  bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options
> options);
> diff --git a/src/compiler/nir/nir_lower_int64.c
> b/src/compiler/nir/nir_lower_int64.c
> index 9cdc8a9d592..25882d3a858 100644
> --- a/src/compiler/nir/nir_lower_int64.c
> +++ b/src/compiler/nir/nir_lower_int64.c
> @@ -90,6 +90,138 @@ lower_ixor64(nir_builder *b, nir_ssa_def *x,
> nir_ssa_def *y)
>                                      nir_ixor(b, x_hi, y_hi));
>  }
> 
> +static nir_ssa_def *
> +lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   /* Implemented as
> +    *
> +    * uint64_t lshift(uint64_t x, int c)
> +    * {
> +    *    if (c == 0) return x;
> +    *
> +    *    uint32_t lo = LO(x), hi = HI(x);
> +    *
> +    *    if (c < 32) {
> +    *       uint32_t lo_shifted = lo << (c & 0x1f);
> +    *       uint32_t hi_shifted = hi << (c & 0x1f);
> +    *       uint32_t lo_shifted_hi = lo >> (abs(32 - c) & 0x1f);
> 
> 
> Why the abs and the &?  it's already predicated on c < 32 and negative
> or OOB shifts already have undefined results.

I think the & is unnecessary, and I tend towards removing them.  The
abs() is there so that it's the same expression as the else case.  This
is useful because the NIR code he generates uses a bcsel instead.

Since the NIR code uses bcsel, I feel like the C pseudo-code should use ?:.

> +    *       return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
> +    *    } else {
> +    *       uint32_t lo_shifted_hi = lo << (abs(32 - c) & 0x1f);
> +    *       return pack_64(0, lo_shifted_hi);
> +    *    }
> +    * }
> +    */
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> +   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y,
> nir_imm_int(b, -32)));
> 
> 
> This is iabs(c - 32) (which yields the same result but isn't the same
> expression) and doesn't have the & 0x1f.
>  
> 
> +   nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
> +   nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
> 
> 
> In general, all of the 0x1f are missing.  While not having them works on
> i965, there's no guarantee it works in general.  Maybe we should add
> them in and have an i965-specific optimization to delete them again? 
> Maybe it's ok to just not have them.  In any case, the code down here
> should match the code above or there should be a very good comment
> saying why it doesn't.

As long as shifting with a >32 value doesn't make the GPU crash, it
should be fine.  The values produced from those shifts aren't used in
the final result.  Right?  Explaining that in the comment is a good idea.

> +   nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
> +
> +   nir_ssa_def *res_if_lt_32 =
> +      nir_pack_64_2x32_split(b, lo_shifted,
> +                                nir_ior(b, hi_shifted, lo_shifted_hi));
> +   nir_ssa_def *res_if_ge_32 =
> +      nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
> +                                nir_ishl(b, x_lo, reverse_count));
> +
> +   return nir_bcsel(b,
> +                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
> +                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
> +                                 res_if_ge_32, res_if_lt_32));
> +}
> +
> +static nir_ssa_def *
> +lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   /* Implemented as
> +    *
> +    * uint64_t arshift(uint64_t x, int c)
> +    * {
> +    *    if (c == 0) return x;
> +    *
> +    *    uint32_t lo = LO(x);
> +    *    int32_t  hi = HI(x);
> +    *
> +    *    if (c < 32) {
> +    *       uint32_t lo_shifted = lo >> (c & 0x1f);
> +    *       uint32_t hi_shifted = hi >> (c & 0x1f);
> +    *       uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
> +    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
> +    *    } else {
> +    *       uint32_t hi_shifted = hi >> 31;
> +    *       uint32_t

Re: [Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 1:39 PM Ian Romanick  wrote:

> On 10/14/2018 03:58 PM, Jason Ekstrand wrote:
> > On October 14, 2018 17:12:34 Matt Turner  wrote:
> >> +static nir_ssa_def *
> >> +lower_iabs64(nir_builder *b, nir_ssa_def *x)
> >> +{
> >> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> >> +   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
> >> +   return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x);
> >
> > lower_bcsel?  Or, since we're depending on this running multiple times,
> > just nir_ineg?  I go back and forth on whether a pass like this should
> > run in a loop or be smart enough to lower intermediate bits on the fly.
> > We should probably pick one.
>
> In principle, I agree.  I've been bitten a couple times by lowering
> passes that generate other things that need to be lowered on some
> platforms (that I didn't test).  In this case, I think the loop is the
> right answer since each operation is lowered by a separate flag.
>

That's the easy answer, certainly.  The other option is to have every
lowered thing builder check the flag and conditionally do the lowering.
That's annoying and hard to get right so a loop is probably best for now.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

2018-10-15 Thread Ian Romanick

On 10/14/2018 03:58 PM, Jason Ekstrand wrote:
> On October 14, 2018 17:12:34 Matt Turner  wrote:
>> +static nir_ssa_def *
>> +lower_iabs64(nir_builder *b, nir_ssa_def *x)
>> +{
>> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
>> +   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
>> +   return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x);
> 
> lower_bcsel?  Or, since we're depending on this running multiple times,
> just nir_ineg?  I go back and forth on whether a pass like this should
> run in a loop or be smart enough to lower intermediate bits on the fly. 
> We should probably pick one.

In principle, I agree.  I've been bitten a couple times by lowering
passes that generate other things that need to be lowered on some
platforms (that I didn't test).  In this case, I think the loop is the
right answer since each operation is lowered by a separate flag.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] appveyor: Cache pip's cache files.

2018-10-15 Thread Roland Scheidegger

Am 12.10.18 um 17:27 schrieb Jose Fonseca:
> It should speed up the Python packages installation.
> ---
>  appveyor.yml | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/appveyor.yml b/appveyor.yml
> index a4e942c14ca..ccb84fd3403 100644
> --- a/appveyor.yml
> +++ b/appveyor.yml
> @@ -33,7 +33,9 @@ branches:
>  # - 
> https://www.appveyor.com/blog/2014/06/04/shallow-clone-for-git-repositories
>  clone_depth: 100
>  
> +# https://www.appveyor.com/docs/build-cache/
>  cache:
> +- '%LOCALAPPDATA%\pip\Cache -> appveyor.yml'
>  - win_flex_bison-2.5.15.zip
>  - llvm-5.0.1-msvc2017-mtd.7z
>  
> 

Series looks good to me.
Reviewed-by: Roland Scheidegger 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] anv: Don't advertise ASTC support on BSW

2018-10-15 Thread Jason Ekstrand

---
 src/intel/vulkan/anv_formats.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
index 33faf7cc37f..9199567f445 100644
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -521,6 +521,14 @@ get_image_format_features(const struct gen_device_info 
*devinfo,
isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC)
   return 0;
 
+   /* ASTC requires nasty workarounds on BSW so we just disable it for now.
+*
+* TODO: Figure out the ASTC workarounds and re-enable on BSW.
+*/
+   if (devinfo->gen < 9 &&
+   isl_format_get_layout(plane_format.isl_format)->txc == ISL_TXC_ASTC)
+  return 0;
+
if (isl_format_supports_sampling(devinfo, plane_format.isl_format)) {
   flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Drop assert about number of uniforms in ARB handling.

2018-10-15 Thread Jason Ekstrand

Reviewed-by: Jason Ekstrand 

On Mon, Oct 15, 2018 at 12:49 PM Kenneth Graunke 
wrote:

> My recent prog_to_nir patch started making new sampler uniforms, which
> apparently increased the number of parameters.  We used to poke at the
> one parameter directly, making it important that there was only one,
> but we haven't done that in a while.  It should be safe to just delete
> the assertion.
>
> Fixes: 1c0f92d8a8c "nir: Create sampler variables in prog_to_nir."
> ---
>  src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> index 35416a74b9c..66cdc1a10b6 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
> @@ -246,10 +246,9 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader
> *shader,
> stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
>
> /* For ARB programs, prog_to_nir generates a single "parameters"
> variable
> -* for all uniform data.  nir_lower_wpos_ytransform may also create an
> -* additional variable.
> +* for all uniform data.  There may be additional sampler variables,
> and
> +* an extra uniform from nir_lower_wpos_ytransform.
>  */
> -   assert(shader->uniforms.length() <= 2);
>
> for (unsigned p = 0; p < plist->NumParameters; p++) {
>/* Parameters should be either vec4 uniforms or single component
> --
> 2.19.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] i965: Drop assert about number of uniforms in ARB handling.

2018-10-15 Thread Kenneth Graunke

My recent prog_to_nir patch started making new sampler uniforms, which
apparently increased the number of parameters.  We used to poke at the
one parameter directly, making it important that there was only one,
but we haven't done that in a while.  It should be safe to just delete
the assertion.

Fixes: 1c0f92d8a8c "nir: Create sampler variables in prog_to_nir."
---
 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 
b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 35416a74b9c..66cdc1a10b6 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -246,10 +246,9 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader 
*shader,
stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
 
/* For ARB programs, prog_to_nir generates a single "parameters" variable
-* for all uniform data.  nir_lower_wpos_ytransform may also create an
-* additional variable.
+* for all uniform data.  There may be additional sampler variables, and
+* an extra uniform from nir_lower_wpos_ytransform.
 */
-   assert(shader->uniforms.length() <= 2);
 
for (unsigned p = 0; p < plist->NumParameters; p++) {
   /* Parameters should be either vec4 uniforms or single component
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/11] nir: Copy propagation between blocks

2018-10-15 Thread Caio Marcelo de Oliveira Filho

Hi,

> > +   }
> > +
> > +   if (new_written) {
> > +  /* Merge new information to the parent control flow node. */
> > +  if (written) {
> > + written->modes |= new_written->modes;
> > + struct hash_entry *ht_entry;
> > + hash_table_foreach(new_written->derefs, ht_entry) {
> > +_mesa_hash_table_insert_pre_hashed(written->derefs,
> > ht_entry->hash,
> > +   ht_entry->key,
> > ht_entry->data);
> >
> 
> Do you want to somehow OR masks together?  This is just picking one of the
> two masks.

You are correct.  Fixed.

Turns out the way the local code we are reusing here is structured, we
don't take much advantage of the fine-grained tracking here.  Added a
TODO about this.




> >  static void
> > -copy_entry_remove(struct copy_prop_var_state *state, struct copy_entry
> > *entry)
> > +copy_entry_remove(struct util_dynarray *copies,
> > +  struct copy_entry *entry)
> >  {
> > -   list_del(>link);
> > -   list_add(>link, >copy_free_list);
> > +   *entry = util_dynarray_pop(copies, struct copy_entry);
> >
> 
> It might be worth a quick comment to justify that this works.  It took me a
> minute to figure out that you were re-ordering the array in the process.

Added a function comment describing what this does and stating it is
safe to use during a reverse iteration.  And also added a comment
highlighting how this works when it is the last element.

(...)

> 
> > +lookup_entry_and_kill_aliases(struct util_dynarray *copies,
> > +  nir_deref_instr *deref,
> > +  unsigned write_mask)
> >  {
> > struct copy_entry *entry = NULL;
> > -   list_for_each_entry_safe(struct copy_entry, iter, >copies,
> > link) {
> > +   util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
> >
> 
> Also might be worth commenting why it's safe to remove elements while
> walking the array.

I think the comments to the copy_entry_remove suffice, but can add it
here before landing if you prefer.

The latest code is in

https://gitlab.freedesktop.org/cmarcelo/mesa/commits/copy-prop

and all the issues I haven't commented are supposed to be fixed
according your comment (added new version to each patch that changed).
Merged the use patches into a single one

intel/nir, freedreno/ir3: Use the separated dead write vars pass

Patches that still need R-b:

   nir: Copy propagation between blocks
   nir: Separate dead write removal into its own pass


Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/11] nir: Separate dead write removal into its own pass

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 12:37 PM Caio Marcelo de Oliveira Filho <
caio.olive...@intel.com> wrote:

> Hi,
>
> > > +{
> > > +   bool progress = false;
> > > +
> > > +   /* Find writes that are unused and can be removed. */
> > > +   util_dynarray_foreach_reverse(unused_writes, struct write_entry,
> > > entry) {
> > > +  nir_deref_compare_result comp = nir_compare_derefs(dst,
> entry->dst);
> > > +  if (comp & nir_derefs_a_contains_b_bit) {
> > >
> >
> > Mind throwing an assert in here:
> >
> > assert((comp & nir_derefs_equal_bit) || mask ==
> ~(nir_component_mask_t)0);
>
> We can assert that.  We can have an entry for a copy between arrays a
> and b, and see a store a[1].x that will invalidate the 'x' component
> of the copy.
>

Do you mean, "we can't assert that"?

I'm trying to think about whether or not the type of per-component
invalidation you're talking about there is valid or not.  If we can assume
that all struct copies are split and that all copies are fully qualified
(i.e., they end in a vector or scalar with wildcards for all the arrays),
then I think such inference is fine.  Maybe worth a comment that such is
intentional?

(...)
>
> > > +  case nir_intrinsic_copy_deref: {
> > > + nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
> > > + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
> > > +
> > > + /* Self-copy is removed. */
> > > + if (nir_compare_derefs(src, dst) & nir_derefs_equal_bit) {
> > > +nir_instr_remove(instr);
> > > +progress = true;
> > > +break;
> > > + }
> > > +
> > > + uintptr_t mask = ~(1 << NIR_MAX_VEC_COMPONENTS);
> > >
> >
> > I don't think this does quite what you want.  Perhaps
> >
> > nir_component_mask_t mask = ~(nir_component_mask_t)0;
>
> I'm going with
>
> nir_component_mask_t mask = (1 << glsl_get_vector_elements(dst->type)) - 1;
>
>
> The idea is that we only fill bits that are valid, so we can detect
> the condition that no bits are set and remove the entry.  Sounds good?
>

Seems reasonable.  Again, this assumes that dst-type is a vector or scalar
and not a struct, array, or other odd type.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/11] nir: Separate dead write removal into its own pass

2018-10-15 Thread Caio Marcelo de Oliveira Filho

Hi,

> > +{
> > +   bool progress = false;
> > +
> > +   /* Find writes that are unused and can be removed. */
> > +   util_dynarray_foreach_reverse(unused_writes, struct write_entry,
> > entry) {
> > +  nir_deref_compare_result comp = nir_compare_derefs(dst, entry->dst);
> > +  if (comp & nir_derefs_a_contains_b_bit) {
> >
> 
> Mind throwing an assert in here:
> 
> assert((comp & nir_derefs_equal_bit) || mask == ~(nir_component_mask_t)0);

We can assert that.  We can have an entry for a copy between arrays a
and b, and see a store a[1].x that will invalidate the 'x' component
of the copy.

(...)

> > +  case nir_intrinsic_copy_deref: {
> > + nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
> > + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
> > +
> > + /* Self-copy is removed. */
> > + if (nir_compare_derefs(src, dst) & nir_derefs_equal_bit) {
> > +nir_instr_remove(instr);
> > +progress = true;
> > +break;
> > + }
> > +
> > + uintptr_t mask = ~(1 << NIR_MAX_VEC_COMPONENTS);
> >
> 
> I don't think this does quite what you want.  Perhaps
> 
> nir_component_mask_t mask = ~(nir_component_mask_t)0;

I'm going with

nir_component_mask_t mask = (1 << glsl_get_vector_elements(dst->type)) - 1;


The idea is that we only fill bits that are valid, so we can detect
the condition that no bits are set and remove the entry.  Sounds good?


> 
> All of the comments were fairly trivial and nit-picky.  Assuming you're ok
> with the changes,
> 
> Reviewed-by: Jason Ekstrand 



Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] intel/tools: fix error_decode batch length

2018-10-15 Thread Lionel Landwerlin

count field is in dwords, the argument to the decoder is in bytes...

Signed-off-by: Lionel Landwerlin 
Fixes: d374423eabbfe3 ("intel/tools: Switch aubinator_error_decode over to the 
gen_print_batch")
---
 src/intel/tools/aubinator_error_decode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/tools/aubinator_error_decode.c 
b/src/intel/tools/aubinator_error_decode.c
index 735d3552722..2d918f0ec0f 100644
--- a/src/intel/tools/aubinator_error_decode.c
+++ b/src/intel/tools/aubinator_error_decode.c
@@ -610,7 +610,8 @@ read_data_file(FILE *file)
   strcmp(sections[s].buffer_name, "batch buffer") == 0 ||
   strcmp(sections[s].buffer_name, "ring buffer") == 0 ||
   strcmp(sections[s].buffer_name, "HW Context") == 0) {
- gen_print_batch(_ctx, sections[s].data, sections[s].count,
+ gen_print_batch(_ctx, sections[s].data,
+ sections[s].count * 4,
  sections[s].gtt_offset, false);
   }
}
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] Allow fd.o to join forces with X.Org

2018-10-15 Thread Eric Engestrom

On October 15, 2018 2:50:13 PM UTC, Harry Wentland  
wrote:
> The leadership of freedesktop.org (fd.o) has recently expressed
> interest
> in having an elected governing body. Given the tight connection
> between
> fd.o and X.Org and the fact that X.Org has such a governing body it
> seemed obvious to consider extending X.Org's mandate to fd.o.
> 
> Quite a bit of background on fd.o leading up to this has been covered
> by
> Daniel Stone at XDC 2018 and was covered really well by Jake Edge of
> LWN [1].

If you'd like to watch Daniel's presentation, the recording is available on 
YouTube:
https://youtu.be/s22B3E7rUTs

The slides are linked in the description.

> 
> One question that is briefly addressed in the LWN article and was
> thoroughly discussed by members of the X.Org boards, Daniel Stone, and
> others in hallway discussions is the question of whether to extend the
> X.Org membership to projects hosted on fd.o but outside the purpose of
> the X.Org foundation as enacted in its bylaws.
> 
> Most people I talked to would prefer not to dilute X.Org's mission and
> extend membership only to contributors of projects that follow X.Org's
> purpose as enacted in its bylaws. Other projects can continue to be
> hosted on fd.o but won't receive X.Org membership for the mere reason
> of
> being hosted on fd.o.

With my member hat on, I think this is the best choice.
Acked-by: Eric Engestrom 

> 
> [1] https://lwn.net/Articles/767258/
> 
> v2:
>  - Subject line that better describes the intention
>  - Briefly describe reasons behind this change
>  - Drop expanding membership eligibility
> ---
> 
> We're looking for feedback and comments on this patch. If it's not
> widely controversial the final version of the patch will be put to a
> vote at the 2019 X.Org elections.
> 
> The patch applies to the X.Org bylaws git repo, which can be found at
> https://gitlab.freedesktop.org/xorgfoundation/bylaws
> 
> Happy commenting.
> 
> Harry
> 
> bylaws.tex | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/bylaws.tex b/bylaws.tex
> index 4ab35a4f7745..44ff4745963b 100644
> --- a/bylaws.tex
> +++ b/bylaws.tex
> @@ -14,7 +14,7 @@ BE IT ENACTED AND IT IS HEREBY ENACTED as a By-law
> of the X.Org Foundation
>  
>  The purpose of the X.Org Foundation shall be to:
>  \begin{enumerate}[(i)\hspace{.2cm}]
> - \item Research, develop, support, organize, administrate,
> standardize,
> + \item \label{1} Research, develop, support, organize, administrate,
> standardize,
>   promote, and defend a free and open accelerated graphics stack. This
>   includes, but is not limited to, the following projects: DRM, Mesa,
>   Wayland and the X Window System,
> @@ -24,6 +24,11 @@ The purpose of the X.Org Foundation shall be to:
>  
>   \item Support and educate the general community of users of this
>   graphics stack.
> +
> + \item Support free and open source projects through the
> freedesktop.org
> + infrastructure. For projects outside the scope of item (\ref{1})
> support
> + extends to project hosting only.
> +
>  \end{enumerate}
>  
>  \article{INTERPRETATION}
> -- 
> 2.19.1
> 
> ___
> memb...@foundation.x.org: X.Org Foundation Members
> Archives: https://foundation.x.org/cgi-bin/mailman/private/members
> Info: https://foundation.x.org/cgi-bin/mailman/listinfo/members
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v5 4/5] loader/dri3: Enable adaptive_sync via _VARIABLE_REFRESH property

2018-10-15 Thread Kazlauskas, Nicholas


On 10/15/2018 11:18 AM, Michel Dänzer wrote:

On 2018-10-12 6:48 p.m., Nicholas Kazlauskas wrote:

The DDX driver can be notified of adaptive sync suitability by
flagging the application's window with the _VARIABLE_REFRESH property.

This property is set on the first swap the application performs
when adaptive_sync is set to true in the drirc.

It's performed here instead of when the loader is initialized for
two reasons:

(1) The window's drawable can be missing during loader init.
 This can be observed during the Unigine Superposition benchmark.

(2) Adaptive sync will only be enabled closer to when the application
 actually begins rendering.

If adaptive_sync is false then the _VARIABLE_REFRESH property
is deleted on loader init.

The property is only managed on the glx DRI3 backend for now. This
should cover most common applications and games on modern hardware.

Vulkan support can be implemented in a similar manner but would likely
require splitting the function out into a common helper function.

Signed-off-by: Nicholas Kazlauskas 

[...]
  
@@ -331,16 +358,28 @@ loader_dri3_drawable_init(xcb_connection_t *conn,

 draw->have_back = 0;
 draw->have_fake_front = 0;
 draw->first_init = true;
+   draw->adaptive_sync = false;
+   draw->adaptive_sync_active = false;
  
 draw->cur_blit_source = -1;

 draw->back_format = __DRI_IMAGE_FORMAT_NONE;
 mtx_init(>mtx, mtx_plain);
 cnd_init(>event_cnd);
  
-   if (draw->ext->config)

+   if (draw->ext->config) {
draw->ext->config->configQueryi(draw->dri_screen,
"vblank_mode", _mode);
  
+  draw->ext->config->configQueryb(draw->dri_screen,

+  "adaptive_sync",
+  _sync);
+
+  draw->adaptive_sync = adaptive_sync;
+
+  if (!adaptive_sync)
+ set_adaptive_sync_property(conn, draw->drawable, false);
+   }


The set_adaptive_sync_property call should be after the if
(draw->ext->config) block here, so that the property is deleted even if
draw->ext->config is NULL.



Makes sense to me, I don't mind fixing this.

I have to wonder when this is actually ever NULL, though. There's 
support for driver default config values so I don't think it's absence 
of a config file. Maybe just a memory allocation failure, then?


Nicholas Kazlauskas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 09/13] anv: Implement vkCmdDrawIndirectByteCountEXT

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 7:06 AM Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> On 13/10/2018 14:09, Jason Ekstrand wrote:
> > Annoyingly, this requires that we implement integer division on the
> > command streamer.  Fortunately, we're only ever dividing by constants so
> > we can use the mulh+add+shift trick and it's not as bad as it sounds.
> > ---
> >   src/intel/vulkan/anv_device.c  |   2 +-
> >   src/intel/vulkan/genX_cmd_buffer.c | 150 +
> >   2 files changed, 151 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/intel/vulkan/anv_device.c
> b/src/intel/vulkan/anv_device.c
> > index 6395656dfd0..acca97b53d1 100644
> > --- a/src/intel/vulkan/anv_device.c
> > +++ b/src/intel/vulkan/anv_device.c
> > @@ -1226,7 +1226,7 @@ void anv_GetPhysicalDeviceProperties2(
> >props->transformFeedbackQueries = VK_FALSE;
> >props->transformFeedbackStreamsLinesTriangles = VK_FALSE;
> >props->transformFeedbackRasterizationStreamSelect = VK_TRUE;
> > - props->transformFeedbackDraw = VK_FALSE;
> > + props->transformFeedbackDraw = VK_TRUE;
> >break;
> > }
> >
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> > index 90469abbf21..1fbcfaf4264 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -27,6 +27,7 @@
> >   #include "anv_private.h"
> >   #include "vk_format_info.h"
> >   #include "vk_util.h"
> > +#include "util/fast_idiv_by_const.h"
> >
> >   #include "common/gen_l3_config.h"
> >   #include "genxml/gen_macros.h"
> > @@ -2880,7 +2881,156 @@ emit_mul_gpr0(struct anv_batch *batch, uint32_t
> N)
> >  build_alu_multiply_gpr0(dw + 1, _dwords, N);
> >   }
> >
> > +static void
> > +emit_alu_add(struct anv_batch *batch, unsigned dst_reg,
> > + unsigned reg_a, unsigned reg_b)
> > +{
> > +   uint32_t *dw = anv_batch_emitn(batch, 1 + 4, GENX(MI_MATH));
> > +   dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, reg_a);
> > +   dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, reg_b);
> > +   dw[3] = mi_alu(MI_ALU_ADD, 0, 0);
> > +   dw[4] = mi_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU);
> > +}
> > +
> > +static void
> > +emit_add32_gpr0(struct anv_batch *batch, uint32_t N)
> > +{
> > +   emit_lri(batch, CS_GPR(1), N);
> > +   emit_alu_add(batch, MI_ALU_REG0, MI_ALU_REG0, MI_ALU_REG1);
> > +}
> > +
> > +static void
> > +emit_alu_shl(struct anv_batch *batch, unsigned dst_reg,
> > + unsigned src_reg, unsigned shift)
> > +{
> > +   assert(shift > 0);
> > +
> > +   uint32_t *dw = anv_batch_emitn(batch, 1 + 4 * shift, GENX(MI_MATH));
> > +   for (unsigned i = 0; i < shift; i++) {
> > +  unsigned add_src = (i == 0) ? src_reg : dst_reg;
> > +  dw[1 + (i * 4) + 0] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, add_src);
> > +  dw[1 + (i * 4) + 1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, add_src);
> > +  dw[1 + (i * 4) + 2] = mi_alu(MI_ALU_ADD, 0, 0);
> > +  dw[1 + (i * 4) + 3] = mi_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU);
> > +   }
> > +}
> > +
> > +static void
> > +emit_div32_gpr0(struct anv_batch *batch, uint32_t D)
> > +{
> > +   /* Zero out the top of GPR0 */
> > +   emit_lri(batch, CS_GPR(0) + 4, 0);
> > +
> > +   if (D == 0) {
> > +  /* This invalid, but we should do something so we set GPR0 to 0.
> */
> > +  emit_lri(batch, CS_GPR(0), 0);
> > +   } else if (util_is_power_of_two_or_zero(D)) {
> > +  unsigned log2_D = util_logbase2(D);
> > +  assert(log2_D < 32);
> > +  /* We right-shift by log2(D) by left-shifting by 32 - log2(D) and
> taking
> > +   * the top 32 bits of the result.
> > +   */
> > +  emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - log2_D);
> > +  emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
> > +  emit_lri(batch, CS_GPR(0) + 4, 0);
> > +   } else {
> > +  struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32,
> 32);
> > +  assert(m.multiplier <= UINT32_MAX);
> > +
> > +  if (m.pre_shift) {
> > + /* We right-shift by L by left-shifting by 32 - l and taking
> the top
> > +  * 32 bits of the result.
> > +  */
> > + if (m.pre_shift < 32)
> > +emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 -
> m.pre_shift);
> > + emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
> > + emit_lri(batch, CS_GPR(0) + 4, 0);
> > +  }
> > +
> > +  /* Do the 32x32 multiply  into gpr0 */
> > +  emit_mul_gpr0(batch, m.multiplier);
> > +
> > +  if (m.increment) {
> > + /* If we need to increment, save off a copy of GPR0 */
> > + emit_lri(batch, CS_GPR(1) + 0, m.multiplier);
> > + emit_lri(batch, CS_GPR(1) + 4, 0);
> > + emit_alu_add(batch, MI_ALU_REG0, MI_ALU_REG0, MI_ALU_REG1);
> > +  }
> > +
> > +  /* Shift by 32 */
> > +  emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
> > +  emit_lri(batch, CS_GPR(0) + 4, 0);
> > +
> > +  if

Re: [Mesa-dev] [PATCH 08/13] anv: Implement the basic form of VK_EXT_transform_feedback

2018-10-15 Thread Jason Ekstrand

On Mon, Oct 15, 2018 at 8:34 AM Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> On 13/10/2018 14:09, Jason Ekstrand wrote:
> > ---
> >   src/intel/vulkan/anv_cmd_buffer.c  |  29 +++
> >   src/intel/vulkan/anv_device.c  |  24 ++
> >   src/intel/vulkan/anv_extensions.py |   2 +-
> >   src/intel/vulkan/anv_pipeline.c|  10 ++-
> >   src/intel/vulkan/anv_private.h |  13 +++
> >   src/intel/vulkan/genX_cmd_buffer.c | 125 +
> >   src/intel/vulkan/genX_pipeline.c   | 122 
> >   7 files changed, 323 insertions(+), 2 deletions(-)
>
>
> ...
>
>
> >  uint32_t topology;
> >
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> > index c3a7e5c83c3..90469abbf21 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -2571,6 +2571,30 @@ genX(cmd_buffer_flush_state)(struct
> anv_cmd_buffer *cmd_buffer)
> >
> >  cmd_buffer->state.gfx.vb_dirty &= ~vb_emit;
> >
> > +#if GEN_GEN >= 8
> > +   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) {
> > +  /* We don't need any per-buffer dirty tracking because you're not
> > +   * allowed to bind different XFB buffers while XFB is enabled.
> > +   */
> > +  for (unsigned idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
> > + struct anv_xfb_binding *xfb =
> _buffer->state.xfb_bindings[idx];
> > + anv_batch_emit(_buffer->batch, GENX(3DSTATE_SO_BUFFER),
> sob) {
> > +sob.SOBufferIndex = idx;
> > +
> > +if (cmd_buffer->state.xfb_enabled && xfb->buffer) {
> > +   sob.SOBufferEnable = true;
> > +   sob.SOBufferMOCS = cmd_buffer->device->default_mocs,
> > +   sob.StreamOffsetWriteEnable = false;
> > +   sob.SurfaceBaseAddress =
> anv_address_add(xfb->buffer->address,
> > +xfb->offset);
> > +   /* Size is in DWords - 1 */
> > +   sob.SurfaceSize = xfb->size / 4 - 1;
> > +}
> > + }
>
>
> Apparently documentation says we need a PIPE_CONTROL with CS Stall bit
> set after 3DSTATE_SO_BUFFER.
>

So it does.  I've added it for GEN_GEN >= 10.

--Jason


> > +  }
> > +   }
> > +#endif
> > +
> >  if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
> > anv_batch_emit_batch(_buffer->batch, >batch);
> >
> > @@ -2970,6 +2994,107 @@ void genX(CmdDrawIndexedIndirect)(
> >  }
> >   }
> >
> > +void genX(CmdBeginTransformFeedbackEXT)(
> > +VkCommandBuffer commandBuffer,
> > +uint32_tfirstCounterBuffer,
> > +uint32_tcounterBufferCount,
> > +const VkBuffer* pCounterBuffers,
> > +const VkDeviceSize* pCounterBufferOffsets)
> > +{
> > +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
> > +
> > +   assert(firstCounterBuffer < MAX_XFB_BUFFERS);
> > +   assert(counterBufferCount < MAX_XFB_BUFFERS);
> > +   assert(firstCounterBuffer + counterBufferCount <= MAX_XFB_BUFFERS);
> > +
> > +   /* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET:
> > +*
> > +*"Ssoftware must ensure that no HW stream output operations can
> be in
> > +*process or otherwise pending at the point that the
> MI_LOAD/STORE
> > +*commands are processed. This will likely require a pipeline
> flush."
> > +*/
> > +   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
> > +   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
> > +
> > +   for (uint32_t idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
> > +  /* If we have a counter buffer, this is a resume so we need to
> load the
> > +   * value into the streamout offset register.  Otherwise, this is
> a begin
> > +   * and we need to reset it to zero.
> > +   */
> > +  if (pCounterBuffers &&
> > +  idx >= firstCounterBuffer &&
> > +  idx - firstCounterBuffer < counterBufferCount &&
> > +  pCounterBuffers[idx - firstCounterBuffer] != VK_NULL_HANDLE) {
> > + uint32_t cb_idx = idx - firstCounterBuffer;
> > + ANV_FROM_HANDLE(anv_buffer, counter_buffer,
> pCounterBuffers[cb_idx]);
> > + uint64_t offset = pCounterBufferOffsets ?
> > +   pCounterBufferOffsets[cb_idx] : 0;
> > +
> > + anv_batch_emit(_buffer->batch, GENX(MI_LOAD_REGISTER_MEM),
> lrm) {
> > +lrm.RegisterAddress  = GENX(SO_WRITE_OFFSET0_num) + idx * 4;
> > +lrm.MemoryAddress=
> anv_address_add(counter_buffer->address,
> > +   offset);
> > + }
> > +  } else {
> > + anv_batch_emit(_buffer->batch, GENX(MI_LOAD_REGISTER_IMM),
> lri) {
> > +lri.RegisterOffset   = GENX(SO_WRITE_OFFSET0_num) +

Re: [Mesa-dev] [PATCH] ac/nir: Use context-specific LLVM types

2018-10-15 Thread Samuel Pitoiset


Reviewed-by: Samuel Pitoiset 

On 10/15/18 5:23 PM, Bas Nieuwenhuizen wrote:

Reviewed-by: Bas Nieuwenhuizen 

Thanks!
On Mon, Oct 15, 2018 at 5:19 PM Alex Smith  wrote:


LLVMInt*Type() return types from the global context and therefore are
not safe for use in other contexts. Use types from our own context
instead.

Fixes frequent crashes seen when doing multithreaded pipeline creation.

Fixes: 4d0b02bb5a "ac: add support for 16bit load_push_constant"
Fixes: 7e7ee82698 "ac: add support for 16bit buffer loads"
Cc: "18.2" 
Signed-off-by: Alex Smith 
---
  src/amd/common/ac_nir_to_llvm.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e0a8e04cf3..402cf2d665 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1387,7 +1387,7 @@ static LLVMValueRef visit_load_push_constant(struct 
ac_nir_context *ctx,

 if (instr->dest.ssa.bit_size == 16) {
 unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
-   LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16Type(), 2 * 
load_dwords);
+   LLVMTypeRef vec_type = 
LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
 ptr = ac_cast_ptr(>ac, ptr, vec_type);
 LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
@@ -1671,7 +1671,7 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
 };
 results[idx] = ac_build_intrinsic(>ac, load_name, 
data_type, params, 5, 0);
 unsigned num_elems = ac_get_type_size(data_type) / 
elem_size_bytes;
-   LLVMTypeRef resTy = 
LLVMVectorType(LLVMIntType(instr->dest.ssa.bit_size), num_elems);
+   LLVMTypeRef resTy = 
LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), 
num_elems);
 results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], 
resTy, "");
 }
 }
--
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac/nir: Use context-specific LLVM types

2018-10-15 Thread Bas Nieuwenhuizen

Reviewed-by: Bas Nieuwenhuizen 

Thanks!
On Mon, Oct 15, 2018 at 5:19 PM Alex Smith  wrote:
>
> LLVMInt*Type() return types from the global context and therefore are
> not safe for use in other contexts. Use types from our own context
> instead.
>
> Fixes frequent crashes seen when doing multithreaded pipeline creation.
>
> Fixes: 4d0b02bb5a "ac: add support for 16bit load_push_constant"
> Fixes: 7e7ee82698 "ac: add support for 16bit buffer loads"
> Cc: "18.2" 
> Signed-off-by: Alex Smith 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index e0a8e04cf3..402cf2d665 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1387,7 +1387,7 @@ static LLVMValueRef visit_load_push_constant(struct 
> ac_nir_context *ctx,
>
> if (instr->dest.ssa.bit_size == 16) {
> unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
> -   LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16Type(), 2 * 
> load_dwords);
> +   LLVMTypeRef vec_type = 
> LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
> ptr = ac_cast_ptr(>ac, ptr, vec_type);
> LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
> res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
> @@ -1671,7 +1671,7 @@ static LLVMValueRef visit_load_buffer(struct 
> ac_nir_context *ctx,
> };
> results[idx] = ac_build_intrinsic(>ac, 
> load_name, data_type, params, 5, 0);
> unsigned num_elems = ac_get_type_size(data_type) / 
> elem_size_bytes;
> -   LLVMTypeRef resTy = 
> LLVMVectorType(LLVMIntType(instr->dest.ssa.bit_size), num_elems);
> +   LLVMTypeRef resTy = 
> LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, 
> instr->dest.ssa.bit_size), num_elems);
> results[idx] = LLVMBuildBitCast(ctx->ac.builder, 
> results[idx], resTy, "");
> }
> }
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] anv: Add a dummy implementation of GetPhysicalDevicePresentRectanglesKHR

2018-10-15 Thread Jason Ekstrand

And what size would that be on Wayland?  I've opened a spec bug about this
and we'll talk about it on the SI call this week.  This entrypoint seems
extraordinarily half-baked to me.  I don't see how it makes any sense
outside of VK_KHR_display or windows full-screen exclusive.

--Jason

On Mon, Oct 15, 2018 at 10:21 AM Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> Argh :(
>
> Shouldn't we at least return the size of the surface?
>
> On 15/10/2018 16:08, Jason Ekstrand wrote:
>
> It's explicitly for a multi-device case where each device presents to part
> of the screen.  Since we don't do that, I'm not sure what we're supposed to
> do.
>
> On Mon, Oct 15, 2018 at 9:14 AM Lionel Landwerlin <
> lionel.g.landwer...@intel.com> wrote:
>
>> I'm guessing we should plugged that into the number of screen on bare
>> DRM for example, which each screen native size reported in pRects.
>>
>> On 15/10/2018 04:47, Jason Ekstrand wrote:
>> > Not really sure what we're supposed to do with this one but we should do
>> > something.
>> > ---
>> >   src/intel/vulkan/anv_wsi.c | 11 +++
>> >   1 file changed, 11 insertions(+)
>> >
>> > diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
>> > index 5ed1d711689..4f3f3fc57af 100644
>> > --- a/src/intel/vulkan/anv_wsi.c
>> > +++ b/src/intel/vulkan/anv_wsi.c
>> > @@ -294,3 +294,14 @@ VkResult anv_GetDeviceGroupSurfacePresentModesKHR(
>> >
>> >  return VK_SUCCESS;
>> >   }
>> > +
>> > +VkResult anv_GetPhysicalDevicePresentRectanglesKHR(
>> > +VkPhysicalDevicephysicalDevice,
>> > +VkSurfaceKHRsurface,
>> > +uint32_t*   pRectCount,
>> > +VkRect2D*   pRects)
>> > +{
>> > +   /* TODO: What should I be doing here? */
>> > +   *pRectCount = 0;
>> > +   return VK_SUCCESS;
>> > +}
>>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] anv: Add a dummy implementation of GetPhysicalDevicePresentRectanglesKHR

2018-10-15 Thread Lionel Landwerlin


Argh :(

Shouldn't we at least return the size of the surface?

On 15/10/2018 16:08, Jason Ekstrand wrote:
It's explicitly for a multi-device case where each device presents to 
part of the screen.  Since we don't do that, I'm not sure what we're 
supposed to do.


On Mon, Oct 15, 2018 at 9:14 AM Lionel Landwerlin 
mailto:lionel.g.landwer...@intel.com>> 
wrote:


I'm guessing we should plugged that into the number of screen on bare
DRM for example, which each screen native size reported in pRects.

On 15/10/2018 04:47, Jason Ekstrand wrote:
> Not really sure what we're supposed to do with this one but we
should do
> something.
> ---
>   src/intel/vulkan/anv_wsi.c | 11 +++
>   1 file changed, 11 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
> index 5ed1d711689..4f3f3fc57af 100644
> --- a/src/intel/vulkan/anv_wsi.c
> +++ b/src/intel/vulkan/anv_wsi.c
> @@ -294,3 +294,14 @@ VkResult
anv_GetDeviceGroupSurfacePresentModesKHR(
>
>      return VK_SUCCESS;
>   }
> +
> +VkResult anv_GetPhysicalDevicePresentRectanglesKHR(
> +    VkPhysicalDevice physicalDevice,
> +    VkSurfaceKHR surface,
> +    uint32_t*  pRectCount,
> +    VkRect2D*  pRects)
> +{
> +   /* TODO: What should I be doing here? */
> +   *pRectCount = 0;
> +   return VK_SUCCESS;
> +}



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ac/nir: Use context-specific LLVM types

2018-10-15 Thread Alex Smith

LLVMInt*Type() return types from the global context and therefore are
not safe for use in other contexts. Use types from our own context
instead.

Fixes frequent crashes seen when doing multithreaded pipeline creation.

Fixes: 4d0b02bb5a "ac: add support for 16bit load_push_constant"
Fixes: 7e7ee82698 "ac: add support for 16bit buffer loads"
Cc: "18.2" 
Signed-off-by: Alex Smith 
---
 src/amd/common/ac_nir_to_llvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e0a8e04cf3..402cf2d665 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1387,7 +1387,7 @@ static LLVMValueRef visit_load_push_constant(struct 
ac_nir_context *ctx,
 
if (instr->dest.ssa.bit_size == 16) {
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
-   LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16Type(), 2 * 
load_dwords);
+   LLVMTypeRef vec_type = 
LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
ptr = ac_cast_ptr(>ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
@@ -1671,7 +1671,7 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
};
results[idx] = ac_build_intrinsic(>ac, load_name, 
data_type, params, 5, 0);
unsigned num_elems = ac_get_type_size(data_type) / 
elem_size_bytes;
-   LLVMTypeRef resTy = 
LLVMVectorType(LLVMIntType(instr->dest.ssa.bit_size), num_elems);
+   LLVMTypeRef resTy = 
LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), 
num_elems);
results[idx] = LLVMBuildBitCast(ctx->ac.builder, 
results[idx], resTy, "");
}
}
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v5 4/5] loader/dri3: Enable adaptive_sync via _VARIABLE_REFRESH property

2018-10-15 Thread Michel Dänzer

On 2018-10-12 6:48 p.m., Nicholas Kazlauskas wrote:
> The DDX driver can be notified of adaptive sync suitability by
> flagging the application's window with the _VARIABLE_REFRESH property.
> 
> This property is set on the first swap the application performs
> when adaptive_sync is set to true in the drirc.
> 
> It's performed here instead of when the loader is initialized for
> two reasons:
> 
> (1) The window's drawable can be missing during loader init.
> This can be observed during the Unigine Superposition benchmark.
> 
> (2) Adaptive sync will only be enabled closer to when the application
> actually begins rendering.
> 
> If adaptive_sync is false then the _VARIABLE_REFRESH property
> is deleted on loader init.
> 
> The property is only managed on the glx DRI3 backend for now. This
> should cover most common applications and games on modern hardware.
> 
> Vulkan support can be implemented in a similar manner but would likely
> require splitting the function out into a common helper function.
> 
> Signed-off-by: Nicholas Kazlauskas 
> 
> [...]
>  
> @@ -331,16 +358,28 @@ loader_dri3_drawable_init(xcb_connection_t *conn,
> draw->have_back = 0;
> draw->have_fake_front = 0;
> draw->first_init = true;
> +   draw->adaptive_sync = false;
> +   draw->adaptive_sync_active = false;
>  
> draw->cur_blit_source = -1;
> draw->back_format = __DRI_IMAGE_FORMAT_NONE;
> mtx_init(>mtx, mtx_plain);
> cnd_init(>event_cnd);
>  
> -   if (draw->ext->config)
> +   if (draw->ext->config) {
>draw->ext->config->configQueryi(draw->dri_screen,
>"vblank_mode", _mode);
>  
> +  draw->ext->config->configQueryb(draw->dri_screen,
> +  "adaptive_sync",
> +  _sync);
> +
> +  draw->adaptive_sync = adaptive_sync;
> +
> +  if (!adaptive_sync)
> + set_adaptive_sync_property(conn, draw->drawable, false);
> +   }

The set_adaptive_sync_property call should be after the if
(draw->ext->config) block here, so that the property is deleted even if
draw->ext->config is NULL.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] anv: Add a dummy implementation of GetPhysicalDevicePresentRectanglesKHR

2018-10-15 Thread Jason Ekstrand

It's explicitly for a multi-device case where each device presents to part
of the screen.  Since we don't do that, I'm not sure what we're supposed to
do.

On Mon, Oct 15, 2018 at 9:14 AM Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> I'm guessing we should plugged that into the number of screen on bare
> DRM for example, which each screen native size reported in pRects.
>
> On 15/10/2018 04:47, Jason Ekstrand wrote:
> > Not really sure what we're supposed to do with this one but we should do
> > something.
> > ---
> >   src/intel/vulkan/anv_wsi.c | 11 +++
> >   1 file changed, 11 insertions(+)
> >
> > diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
> > index 5ed1d711689..4f3f3fc57af 100644
> > --- a/src/intel/vulkan/anv_wsi.c
> > +++ b/src/intel/vulkan/anv_wsi.c
> > @@ -294,3 +294,14 @@ VkResult anv_GetDeviceGroupSurfacePresentModesKHR(
> >
> >  return VK_SUCCESS;
> >   }
> > +
> > +VkResult anv_GetPhysicalDevicePresentRectanglesKHR(
> > +VkPhysicalDevicephysicalDevice,
> > +VkSurfaceKHRsurface,
> > +uint32_t*   pRectCount,
> > +VkRect2D*   pRects)
> > +{
> > +   /* TODO: What should I be doing here? */
> > +   *pRectCount = 0;
> > +   return VK_SUCCESS;
> > +}
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] swr/rast: fix intrinsic/function for LLVM 7 compatibility

2018-10-15 Thread Alok Hota

Converted from x86 VFMADDPS intrinsic to generic LLVM intrinsic, and
removed createInstructionSimplifierPass, which were both removed in LLVM
7.0.0

These changes combine patches we received from the community and our own
internal patches
---
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py  |  2 +-
 .../drivers/swr/rasterizer/jitter/blend_jit.cpp   |  1 -
 .../drivers/swr/rasterizer/jitter/builder_misc.cpp| 11 ++-
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp   |  1 -
 .../rasterizer/jitter/functionpasses/lower_x86.cpp|  1 -
 .../drivers/swr/rasterizer/jitter/streamout_jit.cpp   |  1 -
 6 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 2e7f1a88a0..d34e88d1bc 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -57,7 +57,6 @@ intrinsics = [
 ['VHSUBPS', ['a', 'b'], 'a'],
 ['VPTESTC', ['a', 'b'], 'mInt32Ty'],
 ['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
-['VFMADDPS',['a', 'b', 'c'], 'a'],
 ['VPHADDD', ['a', 'b'], 'a'],
 ['PDEP32',  ['a', 'b'], 'a'],
 ['RDTSC',   [], 'mInt64Ty'],
@@ -71,6 +70,7 @@ llvm_intrinsics = [
 ['STACKRESTORE', 'stackrestore', ['a'], []],
 ['VMINPS', 'minnum', ['a', 'b'], ['a']],
 ['VMAXPS', 'maxnum', ['a', 'b'], ['a']],
+['VFMADDPS', 'fmuladd', ['a', 'b', 'c'], ['a']],
 ['DEBUGTRAP', 'debugtrap', [], []],
 ['POPCNT', 'ctpop', ['a'], ['a']],
 ['LOG2', 'log2', ['a'], ['a']],
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
index f89c502db7..d5328c8e4e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
@@ -870,7 +870,6 @@ struct BlendJit : public Builder
 passes.add(createCFGSimplificationPass());
 passes.add(createEarlyCSEPass());
 passes.add(createInstructionCombiningPass());
-passes.add(createInstructionSimplifierPass());
 passes.add(createConstantPropagationPass());
 passes.add(createSCCPPass());
 passes.add(createAggressiveDCEPass());
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index 4116dad443..26d8688f5e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -755,15 +755,8 @@ namespace SwrJit
 Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
 {
 Value* vOut;
-// use FMADs if available
-if (JM()->mArch.AVX2())
-{
-vOut = VFMADDPS(a, b, c);
-}
-else
-{
-vOut = FADD(FMUL(a, b), c);
-}
+// This maps to LLVM fmuladd intrinsic
+vOut = VFMADDPS(a, b, c);
 return vOut;
 }
 
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index b4d326ebdc..3ad0fabe81 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -294,7 +294,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& 
fetchState)
 optPasses.add(createCFGSimplificationPass());
 optPasses.add(createEarlyCSEPass());
 optPasses.add(createInstructionCombiningPass());
-optPasses.add(createInstructionSimplifierPass());
 optPasses.add(createConstantPropagationPass());
 optPasses.add(createSCCPPass());
 optPasses.add(createAggressiveDCEPass());
diff --git 
a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 7605823c04..c34959d35e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -76,7 +76,6 @@ namespace SwrJit
 {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
 {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
 {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
-{"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
 {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
 {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
 {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
index 8f86af2a4b..11ad36521b 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
@@ -306,7 +306,6 @@

[Mesa-dev] [RFC] Allow fd.o to join forces with X.Org

2018-10-15 Thread Harry Wentland

The leadership of freedesktop.org (fd.o) has recently expressed interest
in having an elected governing body. Given the tight connection between
fd.o and X.Org and the fact that X.Org has such a governing body it
seemed obvious to consider extending X.Org's mandate to fd.o.

Quite a bit of background on fd.o leading up to this has been covered by
Daniel Stone at XDC 2018 and was covered really well by Jake Edge of LWN [1].

One question that is briefly addressed in the LWN article and was
thoroughly discussed by members of the X.Org boards, Daniel Stone, and
others in hallway discussions is the question of whether to extend the
X.Org membership to projects hosted on fd.o but outside the purpose of
the X.Org foundation as enacted in its bylaws.

Most people I talked to would prefer not to dilute X.Org's mission and
extend membership only to contributors of projects that follow X.Org's
purpose as enacted in its bylaws. Other projects can continue to be
hosted on fd.o but won't receive X.Org membership for the mere reason of
being hosted on fd.o.

[1] https://lwn.net/Articles/767258/

v2:
 - Subject line that better describes the intention
 - Briefly describe reasons behind this change
 - Drop expanding membership eligibility
---

We're looking for feedback and comments on this patch. If it's not
widely controversial the final version of the patch will be put to a
vote at the 2019 X.Org elections.

The patch applies to the X.Org bylaws git repo, which can be found at
https://gitlab.freedesktop.org/xorgfoundation/bylaws

Happy commenting.

Harry

bylaws.tex | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/bylaws.tex b/bylaws.tex
index 4ab35a4f7745..44ff4745963b 100644
--- a/bylaws.tex
+++ b/bylaws.tex
@@ -14,7 +14,7 @@ BE IT ENACTED AND IT IS HEREBY ENACTED as a By-law of the 
X.Org Foundation
 
 The purpose of the X.Org Foundation shall be to:
 \begin{enumerate}[(i)\hspace{.2cm}]
-   \item Research, develop, support, organize, administrate, standardize,
+   \item \label{1} Research, develop, support, organize, administrate, 
standardize,
promote, and defend a free and open accelerated graphics stack. This
includes, but is not limited to, the following projects: DRM, Mesa,
Wayland and the X Window System,
@@ -24,6 +24,11 @@ The purpose of the X.Org Foundation shall be to:
 
\item Support and educate the general community of users of this
graphics stack.
+
+   \item Support free and open source projects through the freedesktop.org
+   infrastructure. For projects outside the scope of item (\ref{1}) support
+   extends to project hosting only.
+
 \end{enumerate}
 
 \article{INTERPRETATION}
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 108353] Request: Control Center for AMD GPU

2018-10-15 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=108353

--- Comment #2 from Alex Deucher  ---
The tricky part is making something that works across different desktop
environments.  Each desktop environment stores it's display settings in a
different way.  Additionally, since most of the infrastructure and APIs on
Linux are common across GPUs, it would probably be better to make a generic
control center that each desktop environment could get on board with for the
common areas, and then we can add vendor/device specific knobs on top of that. 
That way we'd have a common interface across GPUs and it would work
consistently across desktops.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 108353] Request: Control Center for AMD GPU

2018-10-15 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=108353

Alex Deucher  changed:

   What|Removed |Added

   Severity|normal  |enhancement

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] anv: Add a dummy implementation of GetPhysicalDevicePresentRectanglesKHR

2018-10-15 Thread Lionel Landwerlin

I'm guessing we should plugged that into the number of screen on bare 
DRM for example, which each screen native size reported in pRects.


On 15/10/2018 04:47, Jason Ekstrand wrote:

Not really sure what we're supposed to do with this one but we should do
something.
---
  src/intel/vulkan/anv_wsi.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 5ed1d711689..4f3f3fc57af 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -294,3 +294,14 @@ VkResult anv_GetDeviceGroupSurfacePresentModesKHR(
  
 return VK_SUCCESS;

  }
+
+VkResult anv_GetPhysicalDevicePresentRectanglesKHR(
+VkPhysicalDevicephysicalDevice,
+VkSurfaceKHRsurface,
+uint32_t*   pRectCount,
+VkRect2D*   pRects)
+{
+   /* TODO: What should I be doing here? */
+   *pRectCount = 0;
+   return VK_SUCCESS;
+}

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/13] anv: Add support for VK_EXT_transform_feedback

2018-10-15 Thread Lionel Landwerlin

I've left a couple of comments on patch 8 about a programming note for 
Gen10+ and a nit on patch 9.

Otherwise patches 7->13 are :

Reviewed-by: Lionel Landwerlin 

(Hopefully Samuel & you can review each other's nir changes :)

On 13/10/2018 14:09, Jason Ekstrand wrote:

This series adds support for the new VK_EXT_transform_feedback extension.
As far as I know, everything works but it's still all a bit experimental as
we don't have very good tests yet.  CTS tests are in-progress and we hope
to have decent testing soon.  We likely won't be landing them in master
until we get better testing as the testing is currently really sketchy.
However, the basics do work and Nvidia HairWorks now works in Witcher 3
with DXVK.

You may be asking question, "What took you so long?" or, "Why are you doing
transform feedback?  I thought that was legacy stuff."  For those who are
interested in the history or the rational behind the lack of transform
feedback in Vulkan and it's sudden appearance, I've written a blog post
explaining it:

http://jason-blog.jlekstrand.net/2018/10/transform-feedback-is-terrible-so-why.html

For those wishing to try out the patches, or who prefer to review with a
branch, the series can be found on my personal gitlab:

https://gitlab.freedesktop.org/jekstrand/mesa/commits/wip/VK_EXT_transform_feedback

Happy reviewing and/or testing!

--Jason Ekstrand


Jason Ekstrand (11):
   vulkan: Update Vulkan XML and headers to 1.1.88
   anv: Add but do not enable VK_EXT_transform_feedback
   nir: Preserve offsets in lower_io_to_scalar_early
   nir: Add a pass for gathering transform feedback info
   anv: Add pipeline cache support for xfb_info
   anv: Implement the basic form of VK_EXT_transform_feedback
   anv: Implement vkCmdDrawIndirectByteCountEXT
   anv: Implement CmdBegin/EndQueryIndexed
   genxml: Add SO_PRIM_STORAGE_NEEDED and SO_NUM_PRIMS_WRITTEN
   anv: Implement transform feedback queries
   anv: Improve the asserts in anv_buffer_get_range

Samuel Pitoiset (2):
   nir: do not remove varyings used for transform feedback
   nir: fix lowering arrays to elements for TFB outputs

  include/vulkan/vulkan.h   |   4 +
  include/vulkan/vulkan_core.h  | 244 +++-
  include/vulkan/vulkan_fuchsia.h   |  58 
  src/compiler/Makefile.sources |   4 +-
  src/compiler/nir/meson.build  |   2 +
  src/compiler/nir/nir_gather_xfb_info.c| 150 ++
  src/compiler/nir/nir_linking_helpers.c|   3 +
  .../nir/nir_lower_io_arrays_to_elements.c |   3 +
  src/compiler/nir/nir_lower_io_to_scalar.c |   8 +
  src/compiler/nir/nir_xfb_info.h   |  59 
  src/intel/genxml/gen10.xml|  32 ++
  src/intel/genxml/gen11.xml|  32 ++
  src/intel/genxml/gen7.xml |  32 ++
  src/intel/genxml/gen75.xml|  32 ++
  src/intel/genxml/gen8.xml |  32 ++
  src/intel/genxml/gen9.xml |  32 ++
  src/intel/vulkan/anv_blorp.c  |   3 +-
  src/intel/vulkan/anv_cmd_buffer.c |  29 ++
  src/intel/vulkan/anv_device.c |  24 ++
  src/intel/vulkan/anv_extensions.py|   1 +
  src/intel/vulkan/anv_pipeline.c   |  12 +-
  src/intel/vulkan/anv_pipeline_cache.c |  48 ++-
  src/intel/vulkan/anv_private.h|  22 +-
  src/intel/vulkan/genX_cmd_buffer.c| 275 ++
  src/intel/vulkan/genX_pipeline.c  | 123 
  src/intel/vulkan/genX_query.c |  93 +-
  src/vulkan/registry/vk.xml| 253 ++--
  27 files changed, 1562 insertions(+), 48 deletions(-)
  create mode 100644 include/vulkan/vulkan_fuchsia.h
  create mode 100644 src/compiler/nir/nir_gather_xfb_info.c
  create mode 100644 src/compiler/nir/nir_xfb_info.h



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 08/13] anv: Implement the basic form of VK_EXT_transform_feedback

2018-10-15 Thread Lionel Landwerlin


On 13/10/2018 14:09, Jason Ekstrand wrote:

---
  src/intel/vulkan/anv_cmd_buffer.c  |  29 +++
  src/intel/vulkan/anv_device.c  |  24 ++
  src/intel/vulkan/anv_extensions.py |   2 +-
  src/intel/vulkan/anv_pipeline.c|  10 ++-
  src/intel/vulkan/anv_private.h |  13 +++
  src/intel/vulkan/genX_cmd_buffer.c | 125 +
  src/intel/vulkan/genX_pipeline.c   | 122 
  7 files changed, 323 insertions(+), 2 deletions(-)



...



 uint32_t topology;
  
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c

index c3a7e5c83c3..90469abbf21 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2571,6 +2571,30 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
  
 cmd_buffer->state.gfx.vb_dirty &= ~vb_emit;
  
+#if GEN_GEN >= 8

+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) {
+  /* We don't need any per-buffer dirty tracking because you're not
+   * allowed to bind different XFB buffers while XFB is enabled.
+   */
+  for (unsigned idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
+ struct anv_xfb_binding *xfb = _buffer->state.xfb_bindings[idx];
+ anv_batch_emit(_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
+sob.SOBufferIndex = idx;
+
+if (cmd_buffer->state.xfb_enabled && xfb->buffer) {
+   sob.SOBufferEnable = true;
+   sob.SOBufferMOCS = cmd_buffer->device->default_mocs,
+   sob.StreamOffsetWriteEnable = false;
+   sob.SurfaceBaseAddress = anv_address_add(xfb->buffer->address,
+xfb->offset);
+   /* Size is in DWords - 1 */
+   sob.SurfaceSize = xfb->size / 4 - 1;
+}
+ }



Apparently documentation says we need a PIPE_CONTROL with CS Stall bit 
set after 3DSTATE_SO_BUFFER.




+  }
+   }
+#endif
+
 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
anv_batch_emit_batch(_buffer->batch, >batch);
  
@@ -2970,6 +2994,107 @@ void genX(CmdDrawIndexedIndirect)(

 }
  }
  
+void genX(CmdBeginTransformFeedbackEXT)(

+VkCommandBuffer commandBuffer,
+uint32_tfirstCounterBuffer,
+uint32_tcounterBufferCount,
+const VkBuffer* pCounterBuffers,
+const VkDeviceSize* pCounterBufferOffsets)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   assert(firstCounterBuffer < MAX_XFB_BUFFERS);
+   assert(counterBufferCount < MAX_XFB_BUFFERS);
+   assert(firstCounterBuffer + counterBufferCount <= MAX_XFB_BUFFERS);
+
+   /* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET:
+*
+*"Ssoftware must ensure that no HW stream output operations can be in
+*process or otherwise pending at the point that the MI_LOAD/STORE
+*commands are processed. This will likely require a pipeline flush."
+*/
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   for (uint32_t idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
+  /* If we have a counter buffer, this is a resume so we need to load the
+   * value into the streamout offset register.  Otherwise, this is a begin
+   * and we need to reset it to zero.
+   */
+  if (pCounterBuffers &&
+  idx >= firstCounterBuffer &&
+  idx - firstCounterBuffer < counterBufferCount &&
+  pCounterBuffers[idx - firstCounterBuffer] != VK_NULL_HANDLE) {
+ uint32_t cb_idx = idx - firstCounterBuffer;
+ ANV_FROM_HANDLE(anv_buffer, counter_buffer, pCounterBuffers[cb_idx]);
+ uint64_t offset = pCounterBufferOffsets ?
+   pCounterBufferOffsets[cb_idx] : 0;
+
+ anv_batch_emit(_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+lrm.RegisterAddress  = GENX(SO_WRITE_OFFSET0_num) + idx * 4;
+lrm.MemoryAddress= anv_address_add(counter_buffer->address,
+   offset);
+ }
+  } else {
+ anv_batch_emit(_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+lri.RegisterOffset   = GENX(SO_WRITE_OFFSET0_num) + idx * 4;
+lri.DataDWord= 0;
+ }
+  }
+   }
+
+   cmd_buffer->state.xfb_enabled = true;
+   cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
+}
+
+void genX(CmdEndTransformFeedbackEXT)(
+VkCommandBuffer commandBuffer,
+uint32_tfirstCounterBuffer,
+uint32_tcounterBufferCount,
+const VkBuffer* pCounterBuffers,
+const VkDeviceSize*

[Mesa-dev] [RFC 2/7] i965: SIMD32 heuristics control data

2018-10-15 Thread Toni Lönnberg

Added a new structure for holding SIMD32 heuristics control data. The
control data itself will be fetched from drirc.
---
 src/intel/compiler/brw_compiler.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index d8c9499..785acdb 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -38,6 +38,15 @@ struct ra_regs;
 struct nir_shader;
 struct brw_program;
 
+struct brw_simd32_heuristics_control {
+   bool grouped_sends_check;
+   int max_grouped_sends;
+   bool inst_count_check;
+   float inst_count_ratio;
+   bool mrt_check;
+   int max_mrts;
+};
+
 struct brw_compiler {
const struct gen_device_info *devinfo;
 
@@ -118,6 +127,8 @@ struct brw_compiler {
 * whether nir_opt_large_constants will be run.
 */
bool supports_shader_constants;
+
+   struct brw_simd32_heuristics_control simd32_heuristics_control;
 };
 
 /**
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC 6/7] i965/fs: SIMD32 selection heuristic based on grouped texture fetches

2018-10-15 Thread Toni Lönnberg

The function goes through the compiled shader and checks how many grouped
texture fetches there are. This is a simple heuristic which gets rid of most
of the regressions when enabling SIMD32 shaders but still retains some of
the benefits.
---
 src/intel/compiler/brw_fs.cpp | 26 ++
 src/intel/compiler/brw_fs.h   |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 23a25fe..02e151f 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7299,6 +7299,32 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
return g.get_assembly();
 }
 
+bool
+fs_visitor::run_heuristic(const struct brw_simd32_heuristics_control *ctrl) {
+   int grouped_sends = 0;
+   int max_grouped_sends = 0;
+   bool pass = true;
+
+   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+  if (inst->opcode >= SHADER_OPCODE_TEX && inst->opcode <= 
SHADER_OPCODE_SAMPLEINFO_LOGICAL) {
+ ++grouped_sends;
+  } else if (grouped_sends > 0) {
+ if (grouped_sends > max_grouped_sends) {
+max_grouped_sends = grouped_sends;
+ }
+ grouped_sends = 0;
+  }
+   }
+
+   if (ctrl->grouped_sends_check) {
+  if (max_grouped_sends > ctrl->max_grouped_sends) {
+ pass = false;
+  }
+   }
+
+   return pass;
+}
+
 fs_reg *
 fs_visitor::emit_cs_work_group_id_setup()
 {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index a344d7c..d7e4abf 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -286,6 +286,8 @@ public:
void dump_instruction(backend_instruction *inst);
void dump_instruction(backend_instruction *inst, FILE *file);
 
+   bool run_heuristic(const struct brw_simd32_heuristics_control *ctrl);
+
const void *const key;
const struct brw_sampler_prog_key_data *key_tex;
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC 3/7] i965: SIMD32 heuristics control data from drirc

2018-10-15 Thread Toni Lönnberg

To be able to test the heuristics with different parameters, they can be
controlled via environment variables through drirc.
---
 src/mesa/drivers/dri/i965/brw_context.c  | 13 +
 src/mesa/drivers/dri/i965/intel_screen.c | 27 +++
 2 files changed, 40 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 6ba64e4..8cc0529 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -893,6 +893,19 @@ brw_process_driconf_options(struct brw_context *brw)
ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
driComputeOptionsSha1(>screen->optionCache,
  ctx->Const.dri_config_options_sha1);
+
+   brw->screen->compiler->simd32_heuristics_control.grouped_sends_check =
+  driQueryOptionb(>optionCache, "simd32_heuristic_grouped_check");
+   brw->screen->compiler->simd32_heuristics_control.max_grouped_sends =
+  driQueryOptioni(>optionCache, "simd32_heuristic_grouped_sends");
+   brw->screen->compiler->simd32_heuristics_control.inst_count_check =
+  driQueryOptionb(>optionCache, "simd32_heuristic_inst_check");
+   brw->screen->compiler->simd32_heuristics_control.inst_count_ratio =
+  driQueryOptionf(>optionCache, "simd32_heuristic_inst_ratio");
+   brw->screen->compiler->simd32_heuristics_control.mrt_check =
+  driQueryOptionb(>optionCache, "simd32_heuristic_mrt_check");
+   brw->screen->compiler->simd32_heuristics_control.max_mrts =
+  driQueryOptioni(>optionCache, "simd32_heuristic_max_mrts");
 }
 
 GLboolean
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index c3bd30f..8601dcd 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -61,6 +61,33 @@ DRI_CONF_BEGIN
DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
 DRI_CONF_DESC_END
   DRI_CONF_OPT_END
+
+  DRI_CONF_OPT_BEGIN_B(simd32_heuristic_grouped_check, "true")
+  DRI_CONF_DESC(en, "Enable/disable grouped texture fetch "
+"check in the SIMD32 selection heuristic.")
+  DRI_CONF_OPT_END
+  DRI_CONF_OPT_BEGIN_V(simd32_heuristic_grouped_sends, int, 6, "1:999")
+ DRI_CONF_DESC(en, "How many grouped texture fetches should "
+"the SIMD32 selection heuristic allow.")
+  DRI_CONF_OPT_END
+  DRI_CONF_OPT_BEGIN_B(simd32_heuristic_inst_check, "true")
+  DRI_CONF_DESC(en, "Enable/disable SIMD32/SIMD16 instruction "
+"count ratio check in the SIMD32 selection "
+"heuristic.")
+  DRI_CONF_OPT_END
+  DRI_CONF_OPT_BEGIN_V(simd32_heuristic_inst_ratio, float, 2.3, "1:999")
+  DRI_CONF_DESC(en, "SIMD32/SIMD16 instruction count ratio "
+"the SIMD32 selection heuristic should allow.")
+  DRI_CONF_OPT_END
+  DRI_CONF_OPT_BEGIN_B(simd32_heuristic_mrt_check, "true")
+  DRI_CONF_DESC(en, "Enable/disable MRT write check in the "
+"SIMD32 selection heuristic.")
+  DRI_CONF_OPT_END
+  DRI_CONF_OPT_BEGIN_V(simd32_heuristic_max_mrts, int, 1, "1:8")
+  DRI_CONF_DESC(en, "How many MRT writes should the SIMD32 "
+"selection heuristic allow.")
+  DRI_CONF_OPT_END
+
   DRI_CONF_MESA_NO_ERROR("false")
DRI_CONF_SECTION_END
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC 5/7] i965/fs: Save the instruction count of each dispatch width

2018-10-15 Thread Toni Lönnberg

The SIMD32 selection heuristics will use this information for deciding whether
SIMD32 shaders should be used.
---
 src/intel/compiler/brw_fs.h |  2 ++
 src/intel/compiler/brw_fs_generator.cpp | 12 
 2 files changed, 14 insertions(+)

diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index aba19d5..a344d7c 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -397,6 +397,7 @@ public:
 
void enable_debug(const char *shader_name);
int generate_code(const cfg_t *cfg, int dispatch_width);
+   int get_inst_count(int dispatch_width);
const unsigned *get_assembly();
 
 private:
@@ -489,6 +490,7 @@ private:
struct brw_stage_prog_data * const prog_data;
 
unsigned dispatch_width; /**< 8, 16 or 32 */
+   int inst_count[3]; /* for 8, 16 and 32 */
 
exec_list discard_halt_patches;
unsigned promoted_constants;
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index cb402cd..797824e 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2486,6 +2486,8 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
   fill_count, promoted_constants, before_size,
   after_size);
 
+   inst_count[ffs(dispatch_width) - 4] = before_size / 16;
+
return start_offset;
 }
 
@@ -2494,3 +2496,13 @@ fs_generator::get_assembly()
 {
return brw_get_program(p, _data->program_size);
 }
+
+int
+fs_generator::get_inst_count(int dispatch_width)
+{
+   if (dispatch_width == 8 || dispatch_width == 16 || dispatch_width == 32) {
+  return inst_count[ffs(dispatch_width) - 4];
+   } else {
+  return 0;
+   }
+}
\ No newline at end of file
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC 4/7] mesa: Helper functions for counting set bits in a mask

2018-10-15 Thread Toni Lönnberg

---
 src/util/bitscan.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index dc89ac9..cdfecaf 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -112,6 +112,31 @@ u_bit_scan64(uint64_t *mask)
return i;
 }
 
+/* Count bits set in mask */
+static inline int
+u_count_bits(unsigned *mask)
+{
+   unsigned v = *mask;
+   int c;
+   v = v - ((v >> 1) & 0x);
+   v = (v & 0x) + ((v >> 2) & 0x);
+   v = (v + (v >> 4)) & 0xF0F0F0F;
+   c = (int)((v * 0x1010101) >> 24);
+   return c;
+}
+
+static inline int
+u_count_bits64(uint64_t *mask)
+{
+   uint64_t v = *mask;
+   int c;
+   v = v - ((v >> 1) & 0xull);
+   v = (v & 0xull) + ((v >> 2) & 0xull);
+   v = (v + (v >> 4)) & 0xF0F0F0F0F0F0F0Full;
+   c = (int)((v * 0x101010101010101ull) >> 56);
+   return c;
+}
+
 /* Determine if an unsigned value is a power of two.
  *
  * \note
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC 7/7] i965/fs: Enable all SIMD32 heuristics

2018-10-15 Thread Toni Lönnberg

There are three simple heuristics for SIMD32 shader enabling:

- How many MRTs does the shader write into?
- How many grouped texture fetches does the shader have?
- How many instructions does the SIMD32 shader have compared to the SIMD16
   shader?

For testing purposes, the heuristics can be controlled via these environment
variables:

simd32_heuristic_mrt_check
- Enables MRT write check
- Default: true

simd32_heuristic_max_mrts
- How many MRT writes the heuristic allows
- Default: 1

simd32_heuristic_grouped_check
- Enables grouped texture fetch check
- Default: true

simd32_heuristic_grouped_sends
- How many grouped texture fetches the heuristic allows
- Default: 6

simd32_heuristic_inst_check
- Enables SIMD32 vs. SIMD16 instruction count check
- Default: true

simd32_heuristic_inst_ratio
- SIMD32 vs. SIMD16 instruction count ratio the heuristic allows
- Default: 2.3

SIMD32 shaders will not be compiled also when SIMD16 compilation fails or
spills.
---
 src/intel/compiler/brw_fs.cpp | 37 +++--
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 02e151f..5cceb6c 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7120,6 +7120,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
char **error_str)
 {
const struct gen_device_info *devinfo = compiler->devinfo;
+   bool simd16_failed = false;
+   bool simd16_spilled = false;
 
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler, >tex, true);
@@ -7187,10 +7189,12 @@ brw_compile_fs(const struct brw_compiler *compiler, 
void *log_data,
  shader_time_index16);
   v16.import_uniforms();
   if (!v16.run_fs(allow_spilling, use_rep_send)) {
+ simd16_failed = true;
  compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s",
v16.fail_msg);
   } else {
+ simd16_spilled = v16.spilled_any_registers;
  simd16_cfg = v16.cfg;
  prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
  prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used);
@@ -7198,9 +7202,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
}
 
/* Currently, the compiler only supports SIMD32 on SNB+ */
+   const brw_simd32_heuristics_control *ctrl = 
>simd32_heuristics_control;
+   uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0;
+
if (v8.max_dispatch_width >= 32 && !use_rep_send &&
compiler->devinfo->gen >= 6 &&
-   unlikely(INTEL_DEBUG & DEBUG_DO32)) {
+   (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+(unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+ !simd16_failed && !simd16_spilled &&
+ (!ctrl->mrt_check ||
+  (ctrl->mrt_check &&
+  u_count_bits64() <= ctrl->max_mrts) {
   /* Try a SIMD32 compile */
   fs_visitor v32(compiler, log_data, mem_ctx, key,
  _data->base, prog, shader, 32,
@@ -7211,9 +7223,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
"SIMD32 shader failed to compile: %s",
v32.fail_msg);
   } else {
- simd32_cfg = v32.cfg;
- prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
- prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+ if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) ||
+  v32.run_heuristic(ctrl)) {
+simd32_cfg = v32.cfg;
+prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
+prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+ }
   }
}
 
@@ -7292,8 +7307,18 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
}
 
if (simd32_cfg) {
-  prog_data->dispatch_32 = true;
-  prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32);
+  uint32_t offset = g.generate_code(simd32_cfg, 32);
+
+  if (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+  (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+   (!simd16_cfg ||
+(simd16_cfg &&
+ (!ctrl->inst_count_check ||
+ (ctrl->inst_count_check &&
+ (float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= 
ctrl->inst_count_ratio)) {
+ prog_data->dispatch_32 = true;
+ prog_data->prog_offset_32 = offset;
+  }
}
 
return g.get_assembly();
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC 1/7] i965: SIMD32 heuristics debug flag

2018-10-15 Thread Toni Lönnberg

Added a new DEBUG_HEUR32 flag to INTEL_DEBUG flags for enabling SIMD32
selection heuristics.
---
 src/intel/common/gen_debug.c | 1 +
 src/intel/common/gen_debug.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f..8990d20 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz",   DEBUG_NO_HIZ },
{ "color",   DEBUG_COLOR },
{ "reemit",  DEBUG_REEMIT },
+   { "heur32",  DEBUG_HEUR32 },
{ NULL,0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index 72d7ca2..c2ca2e2 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ  (1ull << 39)
 #define DEBUG_COLOR   (1ull << 40)
 #define DEBUG_REEMIT  (1ull << 41)
+#define DEBUG_HEUR32  (1ull << 42)
 
 /* These flags are not compatible with the disk shader cache */
 #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME
@@ -90,7 +91,7 @@ extern uint64_t INTEL_DEBUG;
 /* These flags may affect program generation */
 #define DEBUG_DISK_CACHE_MASK \
(DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 |  DEBUG_SPILL_FS | \
-   DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32)
+   DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_HEUR32)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC 0/7] i965: SIMD32 selection heuristics

2018-10-15 Thread Toni Lönnberg

Since we have SIMD32 support available for fragment shaders, it would be nice
to actually enable them. The changes proposed here are not meant as the final
solution to SIMD32 selection by any means, they're meant to be a way to
enable SIMD32 in case a customer absolutely needs them to be enabled for
performance before we actually have a proper heuristic in place. The heuristic
is mainly trying to limit regressions.

These heuristics look at a couple of things to make a choice regarding SIMD32
shaders.

1) Number of enabled MRTs
2) Number of grouped texture fetches
3) Instruction count ratio between SIMD16 and SIMD32

Reasons being, multiple writes tends to trash the render cache, multiple
grouped texture fetches tend to trash the sampler and L3 caches, and with 
these things being equal SIMD32 usually tends to still perform better or 
equally well, as long as it can compensate for latency, even if it has a bit 
more instructions than its SIMD16 counterpart.

A proper heuristic would be looking at whether the shader *actually* can
compensate for latency in any way, which requires some integration to the
scheduler. But as of at this moment, the scheduler reports kind of weird
numbers for the cycle counts. To alleviate problems regarding SIMD32, the
scheduler should also try to schedule texture fetches in smaller groups
in general.

The default values have been tweaked in a way that we most of the time get
benefits and not a lot of regressions from enabling SIMD32.

In my runs, mostly with BXT, the biggest boosts and regressions are as
follows:

+38.5% in GLBench5 ALU2
-7.1% in GLBenchmark fill test

Depending on the platform, the results may differ, SKL both regresses and 
gains less, BSW regresses more and gains less than BXT.

As this is an experimental patch, it is not on by default but has to be
enabled via INTEL_DEBUG, just like forcing SIMD32 on. Further more, the
different mechanisms of the heuristic can be controlled via environment
variables/drirc.

Toni Lönnberg (7):
  i965: SIMD32 heuristics debug flag
  i965: SIMD32 heuristics control data
  i965: SIMD32 heuristics control data from drirc
  mesa: Helper functions for counting set bits in a mask
  i965/fs: Save the instruction count of each dispatch width
  i965/fs: SIMD32 selection heuristic based on grouped texture fetches
  i965/fs: Enable all SIMD32 heuristics

 src/intel/common/gen_debug.c |  1 +
 src/intel/common/gen_debug.h |  3 +-
 src/intel/compiler/brw_compiler.h| 11 ++
 src/intel/compiler/brw_fs.cpp| 63 +---
 src/intel/compiler/brw_fs.h  |  4 ++
 src/intel/compiler/brw_fs_generator.cpp  | 12 ++
 src/mesa/drivers/dri/i965/brw_context.c  | 13 +++
 src/mesa/drivers/dri/i965/intel_screen.c | 27 ++
 src/util/bitscan.h   | 25 +
 9 files changed, 152 insertions(+), 7 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: implement buffer to image operations for R32G32B32

2018-10-15 Thread Bas Nieuwenhuizen

On Mon, Oct 15, 2018 at 12:35 PM Samuel Pitoiset
 wrote:
>
> This should fix rendering issues with Batman Arkham City.
> We will probably need to implement itob and itoi at some
> point, but currently nothing hits these paths.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107765
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_bufimage.c | 358 
>  src/amd/vulkan/radv_meta_copy.c |   8 +-
>  src/amd/vulkan/radv_private.h   |   5 +
>  3 files changed, 369 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_bufimage.c 
> b/src/amd/vulkan/radv_meta_bufimage.c
> index 9efb971638..57e0732cce 100644
> --- a/src/amd/vulkan/radv_meta_bufimage.c
> +++ b/src/amd/vulkan/radv_meta_bufimage.c
> @@ -483,6 +483,214 @@ radv_device_finish_meta_btoi_state(struct radv_device 
> *device)
>  state->btoi.pipeline_3d, >alloc);
>  }
>
> +/* Buffer to image - special path for R32G32B32 */
> +static nir_shader *
> +build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
> +{
> +   nir_builder b;
> +   const struct glsl_type *buf_type = 
> glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
> +false,
> +false,
> +GLSL_TYPE_FLOAT);
> +   const struct glsl_type *img_type = 
> glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
> +false,
> +false,
> +GLSL_TYPE_FLOAT);
> +   nir_builder_init_simple_shader(, NULL, MESA_SHADER_COMPUTE, NULL);
> +   b.shader->info.name = ralloc_strdup(b.shader, 
> "meta_btoi_r32g32b32_cs");
> +   b.shader->info.cs.local_size[0] = 16;
> +   b.shader->info.cs.local_size[1] = 16;
> +   b.shader->info.cs.local_size[2] = 1;
> +   nir_variable *input_img = nir_variable_create(b.shader, 
> nir_var_uniform,
> + buf_type, "s_tex");
> +   input_img->data.descriptor_set = 0;
> +   input_img->data.binding = 0;
> +
> +   nir_variable *output_img = nir_variable_create(b.shader, 
> nir_var_uniform,
> +  img_type, "out_img");
> +   output_img->data.descriptor_set = 0;
> +   output_img->data.binding = 1;
> +
> +   nir_ssa_def *invoc_id = nir_load_system_value(, 
> nir_intrinsic_load_local_invocation_id, 0);
> +   nir_ssa_def *wg_id = nir_load_system_value(, 
> nir_intrinsic_load_work_group_id, 0);
> +   nir_ssa_def *block_size = nir_imm_ivec4(,
> +   
> b.shader->info.cs.local_size[0],
> +   
> b.shader->info.cs.local_size[1],
> +   
> b.shader->info.cs.local_size[2], 0);
> +
> +   nir_ssa_def *global_id = nir_iadd(, nir_imul(, wg_id, 
> block_size), invoc_id);
> +
> +   nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, 
> nir_intrinsic_load_push_constant);
> +   nir_intrinsic_set_base(offset, 0);
> +   nir_intrinsic_set_range(offset, 16);
> +   offset->src[0] = nir_src_for_ssa(nir_imm_int(, 0));
> +   offset->num_components = 2;
> +   nir_ssa_dest_init(>instr, >dest, 2, 32, "offset");
> +   nir_builder_instr_insert(, >instr);
> +
> +   nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, 
> nir_intrinsic_load_push_constant);
> +   nir_intrinsic_set_base(pitch, 0);
> +   nir_intrinsic_set_range(pitch, 16);
> +   pitch->src[0] = nir_src_for_ssa(nir_imm_int(, 8));
> +   pitch->num_components = 1;
> +   nir_ssa_dest_init(>instr, >dest, 1, 32, "pitch");
> +   nir_builder_instr_insert(, >instr);
> +
> +   nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, 
> nir_intrinsic_load_push_constant);
> +   nir_intrinsic_set_base(stride, 0);
> +   nir_intrinsic_set_range(stride, 16);
> +   stride->src[0] = nir_src_for_ssa(nir_imm_int(, 12));
> +   stride->num_components = 1;
> +   nir_ssa_dest_init(>instr, >dest, 1, 32, "stride");
> +   nir_builder_instr_insert(, >instr);
> +
> +   nir_ssa_def *pos_x = nir_channel(, global_id, 0);
> +   nir_ssa_def *pos_y = nir_channel(, global_id, 1);
> +
> +   nir_ssa_def *tmp = nir_imul(, pos_y, >dest.ssa);
> +   tmp = nir_iadd(, tmp, pos_x);
> +
> +   nir_ssa_def *buf_coord = nir_vec4(, tmp, tmp, tmp, tmp);
> +
> +   nir_ssa_def *img_coord = nir_iadd(, global_id, >dest.ssa);
> +
> +   nir_ssa_def *global_pos =
> +   nir_iadd(,
> +nir_imul(, pos_y, >dest.ssa),
> +nir_imul(, pos_x, nir_imm_int(, 3)));
> +
> +   nir_ssa_def

[Mesa-dev] [PATCH] radv: bump discreteQueuePriorities to 2

2018-10-15 Thread Samuel Pitoiset

It's the minimum value required by the spec.

This fixes dEQP-VK.api.info.device.properties.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 88786c999de..73d684c2caa 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -979,7 +979,7 @@ void radv_GetPhysicalDeviceProperties(
.maxClipDistances = 8,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances  = 8,
-   .discreteQueuePriorities  = 1,
+   .discreteQueuePriorities  = 2,
.pointSizeRange   = { 0.125, 255.875 },
.lineWidthRange   = { 0.0, 7.9921875 },
.pointSizeGranularity = (1.0 / 8.0),
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: do not force the flat qualifier for clip/cull distances

2018-10-15 Thread Samuel Pitoiset

This fixes some new CTS that reads clip/cull distances
from the fragment shader stage:

dEQP-VK.clipping.user_defined.clip_*

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_pipeline.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 426b417e172..e1d665d0ac7 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3107,14 +3107,14 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf 
*cs,
 
vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, false);
++ps_offset;
}
 
vs_offset = 
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
ps->info.info.ps.num_input_clips_culls > 4) {
-   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, true);
+   ps_input_cntl[ps_offset] = 
offset_to_ps_input(vs_offset, false);
++ps_offset;
}
}
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 09/13] anv: Implement vkCmdDrawIndirectByteCountEXT

2018-10-15 Thread Lionel Landwerlin


On 13/10/2018 14:09, Jason Ekstrand wrote:

Annoyingly, this requires that we implement integer division on the
command streamer.  Fortunately, we're only ever dividing by constants so
we can use the mulh+add+shift trick and it's not as bad as it sounds.
---
  src/intel/vulkan/anv_device.c  |   2 +-
  src/intel/vulkan/genX_cmd_buffer.c | 150 +
  2 files changed, 151 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 6395656dfd0..acca97b53d1 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1226,7 +1226,7 @@ void anv_GetPhysicalDeviceProperties2(
   props->transformFeedbackQueries = VK_FALSE;
   props->transformFeedbackStreamsLinesTriangles = VK_FALSE;
   props->transformFeedbackRasterizationStreamSelect = VK_TRUE;
- props->transformFeedbackDraw = VK_FALSE;
+ props->transformFeedbackDraw = VK_TRUE;
   break;
}
  
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c

index 90469abbf21..1fbcfaf4264 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -27,6 +27,7 @@
  #include "anv_private.h"
  #include "vk_format_info.h"
  #include "vk_util.h"
+#include "util/fast_idiv_by_const.h"
  
  #include "common/gen_l3_config.h"

  #include "genxml/gen_macros.h"
@@ -2880,7 +2881,156 @@ emit_mul_gpr0(struct anv_batch *batch, uint32_t N)
 build_alu_multiply_gpr0(dw + 1, _dwords, N);
  }
  
+static void

+emit_alu_add(struct anv_batch *batch, unsigned dst_reg,
+ unsigned reg_a, unsigned reg_b)
+{
+   uint32_t *dw = anv_batch_emitn(batch, 1 + 4, GENX(MI_MATH));
+   dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, reg_a);
+   dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, reg_b);
+   dw[3] = mi_alu(MI_ALU_ADD, 0, 0);
+   dw[4] = mi_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU);
+}
+
+static void
+emit_add32_gpr0(struct anv_batch *batch, uint32_t N)
+{
+   emit_lri(batch, CS_GPR(1), N);
+   emit_alu_add(batch, MI_ALU_REG0, MI_ALU_REG0, MI_ALU_REG1);
+}
+
+static void
+emit_alu_shl(struct anv_batch *batch, unsigned dst_reg,
+ unsigned src_reg, unsigned shift)
+{
+   assert(shift > 0);
+
+   uint32_t *dw = anv_batch_emitn(batch, 1 + 4 * shift, GENX(MI_MATH));
+   for (unsigned i = 0; i < shift; i++) {
+  unsigned add_src = (i == 0) ? src_reg : dst_reg;
+  dw[1 + (i * 4) + 0] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, add_src);
+  dw[1 + (i * 4) + 1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, add_src);
+  dw[1 + (i * 4) + 2] = mi_alu(MI_ALU_ADD, 0, 0);
+  dw[1 + (i * 4) + 3] = mi_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU);
+   }
+}
+
+static void
+emit_div32_gpr0(struct anv_batch *batch, uint32_t D)
+{
+   /* Zero out the top of GPR0 */
+   emit_lri(batch, CS_GPR(0) + 4, 0);
+
+   if (D == 0) {
+  /* This invalid, but we should do something so we set GPR0 to 0. */
+  emit_lri(batch, CS_GPR(0), 0);
+   } else if (util_is_power_of_two_or_zero(D)) {
+  unsigned log2_D = util_logbase2(D);
+  assert(log2_D < 32);
+  /* We right-shift by log2(D) by left-shifting by 32 - log2(D) and taking
+   * the top 32 bits of the result.
+   */
+  emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - log2_D);
+  emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
+  emit_lri(batch, CS_GPR(0) + 4, 0);
+   } else {
+  struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
+  assert(m.multiplier <= UINT32_MAX);
+
+  if (m.pre_shift) {
+ /* We right-shift by L by left-shifting by 32 - l and taking the top
+  * 32 bits of the result.
+  */
+ if (m.pre_shift < 32)
+emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - m.pre_shift);
+ emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
+ emit_lri(batch, CS_GPR(0) + 4, 0);
+  }
+
+  /* Do the 32x32 multiply  into gpr0 */
+  emit_mul_gpr0(batch, m.multiplier);
+
+  if (m.increment) {
+ /* If we need to increment, save off a copy of GPR0 */
+ emit_lri(batch, CS_GPR(1) + 0, m.multiplier);
+ emit_lri(batch, CS_GPR(1) + 4, 0);
+ emit_alu_add(batch, MI_ALU_REG0, MI_ALU_REG0, MI_ALU_REG1);
+  }
+
+  /* Shift by 32 */
+  emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
+  emit_lri(batch, CS_GPR(0) + 4, 0);
+
+  if (m.post_shift) {
+ /* We right-shift by L by left-shifting by 32 - l and taking the top
+  * 32 bits of the result.
+  */
+ if (m.post_shift < 32)
+emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - m.post_shift);
+ emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
+ emit_lri(batch, CS_GPR(0) + 4, 0);
+  }
+   }
+}
+
+#endif /* GEN_IS_HASWELL || GEN_GEN >= 8 */
+
+void genX(CmdDrawIndirectByteCountEXT)(
+VkCommandBuffer commandBuffer,
+uint32_t

Re: [Mesa-dev] [PATCH 08/13] anv: Implement the basic form of VK_EXT_transform_feedback

2018-10-15 Thread Lionel Landwerlin


On 13/10/2018 14:09, Jason Ekstrand wrote:

---
  src/intel/vulkan/anv_cmd_buffer.c  |  29 +++
  src/intel/vulkan/anv_device.c  |  24 ++
  src/intel/vulkan/anv_extensions.py |   2 +-
  src/intel/vulkan/anv_pipeline.c|  10 ++-
  src/intel/vulkan/anv_private.h |  13 +++
  src/intel/vulkan/genX_cmd_buffer.c | 125 +
  src/intel/vulkan/genX_pipeline.c   | 122 
  7 files changed, 323 insertions(+), 2 deletions(-)



...



+
+  int max_decls = 0;
+  for (unsigned s = 0; s < MAX_XFB_STREAMS; s++)
+ max_decls = MAX2(max_decls, decls[s]);
+
+  uint8_t sbs[MAX_XFB_STREAMS] = { };
+  for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) {
+ if (xfb_info->buffers_written & (1 << b))
+sbs[xfb_info->buffer_to_stream[b]] |= 1 << b;
+  }
+
+  uint32_t *dw = anv_batch_emitn(>batch, 3 + 2 * max_decls,
+ GENX(3DSTATE_SO_DECL_LIST),
+ .StreamtoBufferSelects0 = sbs[0],
+ .StreamtoBufferSelects1 = sbs[1],
+ .StreamtoBufferSelects2 = sbs[2],
+ .StreamtoBufferSelects3 = sbs[3],
+ .NumEntries0 = decls[0],
+ .NumEntries1 = decls[1],
+ .NumEntries2 = decls[2],
+ .NumEntries3 = decls[3]);
+
+  for (int i = 0; i < max_decls; i++) {
+ GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2,
+&(struct GENX(SO_DECL_ENTRY)) {
+   .Stream0Decl = so_decl[0][i],
+   .Stream1Decl = so_decl[1][i],
+   .Stream2Decl = so_decl[2][i],
+   .Stream3Decl = so_decl[3][i],
+});
+  }



Documentation of 3DSTATE_SO_DECL_LIST Gen10+ says :

"

This command must be followed by a PIPE_CONTROL with CS Stall bit set.,
"



 }
+#endif /* GEN_GEN >= 8 */
  }
  
  static uint32_t



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: implement buffer to image operations for R32G32B32

2018-10-15 Thread Timothy Arceri


Tested-by: Timothy Arceri 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: implement buffer to image operations for R32G32B32

2018-10-15 Thread Samuel Pitoiset

This should fix rendering issues with Batman Arkham City.
We will probably need to implement itob and itoi at some
point, but currently nothing hits these paths.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107765
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_bufimage.c | 358 
 src/amd/vulkan/radv_meta_copy.c |   8 +-
 src/amd/vulkan/radv_private.h   |   5 +
 3 files changed, 369 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_bufimage.c 
b/src/amd/vulkan/radv_meta_bufimage.c
index 9efb971638..57e0732cce 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -483,6 +483,214 @@ radv_device_finish_meta_btoi_state(struct radv_device 
*device)
 state->btoi.pipeline_3d, >alloc);
 }
 
+/* Buffer to image - special path for R32G32B32 */
+static nir_shader *
+build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
+{
+   nir_builder b;
+   const struct glsl_type *buf_type = 
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
+false,
+false,
+GLSL_TYPE_FLOAT);
+   const struct glsl_type *img_type = 
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
+false,
+false,
+GLSL_TYPE_FLOAT);
+   nir_builder_init_simple_shader(, NULL, MESA_SHADER_COMPUTE, NULL);
+   b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
+   b.shader->info.cs.local_size[0] = 16;
+   b.shader->info.cs.local_size[1] = 16;
+   b.shader->info.cs.local_size[2] = 1;
+   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+ buf_type, "s_tex");
+   input_img->data.descriptor_set = 0;
+   input_img->data.binding = 0;
+
+   nir_variable *output_img = nir_variable_create(b.shader, 
nir_var_uniform,
+  img_type, "out_img");
+   output_img->data.descriptor_set = 0;
+   output_img->data.binding = 1;
+
+   nir_ssa_def *invoc_id = nir_load_system_value(, 
nir_intrinsic_load_local_invocation_id, 0);
+   nir_ssa_def *wg_id = nir_load_system_value(, 
nir_intrinsic_load_work_group_id, 0);
+   nir_ssa_def *block_size = nir_imm_ivec4(,
+   b.shader->info.cs.local_size[0],
+   b.shader->info.cs.local_size[1],
+   
b.shader->info.cs.local_size[2], 0);
+
+   nir_ssa_def *global_id = nir_iadd(, nir_imul(, wg_id, block_size), 
invoc_id);
+
+   nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_load_push_constant);
+   nir_intrinsic_set_base(offset, 0);
+   nir_intrinsic_set_range(offset, 16);
+   offset->src[0] = nir_src_for_ssa(nir_imm_int(, 0));
+   offset->num_components = 2;
+   nir_ssa_dest_init(>instr, >dest, 2, 32, "offset");
+   nir_builder_instr_insert(, >instr);
+
+   nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_load_push_constant);
+   nir_intrinsic_set_base(pitch, 0);
+   nir_intrinsic_set_range(pitch, 16);
+   pitch->src[0] = nir_src_for_ssa(nir_imm_int(, 8));
+   pitch->num_components = 1;
+   nir_ssa_dest_init(>instr, >dest, 1, 32, "pitch");
+   nir_builder_instr_insert(, >instr);
+
+   nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, 
nir_intrinsic_load_push_constant);
+   nir_intrinsic_set_base(stride, 0);
+   nir_intrinsic_set_range(stride, 16);
+   stride->src[0] = nir_src_for_ssa(nir_imm_int(, 12));
+   stride->num_components = 1;
+   nir_ssa_dest_init(>instr, >dest, 1, 32, "stride");
+   nir_builder_instr_insert(, >instr);
+
+   nir_ssa_def *pos_x = nir_channel(, global_id, 0);
+   nir_ssa_def *pos_y = nir_channel(, global_id, 1);
+
+   nir_ssa_def *tmp = nir_imul(, pos_y, >dest.ssa);
+   tmp = nir_iadd(, tmp, pos_x);
+
+   nir_ssa_def *buf_coord = nir_vec4(, tmp, tmp, tmp, tmp);
+
+   nir_ssa_def *img_coord = nir_iadd(, global_id, >dest.ssa);
+
+   nir_ssa_def *global_pos =
+   nir_iadd(,
+nir_imul(, pos_y, >dest.ssa),
+nir_imul(, pos_x, nir_imm_int(, 3)));
+
+   nir_ssa_def *input_img_deref = _build_deref_var(, 
input_img)->dest.ssa;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+   tex->op = nir_texop_txf;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src =

[Mesa-dev] [Bug 108365] swr: segfault when running test case GLES2.functional.vertex_arrays.multiple_attributes.stride.3_float2_17_float2_17_float2_0

2018-10-15 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=108365

Bug ID: 108365
   Summary: swr: segfault when running test case
GLES2.functional.vertex_arrays.multiple_attributes.str
ide.3_float2_17_float2_17_float2_0
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Gallium/swr
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: gw.foss...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 142026
  --> https://bugs.freedesktop.org/attachment.cgi?id=142026=edit
glxinfo output

valgrind backtrace: 

Test case
'dEQP-GLES2.functional.vertex_arrays.multiple_attributes.stride.3_float2_17_float2_17_float2_0'..
==27162== Invalid read of size 4
==27162==at 0x916FD14: swr_update_derived(pipe_context*, pipe_draw_info
const*) (swr_state.cpp:1291)
==27162==by 0x916A692: swr_draw_vbo(pipe_context*, pipe_draw_info const*)
(swr_draw.cpp:61)
==27162==by 0x90C2BD4: u_vbuf_draw_vbo (u_vbuf.c:1449)
==27162==by 0x941683A: st_draw_vbo (st_draw.c:236)
==27162==by 0x957A9B3: vbo_draw_arrays (vbo_exec_array.c:406)
==27162==by 0x957B699: vbo_exec_DrawArrays (vbo_exec_array.c:565)
==27162==by 0x5E60D5: glu::CallLogWrapper::glDrawArrays(unsigned int, int,
int) (gluCallLogWrapper.inl:1222)
==27162==by 0x51A2E0:
deqp::gls::ContextArrayPack::render(deqp::gls::Array::Primitive, int, int,
bool, float, float) (glsVertexArrayTests.cpp:1189)
==27162==by 0x5172BB: deqp::gls::MultiVertexArrayTest::iterate()
(glsVertexArrayTests.cpp:2129)
==27162==by 0x1689C9: deqp::gles2::TestCaseWrapper::iterate(tcu::TestCase*)
(tes2TestPackage.cpp:91)
==27162==by 0x7413C2:
tcu::TestSessionExecutor::iterateTestCase(tcu::TestCase*)
(tcuTestSessionExecutor.cpp:299)
==27162==Address 0x4 is not stack'd, malloc'd or (recently) free'd  


Host: Intel Kabylake 
OS:   Ubuntu 18.04

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 02/15] nir: fix compacting varyings when XFB outputs are present

2018-10-15 Thread Samuel Pitoiset




On 10/14/18 11:30 PM, Timothy Arceri wrote:

What happened to v2?


This one isn't up-to-date.



On 13/10/18 11:57 pm, Samuel Pitoiset wrote:

We shouldn't try to compact any varyings known as always
active IO, especially XFB outputs. For example, if one
component of an xfb output is also used as input varying
in the next stage, it shouldn't be compacted.

Because we look at the input varyings from the consumer
stage, we don't know if one of them is an XFB output. One
solution is to mark all components as used when
always_active_io is true to avoid wrong remapping.

Signed-off-by: Samuel Pitoiset 
---
  src/compiler/nir/nir_linking_helpers.c | 9 +
  1 file changed, 9 insertions(+)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c

index 85712a7cb1..88014e9a1d 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -236,6 +236,15 @@ get_slot_component_masks_and_interp_types(struct 
exec_list *var_list,

 get_interp_type(var, default_to_smooth_interp);
  interp_loc[location + i] = get_interp_loc(var);
+    if (var->data.always_active_io) {
+   /* Mark all components as used to avoid repacting xfb 
varyings
+    * wrongly. For instance, if one component of an xfb 
output is

+    * also used as input varying in the next stage.
+    */
+   comps[location + i] |= 0xf;
+   continue;
+    }
+
  if (dual_slot) {
 if (i & 1) {
    comps[location + i] |= ((1 << comps_slot2) - 1);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/15] vulkan: Update the XML and headers to 1.1.88

2018-10-15 Thread Samuel Pitoiset




On 10/15/18 9:54 AM, Bas Nieuwenhuizen wrote:

On Mon, Oct 15, 2018 at 9:37 AM Jason Ekstrand  wrote:


Acked-by: Jason Ekstrand 

Let's land the header.

As far as I can tell, you did that yesterday already:
https://gitlab.freedesktop.org/mesa/mesa/commit/13fd4e601c80b85317feac894c52bd24e4f98c09
?


Jason pushed the patch I guess.





On Sat, Oct 13, 2018 at 7:55 AM Samuel Pitoiset  
wrote:


Signed-off-by: Samuel Pitoiset 
---
  include/vulkan/vulkan_core.h | 244 -
  src/vulkan/registry/vk.xml   | 253 ++-
  2 files changed, 462 insertions(+), 35 deletions(-)

diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h
index 39f4dc6f05..a7780a0f76 100644
--- a/include/vulkan/vulkan_core.h
+++ b/include/vulkan/vulkan_core.h
@@ -43,7 +43,7 @@ extern "C" {
  #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
  #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
  // Version of this file
-#define VK_HEADER_VERSION 86
+#define VK_HEADER_VERSION 88


  #define VK_NULL_HANDLE 0
@@ -147,6 +147,7 @@ typedef enum VkResult {
  VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -103001,
  VK_ERROR_VALIDATION_FAILED_EXT = -111001,
  VK_ERROR_INVALID_SHADER_NV = -112000,
+VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158000,
  VK_ERROR_FRAGMENTATION_EXT = -1000161000,
  VK_ERROR_NOT_PERMITTED_EXT = -1000174001,
  VK_ERROR_OUT_OF_POOL_MEMORY_KHR = VK_ERROR_OUT_OF_POOL_MEMORY,
@@ -297,6 +298,9 @@ typedef enum VkStructureType {
  VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV = 126000,
  VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_BUFFER_CREATE_INFO_NV = 126001,
  VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV = 
126002,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT = 
128000,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT = 
128001,
+VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT = 
128002,
  VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD = 141000,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CORNER_SAMPLED_IMAGE_FEATURES_NV = 
15,
  VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV = 156000,
@@ -398,6 +402,12 @@ typedef enum VkStructureType {
  VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_ADVANCED_STATE_CREATE_INFO_EXT = 
1000148002,
  VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_TO_COLOR_STATE_CREATE_INFO_NV = 
1000149000,
  VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_MODULATION_STATE_CREATE_INFO_NV = 
1000152000,
+VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT = 1000158000,
+VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT = 1000158001,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT = 
1000158002,
+VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT = 
1000158003,
+VK_STRUCTURE_TYPE_IMAGE_EXCPLICIT_DRM_FORMAT_MODIFIER_CREATE_INFO_EXT = 
1000158004,
+VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT = 1000158005,
  VK_STRUCTURE_TYPE_VALIDATION_CACHE_CREATE_INFO_EXT = 100016,
  VK_STRUCTURE_TYPE_SHADER_MODULE_VALIDATION_CACHE_CREATE_INFO_EXT = 
1000160001,
  VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT = 
1000161000,
@@ -428,6 +438,7 @@ typedef enum VkStructureType {
  VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT = 1000178001,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT = 
1000178002,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR = 
100018,
+VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT = 1000184000,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD = 1000185000,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT 
= 100019,
  VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT = 
1000190001,
@@ -443,6 +454,8 @@ typedef enum VkStructureType {
  VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV = 1000206000,
  VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV = 1000206001,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = 
1000211000,
+VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT = 1000212000,
+VK_STRUCTURE_TYPE_IMAGEPIPE_SURFACE_CREATE_INFO_FUCHSIA = 1000214000,
  VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = 
VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
  VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = 
VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,
  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
@@ -811,6 +824,7 @@ typedef enum VkImageType {
  typedef enum VkImageTiling {
  VK_IMAGE_TILING_OPTIMAL = 0,
  VK_IMAGE_TILING_LINEAR = 1,
+

Re: [Mesa-dev] [PATCH] radv: Implement VK_EXT_pci_bus_info.

2018-10-15 Thread Samuel Pitoiset


Reviewed-by: Samuel Pitoiset 

On 10/13/18 7:21 PM, Bas Nieuwenhuizen wrote:

---
  src/amd/vulkan/radv_device.c  | 10 ++
  src/amd/vulkan/radv_extensions.py |  1 +
  src/amd/vulkan/radv_private.h |  2 ++
  3 files changed, 13 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 88786c999de..c418176978e 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -389,6 +389,7 @@ radv_physical_device_init(struct radv_physical_device 
*device,
if ((device->instance->debug_flags & RADV_DEBUG_INFO))
ac_print_gpu_info(>rad_info);
  
+	device->bus_info = *drm_device->businfo.pci;

return VK_SUCCESS;
  
  fail:

@@ -1190,6 +1191,15 @@ void radv_GetPhysicalDeviceProperties2(
properties->conservativeRasterizationPostDepthCoverage 
= VK_FALSE;
break;
}
+   case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
+   VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
+   (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
+   properties->pciDomain = pdevice->bus_info.domain;
+   properties->pciBus = pdevice->bus_info.bus;
+   properties->pciDevice = pdevice->bus_info.dev;
+   properties->pciFunction = pdevice->bus_info.func;
+   break;
+   }
default:
break;
}
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 584926df390..5dcedae1c63 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -103,6 +103,7 @@ EXTENSIONS = [
  Extension('VK_EXT_external_memory_dma_buf',   1, True),
  Extension('VK_EXT_external_memory_host',  1, 
'device->rad_info.has_userptr'),
  Extension('VK_EXT_global_priority',   1, 
'device->rad_info.has_ctx_priority'),
+Extension('VK_EXT_pci_bus_info',  1, True),
  Extension('VK_EXT_sampler_filter_minmax', 1, 
'device->rad_info.chip_class >= CIK'),
  Extension('VK_EXT_shader_viewport_index_layer',   1, True),
  Extension('VK_EXT_shader_stencil_export', 1, True),
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 33ba8a7354b..b35aa8d818f 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -312,6 +312,8 @@ struct radv_physical_device {
VkPhysicalDeviceMemoryProperties memory_properties;
enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
  
+	drmPciBusInfo bus_info;

+
struct radv_device_extension_table supported_extensions;
  };
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/7] i965/fs: Add 64-bit int immediate support to dump_instructions()

2018-10-15 Thread Iago Toral

Patches 1 and 2 are:

Reviewed-by: Iago Toral Quiroga 

On Sun, 2018-10-14 at 15:11 -0700, Matt Turner wrote:
> ---
>  src/intel/compiler/brw_fs.cpp   | 6 ++
>  src/intel/compiler/brw_shader.h | 2 ++
>  2 files changed, 8 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_fs.cpp
> b/src/intel/compiler/brw_fs.cpp
> index 23a25fedca5..69726ed70e8 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -6023,6 +6023,12 @@
> fs_visitor::dump_instruction(backend_instruction *be_inst, FILE
> *file)
>   case BRW_REGISTER_TYPE_UD:
>  fprintf(file, "%uu", inst->src[i].ud);
>  break;
> + case BRW_REGISTER_TYPE_Q:
> +fprintf(file, "%" PRId64 "q", inst->src[i].d64);
> +break;
> + case BRW_REGISTER_TYPE_UQ:
> +fprintf(file, "%" PRIu64 "uq", inst->src[i].u64);
> +break;
>   case BRW_REGISTER_TYPE_VF:
>  fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
>  brw_vf_to_float((inst->src[i].ud >>  0) & 0xff),
> diff --git a/src/intel/compiler/brw_shader.h
> b/src/intel/compiler/brw_shader.h
> index 7d97ddbd868..e1d598b8781 100644
> --- a/src/intel/compiler/brw_shader.h
> +++ b/src/intel/compiler/brw_shader.h
> @@ -89,6 +89,8 @@ struct backend_reg : private brw_reg
> using brw_reg::f;
> using brw_reg::d;
> using brw_reg::ud;
> +   using brw_reg::d64;
> +   using brw_reg::u64;
>  };
>  #endif
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/15] vulkan: Update the XML and headers to 1.1.88

2018-10-15 Thread Bas Nieuwenhuizen

On Mon, Oct 15, 2018 at 9:37 AM Jason Ekstrand  wrote:
>
> Acked-by: Jason Ekstrand 
>
> Let's land the header.
As far as I can tell, you did that yesterday already:
https://gitlab.freedesktop.org/mesa/mesa/commit/13fd4e601c80b85317feac894c52bd24e4f98c09
?

>
> On Sat, Oct 13, 2018 at 7:55 AM Samuel Pitoiset  
> wrote:
>>
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  include/vulkan/vulkan_core.h | 244 -
>>  src/vulkan/registry/vk.xml   | 253 ++-
>>  2 files changed, 462 insertions(+), 35 deletions(-)
>>
>> diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h
>> index 39f4dc6f05..a7780a0f76 100644
>> --- a/include/vulkan/vulkan_core.h
>> +++ b/include/vulkan/vulkan_core.h
>> @@ -43,7 +43,7 @@ extern "C" {
>>  #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
>>  #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
>>  // Version of this file
>> -#define VK_HEADER_VERSION 86
>> +#define VK_HEADER_VERSION 88
>>
>>
>>  #define VK_NULL_HANDLE 0
>> @@ -147,6 +147,7 @@ typedef enum VkResult {
>>  VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -103001,
>>  VK_ERROR_VALIDATION_FAILED_EXT = -111001,
>>  VK_ERROR_INVALID_SHADER_NV = -112000,
>> +VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158000,
>>  VK_ERROR_FRAGMENTATION_EXT = -1000161000,
>>  VK_ERROR_NOT_PERMITTED_EXT = -1000174001,
>>  VK_ERROR_OUT_OF_POOL_MEMORY_KHR = VK_ERROR_OUT_OF_POOL_MEMORY,
>> @@ -297,6 +298,9 @@ typedef enum VkStructureType {
>>  VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV = 
>> 126000,
>>  VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_BUFFER_CREATE_INFO_NV = 
>> 126001,
>>  VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV = 
>> 126002,
>> +VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT = 
>> 128000,
>> +VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT = 
>> 128001,
>> +VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT = 
>> 128002,
>>  VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD = 141000,
>>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CORNER_SAMPLED_IMAGE_FEATURES_NV = 
>> 15,
>>  VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV = 156000,
>> @@ -398,6 +402,12 @@ typedef enum VkStructureType {
>>  VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_ADVANCED_STATE_CREATE_INFO_EXT = 
>> 1000148002,
>>  VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_TO_COLOR_STATE_CREATE_INFO_NV = 
>> 1000149000,
>>  VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_MODULATION_STATE_CREATE_INFO_NV = 
>> 1000152000,
>> +VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT = 1000158000,
>> +VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT = 1000158001,
>> +VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT = 
>> 1000158002,
>> +VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT = 
>> 1000158003,
>> +VK_STRUCTURE_TYPE_IMAGE_EXCPLICIT_DRM_FORMAT_MODIFIER_CREATE_INFO_EXT = 
>> 1000158004,
>> +VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT = 1000158005,
>>  VK_STRUCTURE_TYPE_VALIDATION_CACHE_CREATE_INFO_EXT = 100016,
>>  VK_STRUCTURE_TYPE_SHADER_MODULE_VALIDATION_CACHE_CREATE_INFO_EXT = 
>> 1000160001,
>>  VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT = 
>> 1000161000,
>> @@ -428,6 +438,7 @@ typedef enum VkStructureType {
>>  VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT = 1000178001,
>>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT = 
>> 1000178002,
>>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR = 
>> 100018,
>> +VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT = 1000184000,
>>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD = 
>> 1000185000,
>>  
>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT = 
>> 100019,
>>  VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT = 
>> 1000190001,
>> @@ -443,6 +454,8 @@ typedef enum VkStructureType {
>>  VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV = 1000206000,
>>  VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV = 1000206001,
>>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = 
>> 1000211000,
>> +VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT = 
>> 1000212000,
>> +VK_STRUCTURE_TYPE_IMAGEPIPE_SURFACE_CREATE_INFO_FUCHSIA = 1000214000,
>>  VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = 
>> VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
>>  VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = 
>> VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,
>>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = 
>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
>> @@ -811,6

Re: [Mesa-dev] [PATCH 04/15] radv: gather the number of streams used by geometry shaders

2018-10-15 Thread Bas Nieuwenhuizen

On Sat, Oct 13, 2018 at 2:56 PM Samuel Pitoiset
 wrote:
>
> This will be used for splitting the GS->VS ring buffer. The
> stream ID is always 0 for now.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_shader.h  |  1 +
>  src/amd/vulkan/radv_shader_info.c | 14 ++
>  2 files changed, 15 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
> index c490b69f52..f02ca1cf8d 100644
> --- a/src/amd/vulkan/radv_shader.h
> +++ b/src/amd/vulkan/radv_shader.h
> @@ -159,6 +159,7 @@ struct radv_shader_info {
> } vs;
> struct {
> uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
> +   uint8_t max_stream;

With 0 outputs, should we be able to support 0 streams? If so we might
need to change this to a count.

Otherwise,

Reviewed-by: Bas Nieuwenhuizen 
> } gs;
> struct {
> uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
> diff --git a/src/amd/vulkan/radv_shader_info.c 
> b/src/amd/vulkan/radv_shader_info.c
> index 6262acb1a6..00bc2ca5db 100644
> --- a/src/amd/vulkan/radv_shader_info.c
> +++ b/src/amd/vulkan/radv_shader_info.c
> @@ -434,6 +434,17 @@ gather_info_output_decl_ps(const nir_shader *nir, const 
> nir_variable *var,
> }
>  }
>
> +static void
> +gather_info_output_decl_gs(const nir_shader *nir, const nir_variable *var,
> +  struct radv_shader_info *info)
> +{
> +   unsigned stream = var->data.stream;
> +
> +   assert(stream < 4);
> +
> +   info->gs.max_stream = MAX2(info->gs.max_stream, stream);
> +}
> +
>  static void
>  gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
> struct radv_shader_info *info,
> @@ -447,6 +458,9 @@ gather_info_output_decl(const nir_shader *nir, const 
> nir_variable *var,
> if (options->key.vs.as_ls)
> gather_info_output_decl_ls(nir, var, info);
> break;
> +   case MESA_SHADER_GEOMETRY:
> +   gather_info_output_decl_gs(nir, var, info);
> +   break;
> default:
> break;
> }
> --
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nir: fix clip cull lowering to not assert if GLSL already lowered.

2018-10-15 Thread Kenneth Graunke

From: Dave Airlie 

If GLSL has already done the lowering, we'd rather not crash in this pass.

Reviewed-by: Kenneth Graunke 
---
 src/compiler/nir/nir_lower_clip_cull_distance_arrays.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c 
b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
index 86ce5fb1f86..2afbf9285c0 100644
--- a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
+++ b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
@@ -144,6 +144,12 @@ combine_clip_cull(nir_shader *nir,
  cull = var;
}
 
+   /* if the GLSL lowering pass has already run, don't bother repeating */
+   if (!cull && clip) {
+  if (!glsl_type_is_array(clip->type))
+ return false;
+   }
+
const unsigned clip_array_size = get_unwrapped_array_length(nir, clip);
const unsigned cull_array_size = get_unwrapped_array_length(nir, cull);
 
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] st/mesa: Pass index to pipe->create_query() for statistics queries.

2018-10-15 Thread Kenneth Graunke

GL exposes separate queries for each pipeline statistics counter.
For some reason, Gallium chose to map them all to a single target,
PIPE_QUERY_PIPELINE_STATISTICS.  Radeon hardware appears to query
them all as a group.  pipe->get_query_result_resource() takes an
index, indicating which to write to the buffer.  The CPU-side hook,
pipe->get_query_result(), simply writes them all, and st/mesa returns
the one that was actually desired.

On Intel hardware, each individual pipeline statistics value is handled
as a separate counter and query.  We can query each individually, and
that is more efficient than querying all 11 counters each time.  But,
we need pipe->get_query_result() to know which one to return.

To handle this, we pass the index into pipe->create_query(), which
was previously always 0 for these queries.  Drivers which return all
of the counters as a group can simply ignore it; drivers querying one
at a time can use it to distinguish between the counters.

This is the least invasive fix, but it is kind of ugly, and I wonder
whether we'd be better off just adding PIPE_QUERY_IA_VERTICES (etc.)
targets...
---
 src/mesa/state_tracker/st_cb_queryobj.c | 76 -
 1 file changed, 36 insertions(+), 40 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_queryobj.c 
b/src/mesa/state_tracker/st_cb_queryobj.c
index 69e6004c3f1..0dc06ceb574 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -88,6 +88,40 @@ st_DeleteQuery(struct gl_context *ctx, struct 
gl_query_object *q)
free(stq);
 }
 
+static int
+target_to_index(const struct gl_query_object *q)
+{
+   switch (q->Target) {
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+  return q->Stream;
+   case GL_VERTICES_SUBMITTED_ARB:
+  return 0;
+   case GL_PRIMITIVES_SUBMITTED_ARB:
+  return 1;
+   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+  return 2;
+   case GL_GEOMETRY_SHADER_INVOCATIONS:
+  return 3;
+   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+  return 4;
+   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+  return 5;
+   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+  return 6;
+   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+  return 7;
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+  return 8;
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+  return 9;
+   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+  return 10;
+   default:
+  return 0;
+   }
+}
 
 static void
 st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)
@@ -164,7 +198,7 @@ st_BeginQuery(struct gl_context *ctx, struct 
gl_query_object *q)
  ret = pipe->end_query(pipe, stq->pq_begin);
} else {
   if (!stq->pq) {
- stq->pq = pipe->create_query(pipe, type, q->Stream);
+ stq->pq = pipe->create_query(pipe, type, target_to_index(q));
  stq->type = type;
   }
   if (stq->pq)
@@ -383,46 +417,8 @@ st_StoreQueryResult(struct gl_context *ctx, struct 
gl_query_object *q,
 
if (pname == GL_QUERY_RESULT_AVAILABLE) {
   index = -1;
-   } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
-  switch (q->Target) {
-  case GL_VERTICES_SUBMITTED_ARB:
- index = 0;
- break;
-  case GL_PRIMITIVES_SUBMITTED_ARB:
- index = 1;
- break;
-  case GL_VERTEX_SHADER_INVOCATIONS_ARB:
- index = 2;
- break;
-  case GL_GEOMETRY_SHADER_INVOCATIONS:
- index = 3;
- break;
-  case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
- index = 4;
- break;
-  case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
- index = 5;
- break;
-  case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
- index = 6;
- break;
-  case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
- index = 7;
- break;
-  case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- index = 8;
- break;
-  case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
- index = 9;
- break;
-  case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
- index = 10;
- break;
-  default:
- unreachable("Unexpected target");
-  }
} else {
-  index = 0;
+  index = target_to_index(q);
}
 
pipe->get_query_result_resource(pipe, stq->pq, wait, result_type, index,
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

88 matches

Mail list logo