[Mesa-dev] [PATCH 1/3] vl: Add cropping flags for H264

2019-04-11 Thread Sahu, Satyajit
From: suresh guttula 

This patch adds cropping flags for H264 in pipe_h264_enc_pic_control.

Signed-off-by: Satyajit Sahu 
---
 src/gallium/include/pipe/p_video_state.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index 05855a36e23..1369f1a8ca6 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -395,6 +395,11 @@ struct pipe_h264_enc_pic_control
 {
unsigned enc_cabac_enable;
unsigned enc_constraint_set_flags;
+   unsigned enc_frame_cropping_flag;
+   unsigned enc_frame_crop_left_offset;
+   unsigned enc_frame_crop_right_offset;
+   unsigned enc_frame_crop_top_offset;
+   unsigned enc_frame_crop_bottom_offset;
 };
 
 struct pipe_h264_enc_picture_desc
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/3] radeon/vce:Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264

2019-04-11 Thread Sahu, Satyajit
From: suresh guttula 

This patch will add support for frame_cropping when the input size is not
matched with aligned size. Currently vaapi driver ignores frame cropping
values provided by client. This change will update SPS nalu with proper
cropping values.

v2: Moving default crop setting to else when enc_frame_cropping_flag is not set.

Signed-off-by: Satyajit Sahu 
---
 src/gallium/drivers/radeon/radeon_vce_52.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c 
b/src/gallium/drivers/radeon/radeon_vce_52.c
index fc7ddc62a90..364da4dbe24 100644
--- a/src/gallium/drivers/radeon/radeon_vce_52.c
+++ b/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -81,8 +81,15 @@ static void get_pic_control_param(struct rvce_encoder *enc, 
struct pipe_h264_enc
unsigned encNumMBsPerSlice;
encNumMBsPerSlice = align(enc->base.width, 16) / 16;
encNumMBsPerSlice *= align(enc->base.height, 16) / 16;
-   enc->enc_pic.pc.enc_crop_right_offset = (align(enc->base.width, 16) - 
enc->base.width) >> 1;
-   enc->enc_pic.pc.enc_crop_bottom_offset = (align(enc->base.height, 16) - 
enc->base.height) >> 1;
+   if (pic->pic_ctrl.enc_frame_cropping_flag) {
+   enc->enc_pic.pc.enc_crop_left_offset = 
pic->pic_ctrl.enc_frame_crop_left_offset;
+   enc->enc_pic.pc.enc_crop_right_offset = 
pic->pic_ctrl.enc_frame_crop_right_offset;
+   enc->enc_pic.pc.enc_crop_top_offset = 
pic->pic_ctrl.enc_frame_crop_top_offset;
+   enc->enc_pic.pc.enc_crop_bottom_offset = 
pic->pic_ctrl.enc_frame_crop_bottom_offset;
+   } else {
+   enc->enc_pic.pc.enc_crop_right_offset = (align(enc->base.width, 
16) - enc->base.width) >> 1;
+   enc->enc_pic.pc.enc_crop_bottom_offset = 
(align(enc->base.height, 16) - enc->base.height) >> 1;
+   }
enc->enc_pic.pc.enc_num_mbs_per_slice = encNumMBsPerSlice;
enc->enc_pic.pc.enc_b_pic_pattern = MAX2(enc->base.max_references, 1) - 
1;
enc->enc_pic.pc.enc_number_of_reference_frames = 
MIN2(enc->base.max_references, 2);
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] st/va/enc: Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264

2019-04-11 Thread Sahu, Satyajit
From: suresh guttula 

This patch will add support for frame_cropping when the input size is not
matched with aligned size. Currently vaapi driver ignores frame cropping
values provided by client. This change will update SPS nalu with proper
cropping values.

Signed-off-by: Satyajit Sahu 
---
 src/gallium/state_trackers/va/picture_h264_enc.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/state_trackers/va/picture_h264_enc.c 
b/src/gallium/state_trackers/va/picture_h264_enc.c
index abfd39633de..f46b3425566 100644
--- a/src/gallium/state_trackers/va/picture_h264_enc.c
+++ b/src/gallium/state_trackers/va/picture_h264_enc.c
@@ -127,6 +127,14 @@ vlVaHandleVAEncSequenceParameterBufferTypeH264(vlVaDriver 
*drv, vlVaContext *con
context->desc.h264enc.rate_ctrl.frame_rate_num = h264->time_scale / 2;
context->desc.h264enc.rate_ctrl.frame_rate_den = h264->num_units_in_tick;
context->desc.h264enc.pic_order_cnt_type = 
h264->seq_fields.bits.pic_order_cnt_type;
+
+   if (h264->frame_cropping_flag) {
+  context->desc.h264enc.pic_ctrl.enc_frame_cropping_flag = 
h264->frame_cropping_flag;
+  context->desc.h264enc.pic_ctrl.enc_frame_crop_left_offset = 
h264->frame_crop_left_offset;
+  context->desc.h264enc.pic_ctrl.enc_frame_crop_right_offset = 
h264->frame_crop_right_offset;
+  context->desc.h264enc.pic_ctrl.enc_frame_crop_top_offset = 
h264->frame_crop_top_offset;
+  context->desc.h264enc.pic_ctrl.enc_frame_crop_bottom_offset = 
h264->frame_crop_bottom_offset;
+   }
return VA_STATUS_SUCCESS;
 }
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] 2019 X.Org Foundation Elections Results... and a Redo

2019-04-11 Thread Wentland, Harry
Correction on the link for the mock election. That should be 
https://members.x.org/ballot/3/vote, not /admin.

It will also be linked from the members homepage and shown as the current 
ballot.

Harry

On 2019-04-11 8:03 p.m., Harry Wentland wrote:
> To all X.Org Foundation Members:
> 
> The 2019 X.Org ballot closed yesterday. There is some good and some bad news.
> 
> The Good News:
> The vote on the bylaw changes passed with 53 for, 1 against, and 2 abstaining.
> 
> The Bad News:
> Due to some issues with our new members website all votes for new board 
> members were recorded incorrectly. Thankfully this was fairly obvious. We 
> believe we've found the bug and have a fix for it: 
> https://gitlab.freedesktop.org/xorgfoundation/xorg_membership/commit/15f27d45f1d9b1767377814835f2359f7f76c7e5
> 
> The Redo:
> To assure you and us that we've completely fixed the issues with our 
> elections site we have decided to run a brief mock election where you can 
> vote for your favorite pastry. This will run until Monday Apr 15 noon UTC 
> after which we'll tally and publish the results and confirm whether we fixed 
> the issue. Please leave your vote at https://members.x.org/ballot/3/admin
> 
> If this all looks good we will start the new election for board members a 
> week from today, on Apr 18, until May 2.
> 
> You can expect another email from me early-to-mid next week to confirm the 
> start of the new election for board members.
> 
> We received quite a few membership signups after the membership deadline. 
> We've decided to approve all currently pending signups.
> 
> Harry, on behalf of the X.Org elections committee
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] 2019 X.Org Foundation Elections Results... and a Redo

2019-04-11 Thread Wentland, Harry
To all X.Org Foundation Members:

The 2019 X.Org ballot closed yesterday. There is some good and some bad news.

The Good News:
The vote on the bylaw changes passed with 53 for, 1 against, and 2 abstaining.

The Bad News:
Due to some issues with our new members website all votes for new board members 
were recorded incorrectly. Thankfully this was fairly obvious. We believe we've 
found the bug and have a fix for it: 
https://gitlab.freedesktop.org/xorgfoundation/xorg_membership/commit/15f27d45f1d9b1767377814835f2359f7f76c7e5

The Redo:
To assure you and us that we've completely fixed the issues with our elections 
site we have decided to run a brief mock election where you can vote for your 
favorite pastry. This will run until Monday Apr 15 noon UTC after which we'll 
tally and publish the results and confirm whether we fixed the issue. Please 
leave your vote at https://members.x.org/ballot/3/admin

If this all looks good we will start the new election for board members a week 
from today, on Apr 18, until May 2.

You can expect another email from me early-to-mid next week to confirm the 
start of the new election for board members.

We received quite a few membership signups after the membership deadline. We've 
decided to approve all currently pending signups.

Harry, on behalf of the X.Org elections committee
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-04-11 Thread Andres Gomez
On Tue, 2019-04-09 at 08:40 -0700, Dylan Baker wrote:
> Hi Andres,
> 
> This doesn't apply cleanly to the 19.0 branch, and I'm not even sure where to
> start resolving the conflicts. If you still want this in 19.0 can you backport
> this and either create an MR against the staging/19.0 branch and mention me, 
> or
> send a patch to the stable list and CC me? (I prefer the MR :))

https://gitlab.freedesktop.org/mesa/mesa/merge_requests/641

The backport is not the cleanest since I had to pull also some other
change but I think it should be safe.

However, the fix is not that important so it could also be dropped.
Take a look to the (additional) changes and feel free to pick or not.

> 
> Thanks,
> Dylan
> 
> Quoting Andres Gomez (2019-02-01 10:05:52)
> > From: Iago Toral Quiroga 
> > 
> > Regarding location aliasing requirements, the OpenGL spec says:
> > 
> >   "Further, when location aliasing, the aliases sharing the location
> >must have the same underlying numerical type  (floating-point or
> >integer)."
> > 
> > Khronos has further clarified that this also requires the underlying
> > types to have the same width, so we can't put a float and a double
> > in the same location slot for example. Future versions of the spec will
> > be corrected to make this clear.
> > 
> > This patch amends our implementation to account for this restriction.
> > 
> > In the process of doing this, I also noticed that we would attempt
> > to check aliasing requirements for record variables (including the test
> > for the numerical type) which is not allowed, instead, we should be
> > producing a linker error as soon as we see any attempt to do location
> > aliasing on non-numerical variables. For the particular case of structs,
> > we were producing a linker error in this case, but only because we
> > assumed that struct fields use all components in each location, so
> > any attempt to alias locations consumed by struct fields would produce
> > a link error due to component aliasing, which is not accurate of the
> > actual problem. This patch would make it produce an error for attempting
> > to alias a non-numerical variable instead, which is always accurate.
> > 
> > v2:
> >   - Do not assert if we see invalid numerical types. These come
> > straight from shader code, so we should produce linker errors if
> > shaders attempt to do location aliasing on variables that are not
> > numerical such as records.
> >   - While we are at it, improve error reporting for the case of
> > numerical type mismatch to include the shader stage.
> > 
> > v3:
> >   - Allow location aliasing of images and samplers. If we get these
> > it means bindless support is active and they should be handled
> > as 64-bit integers (Ilia)
> >   - Make sure we produce link errors for any non-numerical type
> > for which we attempt location aliasing, not just structs.
> > 
> > v4:
> >   - Rebased with minor fixes (Andres).
> >   - Added fixing tag to the commit log (Andres).
> > 
> > Fixes: 13652e7516a ("glsl/linker: Fix type checks for location aliasing")
> > Cc: Ilia Mirkin 
> > Signed-off-by: Andres Gomez 
> > ---
> >  src/compiler/glsl/link_varyings.cpp | 64 +
> >  1 file changed, 46 insertions(+), 18 deletions(-)
> > 
> > diff --git a/src/compiler/glsl/link_varyings.cpp 
> > b/src/compiler/glsl/link_varyings.cpp
> > index 3969c0120b3..3f41832ac93 100644
> > --- a/src/compiler/glsl/link_varyings.cpp
> > +++ b/src/compiler/glsl/link_varyings.cpp
> > @@ -424,15 +424,15 @@ compute_variable_location_slot(ir_variable *var, 
> > gl_shader_stage stage)
> >  
> >  struct explicit_location_info {
> > ir_variable *var;
> > -   unsigned numerical_type;
> > +   int numerical_type;
> > unsigned interpolation;
> > bool centroid;
> > bool sample;
> > bool patch;
> >  };
> >  
> > -static inline unsigned
> > -get_numerical_type(const glsl_type *type)
> > +static inline int
> > +get_numerical_sized_type(const glsl_type *type)
> >  {
> > /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, 
> > Page 68,
> >  * (Location aliasing):
> > @@ -440,10 +440,25 @@ get_numerical_type(const glsl_type *type)
> >  *"Further, when location aliasing, the aliases sharing the location
> >  * must have the same underlying numerical type  (floating-point or
> >  * integer)
> > +*
> > +* Khronos has further clarified that this also requires the underlying
> > +* types to have the same width, so we can't put a float and a double
> > +* in the same location slot for example. Future versions of the spec 
> > will
> > +* be corrected to make this clear.
> > +*
> > +* Notice that we allow location aliasing for bindless image/samplers 
> > too
> > +* since these are defined as 64-bit integers.
> >  */
> > -   if (type->is_float() || type->is_double())
> > +   if (type->is_float())
> >return GLSL_TYPE_FLOAT;
> > -   return 

Re: [Mesa-dev] [PATCH 2/3] radeon/vce:Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264

2019-04-11 Thread Liu, Leo

On 2019-04-11 6:36 a.m., Sahu, Satyajit wrote:
> From: suresh guttula 
>
> This patch will add support for frame_cropping when the input size is not
> matched with aligned size. Currently vaapi driver ignores frame cropping
> values provided by client. This change will update SPS nalu with proper
> cropping values.
>
> Signed-off-by: Satyajit Sahu 
> ---
>   src/gallium/drivers/radeon/radeon_vce_52.c | 6 ++
>   1 file changed, 6 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c 
> b/src/gallium/drivers/radeon/radeon_vce_52.c
> index fc7ddc62a90..53f7b2f5fb5 100644
> --- a/src/gallium/drivers/radeon/radeon_vce_52.c
> +++ b/src/gallium/drivers/radeon/radeon_vce_52.c
> @@ -83,6 +83,12 @@ static void get_pic_control_param(struct rvce_encoder 
> *enc, struct pipe_h264_enc
>   encNumMBsPerSlice *= align(enc->base.height, 16) / 16;
>   enc->enc_pic.pc.enc_crop_right_offset = (align(enc->base.width, 16) - 
> enc->base.width) >> 1;
>   enc->enc_pic.pc.enc_crop_bottom_offset = (align(enc->base.height, 16) - 
> enc->base.height) >> 1;

Please put above 2 lines to the else case of frame_cropping_flag.

With that fixed, the series are

Reviewed-by: Leo Liu 


> + if (pic->pic_ctrl.enc_frame_cropping_flag) {
> + enc->enc_pic.pc.enc_crop_left_offset = 
> pic->pic_ctrl.enc_frame_crop_left_offset;
> + enc->enc_pic.pc.enc_crop_right_offset = 
> pic->pic_ctrl.enc_frame_crop_right_offset;
> + enc->enc_pic.pc.enc_crop_top_offset = 
> pic->pic_ctrl.enc_frame_crop_top_offset;
> + enc->enc_pic.pc.enc_crop_bottom_offset = 
> pic->pic_ctrl.enc_frame_crop_bottom_offset;
> + }
>   enc->enc_pic.pc.enc_num_mbs_per_slice = encNumMBsPerSlice;
>   enc->enc_pic.pc.enc_b_pic_pattern = MAX2(enc->base.max_references, 1) - 
> 1;
>   enc->enc_pic.pc.enc_number_of_reference_frames = 
> MIN2(enc->base.max_references, 2);
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110345] Unrecoverable GPU crash with DiRT 4

2019-04-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110345

--- Comment #10 from Thomas Rohloff  ---
(In reply to Thomas Rohloff from comment #9)
> So I'll re-run with RADV_DEBUG=nodcc,nohiz,nofastclears,zerovram to confirm
> it was just luck before.

And it froze, too.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Low interpolation precision for 8 bit textures using llvmpipe

2019-04-11 Thread Roland Scheidegger
What version of mesa are you using?
The debug flags were changed a while ago (so that those perf tweaks can
be disabled on release builds too), it needs to be either:
GALLIVM_PERF=no_rho_approx,no_brilinear,no_quad_lod
or easier
GALLIVM_PERF=no_filter_hacks (which disables these 3 things above together)

Although all of that only really affects filtering with mipmaps (not
sure if you do?).


(more below)


Am 11.04.19 um 18:00 schrieb Dominik Drees:
> Running with the suggested flags in the environment does not change the
> result for the test case I described below. The results with and without
> the environment variables set are pixel-wise equal.
> 
> By the way, and if this of interest: For GL_NEAREST sampling the results
> from hardware and llvmpipe are equal as well.
> 
> Best,
> Dominik
> 
> On 4/11/19 4:36 PM, Ilia Mirkin wrote:
>> llvmpipe takes a number of shortcuts in the interest of speed which
>> cause inaccurate texturing. Try running with
>>
>> GALLIVM_DEBUG=no_rho_approx,no_brilinear,no_quad_lod
>>
>> and see if the issue still occurs.
>>
>> Cheers,
>>
>>    -ilia
>>
>>
>>
>> On Thu, Apr 11, 2019 at 8:30 AM Dominik Drees 
>> wrote:
>>>
>>> Hello, everyone!
>>>
>>> I have a question regarding the interpolation precision of llvmpipe.
>>> Feel free to redirect me to somewhere else if this is not the right
>>> place to ask. Consider the following scenario: In a fragment shader we
>>> are sampling from a 16x16, 8 bit texture with values between 0 and 3
>>> using linear interpolation. Then we write white to the screen if the
>>> sampled value is > 1/255 and black otherwise. The output looks very
>>> different when rendered with llvmpipe compared to the result produced by
>>> rendering hardware (for both intel (mesa i965) and nvidia (proprietary
>>> driver)).
>>>
>>> I've uploaded examplary output images here
>>> (https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fimgur.com%2Fa%2FD1udpezdata=02%7C01%7Csroland%40vmware.com%7Cbdef52eb504c4078f9f808d6be96da17%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636905952501149697sdata=vymggYHZTDLwKNh7RpcM1eSyhVA2L%2BfHNchvYS8yQPQ%3Dreserved=0)
>>>
>>> and the corresponding fragment shader here
>>> (https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpastebin.com%2Fpa808Reqdata=02%7C01%7Csroland%40vmware.com%7Cbdef52eb504c4078f9f808d6be96da17%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636905952501149697sdata=%2FqKVJCXFS4UswynKeSoqCKivTHAb2o%2FZwVE1nwNms3M%3Dreserved=0).
The shader looks iffy to me, how do you use that vec4 in the if clause?


>>>
>>> My hypothesis is that llvmpipe (in contrast to hardware) only uses 8 bit
>>> for the interpolation computation when reading from 8 bit textures and
>>> thus loses precision in the lower bits. Is that correct? If so, does
>>> anyone know of a workaround?

So, in theory it is indeed possible the results are less accurate with
llvmpipe (I believe all recent hw does rgba8 filtering with more than 8
bit precision).
For formats fitting into rgba8, we have a fast path in llvmpipe
(gallivm) for the lerp, which unpacks the 8bit values into 16bit values,
does the lerp with that and packs back to 8 bit. The result is
accurately rounded there (to 8 bit) but only for 1 lerp step - for a 2d
texture there are 3 of those (one per direction, and a final one
combining the result). And yes this means the filtered result only has 8
bits.

I do believe you should not rely on implementations having more accuracy
- as far as I know the filtering we do is conformant there (it is tricky
to do better using the fast path).

There would be code to actually do filtering with full float precision,
although there's no way to reach it with rgba8 formats unless you change
the code (if you want to try out the theory, look at
lp_bld_sample_soa.c, lp_build_sample_soa_code() determines whether to
use the fast (aos) filtering path (use_aos, determined mostly by
util_format_fits_8unorm()). If you set this to false it will use the
full float filtering path. (FWIW I was actually thinking a while ago we
should force this path when there's only 1 channel, albeit I never got
around to test (benchmark) it - this is because the AoS filtering path
is really optimized for rgba8 formats, and if you only have 1 channel
it's quite possible float filtering is actually faster, since this
handles the channels individually.)
I guess though if the full float precision filtering is useful in
general, we could add that to GALLIVM_PERF.

Roland




>>>
>>> A little bit of background about the use case: We are trying to move the
>>> CI of Voreen
>>> (https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.uni-muenster.de%2FVoreen%2Fdata=02%7C01%7Csroland%40vmware.com%7Cbdef52eb504c4078f9f808d6be96da17%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636905952501149697sdata=tZf1sxXpC0rDhAAzqXNp9UQnRmrnZceKCerfJKcMdmk%3Dreserved=0)
>>> to the Gitlab-CI
>>> running in docker without any hardware dependencies. Using llvmpipe for
>>> 

Re: [Mesa-dev] [PATCH 1/2] draw: fix undefined shift of (1 << 31)

2019-04-11 Thread Roland Scheidegger
For the series, and the other one (undefined shifts in swrast/draw),
Reviewed-by: Roland Scheidegger 


Am 11.04.19 um 12:32 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> Pointed out by a coverity scan.
> ---
>  src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c 
> b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
> index 2b96b8ad446..dc22039b127 100644
> --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
> +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
> @@ -175,7 +175,7 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
>  
> /* find two free temp regs */
> for (i = 0; i < 32; i++) {
> -  if ((aactx->tempsUsed & (1 << i)) == 0) {
> +  if ((aactx->tempsUsed & (1u << i)) == 0) {
>   /* found a free temp */
>   if (aactx->tmp0 < 0)
>  aactx->tmp0 = i;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] llvmpipe: add lp_fence_timedwait() helper

2019-04-11 Thread Gustaw Smolarczyk
czw., 11 kwi 2019 o 18:06 Emil Velikov  napisał(a):
>
> The function is analogous to lp_fence_wait() while taking at timeout
> (ns) parameter, as needed for EGL fence/sync.
>
> Cc: Roland Scheidegger 
> Signed-off-by: Emil Velikov 
> ---
>  src/gallium/drivers/llvmpipe/lp_fence.c | 22 ++
>  src/gallium/drivers/llvmpipe/lp_fence.h |  3 +++
>  2 files changed, 25 insertions(+)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c 
> b/src/gallium/drivers/llvmpipe/lp_fence.c
> index 20cd91cd63d..f8b31a9d6a5 100644
> --- a/src/gallium/drivers/llvmpipe/lp_fence.c
> +++ b/src/gallium/drivers/llvmpipe/lp_fence.c
> @@ -125,3 +125,25 @@ lp_fence_wait(struct lp_fence *f)
>  }
>
>
> +boolean
> +lp_fence_timedwait(struct lp_fence *f, uint64_t timeout)
> +{
> +   struct timespec ts = {
> +  .tv_nsec = timeout % 10L,
> +  .tv_sec = timeout / 10L,
> +   };

According to the documentation [1] and looking at the implementation
in mesa [2], cnd_timedwait accepts an absolute time in UTC, not
duration. It seems that the fence_finish callback accepts duration.

[1] https://en.cppreference.com/w/c/thread/cnd_timedwait
[2] 
https://gitlab.freedesktop.org/mesa/mesa/blob/master/include/c11/threads_posix.h#L135

> +   int ret;
> +
> +   if (LP_DEBUG & DEBUG_FENCE)
> +  debug_printf("%s %d\n", __FUNCTION__, f->id);
> +
> +   mtx_lock(>mutex);
> +   assert(f->issued);
> +   while (f->count < f->rank) {
> +  ret = cnd_timedwait(>signalled, >mutex, );

Shouldn't ret be checked for thrd_busy here as well? Otherwise, the
function will busy-wait after the timeout is reached instead of
returning.

Regards,
Gustaw Smolarczyk


> +   }
> +   mtx_unlock(>mutex);
> +   return ret == thrd_success;
> +}
> +
> +
> diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h 
> b/src/gallium/drivers/llvmpipe/lp_fence.h
> index b72026492c6..5ba746d22d1 100644
> --- a/src/gallium/drivers/llvmpipe/lp_fence.h
> +++ b/src/gallium/drivers/llvmpipe/lp_fence.h
> @@ -65,6 +65,9 @@ lp_fence_signalled(struct lp_fence *fence);
>  void
>  lp_fence_wait(struct lp_fence *fence);
>
> +boolean
> +lp_fence_timedwait(struct lp_fence *fence, uint64_t timeout);
> +
>  void
>  llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
>
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] llvmpipe: add lp_fence_timedwait() helper

2019-04-11 Thread Emil Velikov
The function is analogous to lp_fence_wait() while taking at timeout
(ns) parameter, as needed for EGL fence/sync.

Cc: Roland Scheidegger 
Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/llvmpipe/lp_fence.c | 22 ++
 src/gallium/drivers/llvmpipe/lp_fence.h |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c 
b/src/gallium/drivers/llvmpipe/lp_fence.c
index 20cd91cd63d..f8b31a9d6a5 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -125,3 +125,25 @@ lp_fence_wait(struct lp_fence *f)
 }
 
 
+boolean
+lp_fence_timedwait(struct lp_fence *f, uint64_t timeout)
+{
+   struct timespec ts = {
+  .tv_nsec = timeout % 10L,
+  .tv_sec = timeout / 10L,
+   };
+   int ret;
+
+   if (LP_DEBUG & DEBUG_FENCE)
+  debug_printf("%s %d\n", __FUNCTION__, f->id);
+
+   mtx_lock(>mutex);
+   assert(f->issued);
+   while (f->count < f->rank) {
+  ret = cnd_timedwait(>signalled, >mutex, );
+   }
+   mtx_unlock(>mutex);
+   return ret == thrd_success;
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h 
b/src/gallium/drivers/llvmpipe/lp_fence.h
index b72026492c6..5ba746d22d1 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.h
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -65,6 +65,9 @@ lp_fence_signalled(struct lp_fence *fence);
 void
 lp_fence_wait(struct lp_fence *fence);
 
+boolean
+lp_fence_timedwait(struct lp_fence *fence, uint64_t timeout);
+
 void
 llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] llvmpipe: Always return some fence in flush (v2)

2019-04-11 Thread Emil Velikov
From: Tomasz Figa 

If there is no last fence, due to no rendering happening yet, just
create a new signaled fence and return it, to match the expectations of
the EGL sync fence API.

Fixes random "Could not create sync fence 0x3003" assertion failures from
Skia on Android, coming from the following code:

https://android.googlesource.com/platform/frameworks/base/+/master/libs/hwui/pipeline/skia/SkiaOpenGLPipeline.cpp#427

Reproducible especially with thread count >= 4.

One could make the driver always keep the reference to the last fence,
but:

 - the driver seems to explicitly destroy the fence whenever a rendering
   pass completes and changing that would require a significant functional
   change to the code. (Specifically, in lp_scene_end_rasterization().)

 - it still wouldn't solve the problem of an EGL sync fence being created
   and waited on without any rendering happening at all, which is
   also likely to happen with Android code pointed to in the commit.

Therefore, the simple approach of always creating a fence is taken,
similarly to other drivers, such as radeonsi.

Tested with piglit llvmpipe suite with no regressions and following
tests fixed:

egl_khr_fence_sync
 conformance
  eglclientwaitsynckhr_flag_sync_flush
  eglclientwaitsynckhr_nonzero_timeout
  eglclientwaitsynckhr_zero_timeout
  eglcreatesynckhr_default_attributes
  eglgetsyncattribkhr_invalid_attrib
  eglgetsyncattribkhr_sync_status

v2:
 - remove the useless lp_fence_reference() dance (Nicolai),
 - explain why creating the dummy fence is the right approach.

Cc: Roland Scheidegger 
Signed-off-by: Tomasz Figa 
---
 src/gallium/drivers/llvmpipe/lp_setup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c 
b/src/gallium/drivers/llvmpipe/lp_setup.c
index b0873694732..e72e119c8a1 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -361,6 +361,8 @@ lp_setup_flush( struct lp_setup_context *setup,
 
if (fence) {
   lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
+  if (!*fence)
+ *fence = (struct pipe_fence_handle *)lp_fence_create(0);
}
 }
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] llvmpipe: correctly handle waiting in llvmpipe_fence_finish

2019-04-11 Thread Emil Velikov
Currently if the timeout differs from 0, we'll end up with infinite
wait... even if the user is perfectly clear they don't want that.

Use the new lp_fence_timedwait() helper guarding both waits in an
!lp_fence_signalled block like the rest of llvmpipe.

Cc: Roland Scheidegger 
Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/llvmpipe/lp_screen.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index b55b4a3c4fe..3aa8b9fbcc9 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -636,7 +636,12 @@ llvmpipe_fence_finish(struct pipe_screen *screen,
if (!timeout)
   return lp_fence_signalled(f);
 
-   lp_fence_wait(f);
+   if (!lp_fence_signalled(f)) {
+  if (timeout != PIPE_TIMEOUT_INFINITE)
+ return lp_fence_timedwait(f, timeout);
+
+  lp_fence_wait(f);
+   }
return TRUE;
 }
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] anv/ehl: 36bits ppgtt support

2019-04-11 Thread Lionel Landwerlin
I started this MR : 
https://gitlab.freedesktop.org/mesa/mesa/merge_requests/637


On 11/04/2019 13:06, Lionel Landwerlin wrote:
Sorry, upon rereading the code of the various drivers, it seems 
i965/iris handle this properly already.


I have some comments below.

On 11/04/2019 11:36, Lionel Landwerlin wrote:

Hi James,

Thanks a lot for reporting this.

I think this is something we should store in the gen_device_info and 
update with kernel ioctl when supported.

This affects other drivers, not just anv.

-Lionel

On 10/04/2019 23:55, James Xiong wrote:

From: "Xiong, James" 

The vma high heap's capacity and maximum address were pre-defined based
on 48bits ppgtt support, and the buffers allocated from the vma high 
heap

had invalid vma addresses to the ehl platform that only supports 36bits
ppgtt. As a result, KMD rejected all batchbuffers submitted by vulkan.

This patch:
1) initializes the vma high heap by retrieving the gtt capacity from 
KMD

and calculating the size and max address on the fly.
2) enables softpin when full ppgtt is enabled

V2: change commit messages and comments to refect the changes [Bob, 
Jason]

 remove define HIGH_HEAP_SIZE [Bob]
 make sure there's enough space to enable softspin [Jason]

Signed-off-by: Xiong, James 
---
  src/intel/vulkan/anv_device.c  | 30 +++---
  src/intel/vulkan/anv_private.h |  7 ---
  2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c 
b/src/intel/vulkan/anv_device.c

index 88b34c4..c3eff1c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -434,7 +434,12 @@ anv_physical_device_init(struct 
anv_physical_device *device,

anv_gem_supports_syncobj_wait(fd);
 device->has_context_priority = anv_gem_has_context_priority(fd);
  +   /*
+    * make sure there are enough VA space(i.e. 32+bit support) and 
full ggtt

+    * is enabled.
+    */
 device->use_softpin = anv_gem_get_param(fd, 
I915_PARAM_HAS_EXEC_SOFTPIN)

+  && (anv_gem_get_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1)
    && device->supports_48bit_addresses;
   device->has_context_isolation =
@@ -1981,14 +1986,25 @@ VkResult anv_CreateDevice(
    device->vma_lo_available =
physical_device->memory.heaps[physical_device->memory.heap_count - 
1].size;
  -  /* Leave the last 4GiB out of the high vma range, so that 
no state base
-   * address + size can overflow 48 bits. For more information 
see the

-   * comment about Wa32bitGeneralStateOffset in anv_allocator.c
-   */
-  util_vma_heap_init(>vma_hi, HIGH_HEAP_MIN_ADDRESS,
- HIGH_HEAP_SIZE);
    device->vma_hi_available = 
physical_device->memory.heap_count == 1 ? 0 :

   physical_device->memory.heaps[0].size;
+
+  /* Retrieve the GTT's capacity and leave the last 4GiB out of 
the high vma
+   * range, so that no state base address + size can overflow 
the vma range. For
+   * more information see the comment about 
Wa32bitGeneralStateOffset in

+   * anv_allocator.c
+   */
+  uint64_t size = 0;
+  anv_gem_get_context_param(device->fd, 0, 
I915_CONTEXT_PARAM_GTT_SIZE,

+    );



I don't think you need to requery the gtt size, this is already done 
when initializing the physical device.


I think we can do something better by storing the bounds in the 
physical device and just reusing that at logical device creation.




+  if(size > HIGH_HEAP_MIN_ADDRESS + (1ull<<32)) {
+ size -= HIGH_HEAP_MIN_ADDRESS + (1ull<<32);
+ device->vma_hi_max_addr = HIGH_HEAP_MIN_ADDRESS + size - 1;
+  } else {
+ size = device->vma_hi_max_addr = 0;
+  }
+
+  util_vma_heap_init(>vma_hi, HIGH_HEAP_MIN_ADDRESS, 
size);

 }
   /* As per spec, the driver implementation may deny requests 
to acquire
@@ -2456,7 +2472,7 @@ anv_vma_free(struct anv_device *device, struct 
anv_bo *bo)

    device->vma_lo_available += bo->size;
 } else {
    assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS &&
- addr_48b <= HIGH_HEAP_MAX_ADDRESS);
+ addr_48b <= device->vma_hi_max_addr);
    util_vma_heap_free(>vma_hi, addr_48b, bo->size);
    device->vma_hi_available += bo->size;
 }
diff --git a/src/intel/vulkan/anv_private.h 
b/src/intel/vulkan/anv_private.h

index 1664918..ef9b012 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -109,6 +109,9 @@ struct gen_l3_config;
   * heap. Various hardware units will read past the end of an 
object for
   * various reasons. This healthy margin prevents reads from 
wrapping around

   * 48-bit addresses.
+ *
+ * (4) the high vma heap size and max address are calculated based 
on the

+ * gtt capacity retrieved from KMD.
   */
  #define LOW_HEAP_MIN_ADDRESS   0x1000ULL /* 4 
KiB */

  #define LOW_HEAP_MAX_ADDRESS   0xbfffULL
@@ -121,12 +124,9 @@ struct 

Re: [Mesa-dev] Low interpolation precision for 8 bit textures using llvmpipe

2019-04-11 Thread Dominik Drees
Running with the suggested flags in the environment does not change the 
result for the test case I described below. The results with and without 
the environment variables set are pixel-wise equal.


By the way, and if this of interest: For GL_NEAREST sampling the results 
from hardware and llvmpipe are equal as well.


Best,
Dominik

On 4/11/19 4:36 PM, Ilia Mirkin wrote:

llvmpipe takes a number of shortcuts in the interest of speed which
cause inaccurate texturing. Try running with

GALLIVM_DEBUG=no_rho_approx,no_brilinear,no_quad_lod

and see if the issue still occurs.

Cheers,

   -ilia



On Thu, Apr 11, 2019 at 8:30 AM Dominik Drees  wrote:


Hello, everyone!

I have a question regarding the interpolation precision of llvmpipe.
Feel free to redirect me to somewhere else if this is not the right
place to ask. Consider the following scenario: In a fragment shader we
are sampling from a 16x16, 8 bit texture with values between 0 and 3
using linear interpolation. Then we write white to the screen if the
sampled value is > 1/255 and black otherwise. The output looks very
different when rendered with llvmpipe compared to the result produced by
rendering hardware (for both intel (mesa i965) and nvidia (proprietary
driver)).

I've uploaded examplary output images here (https://imgur.com/a/D1udpez)
and the corresponding fragment shader here (https://pastebin.com/pa808Req).

My hypothesis is that llvmpipe (in contrast to hardware) only uses 8 bit
for the interpolation computation when reading from 8 bit textures and
thus loses precision in the lower bits. Is that correct? If so, does
anyone know of a workaround?

A little bit of background about the use case: We are trying to move the
CI of Voreen (https://www.uni-muenster.de/Voreen/) to the Gitlab-CI
running in docker without any hardware dependencies. Using llvmpipe for
our regression tests works in principle, but shows significant
differences in the raycasting rendering of an 8-bit-per-voxel dataset.
(The effect is of course less visible than the constructed example case
linked above, but still quite noticeable for a human.)

Any help or pointers would be appreciated!

Best,
Dominik

--
Dominik Drees

Department of Computer Science
Westfaelische Wilhelms-Universitaet Muenster

email: dominik.dr...@wwu.de
web: https://www.wwu.de/PRIA/personen/drees.shtml
phone: +49 251 83 - 38448

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


--
Dominik Drees

Department of Computer Science
Westfaelische Wilhelms-Universitaet Muenster

email: dominik.dr...@wwu.de
web: https://www.wwu.de/PRIA/personen/drees.shtml
phone: +49 251 83 - 38448



smime.p7s
Description: S/MIME Cryptographic Signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] panfrost: split asserts in pandecode

2019-04-11 Thread Alyssa Rosenzweig
Both patches are R-b: Alyssa Rosenzweig 

Thanks!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110345] Unrecoverable GPU crash with DiRT 4

2019-04-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110345

--- Comment #9 from Thomas Rohloff  ---
Sorry to say but all of these combinations froze:

RADV_DEBUG=nodcc,nohiz
RADV_DEBUG=nofastclears
RADV_DEBUG=zerovram

So I'll re-run with RADV_DEBUG=nodcc,nohiz,nofastclears,zerovram to confirm it
was just luck before. Not sure what to do after that to help pin-pointing this
as the distributions I'm using has some problems with vktrace atm and even if I
would be able to run it it would most likely produce a way to large trace to
upload.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Low interpolation precision for 8 bit textures using llvmpipe

2019-04-11 Thread Ilia Mirkin
llvmpipe takes a number of shortcuts in the interest of speed which
cause inaccurate texturing. Try running with

GALLIVM_DEBUG=no_rho_approx,no_brilinear,no_quad_lod

and see if the issue still occurs.

Cheers,

  -ilia



On Thu, Apr 11, 2019 at 8:30 AM Dominik Drees  wrote:
>
> Hello, everyone!
>
> I have a question regarding the interpolation precision of llvmpipe.
> Feel free to redirect me to somewhere else if this is not the right
> place to ask. Consider the following scenario: In a fragment shader we
> are sampling from a 16x16, 8 bit texture with values between 0 and 3
> using linear interpolation. Then we write white to the screen if the
> sampled value is > 1/255 and black otherwise. The output looks very
> different when rendered with llvmpipe compared to the result produced by
> rendering hardware (for both intel (mesa i965) and nvidia (proprietary
> driver)).
>
> I've uploaded examplary output images here (https://imgur.com/a/D1udpez)
> and the corresponding fragment shader here (https://pastebin.com/pa808Req).
>
> My hypothesis is that llvmpipe (in contrast to hardware) only uses 8 bit
> for the interpolation computation when reading from 8 bit textures and
> thus loses precision in the lower bits. Is that correct? If so, does
> anyone know of a workaround?
>
> A little bit of background about the use case: We are trying to move the
> CI of Voreen (https://www.uni-muenster.de/Voreen/) to the Gitlab-CI
> running in docker without any hardware dependencies. Using llvmpipe for
> our regression tests works in principle, but shows significant
> differences in the raycasting rendering of an 8-bit-per-voxel dataset.
> (The effect is of course less visible than the constructed example case
> linked above, but still quite noticeable for a human.)
>
> Any help or pointers would be appreciated!
>
> Best,
> Dominik
>
> --
> Dominik Drees
>
> Department of Computer Science
> Westfaelische Wilhelms-Universitaet Muenster
>
> email: dominik.dr...@wwu.de
> web: https://www.wwu.de/PRIA/personen/drees.shtml
> phone: +49 251 83 - 38448
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] panfrost: split asserts in pandecode

2019-04-11 Thread Eric Engestrom
On Thursday, 2019-04-11 09:11:12 +0200, Tomeu Vizoso wrote:
> Signed-off-by: Tomeu Vizoso 

Reviewed-by: Eric Engestrom 

> ---
>  src/gallium/drivers/panfrost/pandecode/mmap.h | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/src/gallium/drivers/panfrost/pandecode/mmap.h 
> b/src/gallium/drivers/panfrost/pandecode/mmap.h
> index 1a208336e814..e9acae877f7f 100644
> --- a/src/gallium/drivers/panfrost/pandecode/mmap.h
> +++ b/src/gallium/drivers/panfrost/pandecode/mmap.h
> @@ -54,9 +54,8 @@ __pandecode_fetch_gpu_mem(const struct 
> pandecode_mapped_memory *mem,
>  if (!mem)
>  mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
>  
> -if (!mem ||
> -size + (gpu_va - mem->gpu_va) > mem->length)
> -assert(0);
> +assert(mem);
> +assert(size + (gpu_va - mem->gpu_va) <= mem->length);
>  
>  return mem->addr + gpu_va - mem->gpu_va;
>  }
> -- 
> 2.20.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Low interpolation precision for 8 bit textures using llvmpipe

2019-04-11 Thread Dominik Drees

Hello, everyone!

I have a question regarding the interpolation precision of llvmpipe. 
Feel free to redirect me to somewhere else if this is not the right 
place to ask. Consider the following scenario: In a fragment shader we 
are sampling from a 16x16, 8 bit texture with values between 0 and 3 
using linear interpolation. Then we write white to the screen if the 
sampled value is > 1/255 and black otherwise. The output looks very 
different when rendered with llvmpipe compared to the result produced by 
rendering hardware (for both intel (mesa i965) and nvidia (proprietary 
driver)).


I've uploaded examplary output images here (https://imgur.com/a/D1udpez) 
and the corresponding fragment shader here (https://pastebin.com/pa808Req).


My hypothesis is that llvmpipe (in contrast to hardware) only uses 8 bit 
for the interpolation computation when reading from 8 bit textures and 
thus loses precision in the lower bits. Is that correct? If so, does 
anyone know of a workaround?


A little bit of background about the use case: We are trying to move the 
CI of Voreen (https://www.uni-muenster.de/Voreen/) to the Gitlab-CI 
running in docker without any hardware dependencies. Using llvmpipe for 
our regression tests works in principle, but shows significant 
differences in the raycasting rendering of an 8-bit-per-voxel dataset. 
(The effect is of course less visible than the constructed example case 
linked above, but still quite noticeable for a human.)


Any help or pointers would be appreciated!

Best,
Dominik

--
Dominik Drees

Department of Computer Science
Westfaelische Wilhelms-Universitaet Muenster

email: dominik.dr...@wwu.de
web: https://www.wwu.de/PRIA/personen/drees.shtml
phone: +49 251 83 - 38448



smime.p7s
Description: S/MIME Cryptographic Signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] intel/compiler: fix uninit non-static variable.

2019-04-11 Thread Lionel Landwerlin

To be honest we're not initializing nir_locals either :/

Reviewed-by: Lionel Landwerlin 

On 11/04/2019 11:32, Dave Airlie wrote:

From: Dave Airlie 

Pointed out by coverity.
---
  src/intel/compiler/brw_vec4_visitor.cpp | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/src/intel/compiler/brw_vec4_visitor.cpp 
b/src/intel/compiler/brw_vec4_visitor.cpp
index 16ee31d730a..fa3d7fc13b7 100644
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -1887,6 +1887,8 @@ vec4_visitor::vec4_visitor(const struct brw_compiler 
*compiler,
 this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
  
 this->uniforms = 0;

+
+   this->nir_ssa_values = NULL;
  }
  
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v6 32/35] intel/compiler: validate region restrictions for mixed float mode

2019-04-11 Thread Juan A. Suarez Romero
On Wed, 2019-04-10 at 17:13 -0700, Francisco Jerez wrote:
> "Juan A. Suarez Romero"  writes:
> 
> > From: Iago Toral Quiroga 
> > 
> > v2:
> >  - Adapted unit tests to make them consistent with the changes done
> >to the validation of half-float conversions.
> > 
> > v3 (Curro):
> > - Check all the accummulators
> > - Constify declarations
> > - Do not check src1 type in single-source instructions.
> > - Check for all instructions that read accumulator (either implicitly or
> >   explicitly)
> > - Check restrictions in src1 too.
> > - Merge conditional block
> > - Add invalid test case.
> > ---
> >  src/intel/compiler/brw_eu_validate.c| 290 +++
> >  src/intel/compiler/test_eu_validate.cpp | 631 
> >  2 files changed, 921 insertions(+)
> > 
> > diff --git a/src/intel/compiler/brw_eu_validate.c 
> > b/src/intel/compiler/brw_eu_validate.c
> > index cfaf126e2f5..4a735641c86 100644
> > --- a/src/intel/compiler/brw_eu_validate.c
> > +++ b/src/intel/compiler/brw_eu_validate.c
> > @@ -170,6 +170,20 @@ src1_is_null(const struct gen_device_info *devinfo, 
> > const brw_inst *inst)
> >brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
> >  }
> >  
> > +static bool
> > +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> > +{
> > +   return brw_inst_src0_reg_file(devinfo, inst) == 
> > BRW_ARCHITECTURE_REGISTER_FILE &&
> > +  (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == 
> > BRW_ARF_ACCUMULATOR;
> > +}
> > +
> > +static bool
> > +src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> > +{
> > +   return brw_inst_src1_reg_file(devinfo, inst) == 
> > BRW_ARCHITECTURE_REGISTER_FILE &&
> > +  (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == 
> > BRW_ARF_ACCUMULATOR;
> > +}
> > +
> >  static bool
> >  src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
> >  {
> > @@ -275,6 +289,27 @@ sources_not_null(const struct gen_device_info *devinfo,
> > return error_msg;
> >  }
> >  
> > +static bool
> > +inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst 
> > *inst)
> > +{
> > +   /* Check instructions that use implicit accumulator sources */
> > +   switch (brw_inst_opcode(devinfo, inst)) {
> > +   case BRW_OPCODE_MAC:
> > +   case BRW_OPCODE_MACH:
> > +   case BRW_OPCODE_SADA2:
> > +  return true;
> > +   }
> > +
> > +   /* Instructions with three source operands cannot use explicit 
> > accumulator
> > +* operands.
> > +*/
> 
> They can on Gen10+.  Yeah, I know, it's quite a pain to have to
> special-case 3src instructions everywhere in the validator code...


Checking other parts of code, I'll assume that srcN_is_acc() should return false
for align16 mode; at least in them there're assertions that in this mode srcs
can only be GRF.

OTOH, is it worth to handle here the case for 3src instructions allowing
explicit accumulator? If other parts of drive asume this is not possible, I
understand it would be better to fix this in all the code in a separate patchset
(not related with float16).


> 
> > +   const unsigned num_sources = num_sources_from_inst(devinfo, inst);
> > +   if (num_sources > 2)
> > +  return false;
> > +
> > +   return src0_is_acc(devinfo, inst) || (num_sources > 1 && 
> > src1_is_acc(devinfo, inst));
> > +}
> > +
> >  static struct string
> >  send_restrictions(const struct gen_device_info *devinfo,
> >const brw_inst *inst)
> > @@ -938,6 +973,260 @@ general_restrictions_on_region_parameters(const 
> > struct gen_device_info *devinfo,
> > return error_msg;
> >  }
> >  
> > +static struct string
> > +special_restrictions_for_mixed_float_mode(const struct gen_device_info 
> > *devinfo,
> > +  const brw_inst *inst)
> > +{
> > +   struct string error_msg = { .str = NULL, .len = 0 };
> > +
> > +   const unsigned opcode = brw_inst_opcode(devinfo, inst);
> > +   const unsigned num_sources = num_sources_from_inst(devinfo, inst);
> > +   if (num_sources >= 3)
> > +  return error_msg;
> > +
> > +   if (!is_mixed_float(devinfo, inst))
> > +  return error_msg;
> > +
> > +   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
> > +   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
> > +
> > +   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> > +   enum brw_reg_type src1_type = num_sources > 1 ?
> > + brw_inst_src1_type(devinfo, inst) : 0;
> > +   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
> > +
> > +   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
> > +   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, 
> > dst_stride);
> > +
> > +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> > +* Float Operations:
> > +*
> > +*"Indirect addressing on source is not supported when source and
> > +* 

Re: [Mesa-dev] [PATCH 1/1] anv/ehl: 36bits ppgtt support

2019-04-11 Thread Lionel Landwerlin
Sorry, upon rereading the code of the various drivers, it seems 
i965/iris handle this properly already.


I have some comments below.

On 11/04/2019 11:36, Lionel Landwerlin wrote:

Hi James,

Thanks a lot for reporting this.

I think this is something we should store in the gen_device_info and 
update with kernel ioctl when supported.

This affects other drivers, not just anv.

-Lionel

On 10/04/2019 23:55, James Xiong wrote:

From: "Xiong, James" 

The vma high heap's capacity and maximum address were pre-defined based
on 48bits ppgtt support, and the buffers allocated from the vma high 
heap

had invalid vma addresses to the ehl platform that only supports 36bits
ppgtt. As a result, KMD rejected all batchbuffers submitted by vulkan.

This patch:
1) initializes the vma high heap by retrieving the gtt capacity from KMD
and calculating the size and max address on the fly.
2) enables softpin when full ppgtt is enabled

V2: change commit messages and comments to refect the changes [Bob, 
Jason]

 remove define HIGH_HEAP_SIZE [Bob]
 make sure there's enough space to enable softspin [Jason]

Signed-off-by: Xiong, James 
---
  src/intel/vulkan/anv_device.c  | 30 +++---
  src/intel/vulkan/anv_private.h |  7 ---
  2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c 
b/src/intel/vulkan/anv_device.c

index 88b34c4..c3eff1c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -434,7 +434,12 @@ anv_physical_device_init(struct 
anv_physical_device *device,

anv_gem_supports_syncobj_wait(fd);
 device->has_context_priority = anv_gem_has_context_priority(fd);
  +   /*
+    * make sure there are enough VA space(i.e. 32+bit support) and 
full ggtt

+    * is enabled.
+    */
 device->use_softpin = anv_gem_get_param(fd, 
I915_PARAM_HAS_EXEC_SOFTPIN)

+  && (anv_gem_get_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1)
    && device->supports_48bit_addresses;
   device->has_context_isolation =
@@ -1981,14 +1986,25 @@ VkResult anv_CreateDevice(
    device->vma_lo_available =
physical_device->memory.heaps[physical_device->memory.heap_count - 
1].size;
  -  /* Leave the last 4GiB out of the high vma range, so that no 
state base
-   * address + size can overflow 48 bits. For more information 
see the

-   * comment about Wa32bitGeneralStateOffset in anv_allocator.c
-   */
-  util_vma_heap_init(>vma_hi, HIGH_HEAP_MIN_ADDRESS,
- HIGH_HEAP_SIZE);
    device->vma_hi_available = physical_device->memory.heap_count 
== 1 ? 0 :

   physical_device->memory.heaps[0].size;
+
+  /* Retrieve the GTT's capacity and leave the last 4GiB out of 
the high vma
+   * range, so that no state base address + size can overflow 
the vma range. For
+   * more information see the comment about 
Wa32bitGeneralStateOffset in

+   * anv_allocator.c
+   */
+  uint64_t size = 0;
+  anv_gem_get_context_param(device->fd, 0, 
I915_CONTEXT_PARAM_GTT_SIZE,

+    );



I don't think you need to requery the gtt size, this is already done 
when initializing the physical device.


I think we can do something better by storing the bounds in the physical 
device and just reusing that at logical device creation.




+  if(size > HIGH_HEAP_MIN_ADDRESS + (1ull<<32)) {
+ size -= HIGH_HEAP_MIN_ADDRESS + (1ull<<32);
+ device->vma_hi_max_addr = HIGH_HEAP_MIN_ADDRESS + size - 1;
+  } else {
+ size = device->vma_hi_max_addr = 0;
+  }
+
+  util_vma_heap_init(>vma_hi, HIGH_HEAP_MIN_ADDRESS, size);
 }
   /* As per spec, the driver implementation may deny requests to 
acquire
@@ -2456,7 +2472,7 @@ anv_vma_free(struct anv_device *device, struct 
anv_bo *bo)

    device->vma_lo_available += bo->size;
 } else {
    assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS &&
- addr_48b <= HIGH_HEAP_MAX_ADDRESS);
+ addr_48b <= device->vma_hi_max_addr);
    util_vma_heap_free(>vma_hi, addr_48b, bo->size);
    device->vma_hi_available += bo->size;
 }
diff --git a/src/intel/vulkan/anv_private.h 
b/src/intel/vulkan/anv_private.h

index 1664918..ef9b012 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -109,6 +109,9 @@ struct gen_l3_config;
   * heap. Various hardware units will read past the end of an object 
for
   * various reasons. This healthy margin prevents reads from 
wrapping around

   * 48-bit addresses.
+ *
+ * (4) the high vma heap size and max address are calculated based 
on the

+ * gtt capacity retrieved from KMD.
   */
  #define LOW_HEAP_MIN_ADDRESS   0x1000ULL /* 4 
KiB */

  #define LOW_HEAP_MAX_ADDRESS   0xbfffULL
@@ -121,12 +124,9 @@ struct gen_l3_config;
  #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x00018000ULL /* 6 
GiB */

  #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 

Re: [Mesa-dev] [PATCH 1/2] draw: fix undefined shift of (1 << 31)

2019-04-11 Thread Eric Engestrom
On Thursday, 2019-04-11 20:32:18 +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> Pointed out by a coverity scan.

This patch is:
Reviewed-by: Eric Engestrom 

> ---
>  src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c 
> b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
> index 2b96b8ad446..dc22039b127 100644
> --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
> +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
> @@ -175,7 +175,7 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
>  
> /* find two free temp regs */
> for (i = 0; i < 32; i++) {
> -  if ((aactx->tempsUsed & (1 << i)) == 0) {
> +  if ((aactx->tempsUsed & (1u << i)) == 0) {
>   /* found a free temp */
>   if (aactx->tmp0 < 0)
>  aactx->tmp0 = i;
> -- 
> 2.20.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] swrast: fix undefined shift of 1 << 31

2019-04-11 Thread Eric Engestrom
On Thursday, 2019-04-11 20:40:13 +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> Pointed out by coverity

Series is:
Reviewed-by: Eric Engestrom 

> ---
>  src/mesa/swrast/s_span.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
> index f50b549a97f..184a37c99b8 100644
> --- a/src/mesa/swrast/s_span.c
> +++ b/src/mesa/swrast/s_span.c
> @@ -769,7 +769,7 @@ clip_span( struct gl_context *ctx, SWspan *span )
>* For arrays of values, shift them left.
>*/
>   for (i = 0; i < VARYING_SLOT_MAX; i++) {
> -if (span->interpMask & (1 << i)) {
> +if (span->interpMask & (1u << i)) {
> GLuint j;
> for (j = 0; j < 4; j++) {
>span->attrStart[i][j] += leftClip * span->attrStepX[i][j];
> -- 
> 2.20.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110337] Mesa 19.0.0(1) freeze system on Oland with amdgpu driver

2019-04-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110337

Andre Klapper  changed:

   What|Removed |Added

   Severity|critical|normal
   Priority|high|medium

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v6 32/35] intel/compiler: validate region restrictions for mixed float mode

2019-04-11 Thread Juan A. Suarez Romero
On Wed, 2019-04-10 at 17:13 -0700, Francisco Jerez wrote:
> "Juan A. Suarez Romero"  writes:
> 
> > From: Iago Toral Quiroga 
> > 
> > v2:
> >  - Adapted unit tests to make them consistent with the changes done
> >to the validation of half-float conversions.
> > 
> > v3 (Curro):
> > - Check all the accummulators
> > - Constify declarations
> > - Do not check src1 type in single-source instructions.
> > - Check for all instructions that read accumulator (either implicitly or
> >   explicitly)
> > - Check restrictions in src1 too.
> > - Merge conditional block
> > - Add invalid test case.
> > ---
> >  src/intel/compiler/brw_eu_validate.c| 290 +++
> >  src/intel/compiler/test_eu_validate.cpp | 631 
> >  2 files changed, 921 insertions(+)
> > 
> > diff --git a/src/intel/compiler/brw_eu_validate.c 
> > b/src/intel/compiler/brw_eu_validate.c
> > index cfaf126e2f5..4a735641c86 100644
> > --- a/src/intel/compiler/brw_eu_validate.c
> > +++ b/src/intel/compiler/brw_eu_validate.c
> > @@ -170,6 +170,20 @@ src1_is_null(const struct gen_device_info *devinfo, 
> > const brw_inst *inst)
> >brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
> >  }
> >  
> > +static bool
> > +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> > +{
> > +   return brw_inst_src0_reg_file(devinfo, inst) == 
> > BRW_ARCHITECTURE_REGISTER_FILE &&
> > +  (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == 
> > BRW_ARF_ACCUMULATOR;
> > +}
> > +
> > +static bool
> > +src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
> > +{
> > +   return brw_inst_src1_reg_file(devinfo, inst) == 
> > BRW_ARCHITECTURE_REGISTER_FILE &&
> > +  (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == 
> > BRW_ARF_ACCUMULATOR;
> > +}
> > +
> >  static bool
> >  src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
> >  {
> > @@ -275,6 +289,27 @@ sources_not_null(const struct gen_device_info *devinfo,
> > return error_msg;
> >  }
> >  
> > +static bool
> > +inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst 
> > *inst)
> > +{
> > +   /* Check instructions that use implicit accumulator sources */
> > +   switch (brw_inst_opcode(devinfo, inst)) {
> > +   case BRW_OPCODE_MAC:
> > +   case BRW_OPCODE_MACH:
> > +   case BRW_OPCODE_SADA2:
> > +  return true;
> > +   }
> > +
> > +   /* Instructions with three source operands cannot use explicit 
> > accumulator
> > +* operands.
> > +*/
> 
> They can on Gen10+.  Yeah, I know, it's quite a pain to have to
> special-case 3src instructions everywhere in the validator code...

Is this strictly for Gen>10 or includes Gen10? In the Gen10 PRM still says that
3-src opearand instructions cannot use explicit accumulator


> 
> > +   const unsigned num_sources = num_sources_from_inst(devinfo, inst);
> > +   if (num_sources > 2)
> > +  return false;
> > +
> > +   return src0_is_acc(devinfo, inst) || (num_sources > 1 && 
> > src1_is_acc(devinfo, inst));
> > +}
> > +
> >  static struct string
> >  send_restrictions(const struct gen_device_info *devinfo,
> >const brw_inst *inst)
> > @@ -938,6 +973,260 @@ general_restrictions_on_region_parameters(const 
> > struct gen_device_info *devinfo,
> > return error_msg;
> >  }
> >  
> > +static struct string
> > +special_restrictions_for_mixed_float_mode(const struct gen_device_info 
> > *devinfo,
> > +  const brw_inst *inst)
> > +{
> > +   struct string error_msg = { .str = NULL, .len = 0 };
> > +
> > +   const unsigned opcode = brw_inst_opcode(devinfo, inst);
> > +   const unsigned num_sources = num_sources_from_inst(devinfo, inst);
> > +   if (num_sources >= 3)
> > +  return error_msg;
> > +
> > +   if (!is_mixed_float(devinfo, inst))
> > +  return error_msg;
> > +
> > +   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
> > +   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
> > +
> > +   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
> > +   enum brw_reg_type src1_type = num_sources > 1 ?
> > + brw_inst_src1_type(devinfo, inst) : 0;
> > +   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
> > +
> > +   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
> > +   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, 
> > dst_stride);
> > +
> > +   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
> > +* Float Operations:
> > +*
> > +*"Indirect addressing on source is not supported when source and
> > +* destination data types are mixed float."
> > +*/
> > +   ERROR_IF((types_are_mixed_float(dst_type, src0_type) &&
> > + brw_inst_src0_address_mode(devinfo, inst) != 
> > BRW_ADDRESS_DIRECT) ||
> > +(num_sources > 1 &&
> > + types_are_mixed_float(dst_type, 

[Mesa-dev] [PATCH 1/2] swrast: fix undefined shift of 1 << 31

2019-04-11 Thread Dave Airlie
From: Dave Airlie 

Pointed out by coverity
---
 src/mesa/swrast/s_span.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index f50b549a97f..184a37c99b8 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -769,7 +769,7 @@ clip_span( struct gl_context *ctx, SWspan *span )
   * For arrays of values, shift them left.
   */
  for (i = 0; i < VARYING_SLOT_MAX; i++) {
-if (span->interpMask & (1 << i)) {
+if (span->interpMask & (1u << i)) {
GLuint j;
for (j = 0; j < 4; j++) {
   span->attrStart[i][j] += leftClip * span->attrStepX[i][j];
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] llvmpipe: fix undefined shift 1 << 31.

2019-04-11 Thread Dave Airlie
From: Dave Airlie 

Pointed out by coverity.
---
 src/gallium/drivers/llvmpipe/lp_setup_point.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c 
b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 2192789bd4c..fc1d5ef6e36 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -251,7 +251,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
  * PIPE_MAX_SHADER_OUTPUTS bits.
  */
 if (semantic_index < PIPE_MAX_SHADER_OUTPUTS &&
-(setup->sprite_coord_enable & (1 << semantic_index))) {
+(setup->sprite_coord_enable & (1u << semantic_index))) {
for (i = 0; i < NUM_CHANNELS; i++) {
   if (usage_mask & (1 << i)) {
  texcoord_coef(setup, info, slot + 1, i,
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] vl: Add cropping flags for H264

2019-04-11 Thread Sahu, Satyajit
From: suresh guttula 

This patch adds cropping flags for H264 in pipe_h264_enc_pic_control.

Signed-off-by: Satyajit Sahu 
---
 src/gallium/include/pipe/p_video_state.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index 05855a36e23..1369f1a8ca6 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -395,6 +395,11 @@ struct pipe_h264_enc_pic_control
 {
unsigned enc_cabac_enable;
unsigned enc_constraint_set_flags;
+   unsigned enc_frame_cropping_flag;
+   unsigned enc_frame_crop_left_offset;
+   unsigned enc_frame_crop_right_offset;
+   unsigned enc_frame_crop_top_offset;
+   unsigned enc_frame_crop_bottom_offset;
 };
 
 struct pipe_h264_enc_picture_desc
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] radeon/vce:Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264

2019-04-11 Thread Sahu, Satyajit
From: suresh guttula 

This patch will add support for frame_cropping when the input size is not
matched with aligned size. Currently vaapi driver ignores frame cropping
values provided by client. This change will update SPS nalu with proper
cropping values.

Signed-off-by: Satyajit Sahu 
---
 src/gallium/drivers/radeon/radeon_vce_52.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c 
b/src/gallium/drivers/radeon/radeon_vce_52.c
index fc7ddc62a90..53f7b2f5fb5 100644
--- a/src/gallium/drivers/radeon/radeon_vce_52.c
+++ b/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -83,6 +83,12 @@ static void get_pic_control_param(struct rvce_encoder *enc, 
struct pipe_h264_enc
encNumMBsPerSlice *= align(enc->base.height, 16) / 16;
enc->enc_pic.pc.enc_crop_right_offset = (align(enc->base.width, 16) - 
enc->base.width) >> 1;
enc->enc_pic.pc.enc_crop_bottom_offset = (align(enc->base.height, 16) - 
enc->base.height) >> 1;
+   if (pic->pic_ctrl.enc_frame_cropping_flag) {
+   enc->enc_pic.pc.enc_crop_left_offset = 
pic->pic_ctrl.enc_frame_crop_left_offset;
+   enc->enc_pic.pc.enc_crop_right_offset = 
pic->pic_ctrl.enc_frame_crop_right_offset;
+   enc->enc_pic.pc.enc_crop_top_offset = 
pic->pic_ctrl.enc_frame_crop_top_offset;
+   enc->enc_pic.pc.enc_crop_bottom_offset = 
pic->pic_ctrl.enc_frame_crop_bottom_offset;
+   }
enc->enc_pic.pc.enc_num_mbs_per_slice = encNumMBsPerSlice;
enc->enc_pic.pc.enc_b_pic_pattern = MAX2(enc->base.max_references, 1) - 
1;
enc->enc_pic.pc.enc_number_of_reference_frames = 
MIN2(enc->base.max_references, 2);
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] anv/ehl: 36bits ppgtt support

2019-04-11 Thread Lionel Landwerlin

Hi James,

Thanks a lot for reporting this.

I think this is something we should store in the gen_device_info and 
update with kernel ioctl when supported.

This affects other drivers, not just anv.

-Lionel

On 10/04/2019 23:55, James Xiong wrote:

From: "Xiong, James" 

The vma high heap's capacity and maximum address were pre-defined based
on 48bits ppgtt support, and the buffers allocated from the vma high heap
had invalid vma addresses to the ehl platform that only supports 36bits
ppgtt. As a result, KMD rejected all batchbuffers submitted by vulkan.

This patch:
1) initializes the vma high heap by retrieving the gtt capacity from KMD
and calculating the size and max address on the fly.
2) enables softpin when full ppgtt is enabled

V2: change commit messages and comments to refect the changes [Bob, Jason]
 remove define HIGH_HEAP_SIZE [Bob]
 make sure there's enough space to enable softspin [Jason]

Signed-off-by: Xiong, James 
---
  src/intel/vulkan/anv_device.c  | 30 +++---
  src/intel/vulkan/anv_private.h |  7 ---
  2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 88b34c4..c3eff1c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -434,7 +434,12 @@ anv_physical_device_init(struct anv_physical_device 
*device,
anv_gem_supports_syncobj_wait(fd);
 device->has_context_priority = anv_gem_has_context_priority(fd);
  
+   /*

+* make sure there are enough VA space(i.e. 32+bit support) and full ggtt
+* is enabled.
+*/
 device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)
+  && (anv_gem_get_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1)
&& device->supports_48bit_addresses;
  
 device->has_context_isolation =

@@ -1981,14 +1986,25 @@ VkResult anv_CreateDevice(
device->vma_lo_available =
   physical_device->memory.heaps[physical_device->memory.heap_count - 
1].size;
  
-  /* Leave the last 4GiB out of the high vma range, so that no state base

-   * address + size can overflow 48 bits. For more information see the
-   * comment about Wa32bitGeneralStateOffset in anv_allocator.c
-   */
-  util_vma_heap_init(>vma_hi, HIGH_HEAP_MIN_ADDRESS,
- HIGH_HEAP_SIZE);
device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 :
   physical_device->memory.heaps[0].size;
+
+  /* Retrieve the GTT's capacity and leave the last 4GiB out of the high 
vma
+   * range, so that no state base address + size can overflow the vma 
range. For
+   * more information see the comment about Wa32bitGeneralStateOffset in
+   * anv_allocator.c
+   */
+  uint64_t size = 0;
+  anv_gem_get_context_param(device->fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
+);
+  if(size > HIGH_HEAP_MIN_ADDRESS + (1ull<<32)) {
+ size -= HIGH_HEAP_MIN_ADDRESS + (1ull<<32);
+ device->vma_hi_max_addr = HIGH_HEAP_MIN_ADDRESS + size - 1;
+  } else {
+ size = device->vma_hi_max_addr = 0;
+  }
+
+  util_vma_heap_init(>vma_hi, HIGH_HEAP_MIN_ADDRESS, size);
 }
  
 /* As per spec, the driver implementation may deny requests to acquire

@@ -2456,7 +2472,7 @@ anv_vma_free(struct anv_device *device, struct anv_bo *bo)
device->vma_lo_available += bo->size;
 } else {
assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS &&
- addr_48b <= HIGH_HEAP_MAX_ADDRESS);
+ addr_48b <= device->vma_hi_max_addr);
util_vma_heap_free(>vma_hi, addr_48b, bo->size);
device->vma_hi_available += bo->size;
 }
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 1664918..ef9b012 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -109,6 +109,9 @@ struct gen_l3_config;
   * heap. Various hardware units will read past the end of an object for
   * various reasons. This healthy margin prevents reads from wrapping around
   * 48-bit addresses.
+ *
+ * (4) the high vma heap size and max address are calculated based on the
+ * gtt capacity retrieved from KMD.
   */
  #define LOW_HEAP_MIN_ADDRESS   0x1000ULL /* 4 KiB */
  #define LOW_HEAP_MAX_ADDRESS   0xbfffULL
@@ -121,12 +124,9 @@ struct gen_l3_config;
  #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x00018000ULL /* 6 GiB */
  #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffULL
  #define HIGH_HEAP_MIN_ADDRESS  0x0001c000ULL /* 7 GiB */
-#define HIGH_HEAP_MAX_ADDRESS  0xfffeULL
  
  #define LOW_HEAP_SIZE   \

 (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
-#define HIGH_HEAP_SIZE  \
-   (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1)
  #define DYNAMIC_STATE_POOL_SIZE \
 

[Mesa-dev] [PATCH 3/3] st/va/enc: Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264

2019-04-11 Thread Sahu, Satyajit
From: suresh guttula 

This patch will add support for frame_cropping when the input size is not
matched with aligned size. Currently vaapi driver ignores frame cropping
values provided by client. This change will update SPS nalu with proper
cropping values.

Signed-off-by: Satyajit Sahu 
---
 src/gallium/state_trackers/va/picture_h264_enc.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/state_trackers/va/picture_h264_enc.c 
b/src/gallium/state_trackers/va/picture_h264_enc.c
index abfd39633de..f46b3425566 100644
--- a/src/gallium/state_trackers/va/picture_h264_enc.c
+++ b/src/gallium/state_trackers/va/picture_h264_enc.c
@@ -127,6 +127,14 @@ vlVaHandleVAEncSequenceParameterBufferTypeH264(vlVaDriver 
*drv, vlVaContext *con
context->desc.h264enc.rate_ctrl.frame_rate_num = h264->time_scale / 2;
context->desc.h264enc.rate_ctrl.frame_rate_den = h264->num_units_in_tick;
context->desc.h264enc.pic_order_cnt_type = 
h264->seq_fields.bits.pic_order_cnt_type;
+
+   if (h264->frame_cropping_flag) {
+  context->desc.h264enc.pic_ctrl.enc_frame_cropping_flag = 
h264->frame_cropping_flag;
+  context->desc.h264enc.pic_ctrl.enc_frame_crop_left_offset = 
h264->frame_crop_left_offset;
+  context->desc.h264enc.pic_ctrl.enc_frame_crop_right_offset = 
h264->frame_crop_right_offset;
+  context->desc.h264enc.pic_ctrl.enc_frame_crop_top_offset = 
h264->frame_crop_top_offset;
+  context->desc.h264enc.pic_ctrl.enc_frame_crop_bottom_offset = 
h264->frame_crop_bottom_offset;
+   }
return VA_STATUS_SUCCESS;
 }
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] intel/compiler: fix uninit non-static variable.

2019-04-11 Thread Dave Airlie
From: Dave Airlie 

Pointed out by coverity.
---
 src/intel/compiler/brw_vec4_visitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/intel/compiler/brw_vec4_visitor.cpp 
b/src/intel/compiler/brw_vec4_visitor.cpp
index 16ee31d730a..fa3d7fc13b7 100644
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -1887,6 +1887,8 @@ vec4_visitor::vec4_visitor(const struct brw_compiler 
*compiler,
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
 
this->uniforms = 0;
+
+   this->nir_ssa_values = NULL;
 }
 
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] draw: fix undefined shift of (1 << 31)

2019-04-11 Thread Dave Airlie
From: Dave Airlie 

Pointed out by a coverity scan.
---
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c 
b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 2b96b8ad446..dc22039b127 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -175,7 +175,7 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
 
/* find two free temp regs */
for (i = 0; i < 32; i++) {
-  if ((aactx->tempsUsed & (1 << i)) == 0) {
+  if ((aactx->tempsUsed & (1u << i)) == 0) {
  /* found a free temp */
  if (aactx->tmp0 < 0)
 aactx->tmp0 = i;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110337] Mesa 19.0.0(1) freeze system on Oland with amdgpu driver

2019-04-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110337

Berg  changed:

   What|Removed |Added

   Priority|medium  |high
   Severity|normal  |critical

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110337] Mesa 19.0.0(1) freeze system on Oland with amdgpu driver

2019-04-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110337

Berg  changed:

   What|Removed |Added

Summary|Mesa 19.0.0(1) freeze   |Mesa 19.0.0(1) freeze
   |system on Oland with amdgpu |system on Oland with amdgpu
   ||driver

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110337] Mesa 19.0.0(1) freeze system on Oland with amdgpu

2019-04-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110337

Berg  changed:

   What|Removed |Added

Summary|Mesa 19.0.0(1)  |Mesa 19.0.0(1) freeze
   ||system on Oland with amdgpu

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110345] Unrecoverable GPU crash with DiRT 4

2019-04-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110345

--- Comment #8 from Samuel Pitoiset  ---
Very nice, thanks for your time!

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] radeonsi: set AC_FUNC_ATTR_READNONE for image opcodes where it was missing

2019-04-11 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

On 4/11/19 3:30 AM, Marek Olšák wrote:

From: Marek Olšák 

---
  src/amd/common/ac_llvm_build.c| 1 +
  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 4 
  2 files changed, 5 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index a612dcefa1d..cfbca294c97 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -3293,20 +3293,21 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context 
*ctx, LLVMValueRef param,
   *   addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
   */
  void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
  LLVMValueRef *addr, bool is_array_tex)
  {
struct ac_image_args fmask_load = {};
fmask_load.opcode = ac_image_load;
fmask_load.resource = fmask;
fmask_load.dmask = 0xf;
fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
+   fmask_load.attributes = AC_FUNC_ATTR_READNONE;
  
  	fmask_load.coords[0] = addr[0];

fmask_load.coords[1] = addr[1];
if (is_array_tex)
fmask_load.coords[2] = addr[2];
  
  	LLVMValueRef fmask_value = ac_build_image_opcode(ac, _load);

fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
  ac->i32_0, "");
  
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c

index 727def56f65..ed67976b421 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1035,20 +1035,21 @@ static void resq_emit(
if (inst->Memory.Texture == TGSI_TEXTURE_3D)
target = TGSI_TEXTURE_2D_ARRAY;
else
target = inst->Memory.Texture;
}
  
  	struct ac_image_args args = {};

args.opcode = ac_image_get_resinfo;
args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
args.dmask = 0xf;
+   args.attributes = AC_FUNC_ATTR_READNONE;
  
  	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {

tex_fetch_ptrs(bld_base, emit_data, , NULL, NULL);
args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
} else {
image_fetch_rsrc(bld_base, reg, false, target, );
args.lod = ctx->i32_0;
}
  
  	emit_data->output[emit_data->chan] =

@@ -1265,20 +1266,21 @@ si_lower_gather4_integer(struct si_shader_context *ctx,
lp_build_if(_ctx, >gallivm, LLVMBuildNot(builder, 
wa_, ""));
}
  
  		/* Query the texture size. */

resinfo.opcode = ac_image_get_resinfo;
resinfo.dim = ac_texture_dim_from_tgsi_target(ctx->screen, 
target);
resinfo.resource = args->resource;
resinfo.sampler = args->sampler;
resinfo.lod = ctx->ac.i32_0;
resinfo.dmask = 0xf;
+   resinfo.attributes = AC_FUNC_ATTR_READNONE;
  
  		LLVMValueRef texsize =

fix_resinfo(ctx, target,
ac_build_image_opcode(>ac, ));
  
  		/* Compute -0.5 / size. */

for (unsigned c = 0; c < 2; c++) {
half_texel[c] =
LLVMBuildExtractElement(builder, texsize,
LLVMConstInt(ctx->i32, c, 0), 
"");
@@ -1769,20 +1771,22 @@ static void si_llvm_emit_fbfetch(const struct 
lp_build_tgsi_action *action,
fmask = ac_build_load_to_sgpr(>ac, ptr,
LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 
0));
  
  		ac_apply_fmask_to_sample(>ac, fmask, args.coords,

 
ctx->shader->key.mono.u.ps.fbfetch_layered);
}
  
  	args.opcode = ac_image_load;

args.resource = image;
args.dmask = 0xf;
+   args.attributes = AC_FUNC_ATTR_READNONE;
+
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
ac_image_2darraymsaa : ac_image_2dmsaa;
else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
ac_image_1darray : ac_image_1d;
else
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
ac_image_2darray : ac_image_2d;
  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] ac: use the common helper ac_apply_fmask_to_sample

2019-04-11 Thread Samuel Pitoiset


On 4/11/19 3:30 AM, Marek Olšák wrote:

From: Marek Olšák 

---
  src/amd/common/ac_nir_to_llvm.c | 70 +++--
  1 file changed, 5 insertions(+), 65 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3d2f738edec..3abde6e0969 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2323,92 +2323,32 @@ static int image_type_to_components_count(enum 
glsl_sampler_dim dim, bool array)
case GLSL_SAMPLER_DIM_SUBPASS:
return 2;
case GLSL_SAMPLER_DIM_SUBPASS_MS:
return 3;
default:
break;
}
return 0;
  }
  
-

-/* Adjust the sample index according to FMASK.
- *
- * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
- * which is the identity mapping. Each nibble says which physical sample
- * should be fetched to get that sample.
- *
- * For example, 0x1100 means there are only 2 samples stored and
- * the second sample covers 3/4 of the pixel. When reading samples 0
- * and 1, return physical sample 0 (determined by the first two 0s
- * in FMASK), otherwise return physical sample 1.
- *
- * The sample index should be adjusted as follows:
- *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
- */
  static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context 
*ctx,
LLVMValueRef coord_x, 
LLVMValueRef coord_y,
LLVMValueRef coord_z,
LLVMValueRef sample_index,
LLVMValueRef fmask_desc_ptr)
  {
-   struct ac_image_args args = {0};
-   LLVMValueRef res;
+   unsigned sample_chan = coord_z ? 3 : 2;
+   LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
+   addr[sample_chan] = sample_index;
  
-	args.coords[0] = coord_x;

-   args.coords[1] = coord_y;
-   if (coord_z)
-   args.coords[2] = coord_z;
-
-   args.opcode = ac_image_load;
-   args.dim = coord_z ? ac_image_2darray : ac_image_2d;
-   args.resource = fmask_desc_ptr;
-   args.dmask = 0xf;
-   args.attributes = AC_FUNC_ATTR_READNONE;
-
-   res = ac_build_image_opcode(ctx, );
-
-   res = ac_to_integer(ctx, res);
-   LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
-   LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
-
-   LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
-res,
-ctx->i32_0, "");
-
-   LLVMValueRef sample_index4 =
-   LLVMBuildMul(ctx->builder, sample_index, four, "");
-   LLVMValueRef shifted_fmask =
-   LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
-   LLVMValueRef final_sample =
-   LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");


The only difference is the mask (ie. ac_apply_fmask_to_sample uses 0x7) 
while this code uses 0xF.


According to the comment in that function, I assume 0x7 is the correct 
value?



-
-   /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
-* resource descriptor is 0 (invalid),
-*/
-   LLVMValueRef fmask_desc =
-   LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
-ctx->v8i32, "");
-
-   LLVMValueRef fmask_word1 =
-   LLVMBuildExtractElement(ctx->builder, fmask_desc,
-   ctx->i32_1, "");
-
-   LLVMValueRef word1_is_nonzero =
-   LLVMBuildICmp(ctx->builder, LLVMIntNE,
- fmask_word1, ctx->i32_0, "");
-
-   /* Replace the MSAA sample index. */
-   sample_index =
-   LLVMBuildSelect(ctx->builder, word1_is_nonzero,
-   final_sample, sample_index, "");
-   return sample_index;
+   ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
+   return addr[sample_chan];
  }
  
  static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)

  {
assert(instr->src[0].is_ssa);
return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
  }
  
  static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,

   const nir_intrinsic_instr *instr,

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: initialise some variables in opt_if_loop_last_continue()

2019-04-11 Thread Tapani Pälli

Reviewed-by: Tapani Pälli 

On 4/11/19 2:38 AM, Timothy Arceri wrote:

Fixes a couple of Coverity warnings CID 1444626.

Fixes: e30804c6024f ("nir/radv: remove restrictions on 
opt_if_loop_last_continue()")
---
  src/compiler/nir/nir_opt_if.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 713bdf0c38a..d0aaf9f7133 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -839,8 +839,8 @@ static bool
  opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue)
  {
 nir_if *nif;
-   bool then_ends_in_continue;
-   bool else_ends_in_continue;
+   bool then_ends_in_continue = false;
+   bool else_ends_in_continue = false;
  
 /* Scan the control flow of the loop from the last to the first node

  * looking for an if-statement we can optimise.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] panfrost: split asserts in pandecode

2019-04-11 Thread Tomeu Vizoso
Signed-off-by: Tomeu Vizoso 
---
 src/gallium/drivers/panfrost/pandecode/mmap.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pandecode/mmap.h 
b/src/gallium/drivers/panfrost/pandecode/mmap.h
index 1a208336e814..e9acae877f7f 100644
--- a/src/gallium/drivers/panfrost/pandecode/mmap.h
+++ b/src/gallium/drivers/panfrost/pandecode/mmap.h
@@ -54,9 +54,8 @@ __pandecode_fetch_gpu_mem(const struct 
pandecode_mapped_memory *mem,
 if (!mem)
 mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
 
-if (!mem ||
-size + (gpu_va - mem->gpu_va) > mem->length)
-assert(0);
+assert(mem);
+assert(size + (gpu_va - mem->gpu_va) <= mem->length);
 
 return mem->addr + gpu_va - mem->gpu_va;
 }
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] panfrost: Guard against reading past end of buffer

2019-04-11 Thread Tomeu Vizoso
Signed-off-by: Tomeu Vizoso 
---
 src/gallium/drivers/panfrost/pan_swizzle.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/panfrost/pan_swizzle.c 
b/src/gallium/drivers/panfrost/pan_swizzle.c
index 291bd1f88aea..52a907ddd55d 100644
--- a/src/gallium/drivers/panfrost/pan_swizzle.c
+++ b/src/gallium/drivers/panfrost/pan_swizzle.c
@@ -97,6 +97,9 @@ swizzle_bpp1_align16(int width, int height, int 
source_stride, int block_pitch,
 
 ++y;
 
+if (y >= height)
+break;
+
 {
 int block_y = y & ~(0x0f);
 int rem_y = y & 0x0f;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: don't overwrite existing shader files with MESA_SHADER_CAPTURE_PATH

2019-04-11 Thread Tapani Pälli


On 4/11/19 3:32 AM, Marek Olšák wrote:

From: Marek Olšák 

---
  src/mesa/main/shaderapi.c | 20 +---
  1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 01342c04e8f..6b73e6c7e7a 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1233,24 +1233,38 @@ link_program(struct gl_context *ctx, struct 
gl_shader_program *shProg,
   if (shProg->_LinkedShaders[stage])
  prog = shProg->_LinkedShaders[stage]->Program;
  
   _mesa_use_program(ctx, stage, shProg, prog, ctx->_Shader);

}
 }
  
 /* Capture .shader_test files. */

 const char *capture_path = _mesa_get_shader_capture_path();
 if (shProg->Name != 0 && shProg->Name != ~0 && capture_path != NULL) {
-  FILE *file;
-  char *filename = ralloc_asprintf(NULL, "%s/%u.shader_test",
+  /* Find an unused filename. */
+  char *filename = NULL;
+  for (unsigned i = 0;; i++) {
+ if (i) {
+filename = ralloc_asprintf(NULL, "%s/%u-%u.shader_test",
+   capture_path, shProg->Name, i);
+ } else {
+filename = ralloc_asprintf(NULL, "%s/%u.shader_test",
 capture_path, shProg->Name);


How about just having the counter always there, to simplify a bit and 
have consistent filename scheme? Just a suggestion.



-  file = fopen(filename, "w");
+ }
+ FILE *file = fopen(filename, "r");
+ if (!file)
+break;


I'm surprised we don't have some helper like 'util_path_exists' but this 
works, I guess then we should have 'util_path_isdir|isfile' and others 
as well.


With or without the suggestion;
Reviewed-by: Tapani Pälli 


+ fclose(file);
+ ralloc_free(filename);
+  }
+
+  FILE *file = fopen(filename, "w");
if (file) {
   fprintf(file, "[require]\nGLSL%s >= %u.%02u\n",
   shProg->IsES ? " ES" : "",
   shProg->data->Version / 100, shProg->data->Version % 100);
   if (shProg->SeparateShader)
  fprintf(file, "GL_ARB_separate_shader_objects\nSSO ENABLED\n");
   fprintf(file, "\n");
  
   for (unsigned i = 0; i < shProg->NumShaders; i++) {

  fprintf(file, "[%s shader]\n%s\n",


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3] st/nine: skip position checks in SetCursorPosition()

2019-04-11 Thread Andre Heider
For HW cursors, "cursor.pos" doesn't hold the current position of the
pointer, just the position of the last call to SetCursorPosition().

Skip the check against stale values and bump the d3dadapter9 drm version
to expose this change of behaviour.

Signed-off-by: Andre Heider 
---

V3: improve version description in d3dadapter/drm.h

 include/d3dadapter/drm.h  | 7 +--
 src/gallium/state_trackers/nine/device9.c | 8 +---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/d3dadapter/drm.h b/include/d3dadapter/drm.h
index 647f017fc7f..6939dd4f239 100644
--- a/include/d3dadapter/drm.h
+++ b/include/d3dadapter/drm.h
@@ -29,11 +29,14 @@
 #define D3DADAPTER9DRM_NAME "drm"
 /* current version */
 #define D3DADAPTER9DRM_MAJOR 0
-#define D3DADAPTER9DRM_MINOR 1
+#define D3DADAPTER9DRM_MINOR 2
 
 /* version 0.0: Initial release
  * 0.1: All IDirect3D objects can be assumed to have a pointer to the
- *  internal vtable in second position of the structure */
+ *  internal vtable in second position of the structure
+ * 0.2: IDirect3DDevice9_SetCursorPosition always calls
+ *  ID3DPresent_SetCursorPos for hardware cursors
+ */
 
 struct D3DAdapter9DRM
 {
diff --git a/src/gallium/state_trackers/nine/device9.c 
b/src/gallium/state_trackers/nine/device9.c
index c777f843b67..f165f24ee46 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -793,9 +793,11 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This,
 
 DBG("This=%p X=%d Y=%d Flags=%d\n", This, X, Y, Flags);
 
-if (This->cursor.pos.x == X &&
-This->cursor.pos.y == Y)
-return;
+/* present >= v1.4 handles this itself */
+if (This->minor_version_num < 4) {
+if (This->cursor.pos.x == X && This->cursor.pos.y == Y)
+return;
+}
 
 This->cursor.pos.x = X;
 This->cursor.pos.y = Y;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110402] checking validations

2019-04-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110402

Bug ID: 110402
   Summary: checking validations
   Product: Mesa
   Version: 5.0.2
  Hardware: PowerPC
   URL: http://219.91.142.54:1515/GreyOrder
OS: Windows (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: rjrohan...@rediffmail.com
  Reporter: rjrohan...@rediffmail.com
QA Contact: mesa-dev@lists.freedesktop.org
CC: rjrohan...@rediffmail.com

Created attachment 143934
  --> https://bugs.freedesktop.org/attachment.cgi?id=143934=edit
validation not proper

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev