[Mesa-dev] [Bug 31598] configure: Doesn't check for python libxml2
https://bugs.freedesktop.org/show_bug.cgi?id=31598 LoneVVolf lonew...@xs4all.nl changed: What|Removed |Added Status|REOPENED|RESOLVED Resolution|--- |FIXED --- Comment #5 from LoneVVolf lonew...@xs4all.nl --- fixed with this commit : build: Fix build on systems where /usr/bin/python isn't python 2. http://cgit.freedesktop.org/mesa/mesa/commit/?id=121d19de920212225586c9269f2d34ab7e6e1aec -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/8] i965: reuse _mesa_sizeof_type for index buffer types.
On 01/23/2013 11:17 AM, Eric Anholt wrote: The core Mesa code has just one more case than this (GL_BITMAP), so I don't see any cause to special-case it. It also doesn't have the packed format support, but I guess that isn't relevant for index buffers (only vertex data). Might be worth a note, though. --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 26 ++ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 0775148..1c8ade5 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -32,6 +32,7 @@ #include main/context.h #include main/enums.h #include main/macros.h +#include main/glformats.h #include brw_draw.h #include brw_defines.h @@ -332,29 +333,6 @@ get_surface_type(struct intel_context *intel, GLenum type, GLuint size, } } - -static GLuint get_size( GLenum type ) -{ - switch (type) { - case GL_DOUBLE: return sizeof(GLdouble); - case GL_FLOAT: return sizeof(GLfloat); - case GL_HALF_FLOAT: return sizeof(GLhalfARB); - case GL_INT: return sizeof(GLint); - case GL_SHORT: return sizeof(GLshort); - case GL_BYTE: return sizeof(GLbyte); - case GL_UNSIGNED_INT: return sizeof(GLuint); - case GL_UNSIGNED_SHORT: return sizeof(GLushort); - case GL_UNSIGNED_BYTE: return sizeof(GLubyte); - case GL_FIXED: return sizeof(GLuint); - /* packed formats: always have 4 components, and element size is -* 4 bytes, so pretend each component is 1 byte. -*/ - case GL_INT_2_10_10_10_REV: return sizeof(GLbyte); - case GL_UNSIGNED_INT_2_10_10_10_REV: return sizeof(GLubyte); - default: assert(0); return 0; - } -} - static GLuint get_index_type(GLenum type) { switch (type) { @@ -821,7 +799,7 @@ static void brw_upload_indices(struct brw_context *brw) if (index_buffer == NULL) return; - ib_type_size = get_size(index_buffer-type); + ib_type_size = _mesa_sizeof_type(index_buffer-type); ib_size = ib_type_size * index_buffer-count; bufferobj = index_buffer-obj; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 8/8] i965: Pass in the glarray to get_surface_type.
On 01/23/2013 11:17 AM, Eric Anholt wrote: Dereffing all the values in the two callers was just pointless, and the function isn't inlined so there was actual code impact. It makes sense not to inline it, since it's pretty big and there are already two callers (and my Gen8 branch adds two more). And wow, this patch makes the code so much easier to follow. Nice cleanups and a modest performance improvement to boot! For the series: Reviewed-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/drivers/dri/i965/brw_draw_upload.c | 51 --- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 78ff61f..ed3b378 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -223,16 +223,19 @@ static GLuint byte_types_scale[5] = { * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. */ static unsigned -get_surface_type(struct intel_context *intel, GLenum type, GLuint size, - GLenum format, bool normalized, bool integer) +get_surface_type(struct intel_context *intel, + const struct gl_client_array *glarray) { + int size = glarray-Size; + if (unlikely(INTEL_DEBUG DEBUG_VERTS)) printf(type %s size %d normalized %d\n, - _mesa_lookup_enum_by_nr(type), size, normalized); + _mesa_lookup_enum_by_nr(glarray-Type), + glarray-Size, glarray-Normalized); - if (integer) { - assert(format == GL_RGBA); /* sanity check */ - switch (type) { + if (glarray-Integer) { + assert(glarray-Format == GL_RGBA); /* sanity check */ + switch (glarray-Type) { case GL_INT: return int_types_direct[size]; case GL_SHORT: return short_types_direct[size]; case GL_BYTE: return byte_types_direct[size]; @@ -241,8 +244,8 @@ get_surface_type(struct intel_context *intel, GLenum type, GLuint size, case GL_UNSIGNED_BYTE: return ubyte_types_direct[size]; default: assert(0); return 0; } - } else if (normalized) { - switch (type) { + } else if (glarray-Normalized) { + switch (glarray-Type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; case GL_HALF_FLOAT: return half_float_types[size]; @@ -252,7 +255,7 @@ get_surface_type(struct intel_context *intel, GLenum type, GLuint size, case GL_UNSIGNED_INT: return uint_types_norm[size]; case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; case GL_UNSIGNED_BYTE: - if (format == GL_BGRA) { + if (glarray-Format == GL_BGRA) { /* See GL_EXT_vertex_array_bgra */ assert(size == 4); return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; @@ -268,7 +271,7 @@ get_surface_type(struct intel_context *intel, GLenum type, GLuint size, case GL_INT_2_10_10_10_REV: assert(size == 4); if (intel-gen = 8 || intel-is_haswell) { -return format == GL_BGRA +return glarray-Format == GL_BGRA ? BRW_SURFACEFORMAT_B10G10R10A2_SNORM : BRW_SURFACEFORMAT_R10G10B10A2_SNORM; } @@ -276,7 +279,7 @@ get_surface_type(struct intel_context *intel, GLenum type, GLuint size, case GL_UNSIGNED_INT_2_10_10_10_REV: assert(size == 4); if (intel-gen = 8 || intel-is_haswell) { -return format == GL_BGRA +return glarray-Format == GL_BGRA ? BRW_SURFACEFORMAT_B10G10R10A2_UNORM : BRW_SURFACEFORMAT_R10G10B10A2_UNORM; } @@ -290,25 +293,25 @@ get_surface_type(struct intel_context *intel, GLenum type, GLuint size, * like to use here, so upload everything as UINT and fix * it in the shader */ - if (type == GL_INT_2_10_10_10_REV) { + if (glarray-Type == GL_INT_2_10_10_10_REV) { assert(size == 4); if (intel-gen = 8 || intel-is_haswell) { -return format == GL_BGRA +return glarray-Format == GL_BGRA ? BRW_SURFACEFORMAT_B10G10R10A2_SSCALED : BRW_SURFACEFORMAT_R10G10B10A2_SSCALED; } return BRW_SURFACEFORMAT_R10G10B10A2_UINT; - } else if (type == GL_UNSIGNED_INT_2_10_10_10_REV) { + } else if (glarray-Type == GL_UNSIGNED_INT_2_10_10_10_REV) { assert(size == 4); if (intel-gen = 8 || intel-is_haswell) { -return format == GL_BGRA +return glarray-Format == GL_BGRA ? BRW_SURFACEFORMAT_B10G10R10A2_USCALED : BRW_SURFACEFORMAT_R10G10B10A2_USCALED; } return BRW_SURFACEFORMAT_R10G10B10A2_UINT; } - assert(format == GL_RGBA); /* sanity check */ - switch (type) { + assert(glarray-Format == GL_RGBA); /*
[Mesa-dev] [PATCH 2/3] R600: optimize structurizer a bit
From: Christian König christian.koe...@amd.com Signed-off-by: Christian König christian.koe...@amd.com Tested-by: Michel Dänzer michel.daen...@amd.com --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 71 +++--- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 5be40de..70622e7 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -204,51 +204,33 @@ void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx, Value *True = Invert ? BoolFalse : BoolTrue; Value *False = Invert ? BoolTrue : BoolFalse; - RegionInfo *RI = ParentRegion-getRegionInfo(); - BasicBlock *BB = Term-getParent(); - - // Handle the case where multiple regions start at the same block - Region *R = BB != ParentRegion-getEntry() ? - RI-getRegionFor(BB) : ParentRegion; + BasicBlock *Parent = Term-getParent(); - if (R == ParentRegion) { -// It's a top level block in our region -Value *Cond = True; -if (Term-isConditional()) { - BasicBlock *Other = Term-getSuccessor(!Idx); + Value *Cond = True; + if (Term-isConditional()) { +BasicBlock *Other = Term-getSuccessor(!Idx); - if (Visited.count(Other)) { -if (!Pred.count(Other)) - Pred[Other] = False; - -if (!Pred.count(BB)) - Pred[BB] = True; -return; - } - Cond = Term-getCondition(); +if (Visited.count(Other)) { + if (!Pred.count(Other)) +Pred[Other] = False; - if (Idx != Invert) -Cond = BinaryOperator::CreateNot(Cond, , Term); + if (!Pred.count(Parent)) +Pred[Parent] = True; + return; } +Cond = Term-getCondition(); -Pred[BB] = Cond; - - } else if (ParentRegion-contains(R)) { -// It's a block in a sub region -while(R-getParent() != ParentRegion) - R = R-getParent(); - -Pred[R-getEntry()] = True; - - } else { -// It's a branch from outside into our parent region -Pred[BB] = True; +if (Idx != Invert) + Cond = BinaryOperator::CreateNot(Cond, , Term); } + + Pred[Parent] = Cond; } /// \brief Analyze the successors of each block and build up predicates void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + RegionInfo *RI = ParentRegion-getRegionInfo(); BBPredicates Pred = Predicates[BB]; for (; PI != PE; ++PI) { @@ -263,7 +245,26 @@ void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { BasicBlock *Succ = Term-getSuccessor(i); if (Succ != BB) continue; - buildPredicate(Term, i, Pred, false); + + // Handle the case where multiple regions start at the same block + Region *R = *PI != ParentRegion-getEntry() ? + RI-getRegionFor(*PI) : ParentRegion; + + if (R == ParentRegion) { +// It's a top level block in our region +buildPredicate(Term, i, Pred, false); + + } else if (ParentRegion-contains(R)) { +// It's a block in a sub region +while(R-getParent() != ParentRegion) + R = R-getParent(); + +Pred[R-getEntry()] = BoolTrue; + + } else { +// It's a branch from outside into our parent region +Pred[*PI] = BoolTrue; + } } } } -- 1.7.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] R600: handle loops to self in the structurizer v2
v2: don't mess up other loops Signed-off-by: Christian König deathsim...@vodafone.de Tested-by: Michel Dänzer michel.daen...@amd.com --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 22338b5..5be40de 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -252,6 +252,11 @@ void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { BBPredicates Pred = Predicates[BB]; for (; PI != PE; ++PI) { + +// Ignore self loops +if (*PI == BB) + continue; + BranchInst *Term = castBranchInst((*PI)-getTerminator()); for (unsigned i = 0, e = Term-getNumSuccessors(); i != e; ++i) { @@ -296,7 +301,9 @@ void AMDGPUStructurizeCFG::collectInfos() { LoopPred.clear(); RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); - for (Visited.clear(); OI != OE; Visited[(*OI++)-getEntry()] = ++Number) { + for (Visited.clear(); OI != OE; ++OI) { + +Visited[(*OI)-getEntry()] = ++Number; // Analyze all the conditions leading to a node analyzeBlock((*OI)-getEntry()); @@ -568,6 +575,8 @@ void AMDGPUStructurizeCFG::createFlow() { Predicates[Split] = Predicates[Prev]; Order.push_back(ParentRegion-getBBNode(Split)); LoopPred[Prev] = BoolTrue; +if (LoopEnd == Prev) + LoopEnd = Split; } else if (LoopStart == Order.back()-getEntry()) { // Loop starts behind entry, split entry so that we can jump to it -- 1.7.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] R600: Structurizer fixes/optimizations
Hi Tom, the following patches should fix the known issues with the structurizer and also optimize it a bit more. The first two are already tested by Michel. I tested the last one with radeonsi and it doesn't seem to regress anything and actually fixes four more piglit tests. Please apply to your llvm master branch. Cheers, Christian. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] R600: fix assumption in the CFG structurizers loop handling
From: Christian König christian.koe...@amd.com The loop handling in the CFG structurizer incorrectly assumed that only BasicBlock nodes can have a back edge, but that is also possible for the exit edges of subregions. Fixing 4 more piglit tests on radeonsi. Signed-off-by: Christian König christian.koe...@amd.com --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 135 ++ 1 file changed, 81 insertions(+), 54 deletions(-) diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 70622e7..9528fc2 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -120,9 +120,9 @@ class AMDGPUStructurizeCFG : public RegionPass { void buildPredicate(BranchInst *Term, unsigned Idx, BBPredicates Pred, bool Invert); - void analyzeBlock(BasicBlock *BB); + void analyzeNode(RegionNode *N); - void analyzeLoop(BasicBlock *BB, unsigned LoopIdx); + void analyzeLoop(RegionNode *N); void collectInfos(); @@ -227,72 +227,92 @@ void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx, Pred[Parent] = Cond; } -/// \brief Analyze the successors of each block and build up predicates -void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); +/// \brief Analyze the predecessors of each block and build up predicates +void AMDGPUStructurizeCFG::analyzeNode(RegionNode *N) { RegionInfo *RI = ParentRegion-getRegionInfo(); + BasicBlock *BB = N-getEntry(); BBPredicates Pred = Predicates[BB]; - for (; PI != PE; ++PI) { + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + +// Handle the case where multiple regions start at the same block +Region *R = *PI != ParentRegion-getEntry() ? +RI-getRegionFor(*PI) : ParentRegion; -// Ignore self loops -if (*PI == BB) +// Edge from inside a subregion to its entry, ignore it +if (R == N) continue; -BranchInst *Term = castBranchInst((*PI)-getTerminator()); +if (R == ParentRegion) { -for (unsigned i = 0, e = Term-getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term-getSuccessor(i); - if (Succ != BB) -continue; + // It's a top level block in our region + BranchInst *Term = castBranchInst((*PI)-getTerminator()); + for (unsigned i = 0, e = Term-getNumSuccessors(); i != e; ++i) { +BasicBlock *Succ = Term-getSuccessor(i); +if (Succ != BB) + continue; + +// Ignore self loops +if (*PI != BB) + buildPredicate(Term, i, Pred, false); + +if (!Visited.count(*PI)) { + if (!LoopStart) +LoopStart = BB; - // Handle the case where multiple regions start at the same block - Region *R = *PI != ParentRegion-getEntry() ? - RI-getRegionFor(*PI) : ParentRegion; + buildPredicate(Term, i, LoopPred, true); +} + } - if (R == ParentRegion) { -// It's a top level block in our region -buildPredicate(Term, i, Pred, false); +} else if (ParentRegion-contains(R)) { - } else if (ParentRegion-contains(R)) { -// It's a block in a sub region -while(R-getParent() != ParentRegion) - R = R-getParent(); + // It's an exit from a sub region + while(R-getParent() != ParentRegion) +R = R-getParent(); -Pred[R-getEntry()] = BoolTrue; + BasicBlock *Entry = R-getEntry(); + Pred[Entry] = BoolTrue; + if (!Visited.count(Entry)) { +if (!LoopStart) + LoopStart = BB; - } else { -// It's a branch from outside into our parent region -Pred[*PI] = BoolTrue; +LoopPred[Entry] = BoolFalse; } + +} else { + // It's a branch from outside into our entry region + Pred[*PI] = BoolTrue; } } } -/// \brief Analyze the conditions leading to loop to a previous block -void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned LoopIdx) { - BranchInst *Term = castBranchInst(BB-getTerminator()); +/// \brief Determine the end of the loop +void AMDGPUStructurizeCFG::analyzeLoop(RegionNode *N) { - for (unsigned i = 0, e = Term-getNumSuccessors(); i != e; ++i) { -BasicBlock *Succ = Term-getSuccessor(i); + if (N-isSubRegion()) { +// Test for exit as back edge +BasicBlock *Exit = N-getNodeAsRegion()-getExit(); +if (Visited.count(Exit)) + LoopEnd = N-getEntry(); -// Ignore it if it's not a back edge -if (!Visited.count(Succ)) - continue; + } else { +// Test for sucessors as back edge +BasicBlock *BB = N-getNodeAsBasicBlock(); +BranchInst *Term = castBranchInst(BB-getTerminator()); -buildPredicate(Term, i, LoopPred, true); +for (unsigned i = 0, e = Term-getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term-getSuccessor(i);
[Mesa-dev] mesa/gles3: dri_util.c:192:10: error: use of undeclared identifier '__DRI_API_GLES3'
Hi Ian, due to the changes in... e90c08e dri: Define enum __DRI_API_GLES3 ...I get this breakage with mesa-gles3-git6f3caaf: make[7]: Entering directory `/home/wearefam/src/mesa/mesa-git/src/mesa/drivers/dri/common' CC utils.lo CC dri_util.lo CC libdri_test_stubs_la-dri_test.lo CC xmlconfig.lo CCLD libdri_test_stubs.la dri_util.c:192:10: error: use of undeclared identifier '__DRI_API_GLES3' case __DRI_API_GLES3: ^ 1 error generated. make[7]: *** [dri_util.lo] Error 1 Do I need a different DRI/DRI2 proto? From which fdo GIT tree? If YES, can you bump the required version in configure.ac? Thanks! Regards, - Sedat - P.S.: Check installed DRI/DRI2 proto version $ dpkg -l | grep dri | grep proto | grep -e \-dev ii x11proto-dri2-dev 2.8-1~precise1 X11 DRI2 extension wire protocol - EOT - ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] mesa/gles3: dri_util.c:192:10: error: use of undeclared identifier '__DRI_API_GLES3'
Hmm, switching to Ubuntu's distro-kernel, removing /opt/xorg and building the XORG stuff from scratch... 1. libdrm-git 2. mesa-git 3. xf86-video-intel-git ...makes the errors go away. OK, I had in my 3.8-rc4 kernel drm-intel-nightly integrated, but no linux-headers package of this kernel installed. Looks good. $ sudo grep -A4 'LoadModule: intel' /var/log/Xorg.0.log [15.913] (II) LoadModule: intel [15.913] (II) Loading /usr/lib/x86_64-linux-gnu/xorg/extra-modules/intel_drv.so [15.957] (II) Module intel: vendor=X.Org Foundation [15.957]compiled for 1.11.3, module version = 2.20.19 [15.957]Module class: X.Org Video Driver $ LIBGL_DEBUG=verbose glxinfo 2/dev/null | grep -i opengl OpenGL vendor string: Intel Open Source Technology Center OpenGL renderer string: Mesa DRI Intel(R) Sandybridge Mobile OpenGL version string: 3.0 Mesa 9.1-devel (git-6f3caaf) --- VERSION 3.0! OpenGL shading language version string: 1.30 OpenGL extensions: - Sedat - On Thu, Jan 24, 2013 at 11:14 AM, Sedat Dilek sedat.di...@gmail.com wrote: Hi Ian, due to the changes in... e90c08e dri: Define enum __DRI_API_GLES3 ...I get this breakage with mesa-gles3-git6f3caaf: make[7]: Entering directory `/home/wearefam/src/mesa/mesa-git/src/mesa/drivers/dri/common' CC utils.lo CC dri_util.lo CC libdri_test_stubs_la-dri_test.lo CC xmlconfig.lo CCLD libdri_test_stubs.la dri_util.c:192:10: error: use of undeclared identifier '__DRI_API_GLES3' case __DRI_API_GLES3: ^ 1 error generated. make[7]: *** [dri_util.lo] Error 1 Do I need a different DRI/DRI2 proto? From which fdo GIT tree? If YES, can you bump the required version in configure.ac? Thanks! Regards, - Sedat - P.S.: Check installed DRI/DRI2 proto version $ dpkg -l | grep dri | grep proto | grep -e \-dev ii x11proto-dri2-dev 2.8-1~precise1 X11 DRI2 extension wire protocol - EOT - ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] R600: Do not fold vector inst
On Thu, Jan 24, 2013 at 01:15:19AM +0100, Vincent Lejeune wrote: Reviewed-by: Tom Stellard thomas.stell...@amd.com --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 84223f6..b08d39f 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -272,7 +272,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) { const R600InstrInfo *TII = static_castconst R600InstrInfo*(TM.getInstrInfo()); -if (Result Result-isMachineOpcode() +if (Result Result-isMachineOpcode() +!(TII-get(Result-getMachineOpcode()).TSFlags R600_InstFlag::VECTOR) TII-isALUInstr(Result-getMachineOpcode())) { // Fold FNEG/FABS/CONST_ADDRESS // TODO: Isel can generate multiple MachineInst, we need to recursively -- 1.8.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] R600: Fold remaining CONST_COPY after expand pseudo inst
On Thu, Jan 24, 2013 at 01:16:35AM +0100, Vincent Lejeune wrote: --- lib/Target/R600/AMDGPUTargetMachine.cpp | 2 +- lib/Target/R600/R600LowerConstCopy.cpp | 167 +--- 2 files changed, 157 insertions(+), 12 deletions(-) diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 7b069e7..2185be3 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -136,8 +136,8 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); -addPass(createR600LowerConstCopy(*TM)); addPass(FinalizeMachineBundlesID); +addPass(createR600LowerConstCopy(*TM)); } else { addPass(createSILowerLiteralConstantsPass(*TM)); addPass(createSILowerControlFlowPass(*TM)); diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp index d14ae20..9a60438 100644 --- a/lib/Target/R600/R600LowerConstCopy.cpp +++ b/lib/Target/R600/R600LowerConstCopy.cpp @@ -13,7 +13,6 @@ /// fold them inside vector instruction, like DOT4 or Cube ; ISel emits /// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try /// to fold them if possible or replace them by MOV otherwise. -/// TODO : Implement the folding part, using Copy Propagation algorithm. // //===--===// @@ -30,6 +29,13 @@ class R600LowerConstCopy : public MachineFunctionPass { private: static char ID; const R600InstrInfo *TII; + + struct ConstPairs { +unsigned XYPair; +unsigned ZWPair; + }; + + bool canFoldInBundle(ConstPairs UsedConst, unsigned ReadConst) const; public: R600LowerConstCopy(TargetMachine tm); virtual bool runOnMachineFunction(MachineFunction MF); @@ -39,27 +45,166 @@ public: char R600LowerConstCopy::ID = 0; - R600LowerConstCopy::R600LowerConstCopy(TargetMachine tm) : MachineFunctionPass(ID), TII (static_castconst R600InstrInfo *(tm.getInstrInfo())) { } +bool R600LowerConstCopy::canFoldInBundle(ConstPairs UsedConst, +unsigned ReadConst) const { + unsigned ReadConstChan = ReadConst 3; + unsigned ReadConstIndex = ReadConst (~3); + if (ReadConstChan 2) { +if (!UsedConst.XYPair) { + UsedConst.XYPair = ReadConstIndex; +} +return UsedConst.XYPair == ReadConstIndex; + } else { +if (!UsedConst.ZWPair) { + UsedConst.ZWPair = ReadConstIndex; +} +return UsedConst.ZWPair == ReadConstIndex; + } +} + +static bool isControlFlow(const MachineInstr MI) { + return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) || + (MI.getOpcode() == AMDGPU::ENDIF) || + (MI.getOpcode() == AMDGPU::ELSE) || + (MI.getOpcode() == AMDGPU::WHILELOOP) || + (MI.getOpcode() == AMDGPU::BREAK); +} + bool R600LowerConstCopy::runOnMachineFunction(MachineFunction MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { MachineBasicBlock MBB = *BB; -for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E;) { - MachineInstr MI = *I; - I = llvm::next(I); - if (MI.getOpcode() != AMDGPU::CONST_COPY) +DenseMapunsigned, MachineInstr * RegToConstIndex; +for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), +E = MBB.instr_end(); I != E;) { + + if (I-getOpcode() == AMDGPU::CONST_COPY) { +MachineInstr MI = *I; +I = llvm::next(I); +unsigned DstReg = MI.getOperand(0).getReg(); +DenseMapunsigned, MachineInstr *::iterator SrcMI = +RegToConstIndex.find(DstReg); +if (SrcMI != RegToConstIndex.end()) { + SrcMI-second-eraseFromParent(); + RegToConstIndex.erase(SrcMI); +} +MachineInstr *NewMI = +TII-buildDefaultInstruction(MBB, MI, AMDGPU::MOV, +MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); +TII-setImmOperand(NewMI, R600Operands::SRC0_SEL, +MI.getOperand(1).getImm()); +RegToConstIndex[DstReg] = NewMI; +MI.eraseFromParent(); continue; - MachineInstr *NewMI = TII-buildDefaultInstruction(MBB, I, AMDGPU::MOV, - MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); - NewMI-getOperand(9).setImm(MI.getOperand(1).getImm()); - MI.eraseFromParent(); + } + + std::vectorunsigned Defs; + // We consider all Instructions as bundled because algorithm that handle + // const read port limitations inside an IG is still valid with single + // instructions. + std::vectorMachineInstr * Bundle; + + if (I-isBundle()) { +
Re: [Mesa-dev] [PATCH] r600g/llvm: Fix for bug 59588 (llvm rv780 etqw gpu lock
Hi Vincent, For bug fixes, the Mesa convention is to use a normal commit message describing what the change does and then put a link to the bug at the end of the message. See for example: http://cgit.freedesktop.org/mesa/mesa/commit/?id=728bf86a23f6de137c0871ea87b09e75e55468a9 On Thu, Jan 24, 2013 at 04:07:19PM +0100, Vincent Lejeune wrote: --- src/gallium/drivers/r600/r600_llvm.c | 38 ++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 32b8e56..d28182b 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -329,6 +329,30 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) break; } } + if (!next_param) { + //Dummy export Can you use C style comments here /* */ and other places in this patch. + LLVMValueRef args[3]; + args[0] = LLVMGetUndef(LLVMVectorType(bld_base-base.elem_type, 4)); + args[1] = lp_build_const_int32(base-gallivm, 0); + args[2] = lp_build_const_int32(base-gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); + build_intrinsic( + base-gallivm-builder, + llvm.R600.store.swizzle, + LLVMVoidTypeInContext(base-gallivm-context), + args, 3, 0); + } + if (!next_pos) { + //Dummy export + LLVMValueRef args[3]; + args[0] = LLVMGetUndef(LLVMVectorType(bld_base-base.elem_type, 4)); + args[1] = lp_build_const_int32(base-gallivm, 0); + args[2] = lp_build_const_int32(base-gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); + build_intrinsic( + base-gallivm-builder, + llvm.R600.store.swizzle, + LLVMVoidTypeInContext(base-gallivm-context), + args, 3, 0); + } } else if (ctx-type == TGSI_PROCESSOR_FRAGMENT) { switch (ctx-r600_outputs[i].name) { case TGSI_SEMANTIC_COLOR: @@ -373,10 +397,20 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) break; } } + if (!has_color) { + //Dummy export + LLVMValueRef args[3]; + args[0] = LLVMGetUndef(LLVMVectorType(bld_base-base.elem_type, 4)); + args[1] = lp_build_const_int32(base-gallivm, 0); + args[2] = lp_build_const_int32(base-gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); + build_intrinsic( + base-gallivm-builder, + llvm.R600.store.swizzle, + LLVMVoidTypeInContext(base-gallivm-context), + args, 3, 0); + } } - if (!has_color ctx-type == TGSI_PROCESSOR_FRAGMENT) - lp_build_intrinsic(base-gallivm-builder, llvm.R600.store.pixel.dummy, LLVMVoidTypeInContext(base-gallivm-context), 0, 0); } static void llvm_emit_tex( -- 1.8.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59534] Automake Regression: Erroneous errors messages printed when using clover
https://bugs.freedesktop.org/show_bug.cgi?id=59534 Tom Stellard tstel...@gmail.com changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Tom Stellard tstel...@gmail.com --- Fixed by commit cf69a591e1ad16b590c9ae2eba0da6fa6c4fc741 -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59334] Automake Regression - Clover: Assertion failures when loading pipe_r600.so driver
https://bugs.freedesktop.org/show_bug.cgi?id=59334 Tom Stellard tstel...@gmail.com changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #5 from Tom Stellard tstel...@gmail.com --- Fixed by commit cf69a591e1ad16b590c9ae2eba0da6fa6c4fc741 -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59304] Meta Bug for regressions caused by automake conversion
https://bugs.freedesktop.org/show_bug.cgi?id=59304 Bug 59304 depends on bug 59334, which changed state. Bug 59334 Summary: Automake Regression - Clover: Assertion failures when loading pipe_r600.so driver https://bugs.freedesktop.org/show_bug.cgi?id=59334 What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59238] many new symbols in libxatracker after recent automake work
https://bugs.freedesktop.org/show_bug.cgi?id=59238 Fabio Pedretti fabio@libero.it changed: What|Removed |Added Summary|many new symbols after |many new symbols in |recent automake work|libxatracker after recent ||automake work --- Comment #5 from Fabio Pedretti fabio@libero.it --- This problem looks related to libxatracker only. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: callocing a 32 byte temp is silly, so don't
On Wed, Jan 23, 2013 at 8:36 PM, Ian Romanick i...@freedesktop.org wrote: From: Ian Romanick ian.d.roman...@intel.com I believe that the size used to vary, so the dynamic allocation is necessary. Signed-off-by: Ian Romanick ian.d.roman...@intel.com --- src/mesa/drivers/dri/intel/intel_context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index ab97d66..3aa35e6 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -984,8 +984,8 @@ intel_query_dri2_buffers(struct intel_context *intel, __DRIscreen *screen = intel-intelScreen-driScrnPriv; struct gl_framebuffer *fb = drawable-driverPrivate; int i = 0; - const int max_attachments = 4; - unsigned *attachments = calloc(2 * max_attachments, sizeof(unsigned)); + unsigned attachments[8]; + const int max_attachments = ARRAY_SIZE(attachments) / 2; struct intel_renderbuffer *front_rb; struct intel_renderbuffer *back_rb; @@ -993,6 +993,7 @@ intel_query_dri2_buffers(struct intel_context *intel, front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); + memset(attachments, 0, sizeof(attachments)); if ((intel-is_front_buffer_rendering || intel-is_front_buffer_reading || !back_rb) front_rb) { @@ -1013,7 +1014,6 @@ intel_query_dri2_buffers(struct intel_context *intel, attachments, i / 2, buffer_count, drawable-loaderPrivate); - free(attachments); } /** -- 1.7.11.7 Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] mesa/gles3: dri_util.c:192:10: error: use of undeclared identifier '__DRI_API_GLES3'
FYI, The enum is defined in mesa iteself at #MESA_SOURCE/include/GL/internal/dri_interface.h. Your problem was independent of dri2proto, kernel, drm, etc. I expect your build environment was accidentally #including the header located in /usr/include/GL/internal rather than the one in the mesa source tree. On 01/24/2013 04:33 AM, Sedat Dilek wrote: Hmm, switching to Ubuntu's distro-kernel, removing /opt/xorg and building the XORG stuff from scratch... 1. libdrm-git 2. mesa-git 3. xf86-video-intel-git ...makes the errors go away. OK, I had in my 3.8-rc4 kernel drm-intel-nightly integrated, but no linux-headers package of this kernel installed. Looks good. $ sudo grep -A4 'LoadModule: intel' /var/log/Xorg.0.log [15.913] (II) LoadModule: intel [15.913] (II) Loading /usr/lib/x86_64-linux-gnu/xorg/extra-modules/intel_drv.so [15.957] (II) Module intel: vendor=X.Org Foundation [15.957]compiled for 1.11.3, module version = 2.20.19 [15.957]Module class: X.Org Video Driver $ LIBGL_DEBUG=verbose glxinfo 2/dev/null | grep -i opengl OpenGL vendor string: Intel Open Source Technology Center OpenGL renderer string: Mesa DRI Intel(R) Sandybridge Mobile OpenGL version string: 3.0 Mesa 9.1-devel (git-6f3caaf) --- VERSION 3.0! OpenGL shading language version string: 1.30 OpenGL extensions: - Sedat - On Thu, Jan 24, 2013 at 11:14 AM, Sedat Dilek sedat.di...@gmail.com wrote: Hi Ian, due to the changes in... e90c08e dri: Define enum __DRI_API_GLES3 ...I get this breakage with mesa-gles3-git6f3caaf: make[7]: Entering directory `/home/wearefam/src/mesa/mesa-git/src/mesa/drivers/dri/common' CC utils.lo CC dri_util.lo CC libdri_test_stubs_la-dri_test.lo CC xmlconfig.lo CCLD libdri_test_stubs.la dri_util.c:192:10: error: use of undeclared identifier '__DRI_API_GLES3' case __DRI_API_GLES3: ^ 1 error generated. make[7]: *** [dri_util.lo] Error 1 Do I need a different DRI/DRI2 proto? From which fdo GIT tree? If YES, can you bump the required version in configure.ac? Thanks! Regards, - Sedat - P.S.: Check installed DRI/DRI2 proto version $ dpkg -l | grep dri | grep proto | grep -e \-dev ii x11proto-dri2-dev 2.8-1~precise1 X11 DRI2 extension wire protocol - EOT - ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Use GL_RED for DEPTH_TEXTURE_MODE in ES 3.0 for unsized formats.
Kenneth Graunke kenn...@whitecape.org writes: On 01/23/2013 10:28 PM, Ian Romanick wrote: On 01/23/2013 06:27 PM, Kenneth Graunke wrote: Khronos has apparently decided that depth textures with sized formats (allowed with ARB_internalformat_query or ES 3.0) should be treated as GL_RED, while unsized formats (an existing feature) should be treated as GL_INTENSITY for compatibility with ES 2.0. Ian is proposing changes to ARB_internalformat_query which will make this actually legal and consistent. A similar problem exists with GL 4.2, but we're going to ignore that for the time being. Tested on Ivybridge: no Piglit regressions; fixes 4 es3conform tests: - depth_texture_fbo - depth_texture_fbo_clear - depth_texture_teximage - depth_texture_texsubimage Cc: Ian Romanick i...@freedesktop.org We probably could have done this in core Mesa, but I think this is okay for now. I pushed a version of this patch with my R-b to the gles3 branch. Yeah, I was going to make a core Mesa helper function until I realized it was so little code. Except it's yet more code in our drawing path, when we could initialize it once at teximage time, right? pgpzL20s1Ci5o.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: Fix ReadPixels on buffers whose width = 32kbytes
On Thu, Jan 24, 2013 at 2:10 PM, Paul Berry stereotype...@gmail.com wrote: When possible, glReadPixels calls are performed using the hardware blitter. However, according to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics Data Size Limitations): The BLT engine is capable of transferring very large quantities of graphics data. Any graphics data read from and written to the destination is permitted to represent a number of pixels that occupies up to 65,536 scan lines and up to 32,768 bytes per scan line at the destination. The maximum number of pixels that may be represented per scan line’s worth of graphics data depends on the color depth. With an RGBA32F color buffer (which has 16 bytes per pixel) this imposes a maximum width of 2048 pixels. To make matters worse, if the pitch of the buffer is 32k or greater, intel_miptree_map_blit's call to intelEmitCopyBlit will overflow intelEmitCopyBlit's src_pitch and dst_pitch parameters (which are 16-bit signed integers). We can conveniently avoid both problems by avoiding the readpixels blit path when the miptree's pitch is = 32k. Fixes gles3conform half_float tests when the buffer width is greater than 2048. --- src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index ce03afa..f2571bd 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -1568,7 +1568,8 @@ intel_miptree_map_singlesample(struct intel_context *intel, } else if (intel-has_llc !(mode GL_MAP_WRITE_BIT) !mt-compressed - mt-region-tiling == I915_TILING_X) { + mt-region-tiling == I915_TILING_X + mt-region-pitch 32768) { You may want to put a comment here about why you have this pitch check. Alex intel_miptree_map_blit(intel, mt, map, level, slice); } else { intel_miptree_map_gtt(intel, mt, map, level, slice); -- 1.8.1.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: Fix ReadPixels on buffers whose width = 32kbytes
Yeah, you're right. I was being lazy. How's this: /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * *The BLT engine is capable of transferring very large quantities of *graphics data. Any graphics data read from and written to the *destination is permitted to represent a number of pixels that *occupies up to 65,536 scan lines and up to 32,768 bytes per scan line *at the destination. The maximum number of pixels that may be *represented per scan line’s worth of graphics data depends on the *color depth. * * Furthermore, intelEmitCopyBlit (which is called by * intel_miptree_map_blit) uses a signed 16-bit integer to represent buffer * pitch, so it can only handle buffer pitches 32k. * * As a result of these two limitations, we can only use * intel_miptree_map_blit() when the region's pitsh is less than 32k. */ On 24 January 2013 11:22, Alex Deucher alexdeuc...@gmail.com wrote: On Thu, Jan 24, 2013 at 2:10 PM, Paul Berry stereotype...@gmail.com wrote: When possible, glReadPixels calls are performed using the hardware blitter. However, according to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics Data Size Limitations): The BLT engine is capable of transferring very large quantities of graphics data. Any graphics data read from and written to the destination is permitted to represent a number of pixels that occupies up to 65,536 scan lines and up to 32,768 bytes per scan line at the destination. The maximum number of pixels that may be represented per scan line’s worth of graphics data depends on the color depth. With an RGBA32F color buffer (which has 16 bytes per pixel) this imposes a maximum width of 2048 pixels. To make matters worse, if the pitch of the buffer is 32k or greater, intel_miptree_map_blit's call to intelEmitCopyBlit will overflow intelEmitCopyBlit's src_pitch and dst_pitch parameters (which are 16-bit signed integers). We can conveniently avoid both problems by avoiding the readpixels blit path when the miptree's pitch is = 32k. Fixes gles3conform half_float tests when the buffer width is greater than 2048. --- src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index ce03afa..f2571bd 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -1568,7 +1568,8 @@ intel_miptree_map_singlesample(struct intel_context *intel, } else if (intel-has_llc !(mode GL_MAP_WRITE_BIT) !mt-compressed - mt-region-tiling == I915_TILING_X) { + mt-region-tiling == I915_TILING_X + mt-region-pitch 32768) { You may want to put a comment here about why you have this pitch check. Alex intel_miptree_map_blit(intel, mt, map, level, slice); } else { intel_miptree_map_gtt(intel, mt, map, level, slice); -- 1.8.1.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: Fix ReadPixels on buffers whose width = 32kbytes
Paul Berry stereotype...@gmail.com writes: When possible, glReadPixels calls are performed using the hardware blitter. However, according to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics Data Size Limitations): The BLT engine is capable of transferring very large quantities of graphics data. Any graphics data read from and written to the destination is permitted to represent a number of pixels that occupies up to 65,536 scan lines and up to 32,768 bytes per scan line at the destination. The maximum number of pixels that may be represented per scan line’s worth of graphics data depends on the color depth. With an RGBA32F color buffer (which has 16 bytes per pixel) this imposes a maximum width of 2048 pixels. To make matters worse, if the pitch of the buffer is 32k or greater, intel_miptree_map_blit's call to intelEmitCopyBlit will overflow intelEmitCopyBlit's src_pitch and dst_pitch parameters (which are 16-bit signed integers). We can conveniently avoid both problems by avoiding the readpixels blit path when the miptree's pitch is = 32k. Fixes gles3conform half_float tests when the buffer width is greater than 2048. Seconding that this comment in some way should make it into the code. One possibility that would let the spec cite live in an appropriate place would be to move this test into intel_miptree_map_blit() and fall back to calling intel_miptree_map_gtt() and returning. It would also mean we could replace the Failed to blit there with that same fallback path, which would be kind of neat even though I haven't seen it get hit. I'd actually most like to get the copy blit function's args promoted to ints from shorts, and have checks in that function for the limits. But I don't want to block this fix on that. But even if the spec cite is just added right into this patch as-is, Reviewed-by: Eric Anholt e...@anholt.net pgph7aM3clxFa.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] glsl: add new glsl_strtof() function
On 01/23/2013 12:16 PM, Brian Paul wrote: Note, we could alternately implement this in terms of glsl_strtod() with a (float) cast. Series is Reviewed-by: Ian Romanick ian.d.roman...@intel.com --- src/glsl/strtod.c | 22 ++ src/glsl/strtod.h |3 +++ 2 files changed, 25 insertions(+), 0 deletions(-) diff --git a/src/glsl/strtod.c b/src/glsl/strtod.c index 47c1f0e..46f4dc5 100644 --- a/src/glsl/strtod.c +++ b/src/glsl/strtod.c @@ -55,3 +55,25 @@ glsl_strtod(const char *s, char **end) return strtod(s, end); #endif } + + +/** + * Wrapper around strtod which uses the C locale so the decimal + * point is always '.' + */ +float +glsl_strtof(const char *s, char **end) +{ +#if defined(_GNU_SOURCE) !defined(__CYGWIN__) !defined(__FreeBSD__) \ + !defined(__HAIKU__) !defined(__UCLIBC__) + static locale_t loc = NULL; + if (!loc) { + loc = newlocale(LC_CTYPE_MASK, C, NULL); + } + return strtof_l(s, end, loc); +#elif _XOPEN_SOURCE = 600 || _ISOC99_SOURCE + return strtof(s, end); +#else + return (float) strtod(s, end); +#endif +} diff --git a/src/glsl/strtod.h b/src/glsl/strtod.h index 0cf6409..ad847db 100644 --- a/src/glsl/strtod.h +++ b/src/glsl/strtod.h @@ -34,6 +34,9 @@ extern C { extern double glsl_strtod(const char *s, char **end); +extern float +glsl_strtof(const char *s, char **end); + #ifdef __cplusplus } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59187] [Steam] Black screen but audio song On TF2 (Intel HM 55/ Ironlake Mobile)
https://bugs.freedesktop.org/show_bug.cgi?id=59187 pira...@gmail.com changed: What|Removed |Added CC||pira...@gmail.com -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] mesa/gles3: dri_util.c:192:10: error: use of undeclared identifier '__DRI_API_GLES3'
On Thu, Jan 24, 2013 at 7:31 PM, Chad Versace chad.vers...@linux.intel.com wrote: FYI, The enum is defined in mesa iteself at #MESA_SOURCE/include/GL/internal/dri_interface.h. Your problem was independent of dri2proto, kernel, drm, etc. I expect your build environment was accidentally #including the header located in /usr/include/GL/internal rather than the one in the mesa source tree. Hmm, there is no such header file provided by any Ubuntu package. $ ll /usr/include/GL/internal insgesamt 16 drwxr-xr-x 2 root root 4096 Jan 23 03:48 ./ drwxr-xr-x 3 root root 4096 Jan 23 03:48 ../ -rw-r--r-- 1 root root 6318 Dez 3 18:33 glcore.h $ dpkg -S /usr/include/GL/internal/glcore.h x11proto-gl-dev: /usr/include/GL/internal/glcore.h $ find /opt/xorg/ -name dri_interface.h /opt/xorg/include/GL/internal/dri_interface.h $ find /usr/include/ -name dri_interface.h [ NO OUTPUT ] $ find mesa-git/ -name dri_interface.h mesa-git/include/GL/internal/dri_interface.h - Sedat - On 01/24/2013 04:33 AM, Sedat Dilek wrote: Hmm, switching to Ubuntu's distro-kernel, removing /opt/xorg and building the XORG stuff from scratch... 1. libdrm-git 2. mesa-git 3. xf86-video-intel-git ...makes the errors go away. OK, I had in my 3.8-rc4 kernel drm-intel-nightly integrated, but no linux-headers package of this kernel installed. Looks good. $ sudo grep -A4 'LoadModule: intel' /var/log/Xorg.0.log [15.913] (II) LoadModule: intel [15.913] (II) Loading /usr/lib/x86_64-linux-gnu/xorg/extra-modules/intel_drv.so [15.957] (II) Module intel: vendor=X.Org Foundation [15.957]compiled for 1.11.3, module version = 2.20.19 [15.957]Module class: X.Org Video Driver $ LIBGL_DEBUG=verbose glxinfo 2/dev/null | grep -i opengl OpenGL vendor string: Intel Open Source Technology Center OpenGL renderer string: Mesa DRI Intel(R) Sandybridge Mobile OpenGL version string: 3.0 Mesa 9.1-devel (git-6f3caaf) --- VERSION 3.0! OpenGL shading language version string: 1.30 OpenGL extensions: - Sedat - On Thu, Jan 24, 2013 at 11:14 AM, Sedat Dilek sedat.di...@gmail.com wrote: Hi Ian, due to the changes in... e90c08e dri: Define enum __DRI_API_GLES3 ...I get this breakage with mesa-gles3-git6f3caaf: make[7]: Entering directory `/home/wearefam/src/mesa/mesa-git/src/mesa/drivers/dri/common' CC utils.lo CC dri_util.lo CC libdri_test_stubs_la-dri_test.lo CC xmlconfig.lo CCLD libdri_test_stubs.la dri_util.c:192:10: error: use of undeclared identifier '__DRI_API_GLES3' case __DRI_API_GLES3: ^ 1 error generated. make[7]: *** [dri_util.lo] Error 1 Do I need a different DRI/DRI2 proto? From which fdo GIT tree? If YES, can you bump the required version in configure.ac? Thanks! Regards, - Sedat - P.S.: Check installed DRI/DRI2 proto version $ dpkg -l | grep dri | grep proto | grep -e \-dev ii x11proto-dri2-dev 2.8-1~precise1 X11 DRI2 extension wire protocol - EOT - ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: Fix ReadPixels on buffers whose width = 32kbytes
On 01/24/2013 02:10 PM, Paul Berry wrote: When possible, glReadPixels calls are performed using the hardware blitter. However, according to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics Data Size Limitations): The BLT engine is capable of transferring very large quantities of graphics data. Any graphics data read from and written to the destination is permitted to represent a number of pixels that occupies up to 65,536 scan lines and up to 32,768 bytes per scan line at the destination. The maximum number of pixels that may be represented per scan line’s worth of graphics data depends on the color depth. With an RGBA32F color buffer (which has 16 bytes per pixel) this imposes a maximum width of 2048 pixels. To make matters worse, if the pitch of the buffer is 32k or greater, intel_miptree_map_blit's call to intelEmitCopyBlit will overflow intelEmitCopyBlit's src_pitch and dst_pitch parameters (which are 16-bit signed integers). We can conveniently avoid both problems by avoiding the readpixels blit path when the miptree's pitch is = 32k. Fixes gles3conform half_float tests when the buffer width is greater than 2048. It also fixes some other other tests a 4k pixel width, but this commit message is fine. Tested-by: Ian Romanick ian.d.roman...@intel.com --- src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index ce03afa..f2571bd 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -1568,7 +1568,8 @@ intel_miptree_map_singlesample(struct intel_context *intel, } else if (intel-has_llc !(mode GL_MAP_WRITE_BIT) !mt-compressed - mt-region-tiling == I915_TILING_X) { + mt-region-tiling == I915_TILING_X + mt-region-pitch 32768) { intel_miptree_map_blit(intel, mt, map, level, slice); } else { intel_miptree_map_gtt(intel, mt, map, level, slice); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/vs/gen7: Emit code for GLSL ES 3.00 pack/unpack operations (v2)
On 01/23/2013 07:18 PM, Eric Anholt wrote: Chad Versace chad.vers...@linux.intel.com writes: +void +vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0) +{ + if (intel-gen 7) + assert(!ir_unop_unpack_half_2x16 should be lowered); + + assert(dst.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_UD); + + /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f32to16: +* +* Because this instruction does not have a 16-bit floating-point type, +* the source data type must be Word (W). The destination type must be +* F (Float). +* +* To use W as the source data type, we must adjust horizontal strides, +* which is only possible in align1 mode. All my [chadv] attempts at +* emitting align1 instructions for unpackHalf2x16 failed to pass the +* Piglit tests, so I gave up. +* +* I've verified that, on gen7, it is safe to emit f16to32 in align16 mode +* with UD as source data type. +*/ Have you tested this on something like: in uvec4 v; vec2 result = unpackHalf2x16(v.w); Those kinds of the type must be X and the stride must by Y have sometimes meant that it's just hardcoded and they don't look at what you program, so I'm concerned that some of your regioning (swizzle/abs/neg/uniformness) will just get thrown out by the hardware. But if it's passing on your tests with uniforms, it's probably OK. In the brw code generated by my vs-packHafl2x16 test on IVB, the source to f32to16 is swizzled as yz. If I recall correctly, for my vs-unpackHalf2x16 test, the source to f16to32 was also swizzled to the non-x channel. So I think it's safe to say that this does the right thing. + dst_reg tmp_dst(this, glsl_type::uvec2_type); + src_reg tmp_src(tmp_dst); + + /* tmp.x = src0 0xu; */ + tmp_dst.writemask = WRITEMASK_X; + emit(new(mem_ctx) vec4_instruction(this, BRW_OPCODE_AND, + tmp_dst, src0, src_reg(0xu))); These ought to use the helper functions for simplicity: emit(AND(tmp_dst, src0, src_reg(0xu))); Check out the ALU1 macro for how to set up one of those to have a similar helper for F16TO32 if you want to match up the style. Will do. FWIW, I'll also append the I've experimentally the hardware does what I want to it do comments by stating that the simulator does it too without complaint. + + /* tmp.y = src0 16u; */ + tmp_dst.writemask = WRITEMASK_Y; + emit(new(mem_ctx) vec4_instruction(this, BRW_OPCODE_SHR, + tmp_dst, src0, src_reg(16u))); + + /* dst.xy = f16to32(tmp); */ + dst.writemask = WRITEMASK_XY; + emit(new(mem_ctx) vec4_instruction(this, BRW_OPCODE_F16TO32, + dst, tmp_src)); +} ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: Fix ReadPixels on buffers whose width = 32kbytes
Paul Berry stereotype...@gmail.com writes: Yeah, you're right. I was being lazy. How's this: * Furthermore, intelEmitCopyBlit (which is called by * intel_miptree_map_blit) uses a signed 16-bit integer to represent buffer * pitch, so it can only handle buffer pitches 32k. * * As a result of these two limitations, we can only use * intel_miptree_map_blit() when the region's pitsh is less than 32k. pitch Other than that, Reviewed-by: Eric Anholt e...@anholt.net pgpsn9u6w1Abd.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965/vs/gen7: Emit code for GLSL ES 3.00 pack/unpack operations (v3)
FIXME: This patch emits VS code that violates documented hardware restrictions and then relies on undocumented behavior that results from that violation. This patch passes all tests, but should be fixed ASAP to conform to the hardware documentation. v2: Explain undocumented hardware behavior. Improve comments. v3: Use ALU1 helper methods F32TO16() and F16TO32(). [for anholt] CC: Eric Anholt e...@anholt.net CC: Paul Berry stereotype...@gmail.com Reviewed-by: Ian Romanick ian.d.roman...@intel.com (v1) Signed-off-by: Chad Versace chad.vers...@linux.intel.com --- src/mesa/drivers/dri/i965/brw_vec4.h | 5 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp| 8 ++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 133 + 3 files changed, 146 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index e65b92c..86921a0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -358,6 +358,8 @@ public: vec4_instruction *RNDE(dst_reg dst, src_reg src0); vec4_instruction *RNDZ(dst_reg dst, src_reg src0); vec4_instruction *FRC(dst_reg dst, src_reg src0); + vec4_instruction *F32TO16(dst_reg dst, src_reg src0); + vec4_instruction *F16TO32(dst_reg dst, src_reg src0); vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1); vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1); @@ -431,6 +433,9 @@ public: void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); src_reg fix_math_operand(src_reg src); + void emit_pack_half_2x16(dst_reg dst, src_reg src0); + void emit_unpack_half_2x16(dst_reg dst, src_reg src0); + void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler); void emit_ndc_computation(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 747edc2..e395ada 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -808,6 +808,14 @@ vec4_generator::generate_code(exec_list *instructions) brw_DP2(p, dst, src[0], src[1]); break; + case BRW_OPCODE_F32TO16: + brw_F32TO16(p, dst, src[0]); + break; + + case BRW_OPCODE_F16TO32: + brw_F16TO32(p, dst, src[0]); + break; + case BRW_OPCODE_IF: if (inst-src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ebf8990..4ec77a7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -113,6 +113,8 @@ ALU1(FRC) ALU1(RNDD) ALU1(RNDE) ALU1(RNDZ) +ALU1(F32TO16) +ALU1(F16TO32) ALU2(ADD) ALU2(MUL) ALU2(MACH) @@ -348,6 +350,119 @@ vec4_visitor::emit_math(enum opcode opcode, } void +vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0) +{ + if (intel-gen 7) + assert(!ir_unop_pack_half_2x16 should be lowered); + + assert(dst.type == BRW_REGISTER_TYPE_UD); + assert(src0.type == BRW_REGISTER_TYPE_F); + + /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16: +* +* Because this instruction does not have a 16-bit floating-point type, +* the destination data type must be Word (W). +* +* The destination must be DWord-aligned and specify a horizontal stride +* (HorzStride) of 2. The 16-bit result is stored in the lower word of +* each destination channel and the upper word is not modified. +* +* The above restriction implies that the f32to16 instruction must use +* align1 mode, because only in align1 mode is it possible to specify +* horizontal stride. We choose here to defy the hardware docs and emit +* align16 instructions. +* +* (I [chadv] did attempt to emit align1 instructions for VS f32to16 +* instructions. I was partially successful in that the code passed all +* tests. However, the code was dubiously correct and fragile, and the +* tests were not harsh enough to probe that frailty. Not trusting the +* code, I chose instead to remain in align16 mode in defiance of the hw +* docs). +* +* I've [chadv] experimentally confirmed that, on gen7 hardware and the +* simulator, emitting a f32to16 in align16 mode with UD as destination +* data type is safe. The behavior differs from that specified in the PRM +* in that the upper word of each destination channel is cleared to 0. +*/ + + dst_reg tmp_dst(this, glsl_type::uvec2_type); + src_reg tmp_src(tmp_dst); + +#if 0 + /* Verify the undocumented behavior on which the following instructions +* rely. If f32to16 fails to clear the upper word of the X and Y channels, +* then the result of the bit-or
[Mesa-dev] [PATCH 20/20] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations (4)
v2: Remove lewd comment. [for idr] v3: - Optimize away tmp register for packHalf2x16. [for anholt, paul] - Improve comments. [for anholt, paul] - Reduce near-duplicate code by removing vec4_visitor emit_pack/unpack methods. [for chadv] v4: Factor our UD/W register conversion into helper function. [for anholt] CC: Eric Anholt e...@anholt.net CC: Paul Berry Paul Berry stereotype...@gmail.com Reviewed-by: Ian Romanick ian.d.roman...@intel.com (v2) Signed-off-by: Chad Versace chad.vers...@linux.intel.com --- src/mesa/drivers/dri/i965/brw_defines.h| 3 + src/mesa/drivers/dri/i965/brw_fs.h | 8 ++ .../dri/i965/brw_fs_channel_expressions.cpp| 12 +++ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 105 - src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 19 +++- 5 files changed, 144 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index e2f1e65..79cc12f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -726,6 +726,9 @@ enum opcode { FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_GLOBAL_OFFSET, + FS_OPCODE_PACK_HALF_2x16_SPLIT, + FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, + FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, VS_OPCODE_URB_WRITE, VS_OPCODE_SCRATCH_READ, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b47b0d0..d332502 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -542,6 +542,14 @@ private: struct brw_reg offset); void generate_discard_jump(fs_inst *inst); + void generate_pack_half_2x16_split(fs_inst *inst, + struct brw_reg dst, + struct brw_reg x, + struct brw_reg y); + void generate_unpack_half_2x16_split(fs_inst *inst, +struct brw_reg dst, +struct brw_reg src); + void patch_discard_jumps_to_fb_writes(); struct brw_context *brw; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 58521ee..e19da51 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -342,9 +342,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) assert(!not yet supported); break; + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: case ir_quadop_vector: assert(!should have been lowered); break; + + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + case ir_binop_pack_half_2x16_split: + assert(!not reached: expression operates on scalars only); + break; } ir-remove(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 324e665..86f382e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -922,6 +922,95 @@ fs_generator::generate_set_global_offset(fs_inst *inst, brw_pop_insn_state(p); } +/** + * Change the register's data type from UD to W, doubling the strides in order + * to compensate for halving the data type width. + */ +static struct brw_reg +ud_reg_to_w(struct brw_reg r) +{ + assert(r.type == BRW_REGISTER_TYPE_UD); + r.type = BRW_REGISTER_TYPE_W; + + /* The BRW_*_STRIDE enums are defined so that incrementing the field +* doubles the real stride. +*/ + if (r.hstride != 0) + ++r.hstride; + if (r.vstride != 0) + ++r.vstride; + + return r; +} + +void +fs_generator::generate_pack_half_2x16_split(fs_inst *inst, +struct brw_reg dst, +struct brw_reg x, +struct brw_reg y) +{ + assert(intel-gen = 7); + assert(dst.type == BRW_REGISTER_TYPE_UD); + assert(x.type = BRW_REGISTER_TYPE_F); + assert(y.type = BRW_REGISTER_TYPE_F); + + /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16: +* +* Because this instruction does not have a 16-bit floating-point type, +* the destination data type must be Word (W). +* +* The destination must be DWord-aligned and specify a horizontal stride +* (HorzStride) of 2. The 16-bit result is stored in the lower word of +* each destination channel and the upper word is not modified. +*/ + struct brw_reg dst_w = ud_reg_to_w(dst); + + /* Give each 32-bit channel of dst the form below ,
[Mesa-dev] [PATCH 1/2] util: add some defensive coding in u_upload_alloc()
Some callers of this function were checking the 'ptr' result to see if the function failed. But the correct way is to check the regular return value for PIPE_ERROR_x. Now we initialize all the returned values at the top of the function in case we do hit an error (like OOM). Callers are more likely to detect OOM conditions now. But there are some callers which don't do any error checking... --- src/gallium/auxiliary/util/u_upload_mgr.c |8 +++- 1 files changed, 7 insertions(+), 1 deletions(-) diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index ee1c688..47d39af 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -163,6 +163,13 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload, unsigned alloc_offset = align(min_out_offset, upload-alignment); unsigned offset; + /* Init these return values here in case we fail below to make +* sure the caller doesn't get garbage values. +*/ + *out_offset = ~0; + *outbuf = NULL; + *ptr = NULL; + /* Make sure we have enough space in the upload buffer * for the sub-allocation. */ if (MAX2(upload-offset, alloc_offset) + alloc_size upload-size) { @@ -183,7 +190,6 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload, upload-transfer); if (!upload-map) { pipe_resource_reference(outbuf, NULL); - *ptr = NULL; upload-transfer = NULL; return PIPE_ERROR_OUT_OF_MEMORY; } -- 1.7.3.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH RFC v1] i965: Implement CopyTexSubImage2D via BLORP (and use it by default).
Am Sonntag, 20. Januar 2013 schrieb Paul Berry: --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 106 +++ src/mesa/drivers/dri/i965/brw_context.h | 8 ++ src/mesa/drivers/dri/intel/intel_tex_copy.c | 32 ++-- 3 files changed, 138 insertions(+), 8 deletions(-) Paul, I'd appreciate your feedback on this patch. It's my first time really working with BLORP, so I'm bound to have screwed up something. :) I'm not sure about the HiZ and downsample resolves (hence the //'d out lines). Your interaction with blorp looks good. But I think there are some bugs in the resolves. My rules of thumb for depth/HiZ resolves are: - Resolves take care of the mismatch in access patterns between HiZ-aware components (i.e. the depth buffer bound to the rendering pipeline) and non-HiZ-aware components (texture units, blorp, and swrast). After writing data using a HiZ-aware component and reading it using a non-HiZ-aware component, you need to do a depth resolve. After writing data using a non-HiZ-aware component and reading it using a HiZ-aware component, you need to do a HiZ resolve. For this determination, writing to a portion of the buffer (but not all of it) counts as both a write and a read. - We do resolves at the last possible minute, so the way this is actually accomplished is to call intel_{miptree_slice,renderbuffer}_resolve_{depth,hiz} before reading/writing a buffer and intel_{miptree_slice,renderbuffer}_set_needs_{depth,hiz}_resolve after writing to a buffer. The latter calls simply flag a future resolve as being necessary; the former calls do the resolve only if it was previously flagged. I'll comment below on the specific changes I think are necessary. I am currently testing this patch and I am the reporter of the spell effects slowness in Planeshift [1]. Can this lead to refresh issues like floating around triagnles on ground or so? Funnily I think I didn´t have these while SNA was enabled, but with SNA enabled I had some other wierd issues like high CPU usage for X.org. Well I also put it the driver I compiled with mesa git master from fdo into a 9.0.1 debian package built. Maybe I have to use all of git master of mesa? Anyway for spell effect speed this patch is a *huge* improvement. [1] https://bugs.freedesktop.org/show_bug.cgi?id=59086 Thanks, -- Martin 'Helios' Steigerwald - http://www.Lichtvoll.de GPG: 03B0 0D6C 0040 0710 4AFA B82F 991B EAAC A599 84C7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 19/32] glsl: Handle instance array declarations
On 01/23/2013 07:53 PM, Paul Berry wrote: On 22 January 2013 00:52, Ian Romanick i...@freedesktop.org mailto:i...@freedesktop.org wrote: From: Ian Romanick ian.d.roman...@intel.com mailto:ian.d.roman...@intel.com Signed-off-by: Ian Romanick ian.d.roman...@intel.com mailto:ian.d.roman...@intel.com --- src/glsl/ast_to_hir.cpp | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index d485bc8..c922a84 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -4259,9 +4259,20 @@ ast_uniform_block::hir(exec_list *instructions, * field selector ( . ) operator (analogously to structures). */ if (this-instance_name) { - ir_variable *var = new(state) ir_variable(block_type, -this-instance_name, -ir_var_uniform); + ir_variable *var; + + if (this-array_size != NULL) { + const glsl_type *block_array_type = +process_array_type(loc, block_type, this-array_size, state); + + var = new(state) ir_variable(block_array_type, + this-instance_name, + ir_var_uniform); + } else { + var = new(state) ir_variable(block_type, + this-instance_name, + ir_var_uniform); + } var-interface_type = block_type; state-symbols-add_variable(var); -- 1.7.11.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org mailto:mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev Immediately below this hunk there's an else branch, to deal with the case where the uniform block doesn't have an instance name. Why don't we need to add similar array-handling logic to the else branch? I'm guessing that the grammar prevents instance blocks without names from being arrays, but since I'm not too familiar with UBO's I'm not very certain about it. Yes, that's the case. It makes sense, too, because blocks without instance names just have variables at global scope...so there's nothing sensible to put an array subscript on. If my guess is right, it would be nice to put an explanatory comment in the else branch, and maybe an assert(this-array_size == NULL); just to drive the point home. Certainly never hurts. But I won't be a stickler about it. With or without my suggested change, this patch is: Reviewed-by: Paul Berry stereotype...@gmail.com mailto:stereotype...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/32] UBOs for OpenGL ES 3.0
Patches 1-9, 11-22, and 32 are: Reviewed-by: Kenneth Graunke kenn...@whitecape.org I haven't read the others yet. No objections, just...haven't gotten to them. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] util: add new error checking code in vbuf helper
Check the return value of calls to u_upload_alloc() and u_upload_data() and return early if needed. Since we don't have a way to propagate errors all the way up to Mesa through pipe_context::draw_vbo(), call debug_warn_once() so the user might have some clue about OOM errors. --- src/gallium/auxiliary/util/u_vbuf.c | 76 ++ 1 files changed, 49 insertions(+), 27 deletions(-) diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index b712b52..244b04d 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -323,7 +323,7 @@ void u_vbuf_destroy(struct u_vbuf *mgr) FREE(mgr); } -static void +static enum pipe_error u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, unsigned vb_mask, unsigned out_vb, int start_vertex, unsigned num_vertices, @@ -335,6 +335,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, struct pipe_resource *out_buffer = NULL; uint8_t *out_map; unsigned out_offset, mask; + enum pipe_error err; /* Get a translate object. */ tr = translate_cache_find(mgr-translate_cache, key); @@ -381,6 +382,14 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, assert((ib-buffer || ib-user_buffer) ib-index_size); + /* Create and map the output buffer. */ + err = u_upload_alloc(mgr-uploader, 0, + key-output_stride * num_indices, + out_offset, out_buffer, + (void**)out_map); + if (err != PIPE_OK) + return err; + if (ib-user_buffer) { map = (uint8_t*)ib-user_buffer + offset; } else { @@ -389,12 +398,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, PIPE_TRANSFER_READ, transfer); } - /* Create and map the output buffer. */ - u_upload_alloc(mgr-uploader, 0, - key-output_stride * num_indices, - out_offset, out_buffer, - (void**)out_map); - switch (ib-index_size) { case 4: tr-run_elts(tr, (unsigned*)map, num_indices, 0, out_map); @@ -412,11 +415,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, } } else { /* Create and map the output buffer. */ - u_upload_alloc(mgr-uploader, - key-output_stride * start_vertex, - key-output_stride * num_vertices, - out_offset, out_buffer, - (void**)out_map); + err = u_upload_alloc(mgr-uploader, + key-output_stride * start_vertex, + key-output_stride * num_vertices, + out_offset, out_buffer, + (void**)out_map); + if (err != PIPE_OK) + return err; out_offset -= key-output_stride * start_vertex; @@ -441,6 +446,8 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, pipe_resource_reference( mgr-real_vertex_buffer[out_vb].buffer, NULL); mgr-real_vertex_buffer[out_vb].buffer = out_buffer; + + return PIPE_OK; } static boolean @@ -588,11 +595,14 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, /* Translate buffers. */ for (type = 0; type VB_NUM; type++) { if (key[type].nr_elements) { - u_vbuf_translate_buffers(mgr, key[type], mask[type], - mgr-fallback_vbs[type], - start[type], num[type], - start_index, num_indices, min_index, - unroll_indices type == VB_VERTEX); + enum pipe_error err; + err = u_vbuf_translate_buffers(mgr, key[type], mask[type], +mgr-fallback_vbs[type], +start[type], num[type], +start_index, num_indices, min_index, +unroll_indices type == VB_VERTEX); + if (err != PIPE_OK) +return FALSE; /* Fixup the stride for constant attribs. */ if (type == VB_CONST) { @@ -884,7 +894,7 @@ void u_vbuf_set_index_buffer(struct u_vbuf *mgr, pipe-set_index_buffer(pipe, ib); } -static void +static enum pipe_error u_vbuf_upload_buffers(struct u_vbuf *mgr, int start_vertex, unsigned num_vertices, int start_instance, unsigned num_instances) @@ -953,6 +963,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, unsigned start, end; struct pipe_vertex_buffer *real_vb; const uint8_t *ptr; + enum pipe_error err; i = u_bit_scan(buffer_mask); @@ -963,11 +974,15 @@ u_vbuf_upload_buffers(struct
[Mesa-dev] [PATCH] vbo: add a null pointer check to handle OOM instead of crashing
Note: This is a candidate for the 9.0 branch. --- src/mesa/vbo/vbo_exec_api.c |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 985f220..353f8cf 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -124,6 +124,11 @@ void vbo_exec_vtx_wrap( struct vbo_exec_context *exec ) */ vbo_exec_wrap_buffers( exec ); + if (!exec-vtx.buffer_ptr) { + /* probably ran out of memory earlier when allocating the VBO */ + return; + } + /* Copy stored stored vertices to start of new list. */ assert(exec-vtx.max_vert - exec-vtx.vert_count exec-vtx.copied.nr); -- 1.7.3.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/6] glsl: Add support for varying structs.
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com On Mon, Jan 21, 2013 at 2:16 PM, Paul Berry stereotype...@gmail.com wrote: This patch series adds support for varying structs, which are a required part of GLSL ES 3.00 and GLSL 1.50. I can see two principal ways to implement this feature: a flattening approach, and a packing approach. In the flattening approach, the linker replaces each varying struct with a set of varyings, one for each field in the struct, and adjusts the shader code to refer to the new set of varyings (this is what a programmer would likely try to do when manually porting a shader that uses structs to a version of GLSL that didn't support them). In the packing approach, we keep the varying structs as single variables, and lay out their contents contiguously in GPU memory (in much the same way that a CPU compiler would handle an ordinary C struct). In principle, the flattening approach carries some potential performance benefits, since it frees the linker to assign each struct element a location that is properly aligned to its type. However, it is much more difficult to implement, since it requires a lowering pass to traverse the entire shader and adjust all references to the varying structs, including function in/out parameters and whole structure assignment. To make matters worse, there would be subtle interactions between the linker and the lowering pass, since error checking would have to be performed based on the un-lowered shaders, but the location assignment would have to be applied to the lowered varyings. Because of these difficulties, I've used the packing approach in this initial implementation--this allowed me to take advantage of the existing lower_packed_varyings() function to pack and unpack varying structs, so the code delta is fairly small. We can always switch to the flattening approach if, in the future, we discover some programs whose performance would be dramatically improved by it. Patch 1/6 eliminates the ambiguity in the ir_variable_mode enum between in/out variables that are inputs/outputs of functions and inputs/outputs of shaders. This makes possible patch 2/6, which modifies opt_structure_splitting to leave varying structs alone (we can't have it splitting varying structs, otherwise the linker won't be able to find them). Patches 3/6 and 4/6 update the linker and lower_packed_varyings, respectively, to handle varying structs, and patch 5/6 disables transform feedback for varying structs (we haven't yet heard from Khronos how this is supposed to work). Finally, patch 6/6 enables the feature when the GLSL version is 1.50 or 3.00 ES. [PATCH 1/6] glsl: Eliminate ambiguity between function ins/outs and shader ins/outs [PATCH 2/6] glsl: Disable structure splitting for shader ins/outs. [PATCH 3/6] glsl: Generalize compute_packing_order for varying structs. [PATCH 4/6] glsl: Update lower_packed_varyings to handle varying structs. [PATCH 5/6] glsl: Disable transform feedback of varying structs. [PATCH 6/6] glsl: Allow varying structs in GLSL ES 3.00 and GLSL 1.50. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965/vs/gen7: Emit code for GLSL ES 3.00 pack/unpack operations (v3)
Chad Versace chad.vers...@linux.intel.com writes: FIXME: This patch emits VS code that violates documented hardware restrictions and then relies on undocumented behavior that results from that violation. This patch passes all tests, but should be fixed ASAP to conform to the hardware documentation. + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + assert(!not reached: should be handled by lower_packing_builtins); + break; + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + case ir_binop_pack_half_2x16_split: + assert(!not reached: should not occur in vertex shader); + break; wacky whitespace. Other than that, Reviewed-by: Eric Anholt e...@anholt.net pgpYwbuikbc2r.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH RFC v1] i965: Implement CopyTexSubImage2D via BLORP (and use it by default).
According to KMail this went out as replay to all. Strange. Am Donnerstag, 24. Januar 2013 schrieb Martin Steigerwald: Am Sonntag, 20. Januar 2013 schrieb Paul Berry: --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 106 +++ src/mesa/drivers/dri/i965/brw_context.h | 8 ++ src/mesa/drivers/dri/intel/intel_tex_copy.c | 32 ++-- 3 files changed, 138 insertions(+), 8 deletions(-) Paul, I'd appreciate your feedback on this patch. It's my first time really working with BLORP, so I'm bound to have screwed up something. :) I'm not sure about the HiZ and downsample resolves (hence the //'d out lines). Your interaction with blorp looks good. But I think there are some bugs in the resolves. My rules of thumb for depth/HiZ resolves are: - Resolves take care of the mismatch in access patterns between HiZ-aware components (i.e. the depth buffer bound to the rendering pipeline) and non-HiZ-aware components (texture units, blorp, and swrast). After writing data using a HiZ-aware component and reading it using a non-HiZ-aware component, you need to do a depth resolve. After writing data using a non-HiZ-aware component and reading it using a HiZ-aware component, you need to do a HiZ resolve. For this determination, writing to a portion of the buffer (but not all of it) counts as both a write and a read. - We do resolves at the last possible minute, so the way this is actually accomplished is to call intel_{miptree_slice,renderbuffer}_resolve_{depth,hiz} before reading/writing a buffer and intel_{miptree_slice,renderbuffer}_set_needs_{depth,hiz}_resolve after writing to a buffer. The latter calls simply flag a future resolve as being necessary; the former calls do the resolve only if it was previously flagged. I'll comment below on the specific changes I think are necessary. I am currently testing this patch and I am the reporter of the spell effects slowness in Planeshift [1]. Can this lead to refresh issues like floating around triagnles on ground or so? Funnily I think I didn´t have these while SNA was enabled, but with SNA enabled I had some other wierd issues like high CPU usage for X.org. Well I also put it the driver I compiled with mesa git master from fdo into a 9.0.1 debian package built. Maybe I have to use all of git master of mesa? Anyway for spell effect speed this patch is a *huge* improvement. [1] https://bugs.freedesktop.org/show_bug.cgi?id=59086 Thanks, -- Martin 'Helios' Steigerwald - http://www.Lichtvoll.de GPG: 03B0 0D6C 0040 0710 4AFA B82F 991B EAAC A599 84C7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 04/32] glsl: Refactor uniform block parser rules.
Patches 1-4 are Reviewed-by: Chad Versace chad.vers...@linux.intel.com I'm still working through the others. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 05/32] glsl: Parse non-array uniform block instance names in GLSL ES 3.00.
On 01/23/2013 01:43 PM, Paul Berry wrote: On 22 January 2013 00:51, Ian Romanick i...@freedesktop.org wrote: From: Kenneth Graunke kenn...@whitecape.org In GLSL ES 3.00 (and GLSL 1.50), uniform blocks can have an associated instance name, which essentially namespaces the variables inside. This patch adds basic parsing for this new feature, but doesn't yet hook it up to actually do anything yet. It does not support for arrays of interface blocks; a later commit will take care of that. This change temporarily regresses the piglit test interface-name-access-without-interface-name.vert. This shader failed to compile before (the expected result), but it failed to compile for the wrong reason. This is not a real regression. --- src/glsl/ast.h | 6 -- src/glsl/glsl_parser.yy | 20 ++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/glsl/ast.h b/src/glsl/ast.h index 5074782..bcec6bb 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -805,8 +805,9 @@ class ast_uniform_block : public ast_node { public: ast_uniform_block(ast_type_qualifier layout, const char *block_name, -ast_declarator_list *member_list) - : layout(layout), block_name(block_name) +ast_declarator_list *member_list, + const char *instance_name) + : layout(layout), block_name(block_name), instance_name(instance_name) { declarations.push_degenerate_list_at_head(member_list-link); } @@ -816,6 +817,7 @@ public: ast_type_qualifier layout; const char *block_name; + const char *instance_name; It would be nice to have a comment above this field explaining that instance_name is NULL if there is no instance name. I second the need for a comment here. With that, Reviewed-by: Chad Versace chad.vers...@linux.intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/32] glsl: Parse interface array size
On 01/23/2013 01:54 PM, Paul Berry wrote: On 22 January 2013 00:51, Ian Romanick i...@freedesktop.org wrote: From: Ian Romanick ian.d.roman...@intel.com For now, just drop the value on the floor. Signed-off-by: Ian Romanick ian.d.roman...@intel.com --- src/glsl/ast.h | 12 ++- src/glsl/glsl_parser.yy | 55 ++--- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/glsl/ast.h b/src/glsl/ast.h index bcec6bb..e525cb2 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -804,12 +804,12 @@ public: class ast_uniform_block : public ast_node { public: ast_uniform_block(ast_type_qualifier layout, -const char *block_name, -ast_declarator_list *member_list, - const char *instance_name) - : layout(layout), block_name(block_name), instance_name(instance_name) + const char *instance_name, +ast_expression *array_size) + : layout(layout), block_name(NULL), instance_name(instance_name), + array_size(array_size) { - declarations.push_degenerate_list_at_head(member_list-link); + /* empty */ } virtual ir_rvalue *hir(exec_list *instructions, @@ -820,6 +820,8 @@ public: const char *instance_name; /** List of ast_declarator_list * */ exec_list declarations; + + ast_expression *array_size; It would be nice to have a comment here explaining that this member is NULL if the ast_uniform_block lacks array nature. With Paul's comment about NULL, Reviewed-by: Chad Versace chad.vers...@linux.intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel: Fix glCopyTexSubImage on buffers whose width = 32kbytes
When possible, glCopyTexSubImage calls are performed using the hardware blitter. However, according to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics Data Size Limitations): The BLT engine is capable of transferring very large quantities of graphics data. Any graphics data read from and written to the destination is permitted to represent a number of pixels that occupies up to 65,536 scan lines and up to 32,768 bytes per scan line at the destination. The maximum number of pixels that may be represented per scan line’s worth of graphics data depends on the color depth. With an RGBA32F color buffer (which has 16 bytes per pixel) this imposes a maximum width of 2048 pixels. Other pixel formats have accordingly larger limits. To make matters worse, if the pitch of the buffer is 32k or greater, intel_copy_texsubimage's call to intelEmitCopyBlit will overflow intelEmitCopyBlit's src_pitch and dst_pitch parameters (which are 16-bit signed integers). We can conveniently avoid both problems by avoiding use of the blitter when the miptree's pitch is = 32k. Fixes gles3conform framebuffer_blit_functionality_magnifying_blit tests when the buffer width is equal to 8192. Note: this is very similar to the recent patch intel: Fix ReadPixels on buffers whose width = 32kbytes except that it applies to glCopyTexSubImage instead of glReadPixels. In a future patch it would be nice to refactor the code so that (a) overflow is avoided, and (b) intelEmitCopyBlit is responsible for checking whether the blitter can handle the width, so that all callers of intelEmitCopyBlit work properly, rather than just these two. --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 21 + 1 file changed, 21 insertions(+) diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1af7b1c..c9cbcf4 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -70,6 +70,27 @@ intel_copy_texsubimage(struct intel_context *intel, assert(region); } + /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics +* Data Size Limitations): +* +*The BLT engine is capable of transferring very large quantities of +*graphics data. Any graphics data read from and written to the +*destination is permitted to represent a number of pixels that +*occupies up to 65,536 scan lines and up to 32,768 bytes per scan line +*at the destination. The maximum number of pixels that may be +*represented per scan line’s worth of graphics data depends on the +*color depth. +* +* Furthermore, intelEmitCopyBlit (which is called below) uses a signed +* 16-bit integer to represent buffer pitch, so it can only handle buffer +* pitches 32k. +* +* As a result of these two limitations, we can only use the blitter to do +* this copy when the region's pitch is less than 32k. +*/ + if (region-pitch = 32768) + return false; + if (intelImage-base.Base.TexObject-Target == GL_TEXTURE_1D_ARRAY || intelImage-base.Base.TexObject-Target == GL_TEXTURE_2D_ARRAY) { perf_debug(no support for array textures\n); -- 1.8.1.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/32] glsl: Refactor out processing of structure fields
On 01/23/2013 02:01 PM, Paul Berry wrote: On 22 January 2013 00:51, Ian Romanick i...@freedesktop.org wrote: From: Ian Romanick ian.d.roman...@intel.com This will soon also be used for processing interface block fields. Signed-off-by: Ian Romanick ian.d.roman...@intel.com --- src/glsl/ast_to_hir.cpp | 42 +- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index c432369..bce3488 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -4014,35 +4014,36 @@ ast_type_specifier::hir(exec_list *instructions, } -ir_rvalue * -ast_struct_specifier::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) +unsigned +ast_process_structure_or_interface_block(exec_list *instructions, +struct _mesa_glsl_parse_state *state, +exec_list *declarations, +YYLTYPE loc, +glsl_struct_field **fields_ret) The contract with the caller isn't obvious to me from this function declaration. Can we have a short comment above the function saying that the return value is the number of fields and that *fields_ret receives a pointer to a newly allocated array with that size? With that change, this patch is: Reviewed-by: Paul Berry stereotype...@gmail.com I find it confusing when a function with output parameters also has a return value, and that return value is not an error code. I like to see all the outputs in the parameter list or all packed into the return value, not a hybrid. This code is correct, so you feel free to ignore my complaint (though I hope you don't). With Paul's contract comment, this is Reviewed-by: Chad Versace chad.vers...@linux.intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: Fix glCopyTexSubImage on buffers whose width = 32kbytes
On 01/24/2013 04:54 PM, Paul Berry wrote: When possible, glCopyTexSubImage calls are performed using the hardware blitter. However, according to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics Data Size Limitations): The BLT engine is capable of transferring very large quantities of graphics data. Any graphics data read from and written to the destination is permitted to represent a number of pixels that occupies up to 65,536 scan lines and up to 32,768 bytes per scan line at the destination. The maximum number of pixels that may be represented per scan line’s worth of graphics data depends on the color depth. With an RGBA32F color buffer (which has 16 bytes per pixel) this imposes a maximum width of 2048 pixels. Other pixel formats have accordingly larger limits. To make matters worse, if the pitch of the buffer is 32k or greater, intel_copy_texsubimage's call to intelEmitCopyBlit will overflow intelEmitCopyBlit's src_pitch and dst_pitch parameters (which are 16-bit signed integers). We can conveniently avoid both problems by avoiding use of the blitter when the miptree's pitch is = 32k. Fixes gles3conform framebuffer_blit_functionality_magnifying_blit tests when the buffer width is equal to 8192. Note: this is very similar to the recent patch intel: Fix ReadPixels on buffers whose width = 32kbytes except that it applies to glCopyTexSubImage instead of glReadPixels. In a future patch it would be nice to refactor the code so that (a) overflow is avoided, and (b) intelEmitCopyBlit is responsible for checking whether the blitter can handle the width, so that all callers of intelEmitCopyBlit work properly, rather than just these two. --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 21 + 1 file changed, 21 insertions(+) diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1af7b1c..c9cbcf4 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -70,6 +70,27 @@ intel_copy_texsubimage(struct intel_context *intel, assert(region); } + /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics +* Data Size Limitations): +* +*The BLT engine is capable of transferring very large quantities of +*graphics data. Any graphics data read from and written to the +*destination is permitted to represent a number of pixels that +*occupies up to 65,536 scan lines and up to 32,768 bytes per scan line +*at the destination. The maximum number of pixels that may be +*represented per scan line’s worth of graphics data depends on the +*color depth. +* +* Furthermore, intelEmitCopyBlit (which is called below) uses a signed +* 16-bit integer to represent buffer pitch, so it can only handle buffer +* pitches 32k. +* +* As a result of these two limitations, we can only use the blitter to do +* this copy when the region's pitch is less than 32k. +*/ + if (region-pitch = 32768) + return false; + if (intelImage-base.Base.TexObject-Target == GL_TEXTURE_1D_ARRAY || intelImage-base.Base.TexObject-Target == GL_TEXTURE_2D_ARRAY) { perf_debug(no support for array textures\n); Reviewed-by: Kenneth Graunke kenn...@whitecape.org ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 24/32] glsl: Make the align function available elsewhere in the linker
On 01/22/2013 12:52 AM, Ian Romanick wrote: From: Ian Romanick ian.d.roman...@intel.com Signed-off-by: Ian Romanick ian.d.roman...@intel.com --- src/glsl/glsl_types.cpp | 12 +++- src/glsl/glsl_types.h| 6 ++ src/glsl/link_uniforms.cpp | 14 -- src/glsl/lower_ubo_reference.cpp | 19 +++ 4 files changed, 20 insertions(+), 31 deletions(-) diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 0075550..ddd0148 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -863,12 +863,6 @@ glsl_type::std140_base_alignment(bool row_major) const return -1; } -static unsigned -align(unsigned val, unsigned align) -{ - return (val + align - 1) / align * align; -} - Why not just eliminate this function altogether and use ALIGN() from macros.h? (The implementation is slightly different, but I think it should work.) unsigned glsl_type::std140_size(bool row_major) const { @@ -970,11 +964,11 @@ glsl_type::std140_size(bool row_major) const for (unsigned i = 0; i this-length; i++) { const struct glsl_type *field_type = this-fields.structure[i].type; unsigned align = field_type-std140_base_alignment(row_major); -size = (size + align - 1) / align * align; +size = glsl_align(size, align); size += field_type-std140_size(row_major); } - size = align(size, - this-fields.structure[0].type-std140_base_alignment(row_major)); + size = glsl_align(size, + this-fields.structure[0].type-std140_base_alignment(row_major)); return size; } diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 8588685..b0db2bf 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -601,6 +601,12 @@ struct glsl_struct_field { bool row_major; }; +static inline unsigned int +glsl_align(unsigned int a, unsigned int align) +{ + return (a + align - 1) / align * align; +} + #endif /* __cplusplus */ #endif /* GLSL_TYPES_H */ diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 2a1af6b..439b711 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -29,12 +29,6 @@ #include program/hash_table.h #include program.h -static inline unsigned int -align(unsigned int a, unsigned int align) -{ - return (a + align - 1) / align * align; -} - /** * \file link_uniforms.cpp * Assign locations for GLSL uniforms. @@ -421,13 +415,13 @@ private: this-uniforms[id].block_index = this-ubo_block_index; unsigned alignment = type-std140_base_alignment(ubo_row_major); -this-ubo_byte_offset = align(this-ubo_byte_offset, alignment); +this-ubo_byte_offset = glsl_align(this-ubo_byte_offset, alignment); this-uniforms[id].offset = this-ubo_byte_offset; this-ubo_byte_offset += type-std140_size(ubo_row_major); if (type-is_array()) { this-uniforms[id].array_stride = - align(type-fields.array-std140_size(ubo_row_major), 16); + glsl_align(type-fields.array-std140_size(ubo_row_major), 16); } else { this-uniforms[id].array_stride = 0; } @@ -564,7 +558,7 @@ link_assign_uniform_block_offsets(struct gl_shader *shader) unsigned alignment = type-std140_base_alignment(ubo_var-RowMajor); unsigned size = type-std140_size(ubo_var-RowMajor); -offset = align(offset, alignment); +offset = glsl_align(offset, alignment); ubo_var-Offset = offset; offset += size; } @@ -580,7 +574,7 @@ link_assign_uniform_block_offsets(struct gl_shader *shader) * and rounding up to the next multiple of the base * alignment required for a vec4. */ - block-UniformBufferSize = align(offset, 16); + block-UniformBufferSize = glsl_align(offset, 16); } } diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index 1d08009..8d13ec1 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -61,12 +61,6 @@ public: bool progress; }; -static inline unsigned int -align(unsigned int a, unsigned int align) -{ - return (a + align - 1) / align * align; -} - void lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) { @@ -113,7 +107,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) array_stride = 4; } else { array_stride = deref_array-type-std140_size(row_major); - array_stride = align(array_stride, 16); + array_stride = glsl_align(array_stride, 16); } ir_constant *const_index = deref_array-array_index-as_constant(); @@ -138,7 +132,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) const glsl_type *type = struct_type-fields.structure[i].type; unsigned field_align
[Mesa-dev] [PATCH] glx: only advertise GLX_INTEL_swap_event if it's supported
Only drivers supporting DRI2 version =4 support GLX_INTEL_swap_event. So lets mark it as such otherwise applications which use this extension (i.e. everything based on Clutter, e.g. gnome-shell) break horribly on drivers supporting DRI2 versions only up to 3. Note: This is a candidate for the 9.0 branch. Signed-off-by: Zack Rusin za...@vmware.com --- src/glx/dri2_glx.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c index 1b3cf2b..a51716f 100644 --- a/src/glx/dri2_glx.c +++ b/src/glx/dri2_glx.c @@ -1062,8 +1062,9 @@ dri2BindExtensions(struct dri2_screen *psc, const __DRIextension **extensions) __glXEnableDirectExtension(psc-base, GLX_MESA_swap_control); __glXEnableDirectExtension(psc-base, GLX_SGI_make_current_read); - /* FIXME: if DRI2 version supports it... */ - __glXEnableDirectExtension(psc-base, GLX_INTEL_swap_event); + if (psc-dri2-base.version = 4) { + __glXEnableDirectExtension(psc-base, GLX_INTEL_swap_event); + } if (psc-dri2-base.version = 3) { const unsigned mask = psc-dri2-getAPIMask(psc-driScreen); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59831] New: undefined symbol _ZN4llvm19createGlobalDCEPassEv in r600g
https://bugs.freedesktop.org/show_bug.cgi?id=59831 Priority: medium Bug ID: 59831 Assignee: mesa-dev@lists.freedesktop.org Summary: undefined symbol _ZN4llvm19createGlobalDCEPassEv in r600g Severity: normal Classification: Unclassified OS: All Reporter: ag...@yahoo.com Hardware: Other Status: NEW Version: git Component: Other Product: Mesa libGL error: dlopen /usr/lib64/dri/r600_dri.so failed (/usr/lib64/dri/r600_dri.so: undefined symbol: _ZN4llvm19createGlobalDCEPassEv) with mesa head at: c6a50ddfcb736e8a33b65fe8a72f2b6b9f70073b Not sure if it's a mesa or llvm regression. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Compile the driver with -march=core2.
While most of our development and testing is on x86-64, some of our major consumers of the driver are on i386 still. This meant they aren't taking advantage of SSE for floating point math or cmov instructions, unless the user went out of their way to choose a -march flag (unlikely). Given that the driver can only get probed on i965 and newer chipsets, which only support core2 and above CPUs, this is safe. Improves (32-bit) GLbenchmark 2.1 offscreen performance by .76 +/- 0.35% (n=19) --- configure.ac | 17 + src/mesa/drivers/dri/i965/Makefile.am |3 ++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index e769eda..0af3176 100644 --- a/configure.ac +++ b/configure.ac @@ -492,6 +492,23 @@ if test x$enable_asm = xyes; then fi AC_SUBST([MESA_ASM_FILES]) +# If the user hasn't set an explicit -march flag, then autodetect a few for +# use by the i965 driver. +if echo $CFLAGS | grep -v march /dev/null; then +case $host_cpu in +i?86 | x86_64) +save_CFLAGS=$CFLAGS +AC_MSG_CHECKING([whether $CC supports -march=core2]) +CFLAGS=$save_CFLAGS -march=core2 +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[]])], + [AC_MSG_RESULT([yes]); MARCH_CORE2=-march=core2], + [AC_MSG_RESULT([no]); MARCH_CORE2=]) +CFLAGS=$save_CFLAGS +;; +esac +fi +AC_SUBST([MARCH_CORE2]) + dnl Check to see if dlopen is in default libraries (like Solaris, which dnl has it in libc), or if libdl is needed to get it. AC_CHECK_FUNC([dlopen], [DEFINES=$DEFINES -DHAVE_DLOPEN], diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index dc140df..d5d0631 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -38,7 +38,8 @@ AM_CFLAGS = \ $(DEFINES) \ $(API_DEFINES) \ $(VISIBILITY_CFLAGS) \ - $(INTEL_CFLAGS) + $(INTEL_CFLAGS) \ + $(MARCH_CORE2) AM_CXXFLAGS = $(AM_CFLAGS) -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/8] ARB_shading_language_packing
Following this email are eight patches that add the 4x8 pack/unpack operations that are the difference between what GLSL ES 3.0 and ARB_shading_language_packing require. They require Chad's gles3-glsl-packing series and are available at http://cgit.freedesktop.org/~mattst88/mesa/log/?h=ARB_shading_language_packing I've also added testing support on top of Chad's piglit patch. The {vs,fs}-unpackUnorm4x8 tests currently fail, and I've been unable to spot why. Please give it a look. I'd be nice to get this into 9.1. Thanks, Matt ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/8] glsl: Add infrastructure for ARB_shading_language_packing
--- src/glsl/builtins/tools/generate_builtins.py |1 + src/glsl/glcpp/glcpp-parse.y |3 +++ src/glsl/glsl_parser_extras.cpp |1 + src/glsl/glsl_parser_extras.h|2 ++ src/glsl/standalone_scaffolding.cpp |1 + src/mesa/main/extensions.c |1 + src/mesa/main/mtypes.h |1 + 7 files changed, 10 insertions(+), 0 deletions(-) diff --git a/src/glsl/builtins/tools/generate_builtins.py b/src/glsl/builtins/tools/generate_builtins.py index 2cfb1a3..3db862e 100755 --- a/src/glsl/builtins/tools/generate_builtins.py +++ b/src/glsl/builtins/tools/generate_builtins.py @@ -189,6 +189,7 @@ read_builtins(GLenum target, const char *protos, const char **functions, unsigne st-OES_EGL_image_external_enable = true; st-ARB_shader_bit_encoding_enable = true; st-ARB_texture_cube_map_array_enable = true; + st-ARB_shading_language_packing_enable = true; _mesa_glsl_initialize_types(st); sh-ir = new(sh) exec_list; diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index 8fba923..e927c7c 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -1227,6 +1227,9 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api) if (extensions-ARB_texture_cube_map_array) add_builtin_define(parser, GL_ARB_texture_cube_map_array, 1); + + if (extensions-ARB_shading_language_packing) +add_builtin_define(parser, GL_ARB_shading_language_packing, 1); } } diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index b460c86..c8dbc89 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -462,6 +462,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_uniform_buffer_object, true, false, true, true, false, ARB_uniform_buffer_object), EXT(OES_standard_derivatives, false, false, true, false, true, OES_standard_derivatives), EXT(ARB_texture_cube_map_array, true, false, true, true, false, ARB_texture_cube_map_array), + EXT(ARB_shading_language_packing, true, false, true, true, false, ARB_shading_language_packing), }; #undef EXT diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 2e6bb0b..53df149 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -272,6 +272,8 @@ struct _mesa_glsl_parse_state { bool OES_standard_derivatives_warn; bool ARB_texture_cube_map_array_enable; bool ARB_texture_cube_map_array_warn; + bool ARB_shading_language_packing_enable; + bool ARB_shading_language_packing_warn; /*@}*/ /** Extensions supported by the OpenGL implementation. */ diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index ccf5b4f..8b12f81 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -101,6 +101,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) ctx-Extensions.ARB_shader_bit_encoding = true; ctx-Extensions.OES_standard_derivatives = true; ctx-Extensions.ARB_texture_cube_map_array = true; + ctx-Extensions.ARB_shading_language_packing = true; ctx-Const.GLSLVersion = 120; diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index fd25d31..fb41760 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -125,6 +125,7 @@ static const struct extension extension_table[] = { { GL_ARB_shader_stencil_export, o(ARB_shader_stencil_export), GL, 2009 }, { GL_ARB_shader_texture_lod, o(ARB_shader_texture_lod), GL, 2009 }, { GL_ARB_shading_language_100, o(ARB_shading_language_100),GLL,2003 }, + { GL_ARB_shading_language_packing, o(ARB_shading_language_packing),GL, 2011 }, { GL_ARB_shadow, o(ARB_shadow), GLL,2001 }, { GL_ARB_sync,o(ARB_sync), GL, 2003 }, { GL_ARB_texture_border_clamp, o(ARB_texture_border_clamp),GLL,2000 }, diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index cba1e16..254679f 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3042,6 +3042,7 @@ struct gl_extensions GLboolean ARB_shader_stencil_export; GLboolean ARB_shader_texture_lod; GLboolean ARB_shading_language_100; + GLboolean ARB_shading_language_packing; GLboolean ARB_shadow; GLboolean ARB_sync; GLboolean ARB_texture_border_clamp; -- 1.7.8.6 ___ mesa-dev
[Mesa-dev] [PATCH 2/8] glsl: Add IR lisp for ARB_shading_language_packing
--- src/glsl/builtins/ir/packSnorm4x8.ir |6 ++ src/glsl/builtins/ir/packUnorm4x8.ir |6 ++ src/glsl/builtins/ir/unpackSnorm4x8.ir |6 ++ src/glsl/builtins/ir/unpackUnorm4x8.ir |6 ++ .../profiles/ARB_shading_language_packing.glsl | 14 ++ 5 files changed, 38 insertions(+), 0 deletions(-) create mode 100644 src/glsl/builtins/ir/packSnorm4x8.ir create mode 100644 src/glsl/builtins/ir/packUnorm4x8.ir create mode 100644 src/glsl/builtins/ir/unpackSnorm4x8.ir create mode 100644 src/glsl/builtins/ir/unpackUnorm4x8.ir create mode 100644 src/glsl/builtins/profiles/ARB_shading_language_packing.glsl diff --git a/src/glsl/builtins/ir/packSnorm4x8.ir b/src/glsl/builtins/ir/packSnorm4x8.ir new file mode 100644 index 000..a153aa9 --- /dev/null +++ b/src/glsl/builtins/ir/packSnorm4x8.ir @@ -0,0 +1,6 @@ +((function packSnorm4x8 + (signature uint + (parameters + (declare (in) vec4 arg0)) + ((return (expression uint packSnorm4x8 (var_ref arg0) +)) diff --git a/src/glsl/builtins/ir/packUnorm4x8.ir b/src/glsl/builtins/ir/packUnorm4x8.ir new file mode 100644 index 000..3a8e46c --- /dev/null +++ b/src/glsl/builtins/ir/packUnorm4x8.ir @@ -0,0 +1,6 @@ +((function packUnorm4x8 + (signature uint + (parameters + (declare (in) vec4 arg0)) + ((return (expression uint packUnorm4x8 (var_ref arg0) +)) diff --git a/src/glsl/builtins/ir/unpackSnorm4x8.ir b/src/glsl/builtins/ir/unpackSnorm4x8.ir new file mode 100644 index 000..3c6ed4a --- /dev/null +++ b/src/glsl/builtins/ir/unpackSnorm4x8.ir @@ -0,0 +1,6 @@ +((function unpackSnorm4x8 + (signature vec4 + (parameters + (declare (in) uint arg0)) + ((return (expression vec4 unpackSnorm4x8 (var_ref arg0) +)) diff --git a/src/glsl/builtins/ir/unpackUnorm4x8.ir b/src/glsl/builtins/ir/unpackUnorm4x8.ir new file mode 100644 index 000..e1dfd74 --- /dev/null +++ b/src/glsl/builtins/ir/unpackUnorm4x8.ir @@ -0,0 +1,6 @@ +((function unpackUnorm4x8 + (signature vec4 + (parameters + (declare (in) uint arg0)) + ((return (expression vec4 unpackUnorm4x8 (var_ref arg0) +)) diff --git a/src/glsl/builtins/profiles/ARB_shading_language_packing.glsl b/src/glsl/builtins/profiles/ARB_shading_language_packing.glsl new file mode 100644 index 000..210af51 --- /dev/null +++ b/src/glsl/builtins/profiles/ARB_shading_language_packing.glsl @@ -0,0 +1,14 @@ +#version 130 +#extension GL_ARB_shading_language_packing : enable + +highp uint packSnorm2x16(vec2 v); +highp uint packUnorm2x16(vec2 v); +highp uint packSnorm4x8 (vec4 v); +highp uint packUnorm4x8 (vec4 v); +highp uint packHalf2x16 (mediump vec2 v); + +highp vec2 unpackSnorm2x16(highp uint p); +highp vec2 unpackUnorm2x16(highp uint p); +highp vec4 unpackSnorm4x8 (highp uint p); +highp vec4 unpackUnorm4x8 (highp uint p); +mediump vec2 unpackHalf2x16 (highp uint p); -- 1.7.8.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/8] glsl: Extend ir_expression_operation for ARB_shading_language_packing
For each function {pack,unpack}{Snorm,Unorm}4x8, add a corresponding opcode to enum ir_expression_operation. Validate the new opcodes in ir_validate.cpp. --- src/glsl/ir.cpp | 11 +++ src/glsl/ir.h |4 src/glsl/ir_validate.cpp| 12 src/mesa/program/ir_to_mesa.cpp |4 4 files changed, 31 insertions(+), 0 deletions(-) diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index d80ee3a..a105008 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -316,7 +316,9 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) break; case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: case ir_unop_pack_half_2x16: this-type = glsl_type::uint_type; break; @@ -327,6 +329,11 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) this-type = glsl_type::vec2_type; break; + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + this-type = glsl_type::vec4_type; + break; + default: assert(!not reached: missing automatic type setup for ir_expression); this-type = op0-type; @@ -478,10 +485,14 @@ static const char *const operator_strs[] = { dFdx, dFdy, packSnorm2x16, + packSnorm4x8, packUnorm2x16, + packUnorm4x8, packHalf2x16, unpackSnorm2x16, + unpackSnorm4x8, unpackUnorm2x16, + unpackUnorm4x8, unpackHalf2x16, unpackHalf2x16_split_x, unpackHalf2x16_split_y, diff --git a/src/glsl/ir.h b/src/glsl/ir.h index c1fbe7c..8d8f1c8 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -974,10 +974,14 @@ enum ir_expression_operation { */ /*@{*/ ir_unop_pack_snorm_2x16, + ir_unop_pack_snorm_4x8, ir_unop_pack_unorm_2x16, + ir_unop_pack_unorm_4x8, ir_unop_pack_half_2x16, ir_unop_unpack_snorm_2x16, + ir_unop_unpack_snorm_4x8, ir_unop_unpack_unorm_2x16, + ir_unop_unpack_unorm_4x8, ir_unop_unpack_half_2x16, /*@}*/ diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 9019637..bc53ea2 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -336,6 +336,12 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir-operands[0]-type == glsl_type::vec2_type); break; + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_4x8: + assert(ir-type == glsl_type::uint_type); + assert(ir-operands[0]-type == glsl_type::vec4_type); + break; + case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_half_2x16: @@ -343,6 +349,12 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir-operands[0]-type == glsl_type::uint_type); break; + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + assert(ir-type == glsl_type::vec4_type); + assert(ir-operands[0]-type == glsl_type::uint_type); + break; + case ir_unop_unpack_half_2x16_split_x: case ir_unop_unpack_half_2x16_split_y: assert(ir-type == glsl_type::float_type); diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1ed357a..ec26804 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1428,10 +1428,14 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit(ir, OPCODE_FRC, result_dst, op[0]); break; case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: case ir_unop_pack_half_2x16: case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: case ir_unop_unpack_half_2x16_split_x: case ir_unop_unpack_half_2x16_split_y: -- 1.7.8.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/8] glsl: Evaluate constant pack/unpack 4x8 expressions
That is, evaluate constant expressions for the following functions: packSnorm4x8, unpackSnorm4x8 packUnorm4x8, unpackUnorm4x8 --- src/glsl/ir_constant_expression.cpp | 162 +++ 1 files changed, 162 insertions(+), 0 deletions(-) diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index b34c6e8..4796f6f 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -76,12 +76,24 @@ bitcast_f2u(float f) } /** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef uint8_t +(*pack_1x8_func_t)(float); + +/** * Evaluate one component of a floating-point 2x16 unpacking function. */ typedef uint16_t (*pack_1x16_func_t)(float); /** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef float +(*unpack_1x8_func_t)(uint8_t); + +/** * Evaluate one component of a floating-point 2x16 unpacking function. */ typedef float @@ -112,6 +124,32 @@ pack_2x16(pack_1x16_func_t pack_1x16, } /** + * Evaluate a 4x8 floating-point packing function. + */ +static uint32_t +pack_4x8(pack_1x8_func_t pack_1x8, + float x, float y, float z, float w) +{ + /* From section 8.4 of the GLSL 4.30 spec: +* +*packSnorm4x8 +* +*The first component of the vector will be written to the least +*significant bits of the output; the last component will be written to +*the most significant bits. +* +* The specifications for the other packing functions contain similar +* language. +*/ + uint32_t u = 0; + u |= ((uint32_t) pack_1x8(x) 0); + u |= ((uint32_t) pack_1x8(y) 8); + u |= ((uint32_t) pack_1x8(z) 16); + u |= ((uint32_t) pack_1x8(w) 24); + return u; +} + +/** * Evaluate a 2x16 floating-point unpacking function. */ static void @@ -135,6 +173,48 @@ unpack_2x16(unpack_1x16_func_t unpack_1x16, } /** + * Evaluate a 4x8 floating-point unpacking function. + */ +static void +unpack_4x8(unpack_1x8_func_t unpack_1x8, uint32_t u, + float *x, float *y, float *z, float *w) +{ +/* From section 8.4 of the GLSL 4.30 spec: + * + *unpackSnorm4x8 + *-- + *The first component of the returned vector will be extracted from + *the least significant bits of the input; the last component will be + *extracted from the most significant bits. + * + * The specifications for the other unpacking functions contain similar + * language. + */ + *x = unpack_1x8((uint8_t) (u 0xff)); + *y = unpack_1x8((uint8_t) (u 8)); + *z = unpack_1x8((uint8_t) (u 16)); + *w = unpack_1x8((uint8_t) (u 24)); +} + +/** + * Evaluate one component of packSnorm4x8. + */ +static uint8_t +pack_snorm_1x8(float x) +{ +/* From section 8.4 of the GLSL 4.30 spec: + * + *packSnorm4x8 + * + *The conversion for component c of v to fixed point is done as + *follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + */ + return (uint8_t) _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f); +} + +/** * Evaluate one component of packSnorm2x16. */ static uint16_t @@ -153,6 +233,24 @@ pack_snorm_1x16(float x) } /** + * Evaluate one component of unpackSnorm4x8. + */ +static float +unpack_snorm_1x8(uint8_t u) +{ +/* From section 8.4 of the GLSL 4.30 spec: + * + *unpackSnorm4x8 + *-- + *The conversion for unpacked fixed-point value f to floating point is + *done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + */ + return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); +} + +/** * Evaluate one component of unpackSnorm2x16. */ static float @@ -171,6 +269,24 @@ unpack_snorm_1x16(uint16_t u) } /** + * Evaluate one component packUnorm4x8. + */ +static uint8_t +pack_unorm_1x8(float x) +{ +/* From section 8.4 of the GLSL 4.30 spec: + * + *packUnorm4x8 + * + *The conversion for component c of v to fixed point is done as + *follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + */ + return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f); +} + +/** * Evaluate one component packUnorm2x16. */ static uint16_t @@ -188,6 +304,24 @@ pack_unorm_1x16(float x) return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f); } +/** + * Evaluate one component of unpackUnorm4x8. + */ +static float +unpack_unorm_1x8(uint8_t u) +{ +/* From section 8.4 of the GLSL 4.30 spec: + * + *unpackUnorm4x8 + *-- + *The conversion for unpacked fixed-point value f to floating point is + *done as follows: + * + * unpackUnorm4x8: f / 255.0 + */ + return (float) u / 255.0f; +} + /** * Evaluate one component of
[Mesa-dev] [PATCH 5/8] glsl: Add support for lowering 4x8 pack/unpack operations
Lower them to arithmetic and bit manipulation expressions. --- src/glsl/ir_optimization.h |6 + src/glsl/lower_packing_builtins.cpp | 279 +++ 2 files changed, 285 insertions(+), 0 deletions(-) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index ac90b87..8f33018 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -54,6 +54,12 @@ enum lower_packing_builtins_op { LOWER_PACK_HALF_2x16_TO_SPLIT= 0x0040, LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, + + LOWER_PACK_SNORM_4x8 = 0x0100, + LOWER_UNPACK_SNORM_4x8 = 0x0200, + + LOWER_PACK_UNORM_4x8 = 0x0400, + LOWER_UNPACK_UNORM_4x8 = 0x0800, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index 49176cc..aa6765f 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -85,9 +85,15 @@ public: case LOWER_PACK_SNORM_2x16: *rvalue = lower_pack_snorm_2x16(op0); break; + case LOWER_PACK_SNORM_4x8: + *rvalue = lower_pack_snorm_4x8(op0); + break; case LOWER_PACK_UNORM_2x16: *rvalue = lower_pack_unorm_2x16(op0); break; + case LOWER_PACK_UNORM_4x8: + *rvalue = lower_pack_unorm_4x8(op0); + break; case LOWER_PACK_HALF_2x16: *rvalue = lower_pack_half_2x16(op0); break; @@ -97,9 +103,15 @@ public: case LOWER_UNPACK_SNORM_2x16: *rvalue = lower_unpack_snorm_2x16(op0); break; + case LOWER_UNPACK_SNORM_4x8: + *rvalue = lower_unpack_snorm_4x8(op0); + break; case LOWER_UNPACK_UNORM_2x16: *rvalue = lower_unpack_unorm_2x16(op0); break; + case LOWER_UNPACK_UNORM_4x8: + *rvalue = lower_unpack_unorm_4x8(op0); + break; case LOWER_UNPACK_HALF_2x16: *rvalue = lower_unpack_half_2x16(op0); break; @@ -137,18 +149,30 @@ private: case ir_unop_pack_snorm_2x16: result = op_mask LOWER_PACK_SNORM_2x16; break; + case ir_unop_pack_snorm_4x8: + result = op_mask LOWER_PACK_SNORM_4x8; + break; case ir_unop_pack_unorm_2x16: result = op_mask LOWER_PACK_UNORM_2x16; break; + case ir_unop_pack_unorm_4x8: + result = op_mask LOWER_PACK_UNORM_4x8; + break; case ir_unop_pack_half_2x16: result = op_mask (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); break; case ir_unop_unpack_snorm_2x16: result = op_mask LOWER_UNPACK_SNORM_2x16; break; + case ir_unop_unpack_snorm_4x8: + result = op_mask LOWER_UNPACK_SNORM_4x8; + break; case ir_unop_unpack_unorm_2x16: result = op_mask LOWER_UNPACK_UNORM_2x16; break; + case ir_unop_unpack_unorm_4x8: + result = op_mask LOWER_UNPACK_UNORM_4x8; + break; case ir_unop_unpack_half_2x16: result = op_mask (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); break; @@ -214,6 +238,30 @@ private: } /** +* \brief Pack four uint8's into a single uint32. +* +* Interpret the given uvec4 as a uint32 quad. Pack the quad into a uint32 +* where the least significant bits specify the first element of the quad. +* Return the uint32. +*/ + ir_rvalue* + pack_uvec4_to_uint(ir_rvalue *uvec4_rval) + { + assert(uvec4_rval-type == glsl_type::uvec4_type); + + /* uvec4 u = UVEC4_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uvec4_type, + tmp_pack_uvec4_to_uint); + factory.emit(assign(u, uvec4_rval)); + + /* return ((u.w 0xff) 24) | ((u.z 0xff) 16) | ((u.y 0xff) 8) | (u.x 0xff); */ + return bit_or(bit_or(lshift(bit_and(swizzle_w(u), constant(0xffu)), constant(24u)), + lshift(bit_and(swizzle_z(u), constant(0xffu)), constant(16u))), +bit_or(lshift(bit_and(swizzle_y(u), constant(0xffu)), constant(8u)), + bit_and(swizzle_x(u), constant(0xffu; + } + + /** * \brief Unpack a uint32 into two uint16's. * * Interpret the given uint32 as a uint16 pair where the uint32's least @@ -244,6 +292,44 @@ private: } /** +* \brief Unpack a uint32 into four uint8's. +* +* Interpret the given uint32 as a uint8 quad where the uint32's least +* significant bits specify the quad's first element. Return the uint8 +* quad as a uvec4. +*/ + ir_rvalue* + unpack_uint_to_uvec4(ir_rvalue *uint_rval) + { + assert(uint_rval-type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, +
[Mesa-dev] [PATCH 6/8] i965: Lower the 4x8 pack/unpack operations
--- src/mesa/drivers/dri/i965/brw_shader.cpp |6 +- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 1c02c87..da101a1 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -83,7 +83,11 @@ brw_lower_packing_builtins(struct brw_context *brw, int ops = LOWER_PACK_SNORM_2x16 | LOWER_UNPACK_SNORM_2x16 | LOWER_PACK_UNORM_2x16 - | LOWER_UNPACK_UNORM_2x16; + | LOWER_UNPACK_UNORM_2x16 + | LOWER_PACK_SNORM_4x8 + | LOWER_UNPACK_SNORM_4x8 + | LOWER_PACK_UNORM_4x8 + | LOWER_UNPACK_UNORM_4x8; if (brw-intel.gen = 7) { /* Gen7 introduced the f32to16 and f16to32 instructions, which can be -- 1.7.8.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/8] i965: Assert that the 4x8 pack/unpack operations have been lowered
--- .../dri/i965/brw_fs_channel_expressions.cpp|4 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |4 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |4 3 files changed, 12 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index e19da51..ea06225 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -343,10 +343,14 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) break; case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: case ir_unop_pack_half_2x16: case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: case ir_quadop_vector: assert(!should have been lowered); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 042ccca..7f4e424 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -539,9 +539,13 @@ fs_visitor::visit(ir_expression *ir) this-result, op[0], op[1]); break; case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: case ir_unop_pack_half_2x16: assert(!not reached: should be handled by lower_packing_builtins); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b5f1aae..7ccf621 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1614,9 +1614,13 @@ vec4_visitor::visit(ir_expression *ir) emit_unpack_half_2x16(result_dst, op[0]); break; case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_unorm_4x8: assert(!not reached: should be handled by lower_packing_builtins); break; case ir_unop_unpack_half_2x16_split_x: -- 1.7.8.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: add bounds checking for uniform array access
On Sat, Dec 15, 2012 at 7:02 AM, Stéphane Marchesin stephane.marche...@gmail.com wrote: On Fri, Dec 14, 2012 at 12:52 PM, Frank Henigman fjhenig...@google.com wrote: No piglet regressions and now passes glsl-uniform-out-of-bounds-2. Should this have gone into the stable 9.0 branch? Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59833] New: [swrast] piglit glx-swap-pixmap regression
https://bugs.freedesktop.org/show_bug.cgi?id=59833 Priority: medium Bug ID: 59833 Keywords: regression CC: cwo...@cworth.org Assignee: mesa-dev@lists.freedesktop.org Summary: [swrast] piglit glx-swap-pixmap regression Severity: normal Classification: Unclassified OS: Linux (All) Reporter: v...@freedesktop.org Hardware: x86-64 (AMD64) Status: NEW Version: git Component: Mesa core Product: Mesa mesa: d1f2e9699f3e5e7649fc12c4f2409af91d2494fe (master) $ ./bin/glx-swap-pixmap -auto Probe at (0,0) Expected: 0.00 1.00 0.00 0.00 Observed: 0.00 1.00 0.00 1.00 PIGLIT: {'result': 'fail' } 570ed2be7d776211e1ca2a7a4c44ee6a1d141714 is the first bad commit commit 570ed2be7d776211e1ca2a7a4c44ee6a1d141714 Author: Carl Worth cwo...@cworth.org Date: Mon Jan 21 12:16:27 2013 -0800 ReadPixels: Force ALPHA to 1 while rebasing RGBA values for GL_RGB format When performing a ReadPixels operation, we may be reading from a buffer that stores alpha values, but that is actually representing a buffer with no alpha channel. In this case, while rebasing the values, touch up all alpha values read to 1.0. This commit fixes the following piglit (sub) tests: ARB_texture_float/fbo-colormask-formats GL_RBG16F_ARB EXT_texture_snorm/fbo-colormask-formats GL_RGB16_SNORM GL_RGB8_SNORM GL_RGB_SNORM It likely improves the results of other tests as well, but a PASS remains elusive due to additional bugs. Reviewed-by: Brian Paul bri...@vmware.com Reviewed-by: Anuj Phogat anuj.pho...@gmail.com :04 04 144369a7d3779929bad84beca8f3a5b2ccf90640 c25eb37e73f6f6e5435230fe8a799b1b62ed347b Msrc bisect run success -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59835] New: ir_constant_expression.cpp:156: undefined reference to `_mesa_round_to_even'
https://bugs.freedesktop.org/show_bug.cgi?id=59835 Priority: medium Bug ID: 59835 Keywords: regression CC: chad.vers...@linux.intel.com Assignee: mesa-dev@lists.freedesktop.org Summary: ir_constant_expression.cpp:156: undefined reference to `_mesa_round_to_even' Severity: blocker Classification: Unclassified OS: All Reporter: v...@freedesktop.org Hardware: All Status: NEW Version: git Component: Mesa core Product: Mesa mesa: 0974031f88c31458792864b2f3a46ff885607ab1 (master) $ scons [...] Linking build/linux-x86_64-debug/glsl/builtin_compiler/builtin_compiler ... build/linux-x86_64-debug/glsl/ir_constant_expression.o: In function `pack_snorm_1x16': mesa.bisect/src/glsl/ir_constant_expression.cpp:156: undefined reference to `_mesa_round_to_even' build/linux-x86_64-debug/glsl/ir_constant_expression.o: In function `pack_unorm_1x16': mesa.bisect/src/glsl/ir_constant_expression.cpp:192: undefined reference to `_mesa_round_to_even' build/linux-x86_64-debug/glsl/ir_constant_expression.o: In function `pack_half_1x16': mesa.bisect/src/glsl/ir_constant_expression.cpp:219: undefined reference to `_mesa_float_to_half' build/linux-x86_64-debug/glsl/ir_constant_expression.o: In function `unpack_half_1x16': mesa.bisect/src/glsl/ir_constant_expression.cpp:228: undefined reference to `_mesa_half_to_float' eac030e38e3cdd4ed4534516e3d3a50c8a372719 is the first bad commit commit eac030e38e3cdd4ed4534516e3d3a50c8a372719 Author: Chad Versace chad.vers...@linux.intel.com Date: Wed Jan 16 19:49:40 2013 -0800 mesa,glsl: Move round_to_even() from glsl to mesa/main (v2) Move round_to_even's definition to mesa/main so that _mesa_float_to_half() can use it in order to eliminate rounding bias. In additon to moving the fuction definition, prefix its name with _mesa, just as all other functions in mesa/main are prefixed. v2: Fix Android build. Reviewed-by: Ian Romanick ian.d.roman...@intel.com Signed-off-by: Chad Versace chad.vers...@linux.intel.com :04 04 60f8318121190570b92106da62352dd407e0b19e 61e7bc463c6b611cf6613968c99c8593115a5642 Msrc bisect run success -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 59835] ir_constant_expression.cpp:156: undefined reference to `_mesa_round_to_even'
https://bugs.freedesktop.org/show_bug.cgi?id=59835 Vinson Lee v...@freedesktop.org changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Vinson Lee v...@freedesktop.org --- Fixed by this commit. commit 07e215f4ec80619b0eac26b8a5ec08572527c62a Author: Vinson Lee v...@freedesktop.org Date: Thu Jan 24 22:36:27 2013 -0800 scons: Add imports.c to builtin_compiler build. Fixes build regression introduced by commit eac030e38e3cdd4ed4534516e3d3a50c8a372719. Signed-off-by: Vinson Lee v...@freedesktop.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=59835 -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: Use a CPU map of the batch on LLC-sharing architectures.
On 01/20/2013 02:59 PM, Eric Anholt wrote: Before, we were keeping a CPU-only buffer to accumulate the batchbuffer in, which was an improvement over mapping the batch through the GTT directly (since any readback or other failure to stream through write combining correctly would hurt). However, on LLC-sharing architectures we can do better by mapping the batch directly, which reduces the cache footprint of the application since we no longer have this extra copy of a batchbuffer around. Improves performance of GLBenchmark 2.1 offscreen on IVB by 3.5% +/- 0.4% (n=21). Improves Lightsmark performance by 1.1 +/- 0.1% (n=76). Improves cairo-gl performance by 1.9% +/- 1.4% (n=57). No statistically significant difference in GLB2.1 on SNB (n=37). Improves cairo-gl performance by 2.1% +/- 0.1% (n=278). Looks good to me. Have you tested this on a non-LLC machine? Reviewed-by: Kenneth Graunke kenn...@whitecape.org ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev