Re: [Mesa-dev] [PATCH v3 3/6] vulkan: enum generator: make registry more flexible
On Thu 21 Sep 2017, Jason Ekstrand wrote: > From: Lionel Landwerlin> > It will be used to store extension numbers as well. > > Signed-off-by: Lionel Landwerlin > Reviewed-by: Jason Ekstrand > --- > src/vulkan/util/gen_enum_to_str.py | 20 ++-- > 1 file changed, 10 insertions(+), 10 deletions(-) > > diff --git a/src/vulkan/util/gen_enum_to_str.py > b/src/vulkan/util/gen_enum_to_str.py > index efe5d4f..4a2d605 100644 > --- a/src/vulkan/util/gen_enum_to_str.py > +++ b/src/vulkan/util/gen_enum_to_str.py > @@ -115,18 +115,18 @@ FOREIGN_ENUM_VALUES = [ > ] > > > -class EnumFactory(object): > +class NamedFactory(object): > """Factory for creating enums.""" > > def __init__(self, type_): > self.registry = {} > self.type = type_ > > -def __call__(self, name): > +def __call__(self, name, **kwargs): > try: > return self.registry[name] > except KeyError: > -n = self.registry[name] = self.type(name) > +n = self.registry[name] = self.type(name, **kwargs) > return n > > > @@ -138,7 +138,7 @@ class VkEnum(object): > self.values = values or [] > > > -def parse_xml(efactory, filename): > +def parse_xml(enum_factory, filename): > """Parse the XML file. Accumulate results into the efactory. > > This parser is a memory efficient iterative XML parser that returns a > list > @@ -157,15 +157,15 @@ def parse_xml(efactory, filename): > if event == 'end' and elem.tag == 'enums': > type_ = elem.attrib.get('type') > if type_ == 'enum': > -enum = efactory(elem.attrib['name']) > +enum = enum_factory(elem.attrib['name']) > enum.values.extend([e.attrib['name'] for e in elem > if e.tag == 'enum']) > elif event == 'end' and elem.tag == 'extension': > if elem.attrib['supported'] != 'vulkan': > continue > for e in elem.findall('.//enum[@extends][@offset]'): > -enum = efactory(e.attrib['extends']) > -enum.values.append(e.attrib['name']) > +enum = enum_factory(e.attrib['extends']) > +enum.values.append(e.attrib['name'],) --^ Stray comma in the arg list. > > root.clear() > > @@ -182,10 +182,10 @@ def main(): > > args = parser.parse_args() > > -efactory = EnumFactory(VkEnum) > +enum_factory = NamedFactory(VkEnum) > for filename in args.xml_files: > -parse_xml(efactory, filename) > -enums=sorted(efactory.registry.values(), key=lambda e: e.name) > +parse_xml(enum_factory, filename) > +enums=sorted(enum_factory.registry.values(), key=lambda e: e.name) ^ No spaces around the = > > for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.c')), > (H_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.h'))]: > -- With those minor things fixed, Reviewed-by: Chad Versace ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 4/6] vulkan: enum generator: generate extension number defines
From: Lionel LandwerlinNew extensions can introduce additional enums. Most of the new enums will have disjoint numbers from the initial enums. For example new formats introduced by VK_IMG_format_pvrtc : VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178, VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180, VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182, VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184, VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG = 154000, VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG = 154001, VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG = 154002, VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG = 154003, VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG = 154004, VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG = 154005, VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG = 154006, VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG = 154007, It's obvious we can't have a single table for handling those anymore. Fortunately the enum values actually contain the number of the extension that introduced the new enums. So we can build an indirection table off the extension number and then index by subtracting the first enum of the the format enum value. This change makes the extension number available in the generated enum code. Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand --- src/vulkan/util/gen_enum_to_str.py | 25 + 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index 4a2d605..4d2d331 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -101,6 +101,10 @@ H_TEMPLATE = Template(textwrap.dedent(u"""\ #include #include +% for ext in extensions: +#define _${ext.name}_number (${ext.number}) +% endfor + % for enum in enums: const char * vk_${enum.name[2:]}_to_str(${enum.name} input); % endfor @@ -130,6 +134,14 @@ class NamedFactory(object): return n +class VkExtension(object): +"""Simple struct-like class representing extensions""" + +def __init__(self, name, number=None): +self.name = name +self.number = number + + class VkEnum(object): """Simple struct-like class representing a single Vulkan Enum.""" @@ -138,8 +150,8 @@ class VkEnum(object): self.values = values or [] -def parse_xml(enum_factory, filename): -"""Parse the XML file. Accumulate results into the efactory. +def parse_xml(enum_factory, ext_factory, filename): +"""Parse the XML file. Accumulate results into the factories. This parser is a memory efficient iterative XML parser that returns a list of VkEnum objects. @@ -160,6 +172,9 @@ def parse_xml(enum_factory, filename): enum = enum_factory(elem.attrib['name']) enum.values.extend([e.attrib['name'] for e in elem if e.tag == 'enum']) +elif event == 'start' and elem.tag == 'extension': +ext_factory(elem.attrib['name'], +number=int(elem.attrib['number'])) elif event == 'end' and elem.tag == 'extension': if elem.attrib['supported'] != 'vulkan': continue @@ -169,7 +184,6 @@ def parse_xml(enum_factory, filename): root.clear() - def main(): parser = argparse.ArgumentParser() parser.add_argument('--xml', required=True, @@ -183,9 +197,11 @@ def main(): args = parser.parse_args() enum_factory = NamedFactory(VkEnum) +ext_factory = NamedFactory(VkExtension) for filename in args.xml_files: -parse_xml(enum_factory, filename) +parse_xml(enum_factory, ext_factory, filename) enums=sorted(enum_factory.registry.values(), key=lambda e: e.name) +extensions=sorted(ext_factory.registry.values(), key=lambda e: e.name) for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.c')), (H_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.h'))]: @@ -193,6 +209,7 @@ def main(): f.write(template.render( file=os.path.basename(__file__), enums=enums, +extensions=extensions, copyright=COPYRIGHT, FOREIGN_ENUM_VALUES=FOREIGN_ENUM_VALUES)) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 3/6] vulkan: enum generator: make registry more flexible
From: Lionel LandwerlinIt will be used to store extension numbers as well. Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand --- src/vulkan/util/gen_enum_to_str.py | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index efe5d4f..4a2d605 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -115,18 +115,18 @@ FOREIGN_ENUM_VALUES = [ ] -class EnumFactory(object): +class NamedFactory(object): """Factory for creating enums.""" def __init__(self, type_): self.registry = {} self.type = type_ -def __call__(self, name): +def __call__(self, name, **kwargs): try: return self.registry[name] except KeyError: -n = self.registry[name] = self.type(name) +n = self.registry[name] = self.type(name, **kwargs) return n @@ -138,7 +138,7 @@ class VkEnum(object): self.values = values or [] -def parse_xml(efactory, filename): +def parse_xml(enum_factory, filename): """Parse the XML file. Accumulate results into the efactory. This parser is a memory efficient iterative XML parser that returns a list @@ -157,15 +157,15 @@ def parse_xml(efactory, filename): if event == 'end' and elem.tag == 'enums': type_ = elem.attrib.get('type') if type_ == 'enum': -enum = efactory(elem.attrib['name']) +enum = enum_factory(elem.attrib['name']) enum.values.extend([e.attrib['name'] for e in elem if e.tag == 'enum']) elif event == 'end' and elem.tag == 'extension': if elem.attrib['supported'] != 'vulkan': continue for e in elem.findall('.//enum[@extends][@offset]'): -enum = efactory(e.attrib['extends']) -enum.values.append(e.attrib['name']) +enum = enum_factory(e.attrib['extends']) +enum.values.append(e.attrib['name'],) root.clear() @@ -182,10 +182,10 @@ def main(): args = parser.parse_args() -efactory = EnumFactory(VkEnum) +enum_factory = NamedFactory(VkEnum) for filename in args.xml_files: -parse_xml(efactory, filename) -enums=sorted(efactory.registry.values(), key=lambda e: e.name) +parse_xml(enum_factory, filename) +enums=sorted(enum_factory.registry.values(), key=lambda e: e.name) for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.c')), (H_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.h'))]: -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 5/6] vulkan: enum generator: Stop using iterparse
While using iterparse is potentially a little more efficient, the Vulkan registry XML is not large and using regular element tree simplifies the parsing logic substantially. --- src/vulkan/util/gen_enum_to_str.py | 36 ++-- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index 4d2d331..07d7685 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -157,32 +157,16 @@ def parse_xml(enum_factory, ext_factory, filename): of VkEnum objects. """ -with open(filename, 'rb') as f: -context = iter(et.iterparse(f, events=('start', 'end'))) - -# This gives the root element, since goal is to iterate over the -# elements without building a tree, this allows the root to be cleared -# (erase the elements) after the children have been processed. -_, root = next(context) - -for event, elem in context: -if event == 'end' and elem.tag == 'enums': -type_ = elem.attrib.get('type') -if type_ == 'enum': -enum = enum_factory(elem.attrib['name']) -enum.values.extend([e.attrib['name'] for e in elem -if e.tag == 'enum']) -elif event == 'start' and elem.tag == 'extension': -ext_factory(elem.attrib['name'], -number=int(elem.attrib['number'])) -elif event == 'end' and elem.tag == 'extension': -if elem.attrib['supported'] != 'vulkan': -continue -for e in elem.findall('.//enum[@extends][@offset]'): -enum = enum_factory(e.attrib['extends']) -enum.values.append(e.attrib['name'],) - -root.clear() +xml = et.parse(filename) + +for enum_type in xml.findall('./enums[@type="enum"]'): +enum = enum_factory(enum_type.attrib['name']) +for value in enum_type.findall('./enum'): +enum.values.append(value.attrib['name']) + +for ext_elem in xml.findall('./extensions/extension[@supported="vulkan"]'): +ext_factory(ext_elem.attrib['name'], +number=int(ext_elem.attrib['number'])) def main(): parser = argparse.ArgumentParser() -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 6/6] vulkan: enum generator: Generate entries for extended enums
--- src/vulkan/util/gen_enum_to_str.py | 48 +++--- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index 07d7685..fc93136 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -68,15 +68,15 @@ C_TEMPLATE = Template(textwrap.dedent(u"""\ vk_${enum.name[2:]}_to_str(${enum.name} input) { switch(input) { -% for v in enum.values: -% if v in FOREIGN_ENUM_VALUES: +% for v in sorted(enum.values.keys()): +% if enum.values[v] in FOREIGN_ENUM_VALUES: #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wswitch" % endif case ${v}: -return "${v}"; -% if v in FOREIGN_ENUM_VALUES: +return "${enum.values[v]}"; +% if enum.values[v] in FOREIGN_ENUM_VALUES: #pragma GCC diagnostic pop % endif @@ -133,6 +133,9 @@ class NamedFactory(object): n = self.registry[name] = self.type(name, **kwargs) return n +def get(self, name): +return self.registry.get(name) + class VkExtension(object): """Simple struct-like class representing extensions""" @@ -147,7 +150,20 @@ class VkEnum(object): def __init__(self, name, values=None): self.name = name -self.values = values or [] +# Maps numbers to names +self.values = values or dict() + +def add_value(self, name, value=None, + extension=None, offset=None, + error=False): +assert value is not None or extension is not None +if value is None: +value = 10 + (extension.number - 1) * 1000 + offset +if error: +value = -value + +if value not in self.values: +self.values[value] = name def parse_xml(enum_factory, ext_factory, filename): @@ -162,11 +178,27 @@ def parse_xml(enum_factory, ext_factory, filename): for enum_type in xml.findall('./enums[@type="enum"]'): enum = enum_factory(enum_type.attrib['name']) for value in enum_type.findall('./enum'): -enum.values.append(value.attrib['name']) +enum.add_value(value.attrib['name'], + value=int(value.attrib['value'])) for ext_elem in xml.findall('./extensions/extension[@supported="vulkan"]'): -ext_factory(ext_elem.attrib['name'], -number=int(ext_elem.attrib['number'])) +extension = ext_factory(ext_elem.attrib['name'], +number=int(ext_elem.attrib['number'])) + +for value in ext_elem.findall('./require/enum[@extends]'): +enum = enum_factory.get(value.attrib['extends']) +if enum is None: +continue +if 'value' in value.attrib: +enum.add_value(value.attrib['name'], + value=int(value.attrib['value'])) +else: +error = 'dir' in value.attrib and value.attrib['dir'] == '-' +enum.add_value(value.attrib['name'], + extension=extension, + offset=int(value.attrib['offset']), + error=error) + def main(): parser = argparse.ArgumentParser() -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 1/6] vulkan: enum generator: align function declarations/prototypes
From: Lionel LandwerlinSigned-off-by: Lionel Landwerlin Acked-by: Jason Ekstrand --- src/vulkan/util/gen_enum_to_str.py | 42 +++--- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index df326d0..aa7001e 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -64,27 +64,27 @@ C_TEMPLATE = Template(textwrap.dedent(u"""\ % for enum in enums: -const char * -vk_${enum.name[2:]}_to_str(${enum.name} input) -{ -switch(input) { -% for v in enum.values: -% if v in FOREIGN_ENUM_VALUES: - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wswitch" -% endif -case ${v}: -return "${v}"; -% if v in FOREIGN_ENUM_VALUES: -#pragma GCC diagnostic pop - -% endif -% endfor -default: -unreachable("Undefined enum value."); -} +const char * +vk_${enum.name[2:]}_to_str(${enum.name} input) +{ +switch(input) { +% for v in enum.values: +% if v in FOREIGN_ENUM_VALUES: + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" +% endif +case ${v}: +return "${v}"; +% if v in FOREIGN_ENUM_VALUES: +#pragma GCC diagnostic pop + +% endif +% endfor +default: +unreachable("Undefined enum value."); } +} %endfor"""), output_encoding='utf-8') @@ -102,7 +102,7 @@ H_TEMPLATE = Template(textwrap.dedent(u"""\ #include % for enum in enums: -const char * vk_${enum.name[2:]}_to_str(${enum.name} input); +const char * vk_${enum.name[2:]}_to_str(${enum.name} input); % endfor #endif"""), -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 9/9] swr/rast: Handle instanceID offset / Instance Stride enable
Supported in JitGatherVertices(); FetchJit::JitLoadVertices() may require similar changes, will need address this if it is determined that this path is still in use. Handle Force Sequential Access in FetchJit::Create. --- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 46 ++ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 9061298..1e3db90 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -222,6 +222,18 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) default: SWR_INVALID("Unsupported index type"); vIndices = nullptr; break; } +if(fetchState.bForceSequentialAccessEnable) +{ +Value* pOffsets = C({ 0, 1, 2, 3, 4, 5, 6, 7 }); + +// VertexData buffers are accessed sequentially, the index is equal to the vertex number +vIndices = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex })); +vIndices = ADD(vIndices, pOffsets); +#if USE_SIMD16_SHADERS +vIndices2 = ADD(vIndices, VIMMED1(8)); +#endif +} + Value* vVertexId = vIndices; #if USE_SIMD16_SHADERS Value* vVertexId2 = vIndices2; @@ -275,12 +287,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) : JitGatherVertices(fetchState, streams, vIndices, pVtxOut); #endif -if (fetchState.bInstanceIDOffsetEnable) -{ -// TODO: -SWR_ASSERT((0), "Add support for handling InstanceID Offset Enable."); -} - RET_VOID(); JitManager::DumpToFile(fetch, "src"); @@ -362,6 +368,11 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE , Value* str vectors.clear(); +if (fetchState.bInstanceIDOffsetEnable) +{ +SWR_ASSERT((0), "TODO: Fill out more once driver sends this down"); +} + Value *vCurIndices; Value *startOffset; if(ied.InstanceEnable) @@ -831,8 +842,16 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , minVertex = LOAD(minVertex); } +if (fetchState.bInstanceIDOffsetEnable) +{ +// the InstanceID (curInstance) value is offset by StartInstanceLocation +curInstance = ADD(curInstance, startInstance); +} + Value *vCurIndices; Value *startOffset; +Value *vInstanceStride = VIMMED1(0); + if(ied.InstanceEnable) { Value* stepRate = C(ied.InstanceAdvancementState); @@ -853,11 +872,19 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , } else if (ied.InstanceStrideEnable) { +// grab the instance advancement state, determines stride in bytes from one instance to the next +Value* stepRate = C(ied.InstanceAdvancementState); +vInstanceStride = VBROADCAST(MUL(curInstance, stepRate)); + +// offset indices by baseVertex +vCurIndices = ADD(vIndices, vBaseVertex); + +startOffset = startVertex; SWR_ASSERT((0), "TODO: Fill out more once driver sends this down."); } else { -// offset indices by baseVertex +// offset indices by baseVertex vCurIndices = ADD(vIndices, vBaseVertex); startOffset = startVertex; @@ -925,6 +952,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE , Value* vOffsets = MUL(vCurIndices, vStride); vOffsets = ADD(vOffsets, vAlignmentOffsets); +// if instance stride enable is: +// true - add product of the instanceID and advancement state to the offst into the VB +// false - value of vInstanceStride has been initialialized to zero +vOffsets = ADD(vOffsets, vInstanceStride); + // Packing and component control ComponentEnable compMask = (ComponentEnable)ied.ComponentPacking; const ComponentControl compCtrl[4] { (ComponentControl)ied.ComponentControl0, (ComponentControl)ied.ComponentControl1, -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/9] swr/rast: New GS state/context API
One piglit regression, which was a false pass: spec@glsl-1.50@execution@geometry@dynamic_input_array_index --- .../drivers/swr/rasterizer/core/frontend.cpp | 227 - src/gallium/drivers/swr/rasterizer/core/state.h| 55 +++-- src/gallium/drivers/swr/swr_shader.cpp | 183 - 3 files changed, 253 insertions(+), 212 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index f882869..26e76a9 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -710,45 +710,67 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num THREAD SWR_GS_CONTEXT tlsGsContext; -template -struct GsBufferInfo +// Buffers that are allocated if GS is enabled +struct GsBuffers { -GsBufferInfo(const SWR_GS_STATE ) -{ -const uint32_t vertexCount = gsState.maxNumVerts; -const uint32_t vertexStride = sizeof(SIMDVERTEX); -const uint32_t numSimdBatches = (vertexCount + SIMD_WIDTH - 1) / SIMD_WIDTH; +uint8_t* pGsIn; +uint8_t* pGsOut[KNOB_SIMD_WIDTH]; +uint8_t* pGsTransposed; +void* pStreamCutBuffer; +}; -vertexPrimitiveStride = vertexStride * numSimdBatches; -vertexInstanceStride = vertexPrimitiveStride * SIMD_WIDTH; +// +/// @brief Transposes GS output from SOA to AOS to feed the primitive assembler +/// @param pDst - Destination buffer in AOS form for the current SIMD width, fed into the primitive assembler +/// @param pSrc - Buffer of vertices in SOA form written by the geometry shader +/// @param numVerts - Number of vertices outputted by the GS +/// @param numAttribs - Number of attributes per vertex +template +void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t numAttribs) +{ +uint32_t srcVertexStride = numAttribs * sizeof(float) * 4; +uint32_t dstVertexStride = numAttribs * sizeof(typename SIMD_T::Float) * 4; -if (gsState.isSingleStream) -{ -cutPrimitiveStride = (vertexCount + 7) / 8; -cutInstanceStride = cutPrimitiveStride * SIMD_WIDTH; +OSALIGNSIMD16(uint32_t) gatherOffsets[SimdWidth]; -streamCutPrimitiveStride = 0; -streamCutInstanceStride = 0; -} -else -{ -cutPrimitiveStride = AlignUp(vertexCount * 2 / 8, 4); -cutInstanceStride = cutPrimitiveStride * SIMD_WIDTH; - -streamCutPrimitiveStride = (vertexCount + 7) / 8; -streamCutInstanceStride = streamCutPrimitiveStride * SIMD_WIDTH; -} +for (uint32_t i = 0; i < SimdWidth; ++i) +{ +gatherOffsets[i] = srcVertexStride * i; } +auto vGatherOffsets = SIMD_T::load_si((typename SIMD_T::Integer*)[0]); -uint32_t vertexPrimitiveStride; -uint32_t vertexInstanceStride; +uint32_t numSimd = AlignUp(numVerts, SimdWidth) / SimdWidth; +uint32_t remainingVerts = numVerts; -uint32_t cutPrimitiveStride; -uint32_t cutInstanceStride; +for (uint32_t s = 0; s < numSimd; ++s) +{ +uint8_t* pSrcBase = pSrc + s * srcVertexStride * SimdWidth; +uint8_t* pDstBase = pDst + s * dstVertexStride; -uint32_t streamCutPrimitiveStride; -uint32_t streamCutInstanceStride; -}; +// Compute mask to prevent src overflow +uint32_t mask = std::min(remainingVerts, SimdWidth); +mask = GenMask(mask); +auto vMask = SIMD_T::vmask_ps(mask); +auto viMask = SIMD_T::castps_si(vMask); + +for (uint32_t a = 0; a < numAttribs; ++a) +{ +auto attribGatherX = SIMD_T::template mask_i32gather_ps(SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask); +auto attribGatherY = SIMD_T::template mask_i32gather_ps(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float)), vGatherOffsets, vMask); +auto attribGatherZ = SIMD_T::template mask_i32gather_ps(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float) * 2), vGatherOffsets, vMask); +auto attribGatherW = SIMD_T::template mask_i32gather_ps(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float) * 3), vGatherOffsets, vMask); + +SIMD_T::maskstore_ps((float*)pDstBase, viMask, attribGatherX); +SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(typename SIMD_T::Float)), viMask, attribGatherY); +SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(typename SIMD_T::Float) * 2), viMask, attribGatherZ); +SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(typename SIMD_T::Float) * 3), viMask, attribGatherW); + +pSrcBase += sizeof(float) * 4; +pDstBase += sizeof(typename SIMD_T::Float) * 4; +} +remainingVerts -= SimdWidth; +} +}
[Mesa-dev] [PATCH 6/9] swr/rast: Slightly more efficient blend jit
--- .../drivers/swr/rasterizer/jitter/blend_jit.cpp| 30 -- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp index f2e6e53..3258639 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -581,13 +581,13 @@ struct BlendJit : public Builder // load src1 src1[i] = LOAD(pSrc1, { i }); } -Value* currentMask = VIMMED1(-1); +Value* currentSampleMask = VIMMED1(-1); if (state.desc.alphaToCoverageEnable) { Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f); uint32_t bits = (1 << state.desc.numSamples) - 1; -currentMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits))); -currentMask = FP_TO_SI(FADD(currentMask, VIMMED1(0.5f)), mSimdInt32Ty); +currentSampleMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits))); +currentSampleMask = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty); } // alpha test @@ -766,34 +766,24 @@ struct BlendJit : public Builder assert(!(state.desc.alphaToCoverageEnable)); // load current mask Value* oMask = LOAD(ppoMask); -Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum)); -oMask = AND(oMask, sampleMasked); -currentMask = AND(oMask, currentMask); +currentSampleMask = AND(oMask, currentSampleMask); } if(state.desc.sampleMaskEnable) { Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask}); -Value* sampleMasked = SHL(C(1), sampleNum); -sampleMask = AND(sampleMask, sampleMasked); -sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0))); -sampleMask = S_EXT(sampleMask, mSimdInt32Ty); -currentMask = AND(sampleMask, currentMask); -} - -if (state.desc.alphaToCoverageEnable) -{ -Value* sampleMasked = SHL(C(1), sampleNum); -currentMask = AND(currentMask, VBROADCAST(sampleMasked)); +currentSampleMask = AND(VBROADCAST(sampleMask), currentSampleMask); } if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable || state.desc.oMaskEnable) { -// load coverage mask +// load coverage mask and mask off any lanes with no samples Value* pMask = LOAD(ppMask); -currentMask = S_EXT(ICMP_UGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty); -Value* outputMask = AND(pMask, currentMask); +Value* sampleMasked = SHL(C(1), sampleNum); +currentSampleMask = AND(currentSampleMask, VBROADCAST(sampleMasked)); +currentSampleMask = S_EXT(ICMP_UGT(currentSampleMask, VBROADCAST(C(0))), mSimdInt32Ty); +Value* outputMask = AND(pMask, currentSampleMask); // store new mask STORE(outputMask, GEP(ppMask, C(0))); } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/9] swr/rast: Properly sized null GS buffer
--- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 15bc93d..22a5705 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -798,7 +798,7 @@ static void GeometryShaderStage( const SWR_GS_STATE* pState = SWR_GS_CONTEXT gsContext; -static uint8_t sNullBuffer[1024] = { 0 }; +static uint8_t sNullBuffer[128] = { 0 }; for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) { -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/9] swr/rast: Fetch compile state changes
Add ForceSequentialAccessEnable and InstanceIDOffsetEnable bools to FETCH_COMPILE_STATE. --- src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 6 ++ src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h | 7 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index f3a4b27..9061298 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -275,6 +275,12 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) : JitGatherVertices(fetchState, streams, vIndices, pVtxOut); #endif +if (fetchState.bInstanceIDOffsetEnable) +{ +// TODO: +SWR_ASSERT((0), "Add support for handling InstanceID Offset Enable."); +} + RET_VOID(); JitManager::DumpToFile(fetch, "src"); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h index 0dd6de7..18fa963 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h @@ -107,6 +107,9 @@ struct FETCH_COMPILE_STATE bool bVertexIDOffsetEnable{ false };// Offset vertexID by StartVertex for non-indexed draws or BaseVertex for indexed draws bool bPartialVertexBuffer{ false }; // for indexed draws, map illegal indices to a known resident vertex +bool bForceSequentialAccessEnable{ false }; +bool bInstanceIDOffsetEnable{ false }; + FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck = false): bDisableVGATHER(disableVGATHER), bDisableIndexOOBCheck(diableIndexOOBCheck){ }; @@ -120,11 +123,13 @@ struct FETCH_COMPILE_STATE if (cutIndex != other.cutIndex) return false; if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return false; if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false; +if (bForceSequentialAccessEnable != other.bForceSequentialAccessEnable) return false; +if (bInstanceIDOffsetEnable != other.bInstanceIDOffsetEnable) return false; for(uint32_t i = 0; i < numAttribs; ++i) { if((layout[i].bits != other.layout[i].bits) || - ((layout[i].InstanceEnable == 1) && + (((layout[i].InstanceEnable == 1) || (layout[i].InstanceStrideEnable == 1)) && (layout[i].InstanceAdvancementState != other.layout[i].InstanceAdvancementState))){ return false; } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 8/9] swr/rast: Remove code supporting legacy llvm (<3.9)
--- .../drivers/swr/rasterizer/jitter/JitManager.cpp | 11 ++- .../drivers/swr/rasterizer/jitter/JitManager.h | 7 -- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 102 ++--- 3 files changed, 15 insertions(+), 105 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index e4281f8..3f0772c 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -48,8 +48,9 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Path.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Config/llvm-config.h" -#if HAVE_LLVM < 0x400 +#if LLVM_VERSION_MAJOR < 4 #include "llvm/Bitcode/ReaderWriter.h" #else #include "llvm/Bitcode/BitcodeWriter.h" @@ -231,8 +232,8 @@ void JitManager::DumpAsm(Function* pFunction, const char* fileName) #if defined(_WIN32) DWORD pid = GetCurrentProcessId(); -TCHAR procname[MAX_PATH]; -GetModuleFileName(NULL, procname, MAX_PATH); +char procname[MAX_PATH]; +GetModuleFileNameA(NULL, procname, MAX_PATH); const char* pBaseName = strrchr(procname, '\\'); std::stringstream outDir; outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends; @@ -269,8 +270,8 @@ void JitManager::DumpToFile(Function *f, const char *fileName) { #if defined(_WIN32) DWORD pid = GetCurrentProcessId(); -TCHAR procname[MAX_PATH]; -GetModuleFileName(NULL, procname, MAX_PATH); +char procname[MAX_PATH]; +GetModuleFileNameA(NULL, procname, MAX_PATH); const char* pBaseName = strrchr(procname, '\\'); std::stringstream outDir; outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h index 4bc543b..46ffe27 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h @@ -47,13 +47,6 @@ #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/Config/llvm-config.h" -#ifndef LLVM_VERSION_MAJOR -#include "llvm/Config/config.h" -#endif - -#ifndef HAVE_LLVM -#define HAVE_LLVM ((LLVM_VERSION_MAJOR << 8) | LLVM_VERSION_MINOR) -#endif #include "llvm/IR/Verifier.h" #include "llvm/ExecutionEngine/MCJIT.h" diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index b943909..9ca36b2 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -763,22 +763,10 @@ namespace SwrJit /// lower 8 values are used. Value *Builder::PMOVSXBD(Value* a) { -// llvm-3.9 removed the pmovsxbd intrinsic -#if HAVE_LLVM < 0x309 -// use avx2 byte sign extend instruction if available -if(JM()->mArch.AVX2()) -{ -Function *pmovsxbd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxbd); -return CALL(pmovsxbd, std::initializer_list{a}); -} -else -#endif -{ -// VPMOVSXBD output type -Type* v8x32Ty = VectorType::get(mInt32Ty, 8); -// Extract 8 values from 128bit lane and sign extend -return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); -} +// VPMOVSXBD output type +Type* v8x32Ty = VectorType::get(mInt32Ty, 8); +// Extract 8 values from 128bit lane and sign extend +return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); } // @@ -787,22 +775,10 @@ namespace SwrJit /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values. Value *Builder::PMOVSXWD(Value* a) { -// llvm-3.9 removed the pmovsxwd intrinsic -#if HAVE_LLVM < 0x309 -// use avx2 word sign extend if available -if(JM()->mArch.AVX2()) -{ -Function *pmovsxwd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxwd); -return CALL(pmovsxwd, std::initializer_list {a}); -} -else -#endif -{ -// VPMOVSXWD output type -Type* v8x32Ty = VectorType::get(mInt32Ty, 8); -// Extract 8 values from 128bit lane and sign extend -return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); -} +// VPMOVSXWD output type +Type* v8x32Ty = VectorType::get(mInt32Ty, 8); +// Extract 8 values from 128bit lane and sign extend +return S_EXT(VSHUFFLE(a, a, C({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); }
[Mesa-dev] [PATCH 1/9] swr/rast: Add support for R10G10B10_FLOAT_A2_UNORM pixel format
--- .../drivers/swr/rasterizer/common/formats.cpp | 27 +++--- .../drivers/swr/rasterizer/core/format_traits.h| 2 +- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 16 ++--- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.cpp b/src/gallium/drivers/swr/rasterizer/common/formats.cpp index 263dec6..1c086ff 100644 --- a/src/gallium/drivers/swr/rasterizer/common/formats.cpp +++ b/src/gallium/drivers/swr/rasterizer/common/formats.cpp @@ -2729,16 +2729,27 @@ const SWR_FORMAT_INFO gFormatInfo[] = { { 0.0f, 0.0f, 0.0f, 0.0f }, 1, 1 }, -// padding (0xD5) + +// R10G10B10_FLOAT_A2_UNORM (0xD5) { -nullptr, -{ SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN }, -{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, -0, 0, 0, false, false, false, false, -{ false, false, false, false }, -{ 0.0f, 0.0f, 0.0f, 0.0f }, -1, 1 +"R10G10B10_FLOAT_A2_UNORM", +{ SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNORM }, +{ 0, 0, 0, 0x3f80 }, // Defaults for missing components +{ 0, 1, 2, 3 }, // Swizzle +{ 10, 10, 10, 2 }, // Bits per component +32, // Bits per element +4, // Bytes per element +4, // Num components +false, // isSRGB +false, // isBC +false, // isSubsampled +false, // isLuminance +{ false, false, false, false }, // Is normalized? +{ 1.0f, 1.0f, 1.0f, 1.0f / 3.0f }, // To float scale factor +1, // bcWidth +1, // bcHeight }, + // R32_SINT (0xD6) { "R32_SINT", diff --git a/src/gallium/drivers/swr/rasterizer/core/format_traits.h b/src/gallium/drivers/swr/rasterizer/core/format_traits.h index c04ea5f..bc585dd 100644 --- a/src/gallium/drivers/swr/rasterizer/core/format_traits.h +++ b/src/gallium/drivers/swr/rasterizer/core/format_traits.h @@ -1237,7 +1237,7 @@ template<> struct FormatTraits : /// FormatTraits - Format traits specialization for R10G10B10_FLOAT_A2_UNORM // template<> struct FormatTraits : -ComponentTraits, +ComponentTraits , FormatSwizzle<0, 1, 2, 3>, Defaults<0, 0, 0, 0x3f80> { diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 402fd26..b943909 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -42,7 +42,7 @@ namespace SwrJit ///number of mantissa bits. /// @param val - 32-bit float /// @todo Maybe move this outside of this file into a header? -static uint16_t Convert32To16Float(float val) +static uint16_t ConvertFloat32ToFloat16(float val) { uint32_t sign, exp, mant; uint32_t roundBits; @@ -112,7 +112,7 @@ namespace SwrJit ///float /// @param val - 16-bit float /// @todo Maybe move this outside of this file into a header? -static float ConvertSmallFloatTo32(uint32_t val) +static float ConvertFloat16ToFloat32(uint32_t val) { uint32_t result; if ((val & 0x7fff) == 0) @@ -888,11 +888,11 @@ namespace SwrJit else { FunctionType* pFuncTy = FunctionType::get(mFP32Ty, mInt16Ty); -Function* pCvtPh2Ps = cast(JM()->mpCurrentModule->getOrInsertFunction("ConvertSmallFloatTo32", pFuncTy)); +Function* pCvtPh2Ps = cast(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy)); -if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertSmallFloatTo32") == nullptr) +if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat16ToFloat32") == nullptr) { -sys::DynamicLibrary::AddSymbol("ConvertSmallFloatTo32", (void *)); +sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32", (void *)); } Value* pResult = UndefValue::get(mSimdFP32Ty); @@ -921,11 +921,11 @@ namespace SwrJit { // call scalar C function for now FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty); -Function* pCvtPs2Ph = cast(JM()->mpCurrentModule->getOrInsertFunction("Convert32To16Float", pFuncTy)); +Function* pCvtPs2Ph = cast(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy)); -if (sys::DynamicLibrary::SearchForAddressOfSymbol("Convert32To16Float") == nullptr) +if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") ==
[Mesa-dev] [PATCH 4/9] swr/rast: Move SWR_GS_CONTEXT from thread local storage to stack
Move structure, as the size is significantly reduced due to dynamic allocation of the GS buffers. --- .../drivers/swr/rasterizer/core/frontend.cpp | 23 +++--- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 26e76a9..15bc93d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -708,8 +708,6 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num } } -THREAD SWR_GS_CONTEXT tlsGsContext; - // Buffers that are allocated if GS is enabled struct GsBuffers { @@ -798,21 +796,22 @@ static void GeometryShaderStage( const API_STATE& state = GetApiState(pDC); const SWR_GS_STATE* pState = +SWR_GS_CONTEXT gsContext; static uint8_t sNullBuffer[1024] = { 0 }; for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) { -tlsGsContext.pStreams[i] = pGsBuffers->pGsOut[i]; +gsContext.pStreams[i] = pGsBuffers->pGsOut[i]; } -tlsGsContext.pVerts = (simdvector*)pGsBuffers->pGsIn; -tlsGsContext.PrimitiveID = primID; +gsContext.pVerts = (simdvector*)pGsBuffers->pGsIn; +gsContext.PrimitiveID = primID; uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true); simdvector attrib[MAX_NUM_VERTS_PER_PRIM]; // assemble all attributes for the input primitive -tlsGsContext.inputVertStride = pState->inputVertStride; +gsContext.inputVertStride = pState->inputVertStride; for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot) { uint32_t srcAttribSlot = pState->srcVertexAttribOffset + slot; @@ -821,7 +820,7 @@ static void GeometryShaderStage( for (uint32_t i = 0; i < numVertsPerPrim; ++i) { -tlsGsContext.pVerts[attribSlot + pState->inputVertStride * i] = attrib[i]; +gsContext.pVerts[attribSlot + pState->inputVertStride * i] = attrib[i]; } } @@ -829,7 +828,7 @@ static void GeometryShaderStage( pa.Assemble(VERTEX_POSITION_SLOT, attrib); for (uint32_t i = 0; i < numVertsPerPrim; ++i) { -tlsGsContext.pVerts[VERTEX_POSITION_SLOT + pState->inputVertStride * i] = attrib[i]; +gsContext.pVerts[VERTEX_POSITION_SLOT + pState->inputVertStride * i] = attrib[i]; } // record valid prims from the frontend to avoid over binning the newly generated @@ -842,15 +841,15 @@ static void GeometryShaderStage( for (uint32_t instance = 0; instance < pState->instanceCount; ++instance) { -tlsGsContext.InstanceID = instance; -tlsGsContext.mask = GenerateMask(numInputPrims); +gsContext.InstanceID = instance; +gsContext.mask = GenerateMask(numInputPrims); // execute the geometry shader -state.pfnGsFunc(GetPrivateState(pDC), ); +state.pfnGsFunc(GetPrivateState(pDC), ); for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) { -tlsGsContext.pStreams[i] += pState->allocationSize; +gsContext.pStreams[i] += pState->allocationSize; } } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/9] swr/rast: Fix allocation of DS output data for USE_SIMD16_FRONTEND
--- src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 16 ++-- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 22a5705..aea8e88 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -1062,7 +1062,7 @@ struct TessellationThreadLocalData size_t tsCtxSize; simdscalar* pDSOutput; -size_t numDSOutputVectors; +size_t dsOutputAllocSize; }; THREAD TessellationThreadLocalData* gt_pTessellationThreadData = nullptr; @@ -1210,24 +1210,20 @@ static void TessellationStages( // Allocate DS Output memory uint32_t requiredDSVectorInvocations = AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH; -size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.numDsOutputAttribs; #if USE_SIMD16_FRONTEND size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) * tsState.numDsOutputAttribs; // simd8 -> simd16, padding #else +size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.numDsOutputAttribs; size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors; #endif -if (requiredDSOutputVectors > gt_pTessellationThreadData->numDSOutputVectors) +if (requiredAllocSize > gt_pTessellationThreadData->dsOutputAllocSize) { AlignedFree(gt_pTessellationThreadData->pDSOutput); gt_pTessellationThreadData->pDSOutput = (simdscalar*)AlignedMalloc(requiredAllocSize, 64); -#if USE_SIMD16_FRONTEND -gt_pTessellationThreadData->numDSOutputVectors = RoundUpEven(requiredDSVectorInvocations) * tsState.numDsOutputAttribs; // simd8 -> simd16, padding -#else -gt_pTessellationThreadData->numDSOutputVectors = requiredDSOutputVectors; -#endif +gt_pTessellationThreadData->dsOutputAllocSize = requiredAllocSize; } SWR_ASSERT(gt_pTessellationThreadData->pDSOutput); -SWR_ASSERT(gt_pTessellationThreadData->numDSOutputVectors >= requiredDSOutputVectors); +SWR_ASSERT(gt_pTessellationThreadData->dsOutputAllocSize >= requiredAllocSize); #if defined(_DEBUG) memset(gt_pTessellationThreadData->pDSOutput, 0x90, requiredAllocSize); @@ -1356,7 +1352,7 @@ static void TessellationStages( AlignedFree(gt_pTessellationThreadData->pDSOutput); gt_pTessellationThreadData->pDSOutput = nullptr; } -gt_pTessellationThreadData->numDSOutputVectors = 0; +gt_pTessellationThreadData->dsOutputAllocSize = 0; #endif TSDestroyCtx(tsCtx); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/9] swr: update rasterizer
Highlights: large change in the geometry shader api, cleanups. Tim Rowley (9): swr/rast: Add support for R10G10B10_FLOAT_A2_UNORM pixel format swr/rast: New GS state/context API swr/rast: Fetch compile state changes swr/rast: Move SWR_GS_CONTEXT from thread local storage to stack swr/rast: Properly sized null GS buffer swr/rast: Slightly more efficient blend jit swr/rast: Fix allocation of DS output data for USE_SIMD16_FRONTEND swr/rast: Remove code supporting legacy llvm (<3.9) swr/rast: Handle instanceID offset / Instance Stride enable .../drivers/swr/rasterizer/common/formats.cpp | 27 ++- .../drivers/swr/rasterizer/core/format_traits.h| 2 +- .../drivers/swr/rasterizer/core/frontend.cpp | 252 +++-- src/gallium/drivers/swr/rasterizer/core/state.h| 55 +++-- .../drivers/swr/rasterizer/jitter/JitManager.cpp | 11 +- .../drivers/swr/rasterizer/jitter/JitManager.h | 7 - .../drivers/swr/rasterizer/jitter/blend_jit.cpp| 30 +-- .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 118 ++ .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 40 +++- .../drivers/swr/rasterizer/jitter/fetch_jit.h | 7 +- src/gallium/drivers/swr/swr_shader.cpp | 183 +++ 11 files changed, 361 insertions(+), 371 deletions(-) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 2/6] vulkan: enum generator: sort enums by names
On Thu 21 Sep 2017, Jason Ekstrand wrote: > From: Lionel Landwerlin> > Signed-off-by: Lionel Landwerlin > Reviewed-by: Jason Ekstrand > --- > src/vulkan/util/gen_enum_to_str.py | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/vulkan/util/gen_enum_to_str.py > b/src/vulkan/util/gen_enum_to_str.py > index aa7001e..efe5d4f 100644 > --- a/src/vulkan/util/gen_enum_to_str.py > +++ b/src/vulkan/util/gen_enum_to_str.py > @@ -185,13 +185,14 @@ def main(): > efactory = EnumFactory(VkEnum) > for filename in args.xml_files: > parse_xml(efactory, filename) > +enums=sorted(efactory.registry.values(), key=lambda e: e.name) Minor nit. The above = should have spaces around it. Reviewed-by: Chad Versace > > for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.c')), > (H_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.h'))]: > with open(file_, 'wb') as f: > f.write(template.render( > file=os.path.basename(__file__), > -enums=efactory.registry.values(), > +enums=enums, > copyright=COPYRIGHT, > FOREIGN_ENUM_VALUES=FOREIGN_ENUM_VALUES)) > > -- > 2.5.0.400.gff86faf > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/6] vulkan: enum generator: make registry more flexible
On Thu, Sep 21, 2017 at 10:25 AM, Dylan Bakerwrote: > Quoting Jason Ekstrand (2017-09-21 08:32:20) > > From: Lionel Landwerlin > > > > It will be used to store extension numbers as well. > > > > Signed-off-by: Lionel Landwerlin > > Reviewed-by: Jason Ekstrand > > --- > > src/vulkan/util/gen_enum_to_str.py | 22 +++--- > > 1 file changed, 11 insertions(+), 11 deletions(-) > > > > diff --git a/src/vulkan/util/gen_enum_to_str.py > b/src/vulkan/util/gen_enum_to_str.py > > index efe5d4f..5281e89 100644 > > --- a/src/vulkan/util/gen_enum_to_str.py > > +++ b/src/vulkan/util/gen_enum_to_str.py > > @@ -115,18 +115,18 @@ FOREIGN_ENUM_VALUES = [ > > ] > > > > > > -class EnumFactory(object): > > +class NamedFactory(object): > > """Factory for creating enums.""" > > > > def __init__(self, type_): > > self.registry = {} > > self.type = type_ > > > > -def __call__(self, name): > > +def __call__(self, *args): > > This is pretty ugly and clunky. What about > __call__(self, name, **kwargs), and pass kwargs directly to the type? > Fine with me. > > try: > > -return self.registry[name] > > +return self.registry[args[0]] > > except KeyError: > > -n = self.registry[name] = self.type(name) > > +n = self.registry[args[0]] = self.type(*args) > > return n > > > > > > @@ -138,7 +138,7 @@ class VkEnum(object): > > self.values = values or [] > > > > > > -def parse_xml(efactory, filename): > > +def parse_xml(enum_factory, filename): > > """Parse the XML file. Accumulate results into the efactory. > > > > This parser is a memory efficient iterative XML parser that returns > a list > > @@ -157,15 +157,15 @@ def parse_xml(efactory, filename): > > if event == 'end' and elem.tag == 'enums': > > type_ = elem.attrib.get('type') > > if type_ == 'enum': > > -enum = efactory(elem.attrib['name']) > > +enum = enum_factory(elem.attrib['name']) > > enum.values.extend([e.attrib['name'] for e in elem > > if e.tag == 'enum']) > > elif event == 'end' and elem.tag == 'extension': > > if elem.attrib['supported'] != 'vulkan': > > continue > > for e in elem.findall('.//enum[@extends][@offset]'): > > -enum = efactory(e.attrib['extends']) > > -enum.values.append(e.attrib['name']) > > +enum = enum_factory(e.attrib['extends']) > > +enum.values.append(e.attrib['name'],) > > > > root.clear() > > > > @@ -182,10 +182,10 @@ def main(): > > > > args = parser.parse_args() > > > > -efactory = EnumFactory(VkEnum) > > +enum_factory = NamedFactory(VkEnum) > > for filename in args.xml_files: > > -parse_xml(efactory, filename) > > -enums=sorted(efactory.registry.values(), key=lambda e: e.name) > > +parse_xml(enum_factory, filename) > > +enums=sorted(enum_factory.registry.values(), key=lambda e: e.name) > > > > for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.c')), > > (H_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.h'))]: > > -- > > 2.5.0.400.gff86faf > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/6] vulkan: enum generator: align function declarations/prototypes
Thanks for updating. For the series: Reviewed-by: Dylan BakerQuoting Jason Ekstrand (2017-09-21 14:19:43) > From: Lionel Landwerlin > > Signed-off-by: Lionel Landwerlin > Acked-by: Jason Ekstrand > --- > src/vulkan/util/gen_enum_to_str.py | 42 > +++--- > 1 file changed, 21 insertions(+), 21 deletions(-) > > diff --git a/src/vulkan/util/gen_enum_to_str.py > b/src/vulkan/util/gen_enum_to_str.py > index df326d0..aa7001e 100644 > --- a/src/vulkan/util/gen_enum_to_str.py > +++ b/src/vulkan/util/gen_enum_to_str.py > @@ -64,27 +64,27 @@ C_TEMPLATE = Template(textwrap.dedent(u"""\ > > % for enum in enums: > > -const char * > -vk_${enum.name[2:]}_to_str(${enum.name} input) > -{ > -switch(input) { > -% for v in enum.values: > -% if v in FOREIGN_ENUM_VALUES: > - > -#pragma GCC diagnostic push > -#pragma GCC diagnostic ignored "-Wswitch" > -% endif > -case ${v}: > -return "${v}"; > -% if v in FOREIGN_ENUM_VALUES: > -#pragma GCC diagnostic pop > - > -% endif > -% endfor > -default: > -unreachable("Undefined enum value."); > -} > +const char * > +vk_${enum.name[2:]}_to_str(${enum.name} input) > +{ > +switch(input) { > +% for v in enum.values: > +% if v in FOREIGN_ENUM_VALUES: > + > +#pragma GCC diagnostic push > +#pragma GCC diagnostic ignored "-Wswitch" > +% endif > +case ${v}: > +return "${v}"; > +% if v in FOREIGN_ENUM_VALUES: > +#pragma GCC diagnostic pop > + > +% endif > +% endfor > +default: > +unreachable("Undefined enum value."); > } > +} > %endfor"""), > output_encoding='utf-8') > > @@ -102,7 +102,7 @@ H_TEMPLATE = Template(textwrap.dedent(u"""\ > #include > > % for enum in enums: > -const char * vk_${enum.name[2:]}_to_str(${enum.name} input); > +const char * vk_${enum.name[2:]}_to_str(${enum.name} input); > % endfor > > #endif"""), > -- > 2.5.0.400.gff86faf > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/3] glsl: tidy up IR after loop unrolling
c7affbf6875622a enabled GLSLOptimizeConservatively on some drivers. The idea was to speed up compile times by running the GLSL IR passes only once each time do_common_optimization() is called. However loop unrolling can create a big mess and with large loops can actually case compile times to increase significantly due to a bunch of redundant if statements being propagated to other IRs. Here we make sure to clean things up before moving on. There was no measureable difference in shader-db compile times, but it makes compile times of some piglit tests go from a couple of seconds to basically instant. The shader-db results seemed positive also: Totals: SGPRS: 2829456 -> 2828376 (-0.04 %) VGPRS: 1720793 -> 1721457 (0.04 %) Spilled SGPRs: 7707 -> 7707 (0.00 %) Spilled VGPRs: 33 -> 33 (0.00 %) Private memory VGPRs: 3140 -> 2060 (-34.39 %) Scratch size: 3308 -> 2180 (-34.10 %) dwords per thread Code Size: 79441464 -> 79214616 (-0.29 %) bytes LDS: 436 -> 436 (0.00 %) blocks Max Waves: 558670 -> 558571 (-0.02 %) Wait states: 0 -> 0 (0.00 %) --- src/compiler/glsl/glsl_parser_extras.cpp | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 9cbc2355f9..764c05ad80 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -2209,21 +2209,27 @@ do_common_optimization(exec_list *ir, bool linked, OPT(lower_vector_insert, ir, false); OPT(do_swizzle_swizzle, ir); OPT(do_noop_swizzle, ir); OPT(optimize_split_arrays, ir, linked); OPT(optimize_redundant_jumps, ir); if (options->MaxUnrollIterations) { loop_state *ls = analyze_loop_variables(ir); if (ls->loop_found) { - OPT(unroll_loops, ir, ls, options); + bool loop_progress = unroll_loops(ir, ls, options); + while (loop_progress) { +loop_progress = false; +loop_progress |= do_constant_propagation(ir); +loop_progress |= do_if_simplification(ir); + } + progress |= loop_progress; } delete ls; } #undef OPT return progress; } extern "C" { -- 2.13.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/5] util: Add tests for the string buffer
If you (all) think, an AK from me (Dieter) is enough, I'm running this from the beginning, then go ahead. Greetings, Dieter Am 21.09.2017 21:13, schrieb Thomas Helland: Fixed the missing newline at the end of this cpp file locally. This is the only patch left in the series without an RB. If there's no objections I plan on pushing this once I get an RB on this. Someone mind having a look at it? 2017-09-11 22:21 GMT+02:00 Thomas Helland: More tests could probably be added, but this should cover concatenation, resizing, clearing, formatted printing, and checking the length, so it should be quite complete. V2: Address review feedback from Timothy, plus fixes - Use a large enough char array - Actually test the formatted appending - Test that clear function resets string length V3: Port to gtest V4: Fix test makefile Fix copyright header Fix missing extern C Use more appropriate name for C-file Add tests for append_char --- configure.ac | 1 + src/util/Makefile.am | 5 +- src/util/tests/string_buffer/Makefile.am | 40 +++ .../tests/string_buffer/string_buffer_test.cpp | 119 + 4 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 src/util/tests/string_buffer/Makefile.am create mode 100644 src/util/tests/string_buffer/string_buffer_test.cpp diff --git a/configure.ac b/configure.ac index d0d4c0dfd1..20727c7bb4 100644 --- a/configure.ac +++ b/configure.ac @@ -2924,6 +2924,7 @@ AC_CONFIG_FILES([Makefile src/mesa/state_tracker/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile + src/util/tests/string_buffer/Makefile src/util/xmlpool/Makefile src/vulkan/Makefile]) diff --git a/src/util/Makefile.am b/src/util/Makefile.am index 4512dc99d5..2b47143ad7 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -19,7 +19,10 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = xmlpool . tests/hash_table +SUBDIRS = . \ + xmlpool \ + tests/hash_table \ + tests/string_buffer include Makefile.sources diff --git a/src/util/tests/string_buffer/Makefile.am b/src/util/tests/string_buffer/Makefile.am new file mode 100644 index 00..bd04d86349 --- /dev/null +++ b/src/util/tests/string_buffer/Makefile.am @@ -0,0 +1,40 @@ +# Copyright © 2017 Thomas Helland +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +AM_CPPFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gtest/include \ + $(PTHREAD_CFLAGS) \ + $(DEFINES) + +TESTS = string_buffer_test + +check_PROGRAMS = $(TESTS) + +string_buffer_test_SOURCES = \ + string_buffer_test.cpp + +string_buffer_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) \ + $(DLOPEN_LIBS) diff --git a/src/util/tests/string_buffer/string_buffer_test.cpp b/src/util/tests/string_buffer/string_buffer_test.cpp new file mode 100644 index 00..e80ee8b135 --- /dev/null +++ b/src/util/tests/string_buffer/string_buffer_test.cpp @@ -0,0 +1,119 @@ +/* + * Copyright © 2017 Thomas Helland + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above
[Mesa-dev] [PATCH v3 2/6] vulkan: enum generator: sort enums by names
From: Lionel LandwerlinSigned-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand --- src/vulkan/util/gen_enum_to_str.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index aa7001e..efe5d4f 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -185,13 +185,14 @@ def main(): efactory = EnumFactory(VkEnum) for filename in args.xml_files: parse_xml(efactory, filename) +enums=sorted(efactory.registry.values(), key=lambda e: e.name) for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.c')), (H_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.h'))]: with open(file_, 'wb') as f: f.write(template.render( file=os.path.basename(__file__), -enums=efactory.registry.values(), +enums=enums, copyright=COPYRIGHT, FOREIGN_ENUM_VALUES=FOREIGN_ENUM_VALUES)) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] radeonsi: allow out-of-order rasterization in commutative blending cases
I'm reviewing the patch and there's something that confuses me about radeonsi_assume_no_z_fights (which got implemented in a later patch). It appears to be that part of the logic is flawed. Please correct me whenever you feel I misunderstood what is going on. Assuming colour writes are enabled, si_out_of_order_rasterization will return true only if the following conditions are met (simplified): * There is a zsbuf set (If I interpret it correctly, this means if there is a zs buffer attached) * dsa_order_invariant.pass_set is true * dsa_order_invariant.pass_last is true _or_ * There is no zsbuf set * dsa_order_invariant.pass_last is true However, the logic is apparently contradictory, because: * pass_set will only be true when depth writes are disabled or depth func is set to always or depth func is set to never. * pass_last will only be true when depth writes are enabled or depth func is not set to always nor not_equal. !!This is impossible to satisfy unless depth function is set to never!! Not only this is extremely rare, it appears this is not the intention behind the option "radeonsi_assume_no_z_fights" which I believe is an optimization for gamers to get a performance boost in most games where forcing this option doesn't matter (either because the artifacts are extremely rare or not present). Additionally, there seems to be a bug because si_out_of_order_rasterization can return true if there is no zs buffer and user enabled radeonsi_assume_no_z_fights, which AFAIK is blatantly wrong (correct me if I'm wrong, but if there is no zs buffer, out of order rasterization can cause really wrong results). Maybe I misunderstood what's going on, or I missed something key. But if I'm right then the logic needs to be revised. It would appear to me that idea of radeonsi_assume_no_z_fights is that it should always enable OoO rasterization as long as depth writes are on and a valid zs is present (and other conditions are met such as shaders not requesting early depth stencil, blending operations, etc). But written as is right now, it will almost never be enabled even if the options is forced on. Cheers Matias De: Marek OlšákPara: Nicolai Hähnle CC: "mesa-dev@lists.freedesktop.org" ; Nicolai Hähnle Enviado: Miércoles, 13 de septiembre, 2017 21:19:26 Asunto: Re: [Mesa-dev] [PATCH 5/5] radeonsi: allow out-of-order rasterization in commutative blending cases For the series: Reviewed-by: Marek Olšák Marek On Sat, Sep 9, 2017 at 12:43 PM, Nicolai Hähnle wrote: > From: Nicolai Hähnle > > We do not enable this by default for additive blending, since it slightly > breaks OpenGL invariance guarantees due to non-determinism. > > Still, there may be some applications can benefit from white-listing > via the radeonsi_commutative_blend_add drirc setting without any real > visible artifacts. > --- > src/gallium/drivers/radeonsi/driinfo_radeonsi.h | 1 + > src/gallium/drivers/radeonsi/si_pipe.c | 2 + > src/gallium/drivers/radeonsi/si_pipe.h | 1 + > src/gallium/drivers/radeonsi/si_state.c | 67 > +++-- > src/gallium/drivers/radeonsi/si_state.h | 1 + > src/util/xmlpool/t_options.h| 5 ++ > 6 files changed, 73 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h > b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h > index 8be85289a0c..989e5175cc0 100644 > --- a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h > +++ b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h > @@ -1,5 +1,6 @@ > // DriConf options specific to radeonsi > DRI_CONF_SECTION_PERFORMANCE > DRI_CONF_RADEONSI_ENABLE_SISCHED("false") > DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false") > +DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false") > DRI_CONF_SECTION_END > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index b4972be739c..c44ea3be740 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -1043,20 +1043,22 @@ struct pipe_screen *radeonsi_screen_create(struct > radeon_winsys *ws, > (sscreen->b.chip_class == SI && > sscreen->b.info.pfp_fw_version >= 79 && > sscreen->b.info.me_fw_version >= 142); > > sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI; > sscreen->has_out_of_order_rast = sscreen->b.chip_class >= VI && > sscreen->b.info.max_se >= 2 && > !(sscreen->b.debug_flags & > DBG_NO_OUT_OF_ORDER); > sscreen->assume_no_z_fights = > driQueryOptionb(config->options, > "radeonsi_assume_no_z_fights"); > +
Re: [Mesa-dev] [PATCH] swr/rast: remove llvm fence/atomics from generated files
> On Sep 20, 2017, at 8:13 AM, Laurent Carlierwrote: > > Le mardi 19 septembre 2017, 21:25:42 CEST Tim Rowley a écrit : >> We currently don't use these instructions, and since their API >> changed in llvm-5.0 having them in the autogen files broke the mesa >> release tarballs which ship with generated autogen files. >> >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102847 >> CC: mesa-sta...@lists.freedesktop.org >> --- > > Tested-by: Laurent Carlier Reviewed-by: Bruce Cherniak >> src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py | 8 >> 1 file changed, 8 insertions(+) >> >> diff --git >> a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py >> b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index >> 025d38a..ce892a9 100644 >> --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py >> +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py >> @@ -140,6 +140,14 @@ def parse_ir_builder(input_file): >> >> ignore = False >> >> +# The following functions need to be ignored in >> openswr. +# API change in llvm-5.0 breaks baked autogen >> files +if ( >> +(func_name == 'CreateFence' or >> + func_name == 'CreateAtomicCmpXchg' or >> + func_name == 'CreateAtomicRMW')): >> +ignore = True >> + >> # The following functions need to be ignored. >> if (func_name == 'CreateInsertNUWNSWBinOp' or >> func_name == 'CreateMaskedIntrinsic' or > > > -- > Laurent Carlier > http://www.archlinux.org___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 2/3] glsl: check if induction var incremented before use in terminator
On 22/09/17 01:51, Michael Schellenberger Costa wrote: Out of curriosity what about decrement and other shenanigans? My commit message is a little misleading. The method in which the induction var changes doesn't matter, we just check if something was assigned to it (we trust the rest of the analysis code to do its job and make sure its a valid induction var). If there was an assignment before the terminator is checked we reduce the iteration count of the loop, again the way in which the induction variable is changed is irrelevant we simple reduce the number of passes over the loop by 1. Hope this makes sense. Tim --Michael Am 21.09.2017 um 12:55 schrieb Timothy Arceri: do-while loops can increment the starting value before the condition is checked. e.g. do { ndx++; } while (ndx < 3); This commit changes the code to detect this and reduces the iteration count by 1 if found. V2: fix terminator spelling Reviewed-by: Nicolai HähnleReviewed-by: Elie Tournier --- src/compiler/glsl/loop_analysis.cpp | 38 + 1 file changed, 38 insertions(+) diff --git a/src/compiler/glsl/loop_analysis.cpp b/src/compiler/glsl/loop_analysis.cpp index 81a07f78f8..78279844dc 100644 --- a/src/compiler/glsl/loop_analysis.cpp +++ b/src/compiler/glsl/loop_analysis.cpp @@ -164,20 +164,54 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, iter_value += bias[i]; valid_loop = true; break; } } ralloc_free(mem_ctx); return (valid_loop) ? iter_value : -1; } +static bool +incremented_before_terminator(ir_loop *loop, ir_variable *var, + ir_if *terminator) +{ + for (exec_node *node = loop->body_instructions.get_head(); + !node->is_tail_sentinel(); + node = node->get_next()) { + ir_instruction *ir = (ir_instruction *) node; + + switch (ir->ir_type) { + case ir_type_if: + if (ir->as_if() == terminator) + return false; + break; + + case ir_type_assignment: { + ir_assignment *assign = ir->as_assignment(); + ir_variable *assignee = assign->lhs->whole_variable_referenced(); + + if (assignee == var) { + assert(assign->condition == NULL); + return true; + } + + break; + } + + default: + break; + } + } + + unreachable("Unable to find induction variable"); +} /** * Record the fact that the given loop variable was referenced inside the loop. * * \arg in_assignee is true if the reference was on the LHS of an assignment. * * \arg in_conditional_code_or_nested_loop is true if the reference occurred * inside an if statement or a nested loop. * * \arg current_assignment is the ir_assignment node that the loop variable is @@ -575,20 +609,24 @@ loop_analysis::visit_leave(ir_loop *ir) ir_variable *var = counter->variable_referenced(); ir_rvalue *init = find_initial_value(ir, var); loop_variable *lv = ls->get(var); if (lv != NULL && lv->is_induction_var()) { t->iterations = calculate_iterations(init, limit, lv->increment, cmp); + if (incremented_before_terminator(ir, var, t->ir)) { + t->iterations--; + } + if (t->iterations >= 0 && (ls->limiting_terminator == NULL || t->iterations < ls->limiting_terminator->iterations)) { ls->limiting_terminator = t; } } break; } default: ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/6] vulkan: enum generator: align function declarations/prototypes
On Thu 21 Sep 2017, Jason Ekstrand wrote: > From: Lionel Landwerlin> > Signed-off-by: Lionel Landwerlin > Acked-by: Jason Ekstrand > --- > src/vulkan/util/gen_enum_to_str.py | 42 > +++--- > 1 file changed, 21 insertions(+), 21 deletions(-) Patch 1 is Reviewed-by: Chad Versace ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: Implement VK_ANDROID_native_buffer (v5)
On Thu 21 Sep 2017, Tapani Pälli wrote: > Hi Chad; > > The build works ok now on Android-IA. There is still something wrong with > 'exec async' though. It behaves differently with small/big apps but > eventually I think it just starts to block .. somewhere. I still need the > big hammer to set device->has_exec_async false to fix that. Please don't > consider that to be a blocker though, we can easily carry such patch in > Android-IA and debug it further. > > For this patch: > Reviewed-by: Tapani PälliThanks for testing and review. How long until you observe the lockup? And does the lockup happen earlier or later for small apps vs big apps? And what apps do you use to reproduce the lock? I'll try to reproduce the lock in my ARC++ setup after returning to the office on Monday (I'm at XDC now). ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] i965 : optimized bucket index calculation
+ all v1 reviewers. Does this look ok? -Yogesh. >-Original Message- >From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On Behalf Of >aravindan.muthuku...@intel.com >Sent: Thursday, September 14, 2017 12:13 PM >To: mesa-dev@lists.freedesktop.org >Cc: Muthukumar, Aravindan; J Karanje, >Kedar >Subject: [Mesa-dev] [PATCH v2] i965 : optimized bucket index calculation > >From: Aravindan Muthukumar > >Avoiding the loop which was running with O(n) complexity. >Now the complexity has been reduced to O(1) > >Algorithm calculates the index using matrix method. >Matrix arrangement is as below: >Assuming PAGE_SIZE is 4096. > > 1*4096 2*40963*40964*4096 > 5*4096 6*40967*40968*4096 > ... ... ... ... > ... ... ... ... > ... ... ... max_cache_size > >From this matrix its cleary seen that every row follows the below way: > ... ... ...n > n+(1/4)n n+(1/2)n n+(3/4)n2n > >Row is calulated as log2(size/PAGE_SIZE) Column is calculated as converting the >difference between the elements to fit into power size of two and indexing it. > >Final Index is (row*4)+(col-1) > >Tested with Intel Mesa CI. > >Improves performance of 3d Mark on Broxton. >Analyzed using Compare Perf Analyser: >Average : 201.2 +/- 65.4836 (n=20) >Percentage : 0.705966% +/- 0.229767% (n=20) > >v2: Review comments regarding cosmetics and asserts implemented > >Signed-off-by: Aravindan Muthukumar >Signed-off-by: Kedar Karanje >Reviewed-by: Yogesh Marathe >--- > src/mesa/drivers/dri/i965/brw_bufmgr.c | 46 >-- > 1 file changed, 39 insertions(+), 7 deletions(-) > >diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c >b/src/mesa/drivers/dri/i965/brw_bufmgr.c >index 8017219..8013ccb 100644 >--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c >+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c >@@ -87,6 +87,8 @@ > > #define memclear(s) memset(, 0, sizeof(s)) > >+#define PAGE_SIZE 4096 >+ > #define FILE_DEBUG_FLAG DEBUG_BUFMGR > > static inline int >@@ -181,19 +183,45 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t >pitch, uint32_t tiling) >return ALIGN(pitch, tile_width); > } > >+static inline int >+ilog2_round_up(int value) >+{ >+ assert(value != 0); >+ return 32 - __builtin_clz(value - 1); } >+ >+/* >+ * This function finds the correct bucket fit for the input size. >+ * The function works with O(1) complexity when the requested size >+ * was queried instead of iterating the size through all the buckets. >+ */ > static struct bo_cache_bucket * > bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { >- int i; >+ int index = -1; >+ int row, col = 0; >+ int pages, pages_log2; > >- for (i = 0; i < bufmgr->num_buckets; i++) { >- struct bo_cache_bucket *bucket = >cache_bucket[i]; >- if (bucket->size >= size) { >- return bucket; >- } >+ /* condition for size less than 4*4096 (16KB) page size */ >+ if(size <= 4 * PAGE_SIZE) { >+ index = DIV_ROUND_UP(size, PAGE_SIZE) - 1;; >+ } else { >+ /* Number of pages of page size */ >+ pages = DIV_ROUND_UP(size, PAGE_SIZE); >+ pages_log2 = ilog2_round_up(pages) - 1; >+ >+ /* Finding the row and column of the matrix */ >+ row = pages_log2 - 1; >+ col = DIV_ROUND_UP((pages - (1 << pages_log2)), >+(1 << (pages_log2 - 2))); >+ >+ /* Using the calculated row and column to index into the matrix */ >+ index = (row << 2) + (col - 1); >} > >- return NULL; >+ /* Checking the error condition */ >+ return (index >= 0 && index < bufmgr->num_buckets) ? >+ (>cache_bucket[index]) : NULL; > } > > int >@@ -1239,6 +1267,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) >list_inithead(>cache_bucket[i].head); >bufmgr->cache_bucket[i].size = size; >bufmgr->num_buckets++; >+ >+ assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]); >+ assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]); >+ assert(bucket_for_size(bufmgr, size + 1) != >+ >cache_bucket[i]); > } > > static void >-- >2.7.4 > >___ >mesa-dev mailing list >mesa-dev@lists.freedesktop.org >https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] initial meson port
On Thu, Sep 21, 2017 at 2:20 AM, Eric Anholtwrote: > Dylan Baker writes: >> Results >> autotools : sh -c 535.34s user 30.33s system 310% cpu 3:02.05 total >> meson : sh -c 136.58s user 11.98s system 372% cpu 39.895 total > > I just want to point at these numbers again. meson is so transformative > for your normal build/test cycles that it's worth it even if we have to > duplicate source lists. I know these aren't quite representative > because of all of automake's checks that haven't been done for meson, > but here's what we had for the X server conversion: > > autotools: meson: > no-op build 0.83 0.49 > touch Makefile.am1.28 > touch configure.ac 16.68 > touch meson.build 2.92 > clean ccache build 16.74 1.44 > clean build 52.2427.84 > > Hopefully we can replace two of our build systems (hopefully android and > scons?) with this one, and then I think it will definitely be less > developer build system maintenance, even with duplicated source lists. > I'd be curious to hear what the vmware folks would need from meson in > order to drop scons, and I'd be willing to put in a good bit of work to > make it happen. > > Additionally, meson doesn't need the .hs listed in its source lists, so > these meson.builds are actually more verbose than we need and would drop > a huge source of our "fix up the build system" patches for automake's > stupid distcheck. Wasn't lacking distcheck support one of the arguments against moving to only a scons build when this was brought up all those years ago? Does Meson provide something similar, or do people just now get all of the source from git nowadays? Cheers, Jakob. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] initial meson port
On Thu, Sep 21, 2017 at 5:36 PM, Jakob Bornecrantzwrote: > Wasn't lacking distcheck support one of the arguments against moving > to only a scons build when this was brought up all those years ago? > Does Meson provide something similar, or do people just now get all > of the source from git nowadays? > Meson supports a `dist` feature which is the same as `distcheck`. However, it is less error-prone compared to Autotools distcheck because it will tarball everything that has been checked into git (once tests pass), so you don't have to maintain a whitelist of files. This also means that you can be sure that your release contains the same code that your git repository contains, and hence that your release is buildable from both git and the tarball. Cheers, Nirbheek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/14] Patches for VA-API State Tracker Postproc
On 09/21/2017 03:03 AM, Mark Thompson wrote: On 21/09/17 03:17, Leo Liu wrote: On 09/20/2017 06:11 PM, Mark Thompson wrote: On 19/09/17 20:04, Leo Liu wrote: This series are for VA-API State Tracker Postproc, including: Deinterlacing I video for transcode; Scaling support in postproc for transcode; Frame grabber in postproc Thanks Andy Furnissfor lots of testing on these. Leo Liu (14): st/va/postproc: use video original size for postprocessing vl/compositor: separate YUV part from shader video buffer function vl/compositor: extend YUV deint function to do field deint vl/compositor: add a new function for YUV deint st/omx: use new vl_compositor_yuv_deint_full() to deint st/va: use new vl_compositor_yuv_deint_full() to deint vl/compositor: remove vl_compositor_yuv_deint() function vl/compositor: add Bob top and bottom to YUV deint function st/va/postproc: add a full NV12 deint support from buffer I to P st/va: make internal func vlVaHandleSurfaceAllocate() call simpler st/va/postproc: use progressive target buffer for scaling vl/compositor: create RGB to YUV fragment shader vl/compositor: convert RGB buffer to YUV with color conversion st/va/postproc: implement the DRM prime grabber src/gallium/auxiliary/vl/vl_compositor.c | 263 +- src/gallium/auxiliary/vl/vl_compositor.h | 50 +++- src/gallium/state_trackers/omx_bellagio/vid_dec.c | 11 +- src/gallium/state_trackers/va/picture.c | 16 +- src/gallium/state_trackers/va/postproc.c | 69 +- src/gallium/state_trackers/va/surface.c | 7 +- src/gallium/state_trackers/va/va_private.h | 2 +- 7 files changed, 331 insertions(+), 87 deletions(-) Looks good for import from a bit of testing so far (with the update today). Something funny going on with RGB upload cases? With ffmpeg: ./ffmpeg_g -y -i in.mp4 -an -vaapi_device /dev/dri/renderD129 -vf format=bgr0,hwupload,scale_vaapi=w=1920:h=1080:format=nv12 -c:v h264_vaapi -profile:v 578 -bf 0 out.mp4 it crashes a few lines into copying to the image. The mapping in vlVaMapBuffer() looks like: (gdb) p *buf->derived_surface.resource $9 = {reference = {count = 5}, screen = 0x57829010, width0 = 1920, height0 = 1088, depth0 = 1, array_size = 1, format = PIPE_FORMAT_B8G8R8X8_UNORM, target = PIPE_TEXTURE_2D, last_level = 0, nr_samples = 0, usage = 0, bind = 2097152, flags = 0, next = 0x0} (gdb) p *buf->derived_surface.transfer $8 = {resource = 0x57d8e2c0, level = 0, usage = PIPE_TRANSFER_WRITE, box = {x = 0, y = 0, z = 0, width = 1920, height = 1, depth = 1}, stride = 7680, layer_stride = 7680} height = 1 looks suspicious, like it's only mapping the first line? Looks like the command line crashed at some point where is before you would to go. i.e RGB->YUV in postproc. th I'm not quite understanding what you mean. Do you crash at a different point rather than in the copy after mapping the the image to upload to? Backtrace? I haven't tried your command yet, but I know it won't work. If we would like to have raw RGB to scale in postproc , the raw data have to be in RGB surface. Currently the case we support in RGB format in vaCreateSurface is the passing of the dma-buf handle, not support allocate RGB surface. Even though we got luckily enough, the command line can pass through, it will put rgb data to nv12 surface ( the driver should explicitly return invalid surface for this case). A general question for the whole driver: why are surfaces interlaced by default? I think it's firmware preferred, and they are also good for deinterlacing. Can you be more specific? Take a look "rvid_get_video_param()" from radeon_video.c, that will tell what interlaced format HW support and prefer I agree that it is required for deinterlacing, but that isn't a particularly common case and will only become less so with time. E.g. is it somehow better to decode even progressive video to interlaced frames? That seems like it would have significantly worse locality of reference to me, but maybe the hardware does something special. I may be getting some things wrong here, but the relevant components which deal with surfaces that I see are: H * Decoder: can write either format, the stream type doesn't seem to matter (?). Normally, HW decoder write to NV12, P016, and for Mjpeg it can do YUYV as well. Stream type depends on codecs HW supports All in interlaced and progressive forms? I didn't consider it earlier, but the H.265 decoder seems to always produce progressive for me. Again it depends on HW, State Tracker query driver what it supports and prefers, then make decision how to create surface. Regards, Leo * Encoder: can only accept progressive surfaces. * Deinterlacer: only works on interlaced surfaces (?). Yes, if you would like to have a pretty picture for 'deinterlace_vappi=mode=3'
[Mesa-dev] [PATCH 05/22] glsl: Convert lower_vec_index_to_cond_assign to using ir_builder
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- .../glsl/lower_vec_index_to_cond_assign.cpp | 56 ++- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp index f60ff7b..926a493 100644 --- a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp @@ -40,6 +40,9 @@ #include "ir_visitor.h" #include "ir_optimization.h" #include "compiler/glsl_types.h" +#include "ir_builder.h" + +using namespace ir_builder; namespace { @@ -80,37 +83,26 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ ir_rvalue *orig_index, const glsl_type *type) { - ir_assignment *assign, *value_assign; - ir_variable *index, *var, *value; - ir_dereference *deref, *deref_value; - unsigned i; - - exec_list list; + ir_factory body(, base_ir); /* Store the index to a temporary to avoid reusing its tree. */ assert(orig_index->type == glsl_type::int_type || orig_index->type == glsl_type::uint_type); - index = new(base_ir) ir_variable(orig_index->type, -"vec_index_tmp_i", -ir_var_temporary); - list.push_tail(index); - deref = new(base_ir) ir_dereference_variable(index); - assign = new(base_ir) ir_assignment(deref, orig_index, NULL); - list.push_tail(assign); + ir_variable *const index = + body.make_temp(orig_index->type, "vec_index_tmp_i"); + + body.emit(assign(index, orig_index)); /* Store the value inside a temp, thus avoiding matrixes duplication */ - value = new(base_ir) ir_variable(orig_vector->type, "vec_value_tmp", -ir_var_temporary); - list.push_tail(value); - deref_value = new(base_ir) ir_dereference_variable(value); - value_assign = new(base_ir) ir_assignment(deref_value, orig_vector); - list.push_tail(value_assign); + ir_variable *const value = + body.make_temp(orig_vector->type, "vec_value_tmp"); + + body.emit(assign(value, orig_vector)); + /* Temporary where we store whichever value we swizzle out. */ - var = new(base_ir) ir_variable(type, "vec_index_tmp_v", - ir_var_temporary); - list.push_tail(var); + ir_variable *const var = body.make_temp(type, "vec_index_tmp_v"); /* Generate a single comparison condition "mask" for all of the components * in the vector. @@ -121,22 +113,8 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ mem_ctx); /* Generate a conditional move of each vector element to the temp. */ - for (i = 0; i < orig_vector->type->vector_elements; i++) { - ir_rvalue *condition_swizzle = - new(base_ir) ir_swizzle(new(mem_ctx) ir_dereference_variable(cond), - i, 0, 0, 0, 1); - - /* Just clone the rest of the deref chain when trying to get at the - * underlying variable. - */ - ir_rvalue *swizzle = - new(base_ir) ir_swizzle(deref_value->clone(mem_ctx, NULL), - i, 0, 0, 0, 1); - - deref = new(base_ir) ir_dereference_variable(var); - assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle); - list.push_tail(assign); - } + for (unsigned i = 0; i < orig_vector->type->vector_elements; i++) + body.emit(assign(var, swizzle(value, i, 1), swizzle(cond, i, 1))); /* Put all of the new instructions in the IR stream before the old * instruction. @@ -144,7 +122,7 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ base_ir->insert_before(); this->progress = true; - return new(base_ir) ir_dereference_variable(var); + return deref(var).val; } ir_rvalue * -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/22] glsl: Return ir_variable from compare_index_block
From: "\"Ian Romanick\""From: Ian Romanick This is basically a wash now, but it simplifies later patches that convert to using ir_builder. Signed-off-by: Ian Romanick --- src/compiler/glsl/ir_optimization.h | 6 ++--- .../lower_variable_index_to_cond_assign.cpp | 26 +++ .../glsl/lower_vec_index_to_cond_assign.cpp | 4 +-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 573ddb4..0fbbf34 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -170,9 +170,9 @@ bool lower_blend_equation_advanced(gl_linked_shader *shader); bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state); void propagate_invariance(exec_list *instructions); -ir_rvalue * -compare_index_block(exec_list *instructions, ir_variable *index, - unsigned base, unsigned components, void *mem_ctx); +ir_variable *compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, + void *mem_ctx); bool lower_64bit_integer_instructions(exec_list *instructions, unsigned what_to_lower); diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp index fcb12d1..dd49272 100644 --- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -66,10 +66,10 @@ * \param mem_ctx ralloc memory context to be used for all allocations. * * \returns - * An \c ir_rvalue that \b must be cloned for each use in conditional - * assignments, etc. + * An \c ir_variable containing the per-component comparison results. This + * must be dereferenced per use. */ -ir_rvalue * +ir_variable * compare_index_block(exec_list *instructions, ir_variable *index, unsigned base, unsigned components, void *mem_ctx) { @@ -113,7 +113,7 @@ compare_index_block(exec_list *instructions, ir_variable *index, new(mem_ctx) ir_dereference_variable(condition); instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); - return cond_deref; + return condition; } static inline bool @@ -275,17 +275,21 @@ struct switch_generator for (unsigned i = first; i < end; i += 4) { const unsigned comps = MIN2(condition_components, end - i); -ir_rvalue *const cond_deref = - compare_index_block(list, index, i, comps, this->mem_ctx); + ir_variable *const cond = +compare_index_block(list, index, i, comps, this->mem_ctx); if (comps == 1) { -this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), -list); +ir_rvalue *const cond_deref = + new(mem_ctx) ir_dereference_variable(cond); + +this->generator.generate(i, cond_deref, list); } else { for (unsigned j = 0; j < comps; j++) { - ir_rvalue *const cond_swiz = - new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), - j, 0, 0, 0, 1); + ir_rvalue *const cond_deref = + new(mem_ctx) ir_dereference_variable(cond); + ir_rvalue *const cond_swiz = + new(this->mem_ctx) ir_swizzle(cond_deref, +j, 0, 0, 0, 1); this->generator.generate(i + j, cond_swiz, list); } diff --git a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp index 597d852..f60ff7b 100644 --- a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp @@ -115,7 +115,7 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ /* Generate a single comparison condition "mask" for all of the components * in the vector. */ - ir_rvalue *const cond_deref = + ir_variable *const cond = compare_index_block(, index, 0, orig_vector->type->vector_elements, mem_ctx); @@ -123,7 +123,7 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ /* Generate a conditional move of each vector element to the temp. */ for (i = 0; i < orig_vector->type->vector_elements; i++) { ir_rvalue *condition_swizzle = - new(base_ir) ir_swizzle(cond_deref->clone(mem_ctx, NULL), + new(base_ir) ir_swizzle(new(mem_ctx) ir_dereference_variable(cond), i, 0, 0, 0, 1); /* Just
[Mesa-dev] [PATCH 01/22] glsl: Fix coding standards issues in lower_if_to_cond_assign
From: "\"Ian Romanick\""From: Ian Romanick Mostly tabs-before-spaces issues. Signed-off-by: Ian Romanick --- src/compiler/glsl/lower_if_to_cond_assign.cpp | 95 +-- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp index 54bcae7..0d6aa00 100644 --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp @@ -170,43 +170,43 @@ check_ir_node(ir_instruction *ir, void *data) static void move_block_to_cond_assign(void *mem_ctx, - ir_if *if_ir, ir_rvalue *cond_expr, - exec_list *instructions, - struct set *set) + ir_if *if_ir, ir_rvalue *cond_expr, + exec_list *instructions, + struct set *set) { foreach_in_list_safe(ir_instruction, ir, instructions) { if (ir->ir_type == ir_type_assignment) { -ir_assignment *assign = (ir_assignment *)ir; - -if (_mesa_set_search(set, assign) == NULL) { - _mesa_set_add(set, assign); - - /* If the LHS of the assignment is a condition variable that was -* previously added, insert an additional assignment of false to -* the variable. -*/ - const bool assign_to_cv = - _mesa_set_search( - set, assign->lhs->variable_referenced()) != NULL; - - if (!assign->condition) { - if (assign_to_cv) { - assign->rhs = - new(mem_ctx) ir_expression(ir_binop_logic_and, - glsl_type::bool_type, - cond_expr->clone(mem_ctx, NULL), - assign->rhs); - } else { - assign->condition = cond_expr->clone(mem_ctx, NULL); - } - } else { - assign->condition = - new(mem_ctx) ir_expression(ir_binop_logic_and, -glsl_type::bool_type, -cond_expr->clone(mem_ctx, NULL), -assign->condition); - } -} + ir_assignment *assign = (ir_assignment *)ir; + + if (_mesa_set_search(set, assign) == NULL) { +_mesa_set_add(set, assign); + +/* If the LHS of the assignment is a condition variable that was + * previously added, insert an additional assignment of false to + * the variable. + */ +const bool assign_to_cv = + _mesa_set_search( + set, assign->lhs->variable_referenced()) != NULL; + +if (!assign->condition) { + if (assign_to_cv) { + assign->rhs = + new(mem_ctx) ir_expression(ir_binop_logic_and, +glsl_type::bool_type, +cond_expr->clone(mem_ctx, NULL), +assign->rhs); + } else { + assign->condition = cond_expr->clone(mem_ctx, NULL); + } +} else { + assign->condition = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->condition); +} + } } /* Now, move from the if block to the block surrounding it. */ @@ -216,9 +216,8 @@ move_block_to_cond_assign(void *mem_ctx, } ir_visitor_status -ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) +ir_if_to_cond_assign_visitor::visit_enter(ir_if *) { - (void) ir; this->depth++; return visit_continue; @@ -277,8 +276,8 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) */ ir_variable *const then_var = new(mem_ctx) ir_variable(glsl_type::bool_type, - "if_to_cond_assign_then", - ir_var_temporary); + "if_to_cond_assign_then", + ir_var_temporary); ir->insert_before(then_var); ir_dereference_variable *then_cond = @@ -288,8 +287,8 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) ir->insert_before(assign); move_block_to_cond_assign(mem_ctx, ir, then_cond, ->then_instructions, -this->condition_variables); + >then_instructions, + this->condition_variables); /* Add the new
[Mesa-dev] [PATCH 07/22] glsl: Fix coding standards issues in lower_variable_index_to_cond_assign
From: "\"Ian Romanick\""From: Ian Romanick Mostly tabs-before-spaces, but there was some other trivium too. Signed-off-by: Ian Romanick --- .../lower_variable_index_to_cond_assign.cpp | 154 +- 1 file changed, 76 insertions(+), 78 deletions(-) diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp index dd49272..9e2dd831 100644 --- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -71,12 +71,13 @@ */ ir_variable * compare_index_block(exec_list *instructions, ir_variable *index, - unsigned base, unsigned components, void *mem_ctx) +unsigned base, unsigned components, void *mem_ctx) { ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); assert(index->type->is_scalar()); - assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT); + assert(index->type->base_type == GLSL_TYPE_INT || + index->type->base_type == GLSL_TYPE_UINT); assert(components >= 1 && components <= 4); if (components > 1) { @@ -94,19 +95,18 @@ compare_index_block(exec_list *instructions, ir_variable *index, test_indices_data.i[3] = base + 3; ir_constant *const test_indices = - new(mem_ctx) ir_constant(broadcast_index->type, - _indices_data); + new(mem_ctx) ir_constant(broadcast_index->type, _indices_data); ir_rvalue *const condition_val = new(mem_ctx) ir_expression(ir_binop_equal, -glsl_type::bvec(components), -broadcast_index, -test_indices); + glsl_type::bvec(components), + broadcast_index, + test_indices); ir_variable *const condition = new(mem_ctx) ir_variable(condition_val->type, - "dereference_condition", - ir_var_temporary); + "dereference_condition", + ir_var_temporary); instructions->push_tail(condition); ir_rvalue *const cond_deref = @@ -133,7 +133,7 @@ class deref_replacer : public ir_rvalue_visitor { public: deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) : variable_to_replace(variable_to_replace), value(value), - progress(false) +progress(false) { assert(this->variable_to_replace != NULL); assert(this->value != NULL); @@ -143,9 +143,9 @@ public: { ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); - if ((dv != NULL) && (dv->var == this->variable_to_replace)) { -this->progress = true; -*rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); + if (dv != NULL && dv->var == this->variable_to_replace) { + this->progress = true; + *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); } } @@ -167,10 +167,10 @@ public: virtual ir_visitor_status visit_enter(ir_dereference_array *ir) { - if (is_array_or_matrix(ir->array) - && (ir->array_index->as_constant() == NULL)) { -this->deref = ir; -return visit_stop; + if (is_array_or_matrix(ir->array) && + ir->array_index->as_constant() == NULL) { + this->deref = ir; + return visit_stop; } return visit_continue; @@ -222,8 +222,8 @@ struct assignment_generator */ ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); ir_assignment *const assignment = (is_write) -? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) -: new(mem_ctx) ir_assignment(variable, element, condition); + ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) + : new(mem_ctx) ir_assignment(variable, element, condition); list->push_tail(assignment); } @@ -242,11 +242,11 @@ struct switch_generator void *mem_ctx; switch_generator(const TFunction& generator, ir_variable *index, - unsigned linear_sequence_max_length, - unsigned condition_components) +unsigned linear_sequence_max_length, +unsigned condition_components) : generator(generator), index(index), - linear_sequence_max_length(linear_sequence_max_length), - condition_components(condition_components) +linear_sequence_max_length(linear_sequence_max_length), +condition_components(condition_components) { this->mem_ctx = ralloc_parent(index); } @@ -266,10 +266,10 @@ struct switch_generator */
[Mesa-dev] [PATCH 02/22] glsl: Lower ifs to conditional-select instead of conditional-assign
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/lower_if_to_cond_assign.cpp | 22 --- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp index 0d6aa00..42163c3 100644 --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp @@ -189,22 +189,18 @@ move_block_to_cond_assign(void *mem_ctx, _mesa_set_search( set, assign->lhs->variable_referenced()) != NULL; -if (!assign->condition) { - if (assign_to_cv) { - assign->rhs = - new(mem_ctx) ir_expression(ir_binop_logic_and, -glsl_type::bool_type, -cond_expr->clone(mem_ctx, NULL), -assign->rhs); - } else { - assign->condition = cond_expr->clone(mem_ctx, NULL); - } -} else { - assign->condition = +if (assign_to_cv) { + assign->rhs = new(mem_ctx) ir_expression(ir_binop_logic_and, glsl_type::bool_type, cond_expr->clone(mem_ctx, NULL), - assign->condition); + assign->rhs); +} else { + assign->rhs = + new(mem_ctx) ir_expression(ir_triop_csel, + cond_expr->clone(mem_ctx, NULL), + assign->rhs, + assign->lhs->clone(mem_ctx, NULL)); } } } -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/22] glsl: Convert lower_variable_index_to_cond_assign to ir_builder
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/ir_optimization.h | 8 +- .../lower_variable_index_to_cond_assign.cpp | 158 +++--- .../glsl/lower_vec_index_to_cond_assign.cpp | 4 +- 3 files changed, 65 insertions(+), 105 deletions(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 0fbbf34..38fb549 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -170,9 +170,11 @@ bool lower_blend_equation_advanced(gl_linked_shader *shader); bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state); void propagate_invariance(exec_list *instructions); -ir_variable *compare_index_block(exec_list *instructions, ir_variable *index, - unsigned base, unsigned components, - void *mem_ctx); +namespace ir_builder { class ir_factory; }; + +ir_variable *compare_index_block(ir_builder::ir_factory , + ir_variable *index, + unsigned base, unsigned components); bool lower_64bit_integer_instructions(exec_list *instructions, unsigned what_to_lower); diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp index 9e2dd831..6fe4fe6 100644 --- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -51,6 +51,10 @@ #include "ir_optimization.h" #include "compiler/glsl_types.h" #include "main/macros.h" +#include "program/prog_instruction.h" /* For SWIZZLE_ */ +#include "ir_builder.h" + +using namespace ir_builder; /** * Generate a comparison value for a block of indices @@ -70,20 +74,17 @@ * must be dereferenced per use. */ ir_variable * -compare_index_block(exec_list *instructions, ir_variable *index, -unsigned base, unsigned components, void *mem_ctx) +compare_index_block(ir_factory , ir_variable *index, +unsigned base, unsigned components) { - ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); - assert(index->type->is_scalar()); assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT); assert(components >= 1 && components <= 4); - if (components > 1) { - const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; - broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); - } + ir_rvalue *const broadcast_index = components > 1 + ? swizzle(index, SWIZZLE_, components) + : operand(index).val; /* Compare the desired index value with the next block of four indices. */ @@ -95,23 +96,14 @@ compare_index_block(exec_list *instructions, ir_variable *index, test_indices_data.i[3] = base + 3; ir_constant *const test_indices = - new(mem_ctx) ir_constant(broadcast_index->type, _indices_data); + new(body.mem_ctx) ir_constant(broadcast_index->type, _indices_data); - ir_rvalue *const condition_val = - new(mem_ctx) ir_expression(ir_binop_equal, - glsl_type::bvec(components), - broadcast_index, - test_indices); + ir_rvalue *const condition_val = equal(broadcast_index, test_indices); - ir_variable *const condition = - new(mem_ctx) ir_variable(condition_val->type, - "dereference_condition", - ir_var_temporary); - instructions->push_tail(condition); + ir_variable *const condition = body.make_temp(condition_val->type, + "dereference_condition"); - ir_rvalue *const cond_deref = - new(mem_ctx) ir_dereference_variable(condition); - instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); + body.emit(assign(condition, condition_val)); return condition; } @@ -201,18 +193,13 @@ struct assignment_generator { } - void generate(unsigned i, ir_rvalue* condition, exec_list *list) const + void generate(unsigned i, ir_rvalue* condition, ir_factory ) const { - /* Just clone the rest of the deref chain when trying to get at the - * underlying variable. - */ - void *mem_ctx = ralloc_parent(base_ir); - /* Clone the old r-value in its entirety. Then replace any occurances of * the old variable index with the new constant index. */ - ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); - ir_constant *const index = new(mem_ctx) ir_constant(i); + ir_dereference *element = this->rvalue->clone(body.mem_ctx, NULL); + ir_constant
[Mesa-dev] [PATCH 00/22] Send ir_assignment::condition to the Upside Down
From: Ian RomanickI have also pushed this series to https://cgit.freedesktop.org/~idr/mesa/log/?h=remove-ir_assignment-condition I think -201 lines speaks for itself, but... The condition field pre-dates the ir_triop_csel operation by a few years. The idea was that any assignment could be made conditional so that platforms that didn't have flow control could replace an if-statement with a bunch of conditional assignments. However, handling this condition is really annoying in most optimization passes, so most passes bail as soon as a condition is encountered. Much later in development, ir_triop_csel was added. For the most part, a conditional assignment is equivalent to a conditional select where the "false" value is the destination variable. My main motivation for this series is that I didn't want to deal with ir_assignment::condition in the SPIR-V generator work. That I was able to delete a pile of code is a happy bonus. There are two odd bits that may warrant a bit closer examination: - Patch 6 changes the code that is generated for non-constant vector indexing, and that results in some shader-db changes. Most of the changes are small (+1 instruction in a program), but some are more substantial. - Patch 18 moves the 'foo = foo;' optimization from one optimization pass to another. If we just eliminate that optimization altogether, shader-db results, even on platforms that use NIR, are hurt quite substantially. I have not investigated why NIR isn't picking up the slack here. Ian Romanick (22): glsl: Fix coding standards issues in lower_if_to_cond_assign glsl: Lower ifs to conditional-select instead of conditional-assign glsl: Fix coding standards issues in lower_vec_index_to_cond_assign glsl: Return ir_variable from compare_index_block glsl: Convert lower_vec_index_to_cond_assign to using ir_builder glsl: Lower vector indexing to conditional-select instead of conditional-assign glsl: Fix coding standards issues in lower_variable_index_to_cond_assign glsl: Convert lower_variable_index_to_cond_assign to ir_builder glsl: Lower array indexing to conditional-select instead of conditional-assign glsl: Don't pass NULL to ir_assignment constructor when not necessary glsl/ast: Use ir_binop_equal instead of ir_binop_all_equal glsl/ast: Convert ast_case_label::hir to ir_builder glsl/ast: Explicitly track the set of case labels that occur after default glsl/ast: Generate a more compact expression to disable execution of default case glsl/ast: Use logical-or instead of conditional assignment to set fallthru_var glsl: Eliminate ir_builder assign overloads that have a condition glsl: Eliminate ir_assignment constructors that have a condition glsl: Move 'foo = foo;' optimization to opt_dead_code_local glsl: Kill ir_assignment::condition with fire glsl: Fix indentation left weird from the previous commit glsl: Remove spurious assertions glsl: Simplify ir_assignment::clone src/compiler/glsl/ast_function.cpp| 37 ++- src/compiler/glsl/ast_to_hir.cpp | 152 ++- src/compiler/glsl/glsl_to_nir.cpp | 16 +- src/compiler/glsl/ir.cpp | 18 +- src/compiler/glsl/ir.h| 11 +- src/compiler/glsl/ir_builder.cpp | 15 +- src/compiler/glsl/ir_builder.h| 2 - .../glsl/ir_builder_print_visitor.cpp | 4 - src/compiler/glsl/ir_clone.cpp| 14 +- src/compiler/glsl/ir_constant_expression.cpp | 12 +- .../glsl/ir_expression_flattening.cpp | 4 +- src/compiler/glsl/ir_hv_accept.cpp| 3 - src/compiler/glsl/ir_optimization.h | 8 +- src/compiler/glsl/ir_print_visitor.cpp| 3 - src/compiler/glsl/ir_reader.cpp | 3 +- src/compiler/glsl/ir_rvalue_visitor.cpp | 2 - src/compiler/glsl/loop_analysis.cpp | 3 +- src/compiler/glsl/loop_controls.cpp | 2 +- src/compiler/glsl/lower_discard.cpp | 4 +- src/compiler/glsl/lower_distance.cpp | 2 +- src/compiler/glsl/lower_if_to_cond_assign.cpp | 91 +++ src/compiler/glsl/lower_instructions.cpp | 4 +- src/compiler/glsl/lower_jumps.cpp | 11 +- src/compiler/glsl/lower_mat_op_to_vec.cpp | 2 +- .../glsl/lower_texture_projection.cpp | 2 +- .../lower_variable_index_to_cond_assign.cpp | 253 +++--- .../glsl/lower_vec_index_to_cond_assign.cpp | 105 src/compiler/glsl/lower_vector.cpp| 4 +- src/compiler/glsl/opt_array_splitting.cpp | 10 +- src/compiler/glsl/opt_constant_folding.cpp| 17 -- .../glsl/opt_constant_propagation.cpp | 3 - src/compiler/glsl/opt_constant_variable.cpp | 3 - src/compiler/glsl/opt_copy_propagation.cpp| 24 +- .../glsl/opt_copy_propagation_elements.cpp| 3 -
[Mesa-dev] [PATCH 12/22] glsl/ast: Convert ast_case_label::hir to ir_builder
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/ast_to_hir.cpp | 35 ++-- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 9d69c13..02b0726 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -6657,12 +6657,9 @@ ir_rvalue * ast_case_label::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) { - void *ctx = state; - - ir_dereference_variable *deref_fallthru_var = - new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); + ir_factory body(instructions, state); - ir_rvalue *const true_val = new(ctx) ir_constant(true); + ir_variable *const fallthru_var = state->switch_state.is_fallthru_var; /* If not default case, ... */ if (this->test_value != NULL) { @@ -6670,7 +6667,8 @@ ast_case_label::hir(exec_list *instructions, * comparison of cached test expression value to case label. */ ir_rvalue *const label_rval = this->test_value->hir(instructions, state); - ir_constant *label_const = label_rval->constant_expression_value(ctx); + ir_constant *label_const = + label_rval->constant_expression_value(body.mem_ctx); if (!label_const) { YYLTYPE loc = this->test_value->get_location(); @@ -6680,7 +6678,7 @@ ast_case_label::hir(exec_list *instructions, "constant expression"); /* Stuff a dummy value in to allow processing to continue. */ - label_const = new(ctx) ir_constant(0); + label_const = body.constant(0); } else { hash_entry *entry = _mesa_hash_table_search(state->switch_state.labels_ht, @@ -6707,7 +6705,7 @@ ast_case_label::hir(exec_list *instructions, ir_rvalue *label = label_const; ir_rvalue *deref_test_var = - new(ctx) ir_dereference_variable(state->switch_state.test_var); + new(body.mem_ctx) ir_dereference_variable(state->switch_state.test_var); /* * From GLSL 4.40 specification section 6.2 ("Selection"): @@ -6758,14 +6756,9 @@ ast_case_label::hir(exec_list *instructions, label->type = deref_test_var->type; } - ir_expression *test_cond = new(ctx) ir_expression(ir_binop_equal, -label, -deref_test_var); - - ir_assignment *set_fallthru_on_test = - new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); - - instructions->push_tail(set_fallthru_on_test); + body.emit(assign(fallthru_var, + body.constant(true), + equal(label, deref_test_var))); } else { /* default case */ if (state->switch_state.previous_default) { YYLTYPE loc = this->get_location(); @@ -6778,14 +6771,8 @@ ast_case_label::hir(exec_list *instructions, state->switch_state.previous_default = this; /* Set fallthru condition on 'run_default' bool. */ - ir_dereference_variable *deref_run_default = - new(ctx) ir_dereference_variable(state->switch_state.run_default); - - /* Set fallthru state. */ - ir_assignment *set_fallthru = - new(ctx) ir_assignment(deref_fallthru_var, true_val, deref_run_default); - - instructions->push_tail(set_fallthru); + body.emit(assign(fallthru_var, body.constant(true), + state->switch_state.run_default)); } /* Case statements do not have r-values. */ -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/22] glsl/ast: Generate a more compact expression to disable execution of default case
From: "\"Ian Romanick\""From: Ian Romanick Instead of generating a sequence like: run_default = true; if (i == 3) // some label that appears after default run_default = false; if (i == 4) // some label that appears after default run_default = false; ... if (run_default) { ... } generate something like: run_default = !((i == 3) || (i == 4) || ...); if (run_default) { ... } This eliminates a use of conditional assignements, and it enables the elimination of another. Signed-off-by: Ian Romanick --- src/compiler/glsl/ast_to_hir.cpp | 31 ++- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 1d74e24..5f751a4 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -6589,27 +6589,11 @@ ast_case_statement_list::hir(exec_list *instructions, * if default should be chosen or not. */ if (!default_case.is_empty()) { - - ir_rvalue *const true_val = new (state) ir_constant(true); - ir_dereference_variable *deref_run_default_var = - new(state) ir_dereference_variable(state->switch_state.run_default); - - /* Choose to run default case initially, following conditional - * assignments might change this. - */ - ir_assignment *const init_var = - new(state) ir_assignment(deref_run_default_var, true_val); - instructions->push_tail(init_var); - - /* Default case was the last one, no checks required. */ - if (after_default.is_empty()) { - instructions->append_list(_case); - return NULL; - } - struct hash_entry *entry; ir_factory body(instructions, state); + ir_expression *cmp = NULL; + hash_table_foreach(state->switch_state.labels_ht, entry) { const struct case_label *const l = (struct case_label *) entry->data; @@ -6623,12 +6607,17 @@ ast_case_statement_list::hir(exec_list *instructions, ? body.constant(unsigned(l->value)) : body.constant(int(l->value)); -body.emit(assign(state->switch_state.run_default, - body.constant(false), - equal(cnst, state->switch_state.test_var))); +cmp = cmp == NULL + ? equal(cnst, state->switch_state.test_var) + : logic_or(cmp, equal(cnst, state->switch_state.test_var)); } } + if (cmp != NULL) + body.emit(assign(state->switch_state.run_default, logic_not(cmp))); + else + body.emit(assign(state->switch_state.run_default, body.constant(true))); + /* Append default case and all cases after it. */ instructions->append_list(_case); instructions->append_list(_default); -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 18/22] glsl: Move 'foo = foo; ' optimization to opt_dead_code_local
From: "\"Ian Romanick\""From: Ian Romanick The optimization as done in opt_copy_propagation would have to be removed in the next patch, and doing so would lead to some substantial regressions. Signed-off-by: Ian Romanick Cc: Jason Ekstrand --- src/compiler/glsl/opt_copy_propagation.cpp | 19 +++ src/compiler/glsl/opt_dead_code_local.cpp | 11 +++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/compiler/glsl/opt_copy_propagation.cpp b/src/compiler/glsl/opt_copy_propagation.cpp index b72ae5a..b8ef0de0 100644 --- a/src/compiler/glsl/opt_copy_propagation.cpp +++ b/src/compiler/glsl/opt_copy_propagation.cpp @@ -348,18 +348,13 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir) ir_variable *lhs_var = ir->whole_variable_written(); ir_variable *rhs_var = ir->rhs->whole_variable_referenced(); - if ((lhs_var != NULL) && (rhs_var != NULL)) { - if (lhs_var == rhs_var) { -/* This is a dumb assignment, but we've conveniently noticed - * it here. Removing it now would mess up the loop iteration - * calling us. Just flag it to not execute, and someone else - * will clean up the mess. - */ -ir->condition = new(ralloc_parent(ir)) ir_constant(false); -this->progress = true; - } else if (lhs_var->data.mode != ir_var_shader_storage && - lhs_var->data.mode != ir_var_shader_shared && - lhs_var->data.precise == rhs_var->data.precise) { + /* Don't try to remove a dumb assignment of a variable to itself. Removing +* it now would mess up the loop iteration calling us. +*/ + if (lhs_var != NULL && rhs_var != NULL && lhs_var != rhs_var) { + if (lhs_var->data.mode != ir_var_shader_storage && + lhs_var->data.mode != ir_var_shader_shared && + lhs_var->data.precise == rhs_var->data.precise) { assert(lhs_var); assert(rhs_var); _mesa_hash_table_insert(acp, lhs_var, rhs_var); diff --git a/src/compiler/glsl/opt_dead_code_local.cpp b/src/compiler/glsl/opt_dead_code_local.cpp index a403879..3cbc441 100644 --- a/src/compiler/glsl/opt_dead_code_local.cpp +++ b/src/compiler/glsl/opt_dead_code_local.cpp @@ -173,6 +173,17 @@ process_assignment(void *lin_ctx, ir_assignment *ir, exec_list *assignments) bool progress = false; kill_for_derefs_visitor v(assignments); + if (ir->condition == NULL) { + /* If this is an assignment of the form "foo = foo;", remove the whole + * instruction and be done with it. + */ + const ir_variable *const lhs_var = ir->whole_variable_written(); + if (lhs_var != NULL && lhs_var == ir->rhs->whole_variable_referenced()) { + ir->remove(); + return true; + } + } + /* Kill assignment entries for things used to produce this assignment. */ ir->rhs->accept(); if (ir->condition) { -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 22/22] glsl: Simplify ir_assignment::clone
From: "\"Ian Romanick\""From: Ian Romanick Use the ir_assignment constructor that takes the write mask as a parameter. This skips some work that was done in the other constructor. Signed-off-by: Ian Romanick --- src/compiler/glsl/ir_clone.cpp | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/compiler/glsl/ir_clone.cpp b/src/compiler/glsl/ir_clone.cpp index e08f0c8..9cc962c 100644 --- a/src/compiler/glsl/ir_clone.cpp +++ b/src/compiler/glsl/ir_clone.cpp @@ -251,11 +251,9 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const ir_assignment * ir_assignment::clone(void *mem_ctx, struct hash_table *ht) const { - ir_assignment *cloned = - new(mem_ctx) ir_assignment(this->lhs->clone(mem_ctx, ht), - this->rhs->clone(mem_ctx, ht)); - cloned->write_mask = this->write_mask; - return cloned; + return new(mem_ctx) ir_assignment(this->lhs->clone(mem_ctx, ht), + this->rhs->clone(mem_ctx, ht), + this->write_mask); } ir_function * -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 3/3] glsl: make loop unrolling more like the nir unrolling path
The old code assumed that loop terminators will always be at the start of the loop, resulting in otherwise unrollable loops not being unrolled at all. For example the current code would unroll: int j = 0; do { if (j > 5) break; ... do stuff ... j++; } while (j < 4); But would fail to unroll the following as no iteration limit was calculated because it failed to find the terminator: int j = 0; do { ... do stuff ... j++; } while (j < 4); Also we would fail to unroll the following as we ended up calculating the iteration limit as 6 rather than 4. The unroll code then assumed we had 3 terminators rather the 2 as it wasn't able to determine that "if (j > 5)" was redundant. int j = 0; do { if (j > 5) break; ... do stuff ... if (bool(i)) break; j++; } while (j < 4); This patch changes this pass to be more like the NIR unrolling pass. With this change we handle loop terminators correctly and also handle cases where the terminators have instructions in their branches other than a break. V2: - fixed regression where loops with a break in else were never unrolled in v1. - fixed confusing/wrong naming of bools in complex unrolling. --- src/compiler/glsl/loop_analysis.cpp | 50 +-- src/compiler/glsl/loop_analysis.h | 5 +- src/compiler/glsl/loop_unroll.cpp | 172 3 files changed, 161 insertions(+), 66 deletions(-) diff --git a/src/compiler/glsl/loop_analysis.cpp b/src/compiler/glsl/loop_analysis.cpp index 78279844dc..5bf406e7ee 100644 --- a/src/compiler/glsl/loop_analysis.cpp +++ b/src/compiler/glsl/loop_analysis.cpp @@ -18,21 +18,21 @@ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include "compiler/glsl_types.h" #include "loop_analysis.h" #include "ir_hierarchical_visitor.h" -static bool is_loop_terminator(ir_if *ir); +static void try_add_loop_terminator(loop_variable_state *ls, ir_if *ir); static bool all_expression_operands_are_loop_constant(ir_rvalue *, hash_table *); static ir_rvalue *get_basic_induction_increment(ir_assignment *, hash_table *); /** * Find an initializer of a variable outside a loop * * Works backwards from the loop to find the pre-loop value of the variable. @@ -80,21 +80,21 @@ find_initial_value(ir_loop *loop, ir_variable *var) break; } } return NULL; } static int calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, - enum ir_expression_operation op) + enum ir_expression_operation op, bool continue_from_then) { if (from == NULL || to == NULL || increment == NULL) return -1; void *mem_ctx = ralloc_context(NULL); ir_expression *const sub = new(mem_ctx) ir_expression(ir_binop_sub, from->type, to, from); ir_expression *const div = @@ -147,22 +147,24 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, unreachable("Unsupported type for loop iterator."); } ir_expression *const mul = new(mem_ctx) ir_expression(ir_binop_mul, increment->type, iter, increment); ir_expression *const add = new(mem_ctx) ir_expression(ir_binop_add, mul->type, mul, from); - ir_expression *const cmp = + ir_expression *cmp = new(mem_ctx) ir_expression(op, glsl_type::bool_type, add, to); + if (continue_from_then) + cmp = new(mem_ctx) ir_expression(ir_unop_logic_not, cmp); ir_constant *const cmp_result = cmp->constant_expression_value(mem_ctx); assert(cmp_result != NULL); if (cmp_result->get_bool_component(0)) { iter_value += bias[i]; valid_loop = true; break; } } @@ -299,26 +301,28 @@ loop_variable_state::insert(ir_variable *var) lv->var = var; _mesa_hash_table_insert(this->var_hash, lv->var, lv); this->variables.push_tail(lv); return lv; } loop_terminator * -loop_variable_state::insert(ir_if *if_stmt) +loop_variable_state::insert(ir_if *if_stmt, bool continue_from_then) { void *mem_ctx = ralloc_parent(this); loop_terminator *t = new(mem_ctx) loop_terminator(); t->ir = if_stmt; + t->continue_from_then = continue_from_then; + this->terminators.push_tail(t); return t; } /** * If the given variable already is recorded in the state for this loop, * return the corresponding loop_variable object that records information * about it. @@ -461,24 +465,22 @@ loop_analysis::visit_leave(ir_loop *ir) return visit_continue; foreach_in_list(ir_instruction, node,
[Mesa-dev] [PATCH v2 1/3] glsl: don't drop instructions from unreachable terminators continue branch
These instructions will be executed on every iteration of the loop we cannot drop them. V2: - move removal of unreachable terminators from the terminator list to the same place they are removed from the IR as suggested by Nicolai. --- src/compiler/glsl/loop_analysis.h | 7 +++ src/compiler/glsl/loop_unroll.cpp | 28 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/compiler/glsl/loop_analysis.h b/src/compiler/glsl/loop_analysis.h index 8f82404694..99b6bf7563 100644 --- a/src/compiler/glsl/loop_analysis.h +++ b/src/compiler/glsl/loop_analysis.h @@ -27,20 +27,27 @@ #include "ir.h" #include "util/hash_table.h" /** * Analyze and classify all variables used in all loops in the instruction list */ extern class loop_state * analyze_loop_variables(exec_list *instructions); +static inline bool +is_break(ir_instruction *ir) +{ + return ir != NULL && ir->ir_type == ir_type_loop_jump && + ((ir_loop_jump *) ir)->is_break(); +} + extern bool unroll_loops(exec_list *instructions, loop_state *ls, const struct gl_shader_compiler_options *options); /** * Tracking for all variables used in a loop */ class loop_variable_state : public exec_node { diff --git a/src/compiler/glsl/loop_unroll.cpp b/src/compiler/glsl/loop_unroll.cpp index 7eea439454..358cbf10af 100644 --- a/src/compiler/glsl/loop_unroll.cpp +++ b/src/compiler/glsl/loop_unroll.cpp @@ -46,27 +46,20 @@ public: void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); loop_state *state; bool progress; const struct gl_shader_compiler_options *options; }; } /* anonymous namespace */ -static bool -is_break(ir_instruction *ir) -{ - return ir != NULL && ir->ir_type == ir_type_loop_jump -&& ((ir_loop_jump *) ir)->is_break(); -} - class loop_unroll_count : public ir_hierarchical_visitor { public: int nodes; bool unsupported_variable_indexing; bool array_indexed_by_induction_var_with_exact_iterations; /* If there are nested loops, the node count will be inaccurate. */ bool nested_loop; loop_unroll_count(exec_list *list, loop_variable_state *ls, const struct gl_shader_compiler_options *options) @@ -326,30 +319,49 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) } } /* Remove the conditional break statements associated with all terminators * that are associated with a fixed iteration count, except for the one * associated with the limiting terminator--that one needs to stay, since * it terminates the loop. Exception: if the loop still has a normative * bound, then that terminates the loop, so we don't even need the limiting * terminator. */ - foreach_in_list(loop_terminator, t, >terminators) { + foreach_in_list_safe(loop_terminator, t, >terminators) { if (t->iterations < 0) continue; + exec_list *branch_instructions; if (t != ls->limiting_terminator) { + ir_instruction *ir_if_last = (ir_instruction *) +t->ir->then_instructions.get_tail(); + if (is_break(ir_if_last)) { +branch_instructions = >ir->else_instructions; + } else { +branch_instructions = >ir->then_instructions; +assert(is_break((ir_instruction *) +t->ir->else_instructions.get_tail())); + } + + exec_list copy_list; + copy_list.make_empty(); + clone_ir_list(ir, _list, branch_instructions); + + t->ir->insert_before(_list); t->ir->remove(); assert(ls->num_loop_jumps > 0); ls->num_loop_jumps--; + /* Also remove it from the terminator list */ + t->remove(); + this->progress = true; } } if (ls->limiting_terminator == NULL) { ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail(); /* If a loop has no induction variable and the last instruction is * a break, unroll the loop with a count of 1. This is the classic -- 2.13.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/3] glsl: check if induction var incremented before use in terminator
do-while loops can increment the starting value before the condition is checked. e.g. do { ndx++; } while (ndx < 3); This commit changes the code to detect this and reduces the iteration count by 1 if found. V2: fix terminator spelling Reviewed-by: Nicolai HähnleReviewed-by: Elie Tournier --- src/compiler/glsl/loop_analysis.cpp | 38 + 1 file changed, 38 insertions(+) diff --git a/src/compiler/glsl/loop_analysis.cpp b/src/compiler/glsl/loop_analysis.cpp index 81a07f78f8..78279844dc 100644 --- a/src/compiler/glsl/loop_analysis.cpp +++ b/src/compiler/glsl/loop_analysis.cpp @@ -164,20 +164,54 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, iter_value += bias[i]; valid_loop = true; break; } } ralloc_free(mem_ctx); return (valid_loop) ? iter_value : -1; } +static bool +incremented_before_terminator(ir_loop *loop, ir_variable *var, + ir_if *terminator) +{ + for (exec_node *node = loop->body_instructions.get_head(); +!node->is_tail_sentinel(); +node = node->get_next()) { + ir_instruction *ir = (ir_instruction *) node; + + switch (ir->ir_type) { + case ir_type_if: + if (ir->as_if() == terminator) +return false; + break; + + case ir_type_assignment: { + ir_assignment *assign = ir->as_assignment(); + ir_variable *assignee = assign->lhs->whole_variable_referenced(); + + if (assignee == var) { +assert(assign->condition == NULL); +return true; + } + + break; + } + + default: + break; + } + } + + unreachable("Unable to find induction variable"); +} /** * Record the fact that the given loop variable was referenced inside the loop. * * \arg in_assignee is true if the reference was on the LHS of an assignment. * * \arg in_conditional_code_or_nested_loop is true if the reference occurred * inside an if statement or a nested loop. * * \arg current_assignment is the ir_assignment node that the loop variable is @@ -575,20 +609,24 @@ loop_analysis::visit_leave(ir_loop *ir) ir_variable *var = counter->variable_referenced(); ir_rvalue *init = find_initial_value(ir, var); loop_variable *lv = ls->get(var); if (lv != NULL && lv->is_induction_var()) { t->iterations = calculate_iterations(init, limit, lv->increment, cmp); +if (incremented_before_terminator(ir, var, t->ir)) { + t->iterations--; +} + if (t->iterations >= 0 && (ls->limiting_terminator == NULL || t->iterations < ls->limiting_terminator->iterations)) { ls->limiting_terminator = t; } } break; } default: -- 2.13.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Revert "intel: Remove unused Kabylake pci ids
On Wed, Sep 20, 2017 at 08:17:32PM +, Anuj Phogat wrote: > drm-intel is in favor of keeping the unused pci-id's which > are still listed in the h/w specs. To keep it uniform > across multiple gfx stack components, I'm reverting below > Mesa patches: > b2dae9f8fd310c19e66b161a7ee9845af78f73e0 > ebc5ccf3cc88990248695e833d9ff11e10d91240. > > Signed-off-by: Anuj Phogat> Cc: Matt Turner > Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi > --- > include/pci_ids/i965_pci_ids.h | 8 > 1 file changed, 8 insertions(+) > > diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h > index 655d579f0f..0dd01a4343 100644 > --- a/include/pci_ids/i965_pci_ids.h > +++ b/include/pci_ids/i965_pci_ids.h > @@ -145,16 +145,24 @@ CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics 505 > (Broxton)") > CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)") > CHIPSET(0x5902, kbl_gt1, "Intel(R) HD Graphics 610 (Kaby Lake GT1)") > CHIPSET(0x5906, kbl_gt1, "Intel(R) HD Graphics 610 (Kaby Lake GT1)") > +CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1") > +CHIPSET(0x5908, kbl_gt1, "Intel(R) Kabylake GT1") > CHIPSET(0x590B, kbl_gt1, "Intel(R) Kabylake GT1") > +CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1") > +CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5") > +CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5") > CHIPSET(0x5917, kbl_gt2, "Intel(R) UHD Graphics 620 (Kabylake GT2)") > CHIPSET(0x5912, kbl_gt2, "Intel(R) HD Graphics 630 (Kaby Lake GT2)") > CHIPSET(0x5916, kbl_gt2, "Intel(R) HD Graphics 620 (Kaby Lake GT2)") > +CHIPSET(0x591A, kbl_gt2, "Intel(R) HD Graphics P630 (Kaby Lake GT2)") > CHIPSET(0x591B, kbl_gt2, "Intel(R) HD Graphics 630 (Kaby Lake GT2)") > CHIPSET(0x591D, kbl_gt2, "Intel(R) HD Graphics P630 (Kaby Lake GT2)") > CHIPSET(0x591E, kbl_gt2, "Intel(R) HD Graphics 615 (Kaby Lake GT2)") > CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F") > +CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3") > CHIPSET(0x5926, kbl_gt3, "Intel(R) Iris Plus Graphics 640 (Kaby Lake GT3e)") > CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 (Kaby Lake GT3e)") > +CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4") > CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)") > CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)") > CHIPSET(0x3E90, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)") > -- > 2.13.5 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 19/22] glsl: Kill ir_assignment::condition with fire
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/glsl_to_nir.cpp | 16 ++ src/compiler/glsl/ir.cpp | 5 ++--- src/compiler/glsl/ir.h| 6 -- .../glsl/ir_builder_print_visitor.cpp | 4 src/compiler/glsl/ir_clone.cpp| 2 -- src/compiler/glsl/ir_constant_expression.cpp | 12 +-- src/compiler/glsl/ir_hv_accept.cpp| 3 --- src/compiler/glsl/ir_print_visitor.cpp| 3 --- src/compiler/glsl/ir_rvalue_visitor.cpp | 2 -- src/compiler/glsl/loop_analysis.cpp | 3 +-- src/compiler/glsl/loop_controls.cpp | 2 +- src/compiler/glsl/lower_distance.cpp | 2 +- .../lower_variable_index_to_cond_assign.cpp | 17 +-- .../glsl/lower_vec_index_to_cond_assign.cpp | 4 src/compiler/glsl/opt_array_splitting.cpp | 7 --- src/compiler/glsl/opt_constant_folding.cpp| 17 --- .../glsl/opt_constant_propagation.cpp | 3 --- src/compiler/glsl/opt_constant_variable.cpp | 3 --- src/compiler/glsl/opt_copy_propagation.cpp| 3 --- .../glsl/opt_copy_propagation_elements.cpp| 3 --- .../glsl/opt_dead_builtin_varyings.cpp| 1 - src/compiler/glsl/opt_dead_code_local.cpp | 21 +++ src/compiler/glsl/opt_structure_splitting.cpp | 7 ++- src/compiler/glsl/opt_tree_grafting.cpp | 3 +-- src/compiler/glsl/opt_vectorize.cpp | 3 +-- src/mesa/program/ir_to_mesa.cpp | 16 -- src/mesa/state_tracker/st_glsl_to_tgsi.cpp| 17 ++- 27 files changed, 26 insertions(+), 159 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 99df6e0..57b832f 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1253,13 +1253,7 @@ nir_visitor::visit(ir_assignment *ir) copy->variables[0] = evaluate_deref(>instr, ir->lhs); copy->variables[1] = evaluate_deref(>instr, ir->rhs); - if (ir->condition) { - nir_push_if(, evaluate_rvalue(ir->condition)); - nir_builder_instr_insert(, >instr); - nir_pop_if(, NULL); - } else { - nir_builder_instr_insert(, >instr); - } + nir_builder_instr_insert(, >instr); return; } @@ -1290,13 +1284,7 @@ nir_visitor::visit(ir_assignment *ir) store->variables[0] = nir_deref_var_clone(lhs_deref, store); store->src[0] = nir_src_for_ssa(src); - if (ir->condition) { - nir_push_if(, evaluate_rvalue(ir->condition)); - nir_builder_instr_insert(, >instr); - nir_pop_if(, NULL); - } else { - nir_builder_instr_insert(, >instr); - } + nir_builder_instr_insert(, >instr); } /* diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp index 4cf322d..52f9133 100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@ -151,7 +151,7 @@ ir_assignment::whole_variable_written() ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, unsigned write_mask) : ir_instruction(ir_type_assignment), lhs(lhs), rhs(rhs), - condition(NULL), write_mask(write_mask) + write_mask(write_mask) { if (lhs->type->is_scalar() || lhs->type->is_vector()) { int lhs_components = 0; @@ -165,8 +165,7 @@ ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, } ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs) - : ir_instruction(ir_type_assignment), lhs(NULL), rhs(rhs), - condition(NULL), write_mask(0) + : ir_instruction(ir_type_assignment), lhs(NULL), rhs(rhs), write_mask(0) { /* If the RHS is a vector type, assume that all components of the vector * type are being written to the LHS. The write mask comes from the RHS diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index 28a356a..d7f8630 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -1475,12 +1475,6 @@ public: ir_rvalue *rhs; /** -* Optional condition for the assignment. -*/ - ir_rvalue *condition; - - - /** * Component mask written * * For non-vector types in the LHS, this field will be zero. For vector diff --git a/src/compiler/glsl/ir_builder_print_visitor.cpp b/src/compiler/glsl/ir_builder_print_visitor.cpp index 3e30c5d..1f29562 100644 --- a/src/compiler/glsl/ir_builder_print_visitor.cpp +++ b/src/compiler/glsl/ir_builder_print_visitor.cpp @@ -503,8 +503,6 @@ ir_builder_print_visitor::visit_enter(ir_assignment *ir) if (s != visit_continue) return (s == visit_continue_with_parent) ? visit_continue : s; - assert(ir->condition == NULL); - const struct hash_entry *const he_lhs = _mesa_hash_table_search(index_map, ir->lhs); @@ -525,8 +523,6 @@
[Mesa-dev] [PATCH 06/22] glsl: Lower vector indexing to conditional-select instead of conditional-assign
From: "\"Ian Romanick\""From: Ian Romanick This changes the generated GLSL IR from something like bvec4 eq = equal(vec4(index), vec4(0, 1, 2, 3)); if (eq.x) lhs = rhs.x; if (eq.y) lhs = rhs.y; if (eq.z) lhs = rhs.z; if (eq.w) lhs = rhs.w; to something like bvec4 eq = equal(vec4(index), vec4(0, 1, 2, 3)); lhs = eq.x ? rhs.x : rhs.y; lhs = eq.z ? rhs.z : lhs; lhs = eq.w ? rhs.w : lhs; Now the comparison of the index with 1 is dead. shader-db changes: G4X / Iron Lake: total instructions in shared programs: 4511136 -> 4511200 (0.00%) instructions in affected programs: 6539 -> 6603 (0.98%) helped: 0 HURT: 64 total cycles in shared programs: 108337124 -> 108337252 (0.00%) cycles in affected programs: 112868 -> 112996 (0.11%) helped: 0 HURT: 64 Sandy Bridge: total instructions in shared programs: 9852966 -> 9853030 (0.00%) instructions in affected programs: 5550 -> 5614 (1.15%) helped: 0 HURT: 64 total cycles in shared programs: 139567270 -> 139567394 (0.00%) cycles in affected programs: 26992 -> 27116 (0.46%) helped: 3 HURT: 37 Ivybridge: total instructions in shared programs: 9119649 -> 9119713 (0.00%) instructions in affected programs: 5325 -> 5389 (1.20%) helped: 0 HURT: 64 total cycles in shared programs: 81708934 -> 81709020 (0.00%) cycles in affected programs: 10446 -> 10532 (0.82%) helped: 0 HURT: 18 Haswell: total instructions in shared programs: 8301165 -> 8301229 (0.00%) instructions in affected programs: 5325 -> 5389 (1.20%) helped: 0 HURT: 64 total cycles in shared programs: 79526732 -> 79526806 (0.00%) cycles in affected programs: 5262 -> 5336 (1.41%) helped: 0 HURT: 11 Broadwell and Skylake: total instructions in shared programs: 13511867 -> 13504225 (-0.06%) instructions in affected programs: 100680 -> 93038 (-7.59%) helped: 86 HURT: 0 total cycles in shared programs: 544335160 -> 540320834 (-0.74%) cycles in affected programs: 56615030 -> 52600704 (-7.09%) helped: 43 HURT: 36 total spills in shared programs: 85420 -> 84334 (-1.27%) spills in affected programs: 3099 -> 2013 (-35.04%) helped: 27 HURT: 0 total fills in shared programs: 88695 -> 87427 (-1.43%) fills in affected programs: 3850 -> 2582 (-32.94%) helped: 27 HURT: 0 The spill / fills helped were all in Dolphin uber shaders. In some cases this change was 86 -> 14. That accounts for a ton of cycles. I have not investigated any of these changes beyond this. Signed-off-by: Ian Romanick --- .../glsl/lower_vec_index_to_cond_assign.cpp | 29 +-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp index 926a493..2053342 100644 --- a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp @@ -112,9 +112,32 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ orig_vector->type->vector_elements, mem_ctx); - /* Generate a conditional move of each vector element to the temp. */ - for (unsigned i = 0; i < orig_vector->type->vector_elements; i++) - body.emit(assign(var, swizzle(value, i, 1), swizzle(cond, i, 1))); + /* The swizzle must be 0, 1, 2, or 3. Generate an initial conditional +* select like +* +*var = (i == 0) ? value.x : value.y; +* +* For vectors larger than 2 elements, generate additional conditional +* selects like: +* +*var = (i == 2) ? value.z : var; +*var = (i == 3) ? value.w : var; +*/ + body.emit(assign(var, csel(swizzle_x(cond), + swizzle_x(value), + swizzle_y(value; + + if (orig_vector->type->vector_elements > 2) { + body.emit(assign(var, csel(swizzle_z(cond), + swizzle_z(value), + var))); + } + + if (orig_vector->type->vector_elements > 3) { + body.emit(assign(var, csel(swizzle_w(cond), + swizzle_w(value), + var))); + } /* Put all of the new instructions in the IR stream before the old * instruction. -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 17/22] glsl: Eliminate ir_assignment constructors that have a condition
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/ast_function.cpp| 15 +++ src/compiler/glsl/ir.cpp | 19 ++- src/compiler/glsl/ir.h| 5 ++--- src/compiler/glsl/ir_builder.cpp | 1 - src/compiler/glsl/ir_clone.cpp| 8 ++-- src/compiler/glsl/ir_reader.cpp | 3 ++- src/compiler/glsl/lower_mat_op_to_vec.cpp | 2 +- src/compiler/glsl/lower_vector.cpp| 4 ++-- src/compiler/glsl/opt_array_splitting.cpp | 7 +++ 9 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/compiler/glsl/ast_function.cpp b/src/compiler/glsl/ast_function.cpp index 46a61e4..5e3ea5e 100644 --- a/src/compiler/glsl/ast_function.cpp +++ b/src/compiler/glsl/ast_function.cpp @@ -1134,8 +1134,7 @@ process_vec_mat_constructor(exec_list *instructions, assert(var->type->is_vector()); assert(i < 4); ir_dereference *lhs = new(ctx) ir_dereference_variable(var); - assignment = new(ctx) ir_assignment(lhs, rhs, NULL, - (unsigned)(1 << i)); + assignment = new(ctx) ir_assignment(lhs, rhs, 1U << i); } instructions->push_tail(assignment); @@ -1332,7 +1331,7 @@ emit_inline_vector_constructor(const glsl_type *type, assert(rhs->type == lhs->type); - ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL, mask); + ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, mask); instructions->push_tail(inst); } else { unsigned base_component = 0; @@ -1402,7 +1401,7 @@ emit_inline_vector_constructor(const glsl_type *type, ir_rvalue *rhs = new(ctx) ir_constant(rhs_type, ); ir_instruction *inst = -new(ctx) ir_assignment(lhs, rhs, NULL, constant_mask); +new(ctx) ir_assignment(lhs, rhs, constant_mask); instructions->push_tail(inst); } @@ -1436,7 +1435,7 @@ emit_inline_vector_constructor(const glsl_type *type, new(ctx) ir_swizzle(param, 0, 1, 2, 3, rhs_components); ir_instruction *inst = - new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); + new(ctx) ir_assignment(lhs, rhs, write_mask); instructions->push_tail(inst); } @@ -1487,7 +1486,7 @@ assign_to_matrix_column(ir_variable *var, unsigned column, unsigned row_base, /* Mask of fields to be written in the assignment. */ const unsigned write_mask = ((1U << count) - 1) << row_base; - return new(mem_ctx) ir_assignment(column_ref, src, NULL, write_mask); + return new(mem_ctx) ir_assignment(column_ref, src, write_mask); } @@ -1555,7 +1554,7 @@ emit_inline_matrix_constructor(const glsl_type *type, ir_dereference *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); - inst = new(ctx) ir_assignment(rhs_ref, first_param, NULL, 0x01); + inst = new(ctx) ir_assignment(rhs_ref, first_param, 0x01); instructions->push_tail(inst); /* Assign the temporary vector to each column of the destination matrix @@ -1704,7 +1703,7 @@ emit_inline_matrix_constructor(const glsl_type *type, } ir_instruction *inst = -new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); +new(ctx) ir_assignment(lhs, rhs, write_mask); instructions->push_tail(inst); } } else { diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp index 49db56e..4cf322d 100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@ -149,14 +149,10 @@ ir_assignment::whole_variable_written() } ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, -ir_rvalue *condition, unsigned write_mask) - : ir_instruction(ir_type_assignment) + unsigned write_mask) + : ir_instruction(ir_type_assignment), lhs(lhs), rhs(rhs), + condition(NULL), write_mask(write_mask) { - this->condition = condition; - this->rhs = rhs; - this->lhs = lhs; - this->write_mask = write_mask; - if (lhs->type->is_scalar() || lhs->type->is_vector()) { int lhs_components = 0; for (int i = 0; i < 4; i++) { @@ -168,13 +164,10 @@ ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, } } -ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, -ir_rvalue *condition) - : ir_instruction(ir_type_assignment) +ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs) + : ir_instruction(ir_type_assignment), lhs(NULL), rhs(rhs), + condition(NULL), write_mask(0) { - this->condition = condition; - this->rhs = rhs; - /* If the RHS is a vector type, assume that all components of the vector * type are being written to the LHS. The write mask comes from the
[Mesa-dev] [PATCH 03/22] glsl: Fix coding standards issues in lower_vec_index_to_cond_assign
From: "\"Ian Romanick\""From: Ian Romanick Mostly tabs-before-spaces, but there was some other trivium too. Signed-off-by: Ian Romanick --- .../glsl/lower_vec_index_to_cond_assign.cpp | 28 --- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp index ea8b592..597d852 100644 --- a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp @@ -50,8 +50,9 @@ namespace { class ir_vec_index_to_cond_assign_visitor : public ir_hierarchical_visitor { public: ir_vec_index_to_cond_assign_visitor() + : progress(false) { - progress = false; + /* empty */ } ir_rvalue *convert_vec_index_to_cond_assign(void *mem_ctx, @@ -91,8 +92,8 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ assert(orig_index->type == glsl_type::int_type || orig_index->type == glsl_type::uint_type); index = new(base_ir) ir_variable(orig_index->type, - "vec_index_tmp_i", - ir_var_temporary); +"vec_index_tmp_i", +ir_var_temporary); list.push_tail(index); deref = new(base_ir) ir_dereference_variable(index); assign = new(base_ir) ir_assignment(deref, orig_index, NULL); @@ -108,7 +109,7 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ /* Temporary where we store whichever value we swizzle out. */ var = new(base_ir) ir_variable(type, "vec_index_tmp_v", - ir_var_temporary); + ir_var_temporary); list.push_tail(var); /* Generate a single comparison condition "mask" for all of the components @@ -117,7 +118,7 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ ir_rvalue *const cond_deref = compare_index_block(, index, 0, orig_vector->type->vector_elements, - mem_ctx); + mem_ctx); /* Generate a conditional move of each vector element to the temp. */ for (i = 0; i < orig_vector->type->vector_elements; i++) { @@ -129,8 +130,8 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ * underlying variable. */ ir_rvalue *swizzle = -new(base_ir) ir_swizzle(deref_value->clone(mem_ctx, NULL), -i, 0, 0, 0, 1); + new(base_ir) ir_swizzle(deref_value->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); deref = new(base_ir) ir_dereference_variable(var); assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle); @@ -163,11 +164,8 @@ ir_vec_index_to_cond_assign_visitor::convert_vector_extract_to_cond_assign(ir_rv ir_visitor_status ir_vec_index_to_cond_assign_visitor::visit_enter(ir_expression *ir) { - unsigned int i; - - for (i = 0; i < ir->num_operands; i++) { + for (unsigned i = 0; i < ir->num_operands; i++) ir->operands[i] = convert_vector_extract_to_cond_assign(ir->operands[i]); - } return visit_continue; } @@ -189,9 +187,8 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir) { ir->rhs = convert_vector_extract_to_cond_assign(ir->rhs); - if (ir->condition) { + if (ir->condition) ir->condition = convert_vector_extract_to_cond_assign(ir->condition); - } return visit_continue; } @@ -203,7 +200,7 @@ ir_vec_index_to_cond_assign_visitor::visit_enter(ir_call *ir) ir_rvalue *new_param = convert_vector_extract_to_cond_assign(param); if (new_param != param) { -param->replace_with(new_param); + param->replace_with(new_param); } } @@ -213,9 +210,8 @@ ir_vec_index_to_cond_assign_visitor::visit_enter(ir_call *ir) ir_visitor_status ir_vec_index_to_cond_assign_visitor::visit_enter(ir_return *ir) { - if (ir->value) { + if (ir->value) ir->value = convert_vector_extract_to_cond_assign(ir->value); - } return visit_continue; } -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 16/22] glsl: Eliminate ir_builder assign overloads that have a condition
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/ir_builder.cpp | 16 ++-- src/compiler/glsl/ir_builder.h | 2 -- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/src/compiler/glsl/ir_builder.cpp b/src/compiler/glsl/ir_builder.cpp index 8d61533..fd9881b 100644 --- a/src/compiler/glsl/ir_builder.cpp +++ b/src/compiler/glsl/ir_builder.cpp @@ -46,13 +46,13 @@ ir_factory::make_temp(const glsl_type *type, const char *name) } ir_assignment * -assign(deref lhs, operand rhs, operand condition, int writemask) +assign(deref lhs, operand rhs, int writemask) { void *mem_ctx = ralloc_parent(lhs.val); ir_assignment *assign = new(mem_ctx) ir_assignment(lhs.val, rhs.val, - condition.val, + NULL, writemask); return assign; @@ -64,18 +64,6 @@ assign(deref lhs, operand rhs) return assign(lhs, rhs, (1 << lhs.val->type->vector_elements) - 1); } -ir_assignment * -assign(deref lhs, operand rhs, int writemask) -{ - return assign(lhs, rhs, (ir_rvalue *) NULL, writemask); -} - -ir_assignment * -assign(deref lhs, operand rhs, operand condition) -{ - return assign(lhs, rhs, condition, (1 << lhs.val->type->vector_elements) - 1); -} - ir_return * ret(operand retval) { diff --git a/src/compiler/glsl/ir_builder.h b/src/compiler/glsl/ir_builder.h index ff1ff70..94de1ee 100644 --- a/src/compiler/glsl/ir_builder.h +++ b/src/compiler/glsl/ir_builder.h @@ -122,8 +122,6 @@ public: ir_assignment *assign(deref lhs, operand rhs); ir_assignment *assign(deref lhs, operand rhs, int writemask); -ir_assignment *assign(deref lhs, operand rhs, operand condition); -ir_assignment *assign(deref lhs, operand rhs, operand condition, int writemask); ir_return *ret(operand retval); -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/22] glsl: Don't pass NULL to ir_assignment constructor when not necessary
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/ast_function.cpp| 22 +-- .../glsl/ir_expression_flattening.cpp | 4 +--- src/compiler/glsl/lower_discard.cpp | 4 ++-- src/compiler/glsl/lower_instructions.cpp | 4 ++-- src/compiler/glsl/lower_jumps.cpp | 11 +- .../glsl/lower_texture_projection.cpp | 2 +- src/compiler/glsl/opt_function_inlining.cpp | 11 +- src/compiler/glsl/opt_structure_splitting.cpp | 4 +--- 8 files changed, 27 insertions(+), 35 deletions(-) diff --git a/src/compiler/glsl/ast_function.cpp b/src/compiler/glsl/ast_function.cpp index d528ecc..46a61e4 100644 --- a/src/compiler/glsl/ast_function.cpp +++ b/src/compiler/glsl/ast_function.cpp @@ -1128,7 +1128,7 @@ process_vec_mat_constructor(exec_list *instructions, if (var->type->is_matrix()) { ir_rvalue *lhs = new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i)); - assignment = new(ctx) ir_assignment(lhs, rhs, NULL); + assignment = new(ctx) ir_assignment(lhs, rhs); } else { /* use writemask rather than index for vector */ assert(var->type->is_vector()); @@ -1264,7 +1264,7 @@ process_array_constructor(exec_list *instructions, ir_rvalue *lhs = new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i)); - ir_instruction *assignment = new(ctx) ir_assignment(lhs, rhs, NULL); + ir_instruction *assignment = new(ctx) ir_assignment(lhs, rhs); instructions->push_tail(assignment); i++; @@ -1549,8 +1549,7 @@ emit_inline_matrix_constructor(const glsl_type *type, ir_instruction *inst = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var), -new(ctx) ir_constant(rhs_var->type, ), -NULL); +new(ctx) ir_constant(rhs_var->type, )); instructions->push_tail(inst); ir_dereference *const rhs_ref = @@ -1583,7 +1582,7 @@ emit_inline_matrix_constructor(const glsl_type *type, ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i], type->vector_elements); - inst = new(ctx) ir_assignment(col_ref, rhs, NULL); + inst = new(ctx) ir_assignment(col_ref, rhs); instructions->push_tail(inst); } @@ -1596,7 +1595,7 @@ emit_inline_matrix_constructor(const glsl_type *type, ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1, type->vector_elements); - inst = new(ctx) ir_assignment(col_ref, rhs, NULL); + inst = new(ctx) ir_assignment(col_ref, rhs); instructions->push_tail(inst); } } else if (first_param->type->is_matrix()) { @@ -1650,7 +1649,7 @@ emit_inline_matrix_constructor(const glsl_type *type, ir_rvalue *const lhs = new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col)); -ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL); +ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs); instructions->push_tail(inst); } } @@ -1668,7 +1667,7 @@ emit_inline_matrix_constructor(const glsl_type *type, ir_dereference *const rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); ir_instruction *const inst = - new(ctx) ir_assignment(rhs_var_ref, first_param, NULL); + new(ctx) ir_assignment(rhs_var_ref, first_param); instructions->push_tail(inst); const unsigned last_row = MIN2(src_matrix->type->vector_elements, @@ -1731,7 +1730,7 @@ emit_inline_matrix_constructor(const glsl_type *type, ir_dereference *rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); - ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL); + ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs); instructions->push_tail(inst); do { @@ -1795,8 +1794,7 @@ emit_inline_record_constructor(const glsl_type *type, ir_rvalue *const rhs = ((ir_instruction *) node)->as_rvalue(); assert(rhs != NULL); - ir_instruction *const assign = - new(mem_ctx) ir_assignment(lhs, rhs, NULL); + ir_instruction *const assign = new(mem_ctx) ir_assignment(lhs, rhs); instructions->push_tail(assign); node = node->next; @@ -2158,7 +2156,7 @@ ast_function_expression::hir(exec_list *instructions, instructions->push_tail(var); instructions->push_tail( new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), - matrix, NULL)); +
[Mesa-dev] [PATCH 15/22] glsl/ast: Use logical-or instead of conditional assignment to set fallthru_var
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/ast_to_hir.cpp | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 5f751a4..e8bfef7 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -6773,8 +6773,7 @@ ast_case_label::hir(exec_list *instructions, } body.emit(assign(fallthru_var, - body.constant(true), - equal(label, deref_test_var))); + logic_or(fallthru_var, equal(label, deref_test_var; } else { /* default case */ if (state->switch_state.previous_default) { YYLTYPE loc = this->get_location(); @@ -6787,8 +6786,9 @@ ast_case_label::hir(exec_list *instructions, state->switch_state.previous_default = this; /* Set fallthru condition on 'run_default' bool. */ - body.emit(assign(fallthru_var, body.constant(true), - state->switch_state.run_default)); + body.emit(assign(fallthru_var, + logic_or(fallthru_var, +state->switch_state.run_default))); } /* Case statements do not have r-values. */ -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 20/22] glsl: Fix indentation left weird from the previous commit
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/opt_dead_code_local.cpp | 168 +++--- src/mesa/program/ir_to_mesa.cpp | 10 +- 2 files changed, 89 insertions(+), 89 deletions(-) diff --git a/src/compiler/glsl/opt_dead_code_local.cpp b/src/compiler/glsl/opt_dead_code_local.cpp index 1de281a..121605d 100644 --- a/src/compiler/glsl/opt_dead_code_local.cpp +++ b/src/compiler/glsl/opt_dead_code_local.cpp @@ -192,93 +192,93 @@ process_assignment(void *lin_ctx, ir_assignment *ir, exec_list *assignments) assert(var); /* Now, check if we did a whole-variable assignment. */ - ir_dereference_variable *deref_var = ir->lhs->as_dereference_variable(); + ir_dereference_variable *deref_var = ir->lhs->as_dereference_variable(); - /* If it's a vector type, we can do per-channel elimination of - * use of the RHS. + /* If it's a vector type, we can do per-channel elimination of +* use of the RHS. +*/ + if (deref_var && (deref_var->var->type->is_scalar() || + deref_var->var->type->is_vector())) { + + if (debug) + printf("looking for %s.0x%01x to remove\n", var->name, +ir->write_mask); + + foreach_in_list_safe(assignment_entry, entry, assignments) { + if (entry->lhs != var) +continue; + + /* Skip if the assignment we're trying to eliminate isn't a plain + * variable deref. */ + if (entry->ir->lhs->ir_type != ir_type_dereference_variable) +continue; + + int remove = entry->unused & ir->write_mask; + if (debug) { +printf("%s 0x%01x - 0x%01x = 0x%01x\n", + var->name, + entry->ir->write_mask, + remove, entry->ir->write_mask & ~remove); + } + if (remove) { +progress = true; + +if (debug) { + printf("rewriting:\n "); + entry->ir->print(); + printf("\n"); +} + +entry->ir->write_mask &= ~remove; +entry->unused &= ~remove; +if (entry->ir->write_mask == 0) { + /* Delete the dead assignment. */ + entry->ir->remove(); + entry->remove(); +} else { + void *mem_ctx = ralloc_parent(entry->ir); + /* Reswizzle the RHS arguments according to the new +* write_mask. +*/ + unsigned components[4]; + unsigned channels = 0; + unsigned next = 0; + + for (int i = 0; i < 4; i++) { + if ((entry->ir->write_mask | remove) & (1 << i)) { + if (!(remove & (1 << i))) +components[channels++] = next; + next++; + } + } + + entry->ir->rhs = new(mem_ctx) ir_swizzle(entry->ir->rhs, +components, +channels); + if (debug) { + printf("to:\n "); + entry->ir->print(); + printf("\n"); + } +} + } + } + } else if (ir->whole_variable_written() != NULL) { + /* We did a whole-variable assignment. So, any instruction in + * the assignment list with the same LHS is dead. */ - if (deref_var && (deref_var->var->type->is_scalar() || - deref_var->var->type->is_vector())) { - -if (debug) - printf("looking for %s.0x%01x to remove\n", var->name, - ir->write_mask); - -foreach_in_list_safe(assignment_entry, entry, assignments) { - if (entry->lhs != var) - continue; - -/* Skip if the assignment we're trying to eliminate isn't a plain - * variable deref. */ -if (entry->ir->lhs->ir_type != ir_type_dereference_variable) - continue; - - int remove = entry->unused & ir->write_mask; - if (debug) { - printf("%s 0x%01x - 0x%01x = 0x%01x\n", - var->name, - entry->ir->write_mask, - remove, entry->ir->write_mask & ~remove); - } - if (remove) { - progress = true; - - if (debug) { - printf("rewriting:\n "); - entry->ir->print(); - printf("\n"); - } - - entry->ir->write_mask &= ~remove; - entry->unused &= ~remove; - if (entry->ir->write_mask == 0) { - /* Delete the dead assignment. */ - entry->ir->remove(); - entry->remove(); - }
[Mesa-dev] [PATCH 13/22] glsl/ast: Explicitly track the set of case labels that occur after default
From: "\"Ian Romanick\""From: Ian Romanick Previously the instruction stream was walked looking for comparisons with case-label values. This should generate nearly identical code. For at least fs-default-notlast-fallthrough.shader_test, the code is identical. This change will make later changes possible. Signed-off-by: Ian Romanick --- src/compiler/glsl/ast_to_hir.cpp | 71 ++-- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 02b0726..1d74e24 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -6376,13 +6376,28 @@ ast_selection_statement::hir(exec_list *instructions, } +struct case_label { + /** Value of the case label. */ + unsigned value; + + /** Does this label occur after the default? */ + bool after_default; + + /** +* AST for the case label. +* +* This is only used to generate error messages for duplicate labels. +*/ + ast_expression *ast; +}; + /* Used for detection of duplicate case values, compare * given contents directly. */ static bool compare_case_value(const void *a, const void *b) { - return *(unsigned *) a == *(unsigned *) b; + return ((struct case_label *) a)->value == ((struct case_label *) b)->value; } @@ -6392,7 +6407,7 @@ compare_case_value(const void *a, const void *b) static unsigned key_contents(const void *key) { - return *(unsigned *) key; + return ((struct case_label *) key)->value; } @@ -6592,24 +6607,26 @@ ast_case_statement_list::hir(exec_list *instructions, return NULL; } - foreach_in_list(ir_instruction, ir, _default) { - ir_assignment *assign = ir->as_assignment(); - - if (!assign) -continue; + struct hash_entry *entry; + ir_factory body(instructions, state); - /* Clone the check between case label and init expression. */ - ir_expression *exp = (ir_expression*) assign->condition; - ir_expression *clone = exp->clone(state, NULL); + hash_table_foreach(state->switch_state.labels_ht, entry) { + const struct case_label *const l = (struct case_label *) entry->data; - ir_dereference_variable *deref_var = -new(state) ir_dereference_variable(state->switch_state.run_default); - ir_rvalue *const false_val = new (state) ir_constant(false); - - ir_assignment *const set_false = -new(state) ir_assignment(deref_var, false_val, clone); - - instructions->push_tail(set_false); + /* If the switch init-value is the value of one of the labels that + * occurs after the default case, disable execution of the default + * case. + */ + if (l->after_default) { +ir_constant *const cnst = + state->switch_state.test_var->type->base_type == GLSL_TYPE_UINT + ? body.constant(unsigned(l->value)) + : body.constant(int(l->value)); + +body.emit(assign(state->switch_state.run_default, + body.constant(false), + equal(cnst, state->switch_state.test_var))); + } } /* Append default case and all cases after it. */ @@ -6682,19 +6699,29 @@ ast_case_label::hir(exec_list *instructions, } else { hash_entry *entry = _mesa_hash_table_search(state->switch_state.labels_ht, - (void *)(uintptr_t)_const->value.u[0]); + _const->value.u[0]); if (entry) { -ast_expression *previous_label = (ast_expression *) entry->data; +const struct case_label *const l = + (struct case_label *) entry->data; +const ast_expression *const previous_label = l->ast; YYLTYPE loc = this->test_value->get_location(); + _mesa_glsl_error(& loc, state, "duplicate case value"); loc = previous_label->get_location(); _mesa_glsl_error(& loc, state, "this is the previous case label"); } else { +struct case_label *l = ralloc(state->switch_state.labels_ht, + struct case_label); + +l->value = label_const->value.u[0]; +l->after_default = state->switch_state.previous_default != NULL; +l->ast = this->test_value; + _mesa_hash_table_insert(state->switch_state.labels_ht, -(void *)(uintptr_t)_const->value.u[0], -this->test_value); +_const->value.u[0], +l); } } -- 2.9.5 ___ mesa-dev mailing list
[Mesa-dev] [PATCH 11/22] glsl/ast: Use ir_binop_equal instead of ir_binop_all_equal
From: "\"Ian Romanick\""From: Ian Romanick The values being compared are scalars, so these are the same. While I'm here, simplify the run_default condition to just deref the flag (instead of comparing a scalar bool with true). There is a bit of extra change in this patch. When constructing an ir_binop_equal ir_expression, there is an assertion that the types are the same. There is no such assertion for ir_binop_all_equal, so passing glsl_type::uint_type with glsl_type::int_type was previously fine. A bunch of the code motion is to deal with that. Signed-off-by: Ian Romanick --- src/compiler/glsl/ast_to_hir.cpp | 39 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index c464549..9d69c13 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -6700,12 +6700,14 @@ ast_case_label::hir(exec_list *instructions, } } - ir_dereference_variable *deref_test_var = - new(ctx) ir_dereference_variable(state->switch_state.test_var); + /* Create an r-value version of the ir_constant label here (after we may + * have created a fake one in error cases) that can be passed to + * apply_implicit_conversion below. + */ + ir_rvalue *label = label_const; - ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, -label_const, -deref_test_var); + ir_rvalue *deref_test_var = + new(ctx) ir_dereference_variable(state->switch_state.test_var); /* * From GLSL 4.40 specification section 6.2 ("Selection"): @@ -6718,10 +6720,10 @@ ast_case_label::hir(exec_list *instructions, * uint (see section 4.1.10 “Implicit Conversions”) before the compare * is done." */ - if (label_const->type != state->switch_state.test_var->type) { + if (label->type != state->switch_state.test_var->type) { YYLTYPE loc = this->test_value->get_location(); - const glsl_type *type_a = label_const->type; + const glsl_type *type_a = label->type; const glsl_type *type_b = state->switch_state.test_var->type; /* Check if int->uint implicit conversion is supported. */ @@ -6738,17 +6740,28 @@ ast_case_label::hir(exec_list *instructions, /* Conversion of the case label. */ if (type_a->base_type == GLSL_TYPE_INT) { if (!apply_implicit_conversion(glsl_type::uint_type, - test_cond->operands[0], state)) + label, state)) _mesa_glsl_error(, state, "implicit type conversion error"); } else { /* Conversion of the init-expression value. */ if (!apply_implicit_conversion(glsl_type::uint_type, - test_cond->operands[1], state)) + deref_test_var, state)) _mesa_glsl_error(, state, "implicit type conversion error"); } } + + /* If the implicit conversion was allowed, the types will already be + * the same. If the implicit conversion wasn't allowed, smash the + * type of the label anyway. This will prevent the expression + * constructor (below) from failing an assertion. + */ + label->type = deref_test_var->type; } + ir_expression *test_cond = new(ctx) ir_expression(ir_binop_equal, +label, +deref_test_var); + ir_assignment *set_fallthru_on_test = new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); @@ -6767,14 +6780,10 @@ ast_case_label::hir(exec_list *instructions, /* Set fallthru condition on 'run_default' bool. */ ir_dereference_variable *deref_run_default = new(ctx) ir_dereference_variable(state->switch_state.run_default); - ir_rvalue *const cond_true = new(ctx) ir_constant(true); - ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, -cond_true, -deref_run_default); - /* Set falltrhu state. */ + /* Set fallthru state. */ ir_assignment *set_fallthru = - new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); + new(ctx) ir_assignment(deref_fallthru_var, true_val, deref_run_default); instructions->push_tail(set_fallthru); } -- 2.9.5 ___ mesa-dev mailing list
[Mesa-dev] [PATCH 09/22] glsl: Lower array indexing to conditional-select instead of conditional-assign
From: "\"Ian Romanick\""From: Ian Romanick Signed-off-by: Ian Romanick --- src/compiler/glsl/lower_variable_index_to_cond_assign.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp index 6fe4fe6..bfe6242 100644 --- a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -208,8 +208,8 @@ struct assignment_generator * array dereference. */ ir_assignment *const assignment = (is_write) - ? assign(element, this->var, condition, write_mask) - : assign(this->var, element, condition); + ? assign(element, csel(condition, this->var, element), write_mask) + : assign(this->var, csel(condition, element, this->var)); body.emit(assignment); } -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 21/22] glsl: Remove spurious assertions
From: "\"Ian Romanick\""From: Ian Romanick It's inside an if-statement that already checks that the variables are not NULL. Signed-off-by: Ian Romanick --- src/compiler/glsl/opt_copy_propagation.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/compiler/glsl/opt_copy_propagation.cpp b/src/compiler/glsl/opt_copy_propagation.cpp index 301a2af..ec8a0ed 100644 --- a/src/compiler/glsl/opt_copy_propagation.cpp +++ b/src/compiler/glsl/opt_copy_propagation.cpp @@ -352,8 +352,6 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir) if (lhs_var->data.mode != ir_var_shader_storage && lhs_var->data.mode != ir_var_shader_shared && lhs_var->data.precise == rhs_var->data.precise) { - assert(lhs_var); - assert(rhs_var); _mesa_hash_table_insert(acp, lhs_var, rhs_var); } } -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] glsl: Silence unused parameter warnings
On 09/20/2017 09:15 PM, Timothy Arceri wrote: > Fix sent: > > https://lists.freedesktop.org/archives/mesa-dev/2017-September/170304.html Ugh... which I already deleted from my inbox due to the tag. :( I'll bet this fixes https://bugs.freedesktop.org/show_bug.cgi?id=102910. That patch is Reviewed-by: Ian Romanick> On 21/09/17 11:09, Timothy Arceri wrote: >> Hi Ian, >> >> This series causes massive memory use in the glsl_to_tgsi pass for >> gallium drivers. For example with the following test: >> >> ./bin/shader_runner_gles3 >> tests/spec/glsl-es-3.00/execution/varying-struct-copy-return-vs.shader_test >> -auto >> >> >> I'm not sure whats going on yet because it crashes my machine so >> quickly. It seems like it may be getting stuck somewhere in this code: >> >> ==24131==by 0xB5E9A6F: >> glsl_to_tgsi_visitor::emit_asm(ir_instruction*, unsigned int, >> st_dst_reg, st_dst_reg, st_src_reg, st_src_reg, st_src_reg, >> st_src_reg) (st_glsl_to_tgsi.cpp:397) >> ==24131==by 0xB5EC9E6: emit_asm (st_glsl_to_tgsi.cpp:612) >> ==24131==by 0xB5EC9E6: glsl_to_tgsi_visitor::visit(ir_constant*) >> (st_glsl_to_tgsi.cpp:3058) >> ==24131==by 0xB5F51C3: glsl_to_tgsi_visitor::visit(ir_assignment*) >> (st_glsl_to_tgsi.cpp:2932) >> ==24131==by 0xB5E45D3: glsl_to_tgsi_visitor::visit(ir_function*) >> (st_glsl_to_tgsi.cpp:1185) >> ==24131==by 0xB6BD2D0: visit_exec_list(exec_list*, ir_visitor*) >> (ir.cpp:1900) >> ==24131==by 0xB5FE312: get_mesa_program_tgsi >> (st_glsl_to_tgsi.cpp:6559) >> ==24131==by 0xB5FE312: st_link_shader (st_glsl_to_tgsi.cpp:6906) >> ==24131==by 0xB622A0F: _mesa_glsl_link_shader (ir_to_mesa.cpp:3118) >> ==24131==by 0xB52D7CA: link_program (shaderapi.c:1171) >> ==24131==by 0xB52D7CA: link_program_error (shaderapi.c:1249) >> ==24131==by 0x4F0B65A: stub_glLinkProgram >> (piglit-dispatch-gen.c:33822) >> ==24131==by 0x407A67: link_and_use_shaders (shader_runner.c:1086) >> ==24131==by 0x410601: init_test (shader_runner.c:3855) >> >> >> Tim >> >> On 13/09/17 02:41, Ian Romanick wrote: >>> From: Ian Romanick >>> >>> glsl/ast_type.cpp: In function ‘void >>> merge_bindless_qualifier(YYLTYPE*, _mesa_glsl_parse_state*, const >>> ast_type_qualifier&, const ast_type_qualifier&)’: >>> glsl/ast_type.cpp:189:35: warning: unused parameter ‘loc’ >>> [-Wunused-parameter] >>> merge_bindless_qualifier(YYLTYPE *loc, >>> ^~~ >>> glsl/ast_type.cpp:191:52: warning: unused parameter ‘qualifier’ >>> [-Wunused-parameter] >>>const ast_type_qualifier , >>> ^ >>> glsl/ast_type.cpp:192:52: warning: unused parameter ‘new_qualifier’ >>> [-Wunused-parameter] >>>const ast_type_qualifier _qualifier) >>> ^ >>> >>> glsl/ir_constant_expression.cpp: In member function ‘virtual >>> ir_constant* ir_rvalue::constant_expression_value(void*, hash_table*)’: >>> glsl/ir_constant_expression.cpp:512:44: warning: unused parameter >>> ‘mem_ctx’ [-Wunused-parameter] >>> ir_rvalue::constant_expression_value(void *mem_ctx, struct >>> hash_table *) >>> ^~~ >>> glsl/ir_constant_expression.cpp: In member function ‘virtual >>> ir_constant* ir_texture::constant_expression_value(void*, hash_table*)’: >>> glsl/ir_constant_expression.cpp:705:45: warning: unused parameter >>> ‘mem_ctx’ [-Wunused-parameter] >>> ir_texture::constant_expression_value(void *mem_ctx, struct >>> hash_table *) >>> ^~~ >>> glsl/ir_constant_expression.cpp: In member function ‘virtual >>> ir_constant* ir_assignment::constant_expression_value(void*, >>> hash_table*)’: >>> glsl/ir_constant_expression.cpp:851:48: warning: unused parameter >>> ‘mem_ctx’ [-Wunused-parameter] >>> ir_assignment::constant_expression_value(void *mem_ctx, struct >>> hash_table *) >>> ^~~ >>> glsl/ir_constant_expression.cpp: In member function ‘virtual >>> ir_constant* ir_constant::constant_expression_value(void*, >>> hash_table*)’: >>> glsl/ir_constant_expression.cpp:859:46: warning: unused parameter >>> ‘mem_ctx’ [-Wunused-parameter] >>> ir_constant::constant_expression_value(void *mem_ctx, struct >>> hash_table *) >>>^~~ >>> >>> glsl/linker.cpp: In function ‘void >>> link_xfb_stride_layout_qualifiers(gl_context*, gl_shader_program*, >>> gl_linked_shader*, gl_shader**, unsigned int)’: >>> glsl/linker.cpp:1655:60: warning: unused parameter ‘linked_shader’ >>> [-Wunused-parameter] >>> struct gl_linked_shader >>> *linked_shader, >>> >>> ^ >>> glsl/linker.cpp: In function ‘void >>>
[Mesa-dev] [PATCH v2] Android: move libraries to /vendor
As part of Treble project in Android O, all the device specific files have to be located in a separate vendor partition. This is done by setting LOCAL_PROPRIETARY_MODULE (the name is misleading). This change will not break existing platforms without a vendor partition as it will just move files to /system/vendor. Signed-off-by: Rob Herring--- v2: - Set LOCAL_PROPRIETARY_MODULE globally. Thanks Tapani. Android.common.mk | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Android.common.mk b/Android.common.mk index 6f70dd94a1f5..3447d34fd692 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -101,8 +101,9 @@ LOCAL_CFLAGS += -DHAVE_LIBDRM LOCAL_SHARED_LIBRARIES += libdrm endif -LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/system/lib/$(MESA_DRI_MODULE_REL_PATH)\" -LOCAL_CFLAGS_64 += -DDEFAULT_DRIVER_DIR=\"/system/lib64/$(MESA_DRI_MODULE_REL_PATH)\" +LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib/$(MESA_DRI_MODULE_REL_PATH)\" +LOCAL_CFLAGS_64 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib64/$(MESA_DRI_MODULE_REL_PATH)\" +LOCAL_PROPRIETARY_MODULE := true # uncomment to keep the debug symbols #LOCAL_STRIP_MODULE := false -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] vulkan: enum generator: align function declarations/prototypes
From: Lionel LandwerlinSigned-off-by: Lionel Landwerlin Acked-by: Jason Ekstrand --- src/vulkan/util/gen_enum_to_str.py | 42 +++--- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index df326d0..aa7001e 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -64,27 +64,27 @@ C_TEMPLATE = Template(textwrap.dedent(u"""\ % for enum in enums: -const char * -vk_${enum.name[2:]}_to_str(${enum.name} input) -{ -switch(input) { -% for v in enum.values: -% if v in FOREIGN_ENUM_VALUES: - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wswitch" -% endif -case ${v}: -return "${v}"; -% if v in FOREIGN_ENUM_VALUES: -#pragma GCC diagnostic pop - -% endif -% endfor -default: -unreachable("Undefined enum value."); -} +const char * +vk_${enum.name[2:]}_to_str(${enum.name} input) +{ +switch(input) { +% for v in enum.values: +% if v in FOREIGN_ENUM_VALUES: + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" +% endif +case ${v}: +return "${v}"; +% if v in FOREIGN_ENUM_VALUES: +#pragma GCC diagnostic pop + +% endif +% endfor +default: +unreachable("Undefined enum value."); } +} %endfor"""), output_encoding='utf-8') @@ -102,7 +102,7 @@ H_TEMPLATE = Template(textwrap.dedent(u"""\ #include % for enum in enums: -const char * vk_${enum.name[2:]}_to_str(${enum.name} input); +const char * vk_${enum.name[2:]}_to_str(${enum.name} input); % endfor #endif"""), -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] vulkan: enum generator: sort enums by names
From: Lionel LandwerlinSigned-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand --- src/vulkan/util/gen_enum_to_str.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index aa7001e..efe5d4f 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -185,13 +185,14 @@ def main(): efactory = EnumFactory(VkEnum) for filename in args.xml_files: parse_xml(efactory, filename) +enums=sorted(efactory.registry.values(), key=lambda e: e.name) for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.c')), (H_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.h'))]: with open(file_, 'wb') as f: f.write(template.render( file=os.path.basename(__file__), -enums=efactory.registry.values(), +enums=enums, copyright=COPYRIGHT, FOREIGN_ENUM_VALUES=FOREIGN_ENUM_VALUES)) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] vulkan: enum generator: make registry more flexible
From: Lionel LandwerlinIt will be used to store extension numbers as well. Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand --- src/vulkan/util/gen_enum_to_str.py | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index efe5d4f..5281e89 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -115,18 +115,18 @@ FOREIGN_ENUM_VALUES = [ ] -class EnumFactory(object): +class NamedFactory(object): """Factory for creating enums.""" def __init__(self, type_): self.registry = {} self.type = type_ -def __call__(self, name): +def __call__(self, *args): try: -return self.registry[name] +return self.registry[args[0]] except KeyError: -n = self.registry[name] = self.type(name) +n = self.registry[args[0]] = self.type(*args) return n @@ -138,7 +138,7 @@ class VkEnum(object): self.values = values or [] -def parse_xml(efactory, filename): +def parse_xml(enum_factory, filename): """Parse the XML file. Accumulate results into the efactory. This parser is a memory efficient iterative XML parser that returns a list @@ -157,15 +157,15 @@ def parse_xml(efactory, filename): if event == 'end' and elem.tag == 'enums': type_ = elem.attrib.get('type') if type_ == 'enum': -enum = efactory(elem.attrib['name']) +enum = enum_factory(elem.attrib['name']) enum.values.extend([e.attrib['name'] for e in elem if e.tag == 'enum']) elif event == 'end' and elem.tag == 'extension': if elem.attrib['supported'] != 'vulkan': continue for e in elem.findall('.//enum[@extends][@offset]'): -enum = efactory(e.attrib['extends']) -enum.values.append(e.attrib['name']) +enum = enum_factory(e.attrib['extends']) +enum.values.append(e.attrib['name'],) root.clear() @@ -182,10 +182,10 @@ def main(): args = parser.parse_args() -efactory = EnumFactory(VkEnum) +enum_factory = NamedFactory(VkEnum) for filename in args.xml_files: -parse_xml(efactory, filename) -enums=sorted(efactory.registry.values(), key=lambda e: e.name) +parse_xml(enum_factory, filename) +enums=sorted(enum_factory.registry.values(), key=lambda e: e.name) for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.c')), (H_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.h'))]: -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] vulkan: enum generator: generate extension number defines
From: Lionel LandwerlinNew extensions can introduce additional enums. Most of the new enums will have disjoint numbers from the initial enums. For example new formats introduced by VK_IMG_format_pvrtc : VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178, VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180, VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182, VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184, VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG = 154000, VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG = 154001, VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG = 154002, VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG = 154003, VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG = 154004, VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG = 154005, VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG = 154006, VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG = 154007, It's obvious we can't have a single table for handling those anymore. Fortunately the enum values actually contain the number of the extension that introduced the new enums. So we can build an indirection table off the extension number and then index by subtracting the first enum of the the format enum value. This change makes the extension number available in the generated enum code. Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand --- src/vulkan/util/gen_enum_to_str.py | 24 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index 5281e89..8f32102 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -101,6 +101,10 @@ H_TEMPLATE = Template(textwrap.dedent(u"""\ #include #include +% for ext in extensions: +#define _${ext.name}_number (${ext.number}) +% endfor + % for enum in enums: const char * vk_${enum.name[2:]}_to_str(${enum.name} input); % endfor @@ -130,6 +134,14 @@ class NamedFactory(object): return n +class VkExtension(object): +"""Simple struct-like class representing extensions""" + +def __init__(self, name, number): +self.name = name +self.number = number + + class VkEnum(object): """Simple struct-like class representing a single Vulkan Enum.""" @@ -138,8 +150,8 @@ class VkEnum(object): self.values = values or [] -def parse_xml(enum_factory, filename): -"""Parse the XML file. Accumulate results into the efactory. +def parse_xml(enum_factory, ext_factory, filename): +"""Parse the XML file. Accumulate results into the factories. This parser is a memory efficient iterative XML parser that returns a list of VkEnum objects. @@ -160,6 +172,8 @@ def parse_xml(enum_factory, filename): enum = enum_factory(elem.attrib['name']) enum.values.extend([e.attrib['name'] for e in elem if e.tag == 'enum']) +elif event == 'start' and elem.tag == 'extension': +ext_factory(elem.attrib['name'], int(elem.attrib['number'])) elif event == 'end' and elem.tag == 'extension': if elem.attrib['supported'] != 'vulkan': continue @@ -169,7 +183,6 @@ def parse_xml(enum_factory, filename): root.clear() - def main(): parser = argparse.ArgumentParser() parser.add_argument('--xml', required=True, @@ -183,9 +196,11 @@ def main(): args = parser.parse_args() enum_factory = NamedFactory(VkEnum) +ext_factory = NamedFactory(VkExtension) for filename in args.xml_files: -parse_xml(enum_factory, filename) +parse_xml(enum_factory, ext_factory, filename) enums=sorted(enum_factory.registry.values(), key=lambda e: e.name) +extensions=sorted(ext_factory.registry.values(), key=lambda e: e.name) for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.c')), (H_TEMPLATE, os.path.join(args.outdir, 'vk_enum_to_str.h'))]: @@ -193,6 +208,7 @@ def main(): f.write(template.render( file=os.path.basename(__file__), enums=enums, +extensions=extensions, copyright=COPYRIGHT, FOREIGN_ENUM_VALUES=FOREIGN_ENUM_VALUES)) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] vulkan: enum generator: Generate entries for extended enums
--- src/vulkan/util/gen_enum_to_str.py | 50 +++--- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index e5f8964..57d12e6 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -68,15 +68,15 @@ C_TEMPLATE = Template(textwrap.dedent(u"""\ vk_${enum.name[2:]}_to_str(${enum.name} input) { switch(input) { -% for v in enum.values: -% if v in FOREIGN_ENUM_VALUES: +% for v in sorted(enum.values.keys()): +% if enum.values[v] in FOREIGN_ENUM_VALUES: #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wswitch" % endif case ${v}: -return "${v}"; -% if v in FOREIGN_ENUM_VALUES: +return "${enum.values[v]}"; +% if enum.values[v] in FOREIGN_ENUM_VALUES: #pragma GCC diagnostic pop % endif @@ -133,6 +133,9 @@ class NamedFactory(object): n = self.registry[args[0]] = self.type(*args) return n +def get(self, name): +return self.registry.get(name) + class VkExtension(object): """Simple struct-like class representing extensions""" @@ -147,7 +150,20 @@ class VkEnum(object): def __init__(self, name, values=None): self.name = name -self.values = values or [] +# Maps numbers to names +self.values = values or dict() + +def add_value(self, name, value=None, + extension=None, offset=None, + error=False): +assert value is not None or extension is not None +if value is None: +value = 10 + (extension.number - 1) * 1000 + offset +if error: +value = -value + +if value not in self.values: +self.values[value] = name def parse_xml(enum_factory, ext_factory, filename): @@ -165,14 +181,30 @@ def parse_xml(enum_factory, ext_factory, filename): enum = enum_factory(enum_type.attrib['name']) for value in enum_type.findall('./enum'): -enum.values.append(value.attrib['name']) +enum.add_value(value.attrib['name'], + value=int(value.attrib['value'])) -for ext_elem in xml.findall('./extension'): +for ext_elem in xml.findall('./extensions/extension'): if ext_elem.attrib['supported'] != 'vulkan': continue -ext_factory(ext_elem.attrib['name'], -int(ext_elem.attrib['number'])) +extension = ext_factory(ext_elem.attrib['name'], +int(ext_elem.attrib['number'])) + +for value in ext_elem.findall('./require/enum[@extends]'): +enum = enum_factory.get(value.attrib['extends']) +if enum is None: +continue +if 'value' in value.attrib: +enum.add_value(value.attrib['name'], + value=int(value.attrib['value'])) +else: +error = 'dir' in value.attrib and value.attrib['dir'] == '-' +enum.add_value(value.attrib['name'], + extension=extension, + offset=int(value.attrib['offset']), + error=error) + def main(): parser = argparse.ArgumentParser() -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102891] [radv] glitches on rpcs3 emulator (green zones)
https://bugs.freedesktop.org/show_bug.cgi?id=102891 --- Comment #5 from Bas Nieuwenhuizen--- IIRC that SpvCapabilityImageMSArray warning is always there with renderdoc though, so probably a red herring. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] vulkan: enum generator: Stop using iterparse
While using iterparse is potentially a little more efficient, the Vulkan registry XML is not large and using regular element tree simplifies the parsing logic substantially. --- src/vulkan/util/gen_enum_to_str.py | 41 +++--- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/src/vulkan/util/gen_enum_to_str.py b/src/vulkan/util/gen_enum_to_str.py index 8f32102..e5f8964 100644 --- a/src/vulkan/util/gen_enum_to_str.py +++ b/src/vulkan/util/gen_enum_to_str.py @@ -157,31 +157,22 @@ def parse_xml(enum_factory, ext_factory, filename): of VkEnum objects. """ -with open(filename, 'rb') as f: -context = iter(et.iterparse(f, events=('start', 'end'))) - -# This gives the root element, since goal is to iterate over the -# elements without building a tree, this allows the root to be cleared -# (erase the elements) after the children have been processed. -_, root = next(context) - -for event, elem in context: -if event == 'end' and elem.tag == 'enums': -type_ = elem.attrib.get('type') -if type_ == 'enum': -enum = enum_factory(elem.attrib['name']) -enum.values.extend([e.attrib['name'] for e in elem -if e.tag == 'enum']) -elif event == 'start' and elem.tag == 'extension': -ext_factory(elem.attrib['name'], int(elem.attrib['number'])) -elif event == 'end' and elem.tag == 'extension': -if elem.attrib['supported'] != 'vulkan': -continue -for e in elem.findall('.//enum[@extends][@offset]'): -enum = enum_factory(e.attrib['extends']) -enum.values.append(e.attrib['name'],) - -root.clear() +xml = et.parse(filename) + +for enum_type in xml.findall('./enums'): +if enum_type.attrib.get('type') != 'enum': +continue + +enum = enum_factory(enum_type.attrib['name']) +for value in enum_type.findall('./enum'): +enum.values.append(value.attrib['name']) + +for ext_elem in xml.findall('./extension'): +if ext_elem.attrib['supported'] != 'vulkan': +continue + +ext_factory(ext_elem.attrib['name'], +int(ext_elem.attrib['number'])) def main(): parser = argparse.ArgumentParser() -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] mesa: GL_TEXTURE_BORDER_COLOR exists in OpenGL 1.0, so don't depend on GL_ARB_texture_border_clamp
On 09/20/2017 03:12 AM, Juan A. Suarez Romero wrote: > On Sat, 2017-07-08 at 02:03 +0300, Andres Gomez wrote: >> Ian, it looks like we could want this patch (and the others from the >> series when they land) in -stable (?) >> > > As we are preparing a new stable 17.1 release, gently pinging. > I completely forgot about this series... thanks for the reminder. :) > J.A. > >> On Tue, 2017-06-27 at 10:09 -0700, Ian Romanick wrote: >>> From: Ian Romanick>>> >>> On NV20 (and probably also on earlier NV GPUs that lack >>> GL_ARB_texture_border_clamp) fixes the following piglit tests: >>> >>> gl-1.0-beginend-coverage gltexparameter[if]{v,} >>> push-pop-texture-state >>> texwrap 1d >>> texwrap 1d proj >>> texwrap 2d proj >>> texwrap formats >>> >>> All told, 49 more tests pass on NV20 (10de:0201). >>> >>> No changes on Intel CI run or RV250 (1002:4c66). >>> >>> Signed-off-by: Ian Romanick >>> --- >>> src/mesa/main/texparam.c | 10 +- >>> 1 file changed, 9 insertions(+), 1 deletion(-) >>> >>> diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c >>> index 3c110de..857faf6 100644 >>> --- a/src/mesa/main/texparam.c >>> +++ b/src/mesa/main/texparam.c >>> @@ -736,8 +736,16 @@ set_tex_parameterf(struct gl_context *ctx, >>>break; >>> >>> case GL_TEXTURE_BORDER_COLOR: >>> + /* Border color exists in desktop OpenGL since 1.0 for GL_CLAMP. In >>> + * OpenGL ES 2.0+, it only exists in when >>> GL_OES_texture_border_clamp is >>> + * enabled. It is never available in OpenGL ES 1.x. >>> + * >>> + * FIXME: Every driver that supports GLES2 has this extension. Elide >>> + * the check? >>> + */ >>>if (ctx->API == API_OPENGLES || >>> - !ctx->Extensions.ARB_texture_border_clamp) >>> + (ctx->API == API_OPENGLES2 && >>> + !ctx->Extensions.ARB_texture_border_clamp)) >>> goto invalid_pname; >>> >>>if (!_mesa_target_allows_setting_sampler_parameters(texObj->Target)) > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 19/22] glsl: Kill ir_assignment::condition with fire
And I just pushed an update to my branch that rebases this on the merge of loop_analysis.cpp and loop_controls.cpp. On 09/21/2017 09:34 AM, Ian Romanick wrote: > From: "\"Ian Romanick\""> > From: Ian Romanick > > Signed-off-by: Ian Romanick > --- > src/compiler/glsl/glsl_to_nir.cpp | 16 ++ > src/compiler/glsl/ir.cpp | 5 ++--- > src/compiler/glsl/ir.h| 6 -- > .../glsl/ir_builder_print_visitor.cpp | 4 > src/compiler/glsl/ir_clone.cpp| 2 -- > src/compiler/glsl/ir_constant_expression.cpp | 12 +-- > src/compiler/glsl/ir_hv_accept.cpp| 3 --- > src/compiler/glsl/ir_print_visitor.cpp| 3 --- > src/compiler/glsl/ir_rvalue_visitor.cpp | 2 -- > src/compiler/glsl/loop_analysis.cpp | 3 +-- > src/compiler/glsl/loop_controls.cpp | 2 +- > src/compiler/glsl/lower_distance.cpp | 2 +- > .../lower_variable_index_to_cond_assign.cpp | 17 +-- > .../glsl/lower_vec_index_to_cond_assign.cpp | 4 > src/compiler/glsl/opt_array_splitting.cpp | 7 --- > src/compiler/glsl/opt_constant_folding.cpp| 17 --- > .../glsl/opt_constant_propagation.cpp | 3 --- > src/compiler/glsl/opt_constant_variable.cpp | 3 --- > src/compiler/glsl/opt_copy_propagation.cpp| 3 --- > .../glsl/opt_copy_propagation_elements.cpp| 3 --- > .../glsl/opt_dead_builtin_varyings.cpp| 1 - > src/compiler/glsl/opt_dead_code_local.cpp | 21 +++ > src/compiler/glsl/opt_structure_splitting.cpp | 7 ++- > src/compiler/glsl/opt_tree_grafting.cpp | 3 +-- > src/compiler/glsl/opt_vectorize.cpp | 3 +-- > src/mesa/program/ir_to_mesa.cpp | 16 -- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp| 17 ++- > 27 files changed, 26 insertions(+), 159 deletions(-) > > diff --git a/src/compiler/glsl/glsl_to_nir.cpp > b/src/compiler/glsl/glsl_to_nir.cpp > index 99df6e0..57b832f 100644 > --- a/src/compiler/glsl/glsl_to_nir.cpp > +++ b/src/compiler/glsl/glsl_to_nir.cpp > @@ -1253,13 +1253,7 @@ nir_visitor::visit(ir_assignment *ir) >copy->variables[0] = evaluate_deref(>instr, ir->lhs); >copy->variables[1] = evaluate_deref(>instr, ir->rhs); > > - if (ir->condition) { > - nir_push_if(, evaluate_rvalue(ir->condition)); > - nir_builder_instr_insert(, >instr); > - nir_pop_if(, NULL); > - } else { > - nir_builder_instr_insert(, >instr); > - } > + nir_builder_instr_insert(, >instr); >return; > } > > @@ -1290,13 +1284,7 @@ nir_visitor::visit(ir_assignment *ir) > store->variables[0] = nir_deref_var_clone(lhs_deref, store); > store->src[0] = nir_src_for_ssa(src); > > - if (ir->condition) { > - nir_push_if(, evaluate_rvalue(ir->condition)); > - nir_builder_instr_insert(, >instr); > - nir_pop_if(, NULL); > - } else { > - nir_builder_instr_insert(, >instr); > - } > + nir_builder_instr_insert(, >instr); > } > > /* > diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp > index 4cf322d..52f9133 100644 > --- a/src/compiler/glsl/ir.cpp > +++ b/src/compiler/glsl/ir.cpp > @@ -151,7 +151,7 @@ ir_assignment::whole_variable_written() > ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, > unsigned write_mask) > : ir_instruction(ir_type_assignment), lhs(lhs), rhs(rhs), > - condition(NULL), write_mask(write_mask) > + write_mask(write_mask) > { > if (lhs->type->is_scalar() || lhs->type->is_vector()) { >int lhs_components = 0; > @@ -165,8 +165,7 @@ ir_assignment::ir_assignment(ir_dereference *lhs, > ir_rvalue *rhs, > } > > ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs) > - : ir_instruction(ir_type_assignment), lhs(NULL), rhs(rhs), > - condition(NULL), write_mask(0) > + : ir_instruction(ir_type_assignment), lhs(NULL), rhs(rhs), write_mask(0) > { > /* If the RHS is a vector type, assume that all components of the vector > * type are being written to the LHS. The write mask comes from the RHS > diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h > index 28a356a..d7f8630 100644 > --- a/src/compiler/glsl/ir.h > +++ b/src/compiler/glsl/ir.h > @@ -1475,12 +1475,6 @@ public: > ir_rvalue *rhs; > > /** > -* Optional condition for the assignment. > -*/ > - ir_rvalue *condition; > - > - > - /** > * Component mask written > * > * For non-vector types in the LHS, this field will be zero. For vector > diff --git a/src/compiler/glsl/ir_builder_print_visitor.cpp > b/src/compiler/glsl/ir_builder_print_visitor.cpp > index 3e30c5d..1f29562 100644 > --- a/src/compiler/glsl/ir_builder_print_visitor.cpp > +++
Re: [Mesa-dev] [PATCH v2] Android: move libraries to /vendor
Reviewed-by: Tapani PälliOn 09/21/2017 06:28 PM, Rob Herring wrote: As part of Treble project in Android O, all the device specific files have to be located in a separate vendor partition. This is done by setting LOCAL_PROPRIETARY_MODULE (the name is misleading). This change will not break existing platforms without a vendor partition as it will just move files to /system/vendor. Signed-off-by: Rob Herring --- v2: - Set LOCAL_PROPRIETARY_MODULE globally. Thanks Tapani. Android.common.mk | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Android.common.mk b/Android.common.mk index 6f70dd94a1f5..3447d34fd692 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -101,8 +101,9 @@ LOCAL_CFLAGS += -DHAVE_LIBDRM LOCAL_SHARED_LIBRARIES += libdrm endif -LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/system/lib/$(MESA_DRI_MODULE_REL_PATH)\" -LOCAL_CFLAGS_64 += -DDEFAULT_DRIVER_DIR=\"/system/lib64/$(MESA_DRI_MODULE_REL_PATH)\" +LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib/$(MESA_DRI_MODULE_REL_PATH)\" +LOCAL_CFLAGS_64 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib64/$(MESA_DRI_MODULE_REL_PATH)\" +LOCAL_PROPRIETARY_MODULE := true # uncomment to keep the debug symbols #LOCAL_STRIP_MODULE := false ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] amd/addrlib: fix missing va_end() after va_copy()
On 21.09.2017 12:03, Eric Engestrom wrote: Hmm, just noticed the title should be fixed. Something like this? amd/addrlib: drop unnecessary va_copy() Makes sense, but I already pushed it... On Wednesday, 2017-09-20 14:48:46 +, Nicolai Hähnle wrote: From: Nicolai HähnleThere's no reason to use va_copy here. CID: 1418113 --- I have a slight preference for this variant. -- src/amd/addrlib/core/addrobject.cpp | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/amd/addrlib/core/addrobject.cpp b/src/amd/addrlib/core/addrobject.cpp index dcdb1bffc2b..452feb5fac0 100644 --- a/src/amd/addrlib/core/addrobject.cpp +++ b/src/amd/addrlib/core/addrobject.cpp @@ -209,29 +209,25 @@ VOID Object::operator delete( */ VOID Object::DebugPrint( const CHAR* pDebugString, ///< [in] Debug string ... ) const { #if DEBUG if (m_client.callbacks.debugPrint != NULL) { -va_list ap; - -va_start(ap, pDebugString); - ADDR_DEBUGPRINT_INPUT debugPrintInput = {0}; debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT); debugPrintInput.pDebugString = const_cast (pDebugString); debugPrintInput.hClient = m_client.handle; -va_copy(debugPrintInput.ap, ap); +va_start(debugPrintInput.ap, pDebugString); m_client.callbacks.debugPrint(); -va_end(ap); +va_end(debugPrintInput.ap); } #endif } } // Addr -- 2.11.0 -- Lerne, wie die Welt wirklich ist, Aber vergiss niemals, wie sie sein sollte. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/14] Patches for VA-API State Tracker Postproc
On 21/09/17 03:17, Leo Liu wrote: > On 09/20/2017 06:11 PM, Mark Thompson wrote: >> On 19/09/17 20:04, Leo Liu wrote: >>> This series are for VA-API State Tracker Postproc, including: >>> >>> Deinterlacing I video for transcode; >>> Scaling support in postproc for transcode; >>> Frame grabber in postproc >>> >>> Thanks Andy Furnissfor lots of testing on these. >>> >>> Leo Liu (14): >>> st/va/postproc: use video original size for postprocessing >>> vl/compositor: separate YUV part from shader video buffer function >>> vl/compositor: extend YUV deint function to do field deint >>> vl/compositor: add a new function for YUV deint >>> st/omx: use new vl_compositor_yuv_deint_full() to deint >>> st/va: use new vl_compositor_yuv_deint_full() to deint >>> vl/compositor: remove vl_compositor_yuv_deint() function >>> vl/compositor: add Bob top and bottom to YUV deint function >>> st/va/postproc: add a full NV12 deint support from buffer I to P >>> st/va: make internal func vlVaHandleSurfaceAllocate() call simpler >>> st/va/postproc: use progressive target buffer for scaling >>> vl/compositor: create RGB to YUV fragment shader >>> vl/compositor: convert RGB buffer to YUV with color conversion >>> st/va/postproc: implement the DRM prime grabber >>> >>> src/gallium/auxiliary/vl/vl_compositor.c | 263 >>> +- >>> src/gallium/auxiliary/vl/vl_compositor.h | 50 +++- >>> src/gallium/state_trackers/omx_bellagio/vid_dec.c | 11 +- >>> src/gallium/state_trackers/va/picture.c | 16 +- >>> src/gallium/state_trackers/va/postproc.c | 69 +- >>> src/gallium/state_trackers/va/surface.c | 7 +- >>> src/gallium/state_trackers/va/va_private.h | 2 +- >>> 7 files changed, 331 insertions(+), 87 deletions(-) >>> >> Looks good for import from a bit of testing so far (with the update today). >> >> >> Something funny going on with RGB upload cases? With ffmpeg: >> >> ./ffmpeg_g -y -i in.mp4 -an -vaapi_device /dev/dri/renderD129 -vf >> format=bgr0,hwupload,scale_vaapi=w=1920:h=1080:format=nv12 -c:v h264_vaapi >> -profile:v 578 -bf 0 out.mp4 >> >> it crashes a few lines into copying to the image. >> >> The mapping in vlVaMapBuffer() looks like: >> >> (gdb) p *buf->derived_surface.resource >> $9 = {reference = {count = 5}, screen = 0x57829010, width0 = 1920, >> height0 = 1088, depth0 = 1, array_size = 1, format = >> PIPE_FORMAT_B8G8R8X8_UNORM, target = PIPE_TEXTURE_2D, last_level = 0, >> nr_samples = 0, usage = 0, bind = 2097152, flags = 0, next = 0x0} >> (gdb) p *buf->derived_surface.transfer >> $8 = {resource = 0x57d8e2c0, level = 0, usage = PIPE_TRANSFER_WRITE, box >> = {x = 0, y = 0, z = 0, width = 1920, height = 1, depth = 1}, stride = 7680, >> layer_stride = 7680} >> >> height = 1 looks suspicious, like it's only mapping the first line? > Looks like the command line crashed at some point where is before you would > to go. i.e RGB->YUV in postproc. I'm not quite understanding what you mean. Do you crash at a different point rather than in the copy after mapping the the image to upload to? Backtrace? >> A general question for the whole driver: why are surfaces interlaced by >> default? > I think it's firmware preferred, and they are also good for deinterlacing. Can you be more specific? I agree that it is required for deinterlacing, but that isn't a particularly common case and will only become less so with time. E.g. is it somehow better to decode even progressive video to interlaced frames? That seems like it would have significantly worse locality of reference to me, but maybe the hardware does something special. >> >> I may be getting some things wrong here, but the relevant components which >> deal with surfaces that I see are: >> H >> * Decoder: can write either format, the stream type doesn't seem to matter >> (?). > Normally, HW decoder write to NV12, P016, and for Mjpeg it can do YUYV as > well. Stream type depends on codecs HW supports All in interlaced and progressive forms? I didn't consider it earlier, but the H.265 decoder seems to always produce progressive for me. >> * Encoder: can only accept progressive surfaces. >> * Deinterlacer: only works on interlaced surfaces (?). > Yes, if you would like to have a pretty picture for 'deinterlace_vappi=mode=3' >> * Scaler: can work on either. >> * Import: will pretty much always be progressive unless forced not to be >> (noone is going to make the interlaced format externally unless they get >> forced into it). > If the import usages are for encoder, it have to progressive, Typically it isn't directly for the encoder because few things directly produce the necessary formats - a postproc step for some colour-conversion is very likely to happen first. >> * Export: works for either, but interlaced is likely much harder for others >> to use. For some use for playback, see
[Mesa-dev] [PATCH] i965/screen: Check that given format is valid
CID: 1418110 Fixes: 939b53d3325 "i965/screen: Implement queryDmaBufFormatModifierAttirbs" CC: Jason EkstrandSigned-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/intel_screen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index bc2bba00b6..f85d1ba51d 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -830,6 +830,8 @@ intel_query_format_modifier_attribs(__DRIscreen *dri_screen, { struct intel_screen *screen = dri_screen->driverPrivate; const struct intel_image_format *f = intel_image_format_lookup(fourcc); + if (f == NULL) + return false; if (!modifier_is_supported(>devinfo, f, 0, modifier)) return false; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa/st: fix infinite loops
On Wednesday, September 20, 2017 7:13:29 PM PDT Timothy Arceri wrote: > Fixes: 9ac8fece63a9 (glsl: Unify ir_constant::const_elements and ::components) > --- > src/mesa/program/ir_to_mesa.cpp| 2 +- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- > 2 files changed, 2 insertions(+), 2 deletions(-) Looks good to me. Reviewed-by: Kenneth GraunkeBugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102910 signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/screen: Check that given format is valid
Reviewed-by: Tapani PälliOn 09/21/2017 10:22 AM, Topi Pohjolainen wrote: CID: 1418110 Fixes: 939b53d3325 "i965/screen: Implement queryDmaBufFormatModifierAttirbs" CC: Jason Ekstrand Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/intel_screen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index bc2bba00b6..f85d1ba51d 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -830,6 +830,8 @@ intel_query_format_modifier_attribs(__DRIscreen *dri_screen, { struct intel_screen *screen = dri_screen->driverPrivate; const struct intel_image_format *f = intel_image_format_lookup(fourcc); + if (f == NULL) + return false; if (!modifier_is_supported(>devinfo, f, 0, modifier)) return false; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa/st: fix infinite loops
On 21/09/17 16:55, Kenneth Graunke wrote: On Wednesday, September 20, 2017 7:13:29 PM PDT Timothy Arceri wrote: Fixes: 9ac8fece63a9 (glsl: Unify ir_constant::const_elements and ::components) --- src/mesa/program/ir_to_mesa.cpp| 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) Looks good to me. Reviewed-by: Kenneth GraunkeBugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102910 Thanks. I already pushed with Dylan's rb since it was using up 16GB within seconds and making my machine unresponsive. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] initial meson port
Matt Turnerwrites: > On Thu, Sep 21, 2017 at 5:06 AM, Jakob Bornecrantz > wrote: >> On Thu, Sep 21, 2017 at 2:20 AM, Eric Anholt wrote: >>> Dylan Baker writes: Results autotools : sh -c 535.34s user 30.33s system 310% cpu 3:02.05 total meson : sh -c 136.58s user 11.98s system 372% cpu 39.895 total >>> >>> I just want to point at these numbers again. meson is so transformative >>> for your normal build/test cycles that it's worth it even if we have to >>> duplicate source lists. I know these aren't quite representative >>> because of all of automake's checks that haven't been done for meson, >>> but here's what we had for the X server conversion: >>> >>> autotools: meson: >>> no-op build 0.83 0.49 >>> touch Makefile.am1.28 >>> touch configure.ac 16.68 >>> touch meson.build 2.92 >>> clean ccache build 16.74 1.44 >>> clean build 52.2427.84 >>> >>> Hopefully we can replace two of our build systems (hopefully android and >>> scons?) with this one, and then I think it will definitely be less >>> developer build system maintenance, even with duplicated source lists. >>> I'd be curious to hear what the vmware folks would need from meson in >>> order to drop scons, and I'd be willing to put in a good bit of work to >>> make it happen. >>> >>> Additionally, meson doesn't need the .hs listed in its source lists, so >>> these meson.builds are actually more verbose than we need and would drop >>> a huge source of our "fix up the build system" patches for automake's >>> stupid distcheck. >> >> Wasn't lacking distcheck support one of the arguments against moving >> to only a scons build when this was brought up all those years ago? >> Does Meson provide something similar, or do people just now get all >> of the source from git nowadays? > > Maybe that discussion was a before my time (or maybe I've just > forgotten) but I did all of the work to make "make dist" work in > ~2013. Building the tarballs and generating files like configure makes > sense given the workings and limitations of autotools. I'd definitely > be opposed to not making the tarballs with autotools' dist target > because since we've switched we haven't shipped a broken tarball once, > which was a common occurrence previously. > > With switching to Meson though, there's not the same need to generate > all sorts of things and include them in the tarball. We'd add > dependencies on python, mako, flex, and bison that we don't currently > require to build from a tarball, but I think that's an acceptable > cost. > > Just to preempt the question: as a (source-based) distribution > maintainer, I'm against just getting the code from git. There's lots > of distro infrastructure in place to mirror files and not any that I'm > aware of to handle git repos. Yeah. You still want tarballs, you just want the tarballs to be basically a snapshot of git so that everybody's using the same build system. signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] st/va: Implement vaExportSurfaceHandle()
On 20/09/17 09:14, Christian König wrote: > Am 20.09.2017 um 00:01 schrieb Mark Thompson: >> This is a new interface in libva2 to support wider use-cases of passing >> surfaces to external APIs. In particular, this allows export of NV12 and >> P010 surfaces. > > First of all thanks a lot for taking care of this. > >> Signed-off-by: Mark Thompson>> --- >> Trivial update for a minor change requested on libva side (1/2 identical). >> >> Still unsure on what to do about size and interlacing. I'll have a look at >> the postproc code just posted soon, though I think it's pretty much entirely >> orthogonal to this. > > Probably best to convert the interlaced representation into the progressive > form before exporting. I'd prefer not to do that, because a use-case of this is to be able to modify surfaces in-place. (E.g. decode, scale, export, blend something else onto the surface, fence, encode.) > Only alternative I can think of is to define new DRM > formats/modifiers/attributes, but then the application needs to be aware of > this as well. I'm not really sure what form that would need to take. The modifiers are meant for something else importing the surfaces, so what would support it and how? That would also require keeping the format part the same, I think (so an NV12 surface would still be an R8 plane and a GR88 plane, but with some modifier meaning it is actually present as two fields). It could work with the current construction by adding a way to indicate the surface is interlaced with separate fields. For NV12 it would then export four handles, one for each field of each plane. mpv at least should be able to handle this (there is already support for something similar for some nvidia vdpau cases), but I don't know how acceptable that would be to other users. Certainly it would be quite a lot harder to modify a surface in-place in a sensible way with that setup. I'm still unconvinced by the comments in the other thread about using interlaced surfaces by default - a lot of things would be easier if progressive were the default, and I have yet to find any case this actually fails in. Thanks, - Mark ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Mesa-announce] Mesa 17.1.10 release candidate
Hello list, The candidate for the Mesa 17.1.10 is now available. Currently we have: - 41 queued - 0 nominated (outstanding) - and 5 rejected patches This is the last release for the 17.1 series. In the current queue we have: In build and integration system, we add a dependency on libunwind when running make distcheck, as this is optional for libgallium but we want to catch any problem. As consequence, also force LLVM 3.3 in Travis when building Gallium ST Other, as this is the minimum required version we want to test. On the other hand, we link libmesautil into u_atomic_test, as this is required by platforms without particular atomic operations. In this sense, there's a patch to implement __sync_val_compare_and_swap_8, required by 32-bit PowerPC platforms. The state tracker received a couple of patches, one that fixes a dEQP- GLES31 test and another that fixes the usage of 64-bit unsigned integers when used for boolean comparisons. The platform part adds a missing __DRI_BACKGROUND_CALLABLE extension. Intel i965 driver gets a fix for a crash that can happen in Haswell when uploading a stencil texture with blorp. Intel ANV driver fixes a problem when copying descriptors. VC4 driver gets several use-after-free fixes. SWR driver fixes a use case that happens when attaching/dettaching/re- attaching surfaces, that leads to wrong behaviour. AMD drivers get a fix for various dEQP-GLES31 tests. Also, the radeonsi driver receives a couple of patches more that fixes different dEQP's OpenGL-ES tests. In the same way, RADV driver also gets a fix for several dEQP's Vulkan tests. Take a look at section "Mesa stable queue" for more information. Testing reports/general approval Any testing reports (or general approval of the state of the branch) will be greatly appreciated. The plan is to have 17.1.10 next Monday (25th of September), around or shortly after 15:00 GMT. If you have any questions or suggestions - be that about the current patch queue or otherwise, please go ahead. Trivial merge conflicts --- commit f348cd1430b7ca436bd83043c3f0155c4a045c1c Author: Nicolai Hähnleradeonsi: apply a mask to gl_SampleMaskIn in the PS prolog (cherry picked from commit 92c4277990799641d4986ce66a62366228713945) commit 21b223ebac2aba61bcf35201792c03ffca415afd Author: Jason Ekstrand i965/blorp: Set r8stencil_needs_update when writing stencil (cherry picked from commit a43d379000260485fc4b2b03b069aedc46879557) commit 5a1e8e29dfa9b8394914b55b23fd6f497411dbd4 Author: Nicolai Hähnle amd/common: round cube array slice in ac_prepare_cube_coords (cherry picked from commit e0af3bed2cb52b5d8cf1da08b42cf28bae131c76) commit e88c22f645663d80051da9fa0dd4abf533a04eba Author: Matt Turner util: Link libmesautil into u_atomic_test (cherry picked from commit d075a4089ec62c489c8a3423f82371bf85f2ea6c) commit 81d3b3ca168bf1ebe736bb994b20b4aeb2e3966e Author: Nicolai Hähnle radeonsi: fix array textures layer coordinate (cherry picked from commit 87f7c7bd6566dbd95214df9b058bca5e6573374e) Cheers, J.A. Mesa stable queue - Nominated (0) = Queued (41) === Alexandre Demers (1): osmesa: link with libunwind if enabled (v2) Andres Gomez (12): docs: add sha256 checksums for 17.1.9 cherry-ignore: add "st/mesa: skip draw calls with pipe_draw_info::count == 0" cherry-ignore: add "radv: use amdgpu_bo_va_op_raw." cherry-ignore: add "radv: use simpler indirect packet 3 if possible." cherry-ignore: add "radeonsi: don't always apply the PrimID instancing bug workaround on SI" cherry-ignore: add "intel/eu/validate: Look up types on demand in execution_type()" cherry-ignore: add "radv: gfx9 fixes" cherry-ignore: add "radv/gfx9: set mip0-depth correctly for 2d arrays/3d images" cherry-ignore: add "radv/gfx9: fix image resource handling." cherry-ignore: add "docs/egl: remove reference to EGL_DRIVERS_PATH" cherry-ignore: add "radv: Disable multilayer & multilevel DCC." cherry-ignore: add "radv: Don't allocate CMASK for linear images." Dave Airlie (2): radv/ac: bump params array for image atomic comp swap st/glsl->tgsi: fix u64 to bool comparisons. Emil Velikov (2): egl/x11/dri3: adding missing __DRI_BACKGROUND_CALLABLE extension automake: enable libunwind in `make distcheck' Eric Anholt (3): broadcom/vc4: Fix use-after-free for flushing when writing to a texture. broadcom/vc4: Fix use-after-free trying to mix a quad and tile clear. broadcom/vc4: Fix use-after-free when deleting a program. George Kyriazis (1): swr: invalidate attachment on transition change Gert Wollny (2): travis: force llvm-3.3 for "make Gallium ST Other"
Re: [Mesa-dev] [PATCH 5/6] vulkan: enum generator: Stop using iterparse
On Thu, Sep 21, 2017 at 10:34 AM, Dylan Bakerwrote: > Quoting Jason Ekstrand (2017-09-21 08:32:22) > > While using iterparse is potentially a little more efficient, the Vulkan > > registry XML is not large and using regular element tree simplifies the > > parsing logic substantially. > > --- > > src/vulkan/util/gen_enum_to_str.py | 41 +++--- > > > 1 file changed, 16 insertions(+), 25 deletions(-) > > > > diff --git a/src/vulkan/util/gen_enum_to_str.py > b/src/vulkan/util/gen_enum_to_str.py > > index 8f32102..e5f8964 100644 > > --- a/src/vulkan/util/gen_enum_to_str.py > > +++ b/src/vulkan/util/gen_enum_to_str.py > > @@ -157,31 +157,22 @@ def parse_xml(enum_factory, ext_factory, filename): > > of VkEnum objects. > > """ > > > > -with open(filename, 'rb') as f: > > -context = iter(et.iterparse(f, events=('start', 'end'))) > > - > > -# This gives the root element, since goal is to iterate over the > > -# elements without building a tree, this allows the root to be > cleared > > -# (erase the elements) after the children have been processed. > > -_, root = next(context) > > - > > -for event, elem in context: > > -if event == 'end' and elem.tag == 'enums': > > -type_ = elem.attrib.get('type') > > -if type_ == 'enum': > > -enum = enum_factory(elem.attrib['name']) > > -enum.values.extend([e.attrib['name'] for e in elem > > -if e.tag == 'enum']) > > -elif event == 'start' and elem.tag == 'extension': > > -ext_factory(elem.attrib['name'], > int(elem.attrib['number'])) > > -elif event == 'end' and elem.tag == 'extension': > > -if elem.attrib['supported'] != 'vulkan': > > -continue > > -for e in elem.findall('.//enum[@extends][@offset]'): > > -enum = enum_factory(e.attrib['extends']) > > -enum.values.append(e.attrib['name'],) > > - > > -root.clear() > > +xml = et.parse(filename) > > + > > +for enum_type in xml.findall('./enums'): > > +if enum_type.attrib.get('type') != 'enum': > > +continue > > please do not use continue for iterating xlm. xpath is more than capable of > representing this in less code, and it's implemented in C so it will be > much > faster. > > for enum_type in xml.findall('./enums[@type="enum"]') > Done. > > + > > +enum = enum_factory(enum_type.attrib['name']) > > +for value in enum_type.findall('./enum'): > > +enum.values.append(value.attrib['name']) > > + > > +for ext_elem in xml.findall('./extension'): > > +if ext_elem.attrib['supported'] != 'vulkan': > > +continue > > please do the same thing here. > Done. > > + > > +ext_factory(ext_elem.attrib['name'], > > +int(ext_elem.attrib['number'])) > > > > def main(): > > parser = argparse.ArgumentParser() > > -- > > 2.5.0.400.gff86faf > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/5] util: Add tests for the string buffer
Fixed the missing newline at the end of this cpp file locally. This is the only patch left in the series without an RB. If there's no objections I plan on pushing this once I get an RB on this. Someone mind having a look at it? 2017-09-11 22:21 GMT+02:00 Thomas Helland: > More tests could probably be added, but this should cover > concatenation, resizing, clearing, formatted printing, > and checking the length, so it should be quite complete. > > V2: Address review feedback from Timothy, plus fixes >- Use a large enough char array >- Actually test the formatted appending >- Test that clear function resets string length > > V3: Port to gtest > > V4: Fix test makefile > Fix copyright header > Fix missing extern C > Use more appropriate name for C-file > Add tests for append_char > --- > configure.ac | 1 + > src/util/Makefile.am | 5 +- > src/util/tests/string_buffer/Makefile.am | 40 +++ > .../tests/string_buffer/string_buffer_test.cpp | 119 > + > 4 files changed, 164 insertions(+), 1 deletion(-) > create mode 100644 src/util/tests/string_buffer/Makefile.am > create mode 100644 src/util/tests/string_buffer/string_buffer_test.cpp > > diff --git a/configure.ac b/configure.ac > index d0d4c0dfd1..20727c7bb4 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -2924,6 +2924,7 @@ AC_CONFIG_FILES([Makefile > src/mesa/state_tracker/tests/Makefile > src/util/Makefile > src/util/tests/hash_table/Makefile > + src/util/tests/string_buffer/Makefile > src/util/xmlpool/Makefile > src/vulkan/Makefile]) > > diff --git a/src/util/Makefile.am b/src/util/Makefile.am > index 4512dc99d5..2b47143ad7 100644 > --- a/src/util/Makefile.am > +++ b/src/util/Makefile.am > @@ -19,7 +19,10 @@ > # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > # IN THE SOFTWARE. > > -SUBDIRS = xmlpool . tests/hash_table > +SUBDIRS = . \ > + xmlpool \ > + tests/hash_table \ > + tests/string_buffer > > include Makefile.sources > > diff --git a/src/util/tests/string_buffer/Makefile.am > b/src/util/tests/string_buffer/Makefile.am > new file mode 100644 > index 00..bd04d86349 > --- /dev/null > +++ b/src/util/tests/string_buffer/Makefile.am > @@ -0,0 +1,40 @@ > +# Copyright © 2017 Thomas Helland > +# > +# Permission is hereby granted, free of charge, to any person obtaining a > +# copy of this software and associated documentation files (the "Software"), > +# to deal in the Software without restriction, including without limitation > +# the rights to use, copy, modify, merge, publish, distribute, sublicense, > +# and/or sell copies of the Software, and to permit persons to whom the > +# Software is furnished to do so, subject to the following conditions: > +# > +# The above copyright notice and this permission notice (including the next > +# paragraph) shall be included in all copies or substantial portions of the > +# Software. > +# > +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > +# IN THE SOFTWARE. > + > +AM_CPPFLAGS = \ > + -I$(top_srcdir)/src \ > + -I$(top_srcdir)/include \ > + -I$(top_srcdir)/src/gtest/include \ > + $(PTHREAD_CFLAGS) \ > + $(DEFINES) > + > +TESTS = string_buffer_test > + > +check_PROGRAMS = $(TESTS) > + > +string_buffer_test_SOURCES = \ > + string_buffer_test.cpp > + > +string_buffer_test_LDADD = \ > + $(top_builddir)/src/gtest/libgtest.la \ > + $(top_builddir)/src/util/libmesautil.la \ > + $(PTHREAD_LIBS) \ > + $(DLOPEN_LIBS) > diff --git a/src/util/tests/string_buffer/string_buffer_test.cpp > b/src/util/tests/string_buffer/string_buffer_test.cpp > new file mode 100644 > index 00..e80ee8b135 > --- /dev/null > +++ b/src/util/tests/string_buffer/string_buffer_test.cpp > @@ -0,0 +1,119 @@ > +/* > + * Copyright © 2017 Thomas Helland > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following
[Mesa-dev] [PATCH 05/15] st/omx: use new vl_compositor_yuv_deint_full() to deint
v2: add dst rect to make sure no scale Acked-by: Christian König--- src/gallium/state_trackers/omx_bellagio/vid_dec.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/omx_bellagio/vid_dec.c b/src/gallium/state_trackers/omx_bellagio/vid_dec.c index 28741c0..f9fe19f 100644 --- a/src/gallium/state_trackers/omx_bellagio/vid_dec.c +++ b/src/gallium/state_trackers/omx_bellagio/vid_dec.c @@ -602,6 +602,7 @@ static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* /* re-allocate the progressive buffer */ omx_base_video_PortType *port; struct pipe_video_buffer templat = {}; +struct u_rect src_rect, dst_rect; port = (omx_base_video_PortType *) priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; @@ -614,8 +615,14 @@ static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* new_vbuf = priv->pipe->create_video_buffer(priv->pipe, ); /* convert the interlaced to the progressive */ -vl_compositor_yuv_deint(>cstate, >compositor, -input->pInputPortPrivate, new_vbuf); +src_rect.x0 = dst_rect.x0 = 0; +src_rect.x1 = dst_rect.x1 = templat.width; +src_rect.y0 = dst_rect.y0 = 0; +src_rect.y1 = dst_rect.y1 = templat.height; + +vl_compositor_yuv_deint_full(>cstate, >compositor, + input->pInputPortPrivate, new_vbuf, + _rect, _rect, VL_COMPOSITOR_WEAVE); /* set the progrssive buffer for next round */ vbuf->destroy(vbuf); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] glx: Sort the GLX extension bit enum and table
Not quite asciibetical: ARB, then EXT, then vendor, just like the GL extension enum just below. No functional change, but it bothered me. Signed-off-by: Adam Jackson--- src/glx/glxextensions.c | 22 +++--- src/glx/glxextensions.h | 24 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c index 88bf0de3e6..6882e442fe 100644 --- a/src/glx/glxextensions.c +++ b/src/glx/glxextensions.c @@ -139,14 +139,17 @@ static const struct extension_info known_glx_extensions[] = { { GLX(ARB_framebuffer_sRGB),VER(0,0), Y, Y, N, N }, { GLX(ARB_get_proc_address),VER(1,4), Y, N, Y, N }, { GLX(ARB_multisample), VER(1,4), Y, Y, N, N }, - { GLX(ATI_pixel_format_float), VER(0,0), N, N, N, N }, + { GLX(EXT_buffer_age), VER(0,0), Y, N, N, Y }, + { GLX(EXT_create_context_es2_profile), VER(0,0), Y, N, N, N }, + { GLX(EXT_create_context_es_profile), VER(0,0), Y, N, N, N }, + { GLX(EXT_fbconfig_packed_float), VER(0,0), Y, Y, N, N }, + { GLX(EXT_framebuffer_sRGB),VER(0,0), Y, Y, N, N }, { GLX(EXT_import_context), VER(0,0), Y, Y, N, N }, + { GLX(EXT_texture_from_pixmap), VER(0,0), Y, N, N, N }, { GLX(EXT_visual_info), VER(0,0), Y, Y, N, N }, { GLX(EXT_visual_rating), VER(0,0), Y, Y, N, N }, - { GLX(EXT_fbconfig_packed_float), VER(0,0), Y, Y, N, N }, - { GLX(EXT_framebuffer_sRGB),VER(0,0), Y, Y, N, N }, - { GLX(EXT_create_context_es2_profile), VER(0,0), Y, N, N, N }, - { GLX(EXT_create_context_es_profile), VER(0,0), Y, N, N, N }, + { GLX(ATI_pixel_format_float), VER(0,0), N, N, N, N }, + { GLX(INTEL_swap_event),VER(0,0), Y, N, N, N }, { GLX(MESA_copy_sub_buffer),VER(0,0), Y, N, N, N }, { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N }, { GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y }, @@ -154,18 +157,15 @@ static const struct extension_info known_glx_extensions[] = { { GLX(NV_float_buffer), VER(0,0), N, N, N, N }, { GLX(OML_swap_method), VER(0,0), Y, Y, N, N }, { GLX(OML_sync_control),VER(0,0), Y, N, N, Y }, - { GLX(SGI_make_current_read), VER(1,3), Y, N, N, N }, - { GLX(SGI_swap_control),VER(0,0), Y, N, N, N }, - { GLX(SGI_video_sync), VER(0,0), Y, N, N, Y }, { GLX(SGIS_multisample),VER(0,0), Y, Y, N, N }, { GLX(SGIX_fbconfig), VER(1,3), Y, Y, N, N }, { GLX(SGIX_pbuffer),VER(1,3), Y, Y, N, N }, { GLX(SGIX_swap_barrier), VER(0,0), N, N, N, N }, { GLX(SGIX_swap_group), VER(0,0), N, N, N, N }, { GLX(SGIX_visual_select_group),VER(0,0), Y, Y, N, N }, - { GLX(EXT_texture_from_pixmap), VER(0,0), Y, N, N, N }, - { GLX(INTEL_swap_event),VER(0,0), Y, N, N, N }, - { GLX(EXT_buffer_age), VER(0,0), Y, N, N, Y }, + { GLX(SGI_make_current_read), VER(1,3), Y, N, N, N }, + { GLX(SGI_swap_control),VER(0,0), Y, N, N, N }, + { GLX(SGI_video_sync), VER(0,0), Y, N, N, Y }, { NULL } }; diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h index ff35dedd0b..6225742da5 100644 --- a/src/glx/glxextensions.h +++ b/src/glx/glxextensions.h @@ -43,14 +43,17 @@ enum ARB_fbconfig_float_bit, ARB_get_proc_address_bit, ARB_multisample_bit, - ATI_pixel_format_float_bit, + EXT_buffer_age_bit, + EXT_create_context_es2_profile_bit, + EXT_create_context_es_profile_bit, + EXT_fbconfig_packed_float_bit, + EXT_framebuffer_sRGB_bit, + EXT_import_context_bit, + EXT_texture_from_pixmap_bit, EXT_visual_info_bit, EXT_visual_rating_bit, - EXT_import_context_bit, - EXT_framebuffer_sRGB_bit, - EXT_fbconfig_packed_float_bit, - EXT_create_context_es_profile_bit, - EXT_create_context_es2_profile_bit, + ATI_pixel_format_float_bit, + INTEL_swap_event_bit, MESA_copy_sub_buffer_bit, MESA_depth_float_bit, MESA_multithread_makecurrent_bit, @@ -60,18 +63,15 @@ enum NV_float_buffer_bit, OML_swap_method_bit, OML_sync_control_bit, - SGI_make_current_read_bit, - SGI_swap_control_bit, - SGI_video_sync_bit, SGIS_multisample_bit, SGIX_fbconfig_bit, SGIX_pbuffer_bit, SGIX_swap_barrier_bit, SGIX_swap_group_bit, SGIX_visual_select_group_bit, - EXT_texture_from_pixmap_bit, - INTEL_swap_event_bit, - EXT_buffer_age_bit, + SGI_make_current_read_bit, + SGI_swap_control_bit, + SGI_video_sync_bit, }; /* From the GLX perspective, the ARB and EXT extensions are identical. Use a -- 2.13.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] glx: Implement GLX_EXT_no_config_context
This more or less ports EGL_KHR_no_config_context to GLX. This is currently a draft extension, under review at: https://github.com/KhronosGroup/OpenGL-Registry/pull/102 Signed-off-by: Adam Jackson--- src/glx/create_context.c | 37 +++-- src/glx/glxextensions.c | 1 + src/glx/glxextensions.h | 1 + 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/glx/create_context.c b/src/glx/create_context.c index 38e949ab4c..d855699743 100644 --- a/src/glx/create_context.c +++ b/src/glx/create_context.c @@ -47,21 +47,11 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, xcb_generic_error_t *err; xcb_void_cookie_t cookie; unsigned dummy_err = 0; + int screen = -1; - - if (dpy == NULL || cfg == NULL) - return NULL; - - /* This means that either the caller passed the wrong display pointer or -* one of the internal GLX data structures (probably the fbconfig) has an -* error. There is nothing sensible to do, so return an error. -*/ - psc = GetGLXScreenConfigs(dpy, cfg->screen); - if (psc == NULL) + if (dpy == NULL) return NULL; - assert(cfg->screen == psc->scr); - /* Count the number of attributes specified by the application. All * attributes appear in pairs, except the terminating None. */ @@ -70,6 +60,25 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, /* empty */ ; } + if (cfg) { + screen = cfg->screen; + } else { + int i; + for (i = 0; i < num_attribs; i++) + if (attrib_list[i * 2] == GLX_SCREEN) +screen = attrib_list[i * 2 + 1]; + } + + /* This means that either the caller passed the wrong display pointer or +* one of the internal GLX data structures (probably the fbconfig) has an +* error. There is nothing sensible to do, so return an error. +*/ + psc = GetGLXScreenConfigs(dpy, screen); + if (psc == NULL) + return NULL; + + assert(screen == psc->scr); + if (direct && psc->vtable->create_context_attribs) { /* GLX drops the error returned by the driver. The expectation is that * an error will also be returned by the server. The server's error @@ -104,8 +113,8 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config, cookie = xcb_glx_create_context_attribs_arb_checked(c, gc->xid, -cfg->fbconfigID, -cfg->screen, +cfg ? cfg->fbconfigID : 0, +screen, gc->share_xid, gc->isDirect, num_attribs, diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c index 6882e442fe..64f8c2fe16 100644 --- a/src/glx/glxextensions.c +++ b/src/glx/glxextensions.c @@ -145,6 +145,7 @@ static const struct extension_info known_glx_extensions[] = { { GLX(EXT_fbconfig_packed_float), VER(0,0), Y, Y, N, N }, { GLX(EXT_framebuffer_sRGB),VER(0,0), Y, Y, N, N }, { GLX(EXT_import_context), VER(0,0), Y, Y, N, N }, + { GLX(EXT_no_config_context), VER(0,0), Y, Y, N, Y }, { GLX(EXT_texture_from_pixmap), VER(0,0), Y, N, N, N }, { GLX(EXT_visual_info), VER(0,0), Y, Y, N, N }, { GLX(EXT_visual_rating), VER(0,0), Y, Y, N, N }, diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h index 6225742da5..52d7dc08eb 100644 --- a/src/glx/glxextensions.h +++ b/src/glx/glxextensions.h @@ -49,6 +49,7 @@ enum EXT_fbconfig_packed_float_bit, EXT_framebuffer_sRGB_bit, EXT_import_context_bit, + EXT_no_config_context_bit, EXT_texture_from_pixmap_bit, EXT_visual_info_bit, EXT_visual_rating_bit, -- 2.13.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radv: Implement VK_AMD_rasterization_order
Pushed, thanks. On Mon, Sep 18, 2017 at 6:26 PM, Nicholas Miellwrote: > Tested with AMD's Anvil OutOfOrderRasterization demo on a RX 560. > > Signed-off-by: Nicholas Miell > --- > src/amd/vulkan/radv_device.c | 17 + > src/amd/vulkan/radv_pipeline.c | 10 +- > 2 files changed, 26 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index e6d595dfbe5..49536851bae 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -175,6 +175,14 @@ static const VkExtensionProperties > common_device_extensions[] = { > .specVersion = 1, > }, > }; > + > +static const VkExtensionProperties rasterization_order_extension[] ={ > + { > + .extensionName = VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME, > + .specVersion = 1, > + }, > +}; > + > static const VkExtensionProperties ext_sema_device_extensions[] = { > { > .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, > @@ -339,6 +347,15 @@ radv_physical_device_init(struct radv_physical_device > *device, > if (result != VK_SUCCESS) > goto fail; > > + if (device->rad_info.chip_class >= VI && device->rad_info.max_se >= > 2) { > + result = radv_extensions_register(instance, > + >extensions, > + rasterization_order_extension, > + > ARRAY_SIZE(rasterization_order_extension)); > + if (result != VK_SUCCESS) > + goto fail; > + } > + > if (device->rad_info.has_syncobj) { > result = radv_extensions_register(instance, > >extensions, > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c > index 91577402a2c..8f20e902800 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -33,6 +33,7 @@ > #include "nir/nir.h" > #include "nir/nir_builder.h" > #include "spirv/nir_spirv.h" > +#include "vk_util.h" > > #include > #include > @@ -1085,6 +1086,13 @@ radv_pipeline_init_multisample_state(struct > radv_pipeline *pipeline, > ms->pa_sc_mode_cntl_1 |= > EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); > } > > + const struct VkPipelineRasterizationStateRasterizationOrderAMD > *raster_order = > + vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, > PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD); > + if (raster_order && raster_order->rasterizationOrder == > VK_RASTERIZATION_ORDER_RELAXED_AMD) { > + ms->pa_sc_mode_cntl_1 |= > S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(1) | > + S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7); > + } > + > if (vkms) { > if (vkms->alphaToCoverageEnable) > blend->db_alpha_to_mask |= > S_028B70_ALPHA_TO_MASK_ENABLE(1); > @@ -1875,7 +1883,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline, > !ps->info.fs.writes_sample_mask) > pipeline->graphics.blend.spi_shader_col_format = > V_028714_SPI_SHADER_32_R; > } > - > + > unsigned z_order; > pipeline->graphics.db_shader_control = 0; > if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory) > -- > 2.13.5 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] initial meson port
On Thu, Sep 21, 2017 at 5:06 AM, Jakob Bornecrantzwrote: > On Thu, Sep 21, 2017 at 2:20 AM, Eric Anholt wrote: >> Dylan Baker writes: >>> Results >>> autotools : sh -c 535.34s user 30.33s system 310% cpu 3:02.05 total >>> meson : sh -c 136.58s user 11.98s system 372% cpu 39.895 total >> >> I just want to point at these numbers again. meson is so transformative >> for your normal build/test cycles that it's worth it even if we have to >> duplicate source lists. I know these aren't quite representative >> because of all of automake's checks that haven't been done for meson, >> but here's what we had for the X server conversion: >> >> autotools: meson: >> no-op build 0.83 0.49 >> touch Makefile.am1.28 >> touch configure.ac 16.68 >> touch meson.build 2.92 >> clean ccache build 16.74 1.44 >> clean build 52.2427.84 >> >> Hopefully we can replace two of our build systems (hopefully android and >> scons?) with this one, and then I think it will definitely be less >> developer build system maintenance, even with duplicated source lists. >> I'd be curious to hear what the vmware folks would need from meson in >> order to drop scons, and I'd be willing to put in a good bit of work to >> make it happen. >> >> Additionally, meson doesn't need the .hs listed in its source lists, so >> these meson.builds are actually more verbose than we need and would drop >> a huge source of our "fix up the build system" patches for automake's >> stupid distcheck. > > Wasn't lacking distcheck support one of the arguments against moving > to only a scons build when this was brought up all those years ago? > Does Meson provide something similar, or do people just now get all > of the source from git nowadays? Maybe that discussion was a before my time (or maybe I've just forgotten) but I did all of the work to make "make dist" work in ~2013. Building the tarballs and generating files like configure makes sense given the workings and limitations of autotools. I'd definitely be opposed to not making the tarballs with autotools' dist target because since we've switched we haven't shipped a broken tarball once, which was a common occurrence previously. With switching to Meson though, there's not the same need to generate all sorts of things and include them in the tarball. We'd add dependencies on python, mako, flex, and bison that we don't currently require to build from a tarball, but I think that's an acceptable cost. Just to preempt the question: as a (source-based) distribution maintainer, I'm against just getting the code from git. There's lots of distro infrastructure in place to mirror files and not any that I'm aware of to handle git repos. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 2/3] glsl: check if induction var incremented before use in terminator
Out of curriosity what about decrement and other shenanigans? --Michael Am 21.09.2017 um 12:55 schrieb Timothy Arceri: do-while loops can increment the starting value before the condition is checked. e.g. do { ndx++; } while (ndx < 3); This commit changes the code to detect this and reduces the iteration count by 1 if found. V2: fix terminator spelling Reviewed-by: Nicolai HähnleReviewed-by: Elie Tournier --- src/compiler/glsl/loop_analysis.cpp | 38 + 1 file changed, 38 insertions(+) diff --git a/src/compiler/glsl/loop_analysis.cpp b/src/compiler/glsl/loop_analysis.cpp index 81a07f78f8..78279844dc 100644 --- a/src/compiler/glsl/loop_analysis.cpp +++ b/src/compiler/glsl/loop_analysis.cpp @@ -164,20 +164,54 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, iter_value += bias[i]; valid_loop = true; break; } } ralloc_free(mem_ctx); return (valid_loop) ? iter_value : -1; } +static bool +incremented_before_terminator(ir_loop *loop, ir_variable *var, + ir_if *terminator) +{ + for (exec_node *node = loop->body_instructions.get_head(); +!node->is_tail_sentinel(); +node = node->get_next()) { + ir_instruction *ir = (ir_instruction *) node; + + switch (ir->ir_type) { + case ir_type_if: + if (ir->as_if() == terminator) +return false; + break; + + case ir_type_assignment: { + ir_assignment *assign = ir->as_assignment(); + ir_variable *assignee = assign->lhs->whole_variable_referenced(); + + if (assignee == var) { +assert(assign->condition == NULL); +return true; + } + + break; + } + + default: + break; + } + } + + unreachable("Unable to find induction variable"); +} /** * Record the fact that the given loop variable was referenced inside the loop. * * \arg in_assignee is true if the reference was on the LHS of an assignment. * * \arg in_conditional_code_or_nested_loop is true if the reference occurred * inside an if statement or a nested loop. * * \arg current_assignment is the ir_assignment node that the loop variable is @@ -575,20 +609,24 @@ loop_analysis::visit_leave(ir_loop *ir) ir_variable *var = counter->variable_referenced(); ir_rvalue *init = find_initial_value(ir, var); loop_variable *lv = ls->get(var); if (lv != NULL && lv->is_induction_var()) { t->iterations = calculate_iterations(init, limit, lv->increment, cmp); +if (incremented_before_terminator(ir, var, t->ir)) { + t->iterations--; +} + if (t->iterations >= 0 && (ls->limiting_terminator == NULL || t->iterations < ls->limiting_terminator->iterations)) { ls->limiting_terminator = t; } } break; } default: ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/6] vulkan: enum generator: make registry more flexible
Quoting Jason Ekstrand (2017-09-21 08:32:20) > From: Lionel Landwerlin> > It will be used to store extension numbers as well. > > Signed-off-by: Lionel Landwerlin > Reviewed-by: Jason Ekstrand > --- > src/vulkan/util/gen_enum_to_str.py | 22 +++--- > 1 file changed, 11 insertions(+), 11 deletions(-) > > diff --git a/src/vulkan/util/gen_enum_to_str.py > b/src/vulkan/util/gen_enum_to_str.py > index efe5d4f..5281e89 100644 > --- a/src/vulkan/util/gen_enum_to_str.py > +++ b/src/vulkan/util/gen_enum_to_str.py > @@ -115,18 +115,18 @@ FOREIGN_ENUM_VALUES = [ > ] > > > -class EnumFactory(object): > +class NamedFactory(object): > """Factory for creating enums.""" > > def __init__(self, type_): > self.registry = {} > self.type = type_ > > -def __call__(self, name): > +def __call__(self, *args): This is pretty ugly and clunky. What about __call__(self, name, **kwargs), and pass kwargs directly to the type? > try: > -return self.registry[name] > +return self.registry[args[0]] > except KeyError: > -n = self.registry[name] = self.type(name) > +n = self.registry[args[0]] = self.type(*args) > return n > > > @@ -138,7 +138,7 @@ class VkEnum(object): > self.values = values or [] > > > -def parse_xml(efactory, filename): > +def parse_xml(enum_factory, filename): > """Parse the XML file. Accumulate results into the efactory. > > This parser is a memory efficient iterative XML parser that returns a > list > @@ -157,15 +157,15 @@ def parse_xml(efactory, filename): > if event == 'end' and elem.tag == 'enums': > type_ = elem.attrib.get('type') > if type_ == 'enum': > -enum = efactory(elem.attrib['name']) > +enum = enum_factory(elem.attrib['name']) > enum.values.extend([e.attrib['name'] for e in elem > if e.tag == 'enum']) > elif event == 'end' and elem.tag == 'extension': > if elem.attrib['supported'] != 'vulkan': > continue > for e in elem.findall('.//enum[@extends][@offset]'): > -enum = efactory(e.attrib['extends']) > -enum.values.append(e.attrib['name']) > +enum = enum_factory(e.attrib['extends']) > +enum.values.append(e.attrib['name'],) > > root.clear() > > @@ -182,10 +182,10 @@ def main(): > > args = parser.parse_args() > > -efactory = EnumFactory(VkEnum) > +enum_factory = NamedFactory(VkEnum) > for filename in args.xml_files: > -parse_xml(efactory, filename) > -enums=sorted(efactory.registry.values(), key=lambda e: e.name) > +parse_xml(enum_factory, filename) > +enums=sorted(enum_factory.registry.values(), key=lambda e: e.name) > > for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.c')), > (H_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.h'))]: > -- > 2.5.0.400.gff86faf > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/6] vulkan: enum generator: generate extension number defines
Quoting Jason Ekstrand (2017-09-21 08:32:21) > From: Lionel Landwerlin> > New extensions can introduce additional enums. Most of the new enums > will have disjoint numbers from the initial enums. For example new > formats introduced by VK_IMG_format_pvrtc : > > VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, > VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178, > VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, > VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180, > VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, > VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182, > VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, > VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184, > VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG = 154000, > VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG = 154001, > VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG = 154002, > VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG = 154003, > VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG = 154004, > VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG = 154005, > VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG = 154006, > VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG = 154007, > > It's obvious we can't have a single table for handling those anymore. > > Fortunately the enum values actually contain the number of the > extension that introduced the new enums. So we can build an > indirection table off the extension number and then index by > subtracting the first enum of the the format enum value. > > This change makes the extension number available in the generated enum > code. > > Signed-off-by: Lionel Landwerlin > Reviewed-by: Jason Ekstrand > --- > src/vulkan/util/gen_enum_to_str.py | 24 > 1 file changed, 20 insertions(+), 4 deletions(-) > > diff --git a/src/vulkan/util/gen_enum_to_str.py > b/src/vulkan/util/gen_enum_to_str.py > index 5281e89..8f32102 100644 > --- a/src/vulkan/util/gen_enum_to_str.py > +++ b/src/vulkan/util/gen_enum_to_str.py > @@ -101,6 +101,10 @@ H_TEMPLATE = Template(textwrap.dedent(u"""\ > #include > #include > > +% for ext in extensions: > +#define _${ext.name}_number (${ext.number}) > +% endfor > + > % for enum in enums: > const char * vk_${enum.name[2:]}_to_str(${enum.name} input); > % endfor > @@ -130,6 +134,14 @@ class NamedFactory(object): > return n > > > +class VkExtension(object): > +"""Simple struct-like class representing extensions""" > + > +def __init__(self, name, number): > +self.name = name > +self.number = number > + > + > class VkEnum(object): > """Simple struct-like class representing a single Vulkan Enum.""" > > @@ -138,8 +150,8 @@ class VkEnum(object): > self.values = values or [] > > > -def parse_xml(enum_factory, filename): > -"""Parse the XML file. Accumulate results into the efactory. > +def parse_xml(enum_factory, ext_factory, filename): > +"""Parse the XML file. Accumulate results into the factories. > > This parser is a memory efficient iterative XML parser that returns a > list > of VkEnum objects. > @@ -160,6 +172,8 @@ def parse_xml(enum_factory, filename): > enum = enum_factory(elem.attrib['name']) > enum.values.extend([e.attrib['name'] for e in elem > if e.tag == 'enum']) > +elif event == 'start' and elem.tag == 'extension': > +ext_factory(elem.attrib['name'], int(elem.attrib['number'])) It would be better to nest the tags under the event if event == 'start': if elem.tag == '...' > elif event == 'end' and elem.tag == 'extension': > if elem.attrib['supported'] != 'vulkan': > continue > @@ -169,7 +183,6 @@ def parse_xml(enum_factory, filename): > > root.clear() > > - > def main(): > parser = argparse.ArgumentParser() > parser.add_argument('--xml', required=True, > @@ -183,9 +196,11 @@ def main(): > args = parser.parse_args() > > enum_factory = NamedFactory(VkEnum) > +ext_factory = NamedFactory(VkExtension) > for filename in args.xml_files: > -parse_xml(enum_factory, filename) > +parse_xml(enum_factory, ext_factory, filename) > enums=sorted(enum_factory.registry.values(), key=lambda e: e.name) > +extensions=sorted(ext_factory.registry.values(), key=lambda e: e.name) > > for template, file_ in [(C_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.c')), > (H_TEMPLATE, os.path.join(args.outdir, > 'vk_enum_to_str.h'))]: > @@ -193,6 +208,7 @@ def main(): > f.write(template.render( > file=os.path.basename(__file__), > enums=enums, > +extensions=extensions, > copyright=COPYRIGHT, > FOREIGN_ENUM_VALUES=FOREIGN_ENUM_VALUES)) > > -- > 2.5.0.400.gff86faf > > ___ > mesa-dev mailing
Re: [Mesa-dev] [PATCH 5/6] vulkan: enum generator: Stop using iterparse
Quoting Jason Ekstrand (2017-09-21 08:32:22) > While using iterparse is potentially a little more efficient, the Vulkan > registry XML is not large and using regular element tree simplifies the > parsing logic substantially. > --- > src/vulkan/util/gen_enum_to_str.py | 41 > +++--- > 1 file changed, 16 insertions(+), 25 deletions(-) > > diff --git a/src/vulkan/util/gen_enum_to_str.py > b/src/vulkan/util/gen_enum_to_str.py > index 8f32102..e5f8964 100644 > --- a/src/vulkan/util/gen_enum_to_str.py > +++ b/src/vulkan/util/gen_enum_to_str.py > @@ -157,31 +157,22 @@ def parse_xml(enum_factory, ext_factory, filename): > of VkEnum objects. > """ > > -with open(filename, 'rb') as f: > -context = iter(et.iterparse(f, events=('start', 'end'))) > - > -# This gives the root element, since goal is to iterate over the > -# elements without building a tree, this allows the root to be > cleared > -# (erase the elements) after the children have been processed. > -_, root = next(context) > - > -for event, elem in context: > -if event == 'end' and elem.tag == 'enums': > -type_ = elem.attrib.get('type') > -if type_ == 'enum': > -enum = enum_factory(elem.attrib['name']) > -enum.values.extend([e.attrib['name'] for e in elem > -if e.tag == 'enum']) > -elif event == 'start' and elem.tag == 'extension': > -ext_factory(elem.attrib['name'], int(elem.attrib['number'])) > -elif event == 'end' and elem.tag == 'extension': > -if elem.attrib['supported'] != 'vulkan': > -continue > -for e in elem.findall('.//enum[@extends][@offset]'): > -enum = enum_factory(e.attrib['extends']) > -enum.values.append(e.attrib['name'],) > - > -root.clear() > +xml = et.parse(filename) > + > +for enum_type in xml.findall('./enums'): > +if enum_type.attrib.get('type') != 'enum': > +continue please do not use continue for iterating xlm. xpath is more than capable of representing this in less code, and it's implemented in C so it will be much faster. for enum_type in xml.findall('./enums[@type="enum"]') > + > +enum = enum_factory(enum_type.attrib['name']) > +for value in enum_type.findall('./enum'): > +enum.values.append(value.attrib['name']) > + > +for ext_elem in xml.findall('./extension'): > +if ext_elem.attrib['supported'] != 'vulkan': > +continue please do the same thing here. > + > +ext_factory(ext_elem.attrib['name'], > +int(ext_elem.attrib['number'])) > > def main(): > parser = argparse.ArgumentParser() > -- > 2.5.0.400.gff86faf > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: Implement VK_ANDROID_native_buffer (v5)
Hi Chad; The build works ok now on Android-IA. There is still something wrong with 'exec async' though. It behaves differently with small/big apps but eventually I think it just starts to block .. somewhere. I still need the big hammer to set device->has_exec_async false to fix that. Please don't consider that to be a blocker though, we can easily carry such patch in Android-IA and debug it further. For this patch: Reviewed-by: Tapani PälliOn 09/19/2017 12:44 AM, Chad Versace wrote: This implementation is correct (afaict), but takes two shortcuts regarding the import/export of Android sync fds. Shortcut 1. When Android calls vkAcquireImageANDROID to import a sync fd into a VkSemaphore or VkFence, the driver instead simply blocks on the sync fd, then puts the VkSemaphore or VkFence into the signalled state. Thanks to implicit sync, this produces correct behavior (with extra latency overhead, perhaps) despite its ugliness. Shortcut 2. When Android calls vkQueueSignalReleaseImageANDROID to export a collection of wait semaphores as a sync fd, the driver instead submits the semaphores to the queue, then returns sync fd -1, which informs the caller that no additional synchronization is needed. Again, thanks to implicit sync, this produces correct behavior (with extra batch submission overhead) despite its ugliness. I chose to take the shortcuts instead of properly importing/exporting the sync fds for two reasons: Reason 1. I've already tested this patch with dEQP and with demos apps. It works. I wanted to get the tested patches into the tree now, and polish the implementation afterwards. Reason 2. I want to run this on a 3.18 kernel (gasp!). In 3.18, i915 supports neither Android's sync_fence, nor upstream's sync_file, nor drm_syncobj. Again, I tested these patches on Android with a 3.18 kernel and they work. I plan to quickly follow-up with patches that remove the shortcuts and properly import/export the sync fds. Testing === I tested with 64-bit ARC++ on a Skylake Chromebook and a 3.18 kernel. The following pass: a little spinning cube demo APK dEQP-VK.info.* dEQP-VK.api.smoke.* dEQP-VK.api.info.instance.* dEQP-VK.api.info.device.* dEQP-VK.api.wsi.android.* v2: - Reject VkNativeBufferANDROID if the dma-buf's size is too small for the VkImage. - Stop abusing VkNativeBufferANDROID by passing it to vkAllocateMemory during vkCreateImage. Instead, directly import its dma-buf during vkCreateImage with anv_bo_cache_import(). [for jekstrand] - Rebase onto Tapani's VK_EXT_debug_report changes. - Drop `CPPFLAGS += $(top_srcdir)/include/android`. The dir does not exist. v3: - Delete duplicate #include "anv_private.h". [per Tapani] - Try to fix the Android-IA build in Android.vulkan.mk by following Tapani's example. v4: - Unset EXEC_OBJECT_ASYNC and set EXEC_OBJECT_WRITE on the imported gralloc buffer, just as we do for all other winsys buffers in anv_wsi.c. [found by Tapani] v5: - Really fix the Android-IA build by ensuring that Android.vulkan.mk uses Mesa' vulkan.h and not Android's. Insert -I$(MESA_TOP)/include before -Iframeworks/native/vulkan/include. [for Tapani] - In vkAcquireImageANDROID, submit signal operations to the VkSemaphore and VkFence. [for zhou] Cc: Tapani Pälli Cc: Jason Ekstrand Cc: zhoucm1 --- src/intel/Android.vulkan.mk | 7 +- src/intel/Makefile.sources | 3 + src/intel/Makefile.vulkan.am| 2 + src/intel/vulkan/anv_android.c | 243 src/intel/vulkan/anv_device.c | 12 +- src/intel/vulkan/anv_entrypoints_gen.py | 10 +- src/intel/vulkan/anv_extensions.py | 1 + src/intel/vulkan/anv_image.c| 148 +-- src/intel/vulkan/anv_private.h | 1 + 9 files changed, 415 insertions(+), 12 deletions(-) create mode 100644 src/intel/vulkan/anv_android.c diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk index e20b32b87c..b2d7d4e46c 100644 --- a/src/intel/Android.vulkan.mk +++ b/src/intel/Android.vulkan.mk @@ -28,6 +28,7 @@ VK_ENTRYPOINTS_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/vulkan/anv_entrypoints_ge VK_EXTENSIONS_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/vulkan/anv_extensions.py VULKAN_COMMON_INCLUDES := \ + $(MESA_TOP)/include \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include \ @@ -36,7 +37,8 @@ VULKAN_COMMON_INCLUDES := \ $(MESA_TOP)/src/vulkan/util \ $(MESA_TOP)/src/intel \ $(MESA_TOP)/include/drm-uapi \ - $(MESA_TOP)/src/intel/vulkan + $(MESA_TOP)/src/intel/vulkan \ + frameworks/native/vulkan/include # libmesa_anv_entrypoints with header and
Re: [Mesa-dev] [PATCH 3/3] glsl: make loop unrolling more like the nir unrolling path
On 18/09/17 21:43, Timothy Arceri wrote: On 18/09/17 20:50, Nicolai Hähnle wrote: On 14.09.2017 06:47, Timothy Arceri wrote: The old code incorrectly assumed that loop terminators will always be at the start of the loop. It really seems to be just luck that we haven't triggered any bugs here, for example if there is a loop terminator at the start of the loop we might actually ignore any other terminators that might be later in the loop because we break before checking all the instructions. Ignoring the other terminators might result in unrolling loops that we shouldn't be, or the wrong number of iterations being calculated etc. Incorrect analysis can also result in loops not being unrolled at all. For example the current code would unroll: int j = 0; do { if (j > 5) break; ... do stuff ... j++; } while (j < 4); But would fail to unroll the following as no iteration limit was calculated because it failed to find the terminator: int j = 0; do { ... do stuff ... j++; } while (j < 4); Also we would fail to unroll the following as we ended up calculating the iteration limit as 6 rather than 4. The unroll code then assumed we had 3 terminators rather the 2 as it wasn't able to determine that "if (j > 5)" was redundant. int j = 0; do { if (j > 5) break; ... do stuff ... if (bool(i)) break; j++; } while (j < 4); This patch changes this pass to be more like the NIR unrolling pass. With this change we handle loop terminators correctly and also handle cases where the terminators have instructions in their branches other than a break. Some of that code is a bit tricky. Could we perhaps normalize the loop terminators such that the break is always in the then-branch? I.e. flip then- and else-branches if required. This would simplify a lot of the case distinction. Additionally, it's not clear to me what the value-add of the complex loop at the end of visit_leave is. Couldn't we just unconditionally do the splicing of terminators into a chain of if-statements, so that loop { ... if (cond) { ... break } ... if (cond) { ... break } ... } becomes loop { ... if (cond) { ... break } else { ... if (cond) { ... break } else { ... // splice subsequent unrolled iterations here } } } regardless of which condition is the limiting terminator? Unrolling chains the if-statements even further, and constant propagation should take care of the rest. We already rely on constant propagation for cleanups anyway. Ok I see where the confusion was now. I forgot to rename the bools from an earlier attempt at reworking this. Also because of the crazy way this code worked before I'd broken unrolling loops with breaks in the else branch so we never tried to unroll them and the code I had in complex unrolling for these was just plain wrong. I've fixed everything up and I'm testing a V2 now. Should have it sent out shortly. I'm not sure what exactly the point is you are trying to make. This is exactly what the code does. The difference between the simple unroll and complex unroll is 1. you need to splice one if inside the other 2. you need a placeholder so you can insert the subsequent iteration. These are the only differences between the simple unroll, but trying combine them would just make the code even harder to follow IMO. Some other, more minor comments, below. --- src/compiler/glsl/loop_analysis.cpp | 7 +- src/compiler/glsl/loop_unroll.cpp | 177 2 files changed, 141 insertions(+), 43 deletions(-) diff --git a/src/compiler/glsl/loop_analysis.cpp b/src/compiler/glsl/loop_analysis.cpp index 8a0425d185..53372183dd 100644 --- a/src/compiler/glsl/loop_analysis.cpp +++ b/src/compiler/glsl/loop_analysis.cpp @@ -324,22 +324,20 @@ loop_analysis::visit_leave(ir_loop *ir) foreach_in_list(ir_instruction, node, >body_instructions) { /* Skip over declarations at the start of a loop. */ if (node->as_variable()) continue; ir_if *if_stmt = ((ir_instruction *) node)->as_if(); if ((if_stmt != NULL) && is_loop_terminator(if_stmt)) ls->insert(if_stmt); - else - break; } foreach_in_list_safe(loop_variable, lv, >variables) { /* Move variables that are already marked as being loop constant to * a separate list. These trivially don't need to be tested. */ if (lv->is_loop_constant()) { lv->remove(); ls->constants.push_tail(lv); @@ -644,25 +642,22 @@ get_basic_induction_increment(ir_assignment *ir, hash_table *var_hash) /** * Detect whether an if-statement is a loop terminating condition * * Detects if-statements of the form * * (if
Re: [Mesa-dev] Meson mesademos (Was: [RFC libdrm 0/2] Replace the build system with meson)
On Thu, Sep 21, 2017 at 1:53 PM, Jose Fonsecawrote: > On 12/04/17 18:57, Nirbheek Chauhan wrote: >> >> Hi Jose, >> >> On Wed, Apr 12, 2017 at 11:08 PM, Jose Fonseca >> wrote: >>> >>> One newbie question: what's your workflow to update a wrap patch? Can we >>> prototype changes locally without tarballing the patch? >>> >> >> Any changes you make in subproject directories will be kept as-is; >> Meson only initializes them and will not try to update them (yet), so >> you can apply your patches to the subproject directory manually for >> testing. >> >> In the future, we want to improve this workflow, of course. >> > > Hi Dylan, > > FYI I didn't forget about this. But I have to say that having to maintain > and fix these wrap modules seperately from git is simply a huge turn off > everytime I think about resuming this. > > I think it's was a mistake to try to make meson wrap modules for 3rd party > dependencies. Especially when these wrap modules involve glue that's not > tracked in git, so it can't be esasily revved, or shared across the people > working on this. If we simply had a way to consumed built binaries like we > can easily do with cmake, I'm confident it would have been trivial to get > this going by now. > > But as it stands I don't think overcome this wall. Honestly, I don't want > have to deal with porting glew/freeglut to meson just to get mesademos with > meson, and I really shouldn't have to. I don't want to build the world just > mesademos. > That is not a requirement in general, only a requirement if you want to use glew/freeglut as a subproject (which requires meson build files). Meson will be able to find libraries stored anywhere on the system as long as you either: a) Set the correct env variables for the compiler library and include paths[1], or b) Set the `dirs:` keyword argument to cc.find_library() and set the right include paths, or c) Set PKG_CONFIG_PATH if your libraries ship pkg-config files This is very similar to cmake, fwict. However, if this doesn't work for you, we'd love to talk about how we can improve things. For instance, there were these proposals: https://github.com/mesonbuild/meson/issues/1525 and https://github.com/mesonbuild/meson/issues/1524, but we didn't get any feedback on whether they would actually be useful in real-world usage. Meson is (IMO) unusual in the build systems world in that it's not a static unchangeable upstream (ala cmake/autotools/scons), but is a FOSS project that you can interact with, so please talk to us. :) The main advantage of adding a wrap patch for a project is to speed up (and make it easy to have) integration builds that build the entire stack, but the most common usage of Meson is to build against pre-existing binaries. We could probably make the experience better on Windows, but we need feedback to do that! Cheers, Nirbheek 1. `LIB`/`INCLUDE` with MSVC and `LIBRARY_PATH`/`C_INCLUDE_PATH` for gcc/clang ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Meson mesademos (Was: [RFC libdrm 0/2] Replace the build system with meson)
On 12/04/17 18:57, Nirbheek Chauhan wrote: Hi Jose, On Wed, Apr 12, 2017 at 11:08 PM, Jose Fonsecawrote: One newbie question: what's your workflow to update a wrap patch? Can we prototype changes locally without tarballing the patch? Any changes you make in subproject directories will be kept as-is; Meson only initializes them and will not try to update them (yet), so you can apply your patches to the subproject directory manually for testing. In the future, we want to improve this workflow, of course. Hi Dylan, FYI I didn't forget about this. But I have to say that having to maintain and fix these wrap modules seperately from git is simply a huge turn off everytime I think about resuming this. I think it's was a mistake to try to make meson wrap modules for 3rd party dependencies. Especially when these wrap modules involve glue that's not tracked in git, so it can't be esasily revved, or shared across the people working on this. If we simply had a way to consumed built binaries like we can easily do with cmake, I'm confident it would have been trivial to get this going by now. But as it stands I don't think overcome this wall. Honestly, I don't want have to deal with porting glew/freeglut to meson just to get mesademos with meson, and I really shouldn't have to. I don't want to build the world just mesademos. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] amd/addrlib: fix missing va_end() after va_copy()
Hmm, just noticed the title should be fixed. Something like this? > amd/addrlib: drop unnecessary va_copy() On Wednesday, 2017-09-20 14:48:46 +, Nicolai Hähnle wrote: > From: Nicolai Hähnle> > There's no reason to use va_copy here. > > CID: 1418113 > --- > I have a slight preference for this variant. > -- > src/amd/addrlib/core/addrobject.cpp | 8 ++-- > 1 file changed, 2 insertions(+), 6 deletions(-) > > diff --git a/src/amd/addrlib/core/addrobject.cpp > b/src/amd/addrlib/core/addrobject.cpp > index dcdb1bffc2b..452feb5fac0 100644 > --- a/src/amd/addrlib/core/addrobject.cpp > +++ b/src/amd/addrlib/core/addrobject.cpp > @@ -209,29 +209,25 @@ VOID Object::operator delete( > > > */ > VOID Object::DebugPrint( > const CHAR* pDebugString, ///< [in] Debug string > ... > ) const > { > #if DEBUG > if (m_client.callbacks.debugPrint != NULL) > { > -va_list ap; > - > -va_start(ap, pDebugString); > - > ADDR_DEBUGPRINT_INPUT debugPrintInput = {0}; > > debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT); > debugPrintInput.pDebugString = const_cast (pDebugString); > debugPrintInput.hClient = m_client.handle; > -va_copy(debugPrintInput.ap, ap); > +va_start(debugPrintInput.ap, pDebugString); > > m_client.callbacks.debugPrint(); > > -va_end(ap); > +va_end(debugPrintInput.ap); > } > #endif > } > > } // Addr > -- > 2.11.0 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev