Instead of walking the attribute descriptions and filling the vertex
elements packets out in random order, we now do a pre-pass where we
build an array of descriptions and walk the vertex elements in order.
This has the advantage of making the actual element setup more clear
because it's now three cases: 64-bit dummy, real attribute, and empty
element.  This also has the advantage that we no longer need to look at
double_inputs from the NIR shader and can base our element skipping on
the VkFormat in the attribute which should be more robust.
---
 src/intel/vulkan/genX_pipeline.c | 178 +++++++++++++++++--------------
 1 file changed, 99 insertions(+), 79 deletions(-)

diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 297e9455b51..d3c77ecb89b 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -83,6 +83,8 @@ vertex_element_comp_control(enum isl_format format, unsigned 
comp)
    }
 }
 
+#define DUMMY_64BIT_ATTRIB ((const VkVertexInputAttributeDescription *)1)
+
 static void
 emit_vertex_input(struct anv_pipeline *pipeline,
                   const VkPipelineVertexInputStateCreateInfo *info)
@@ -91,18 +93,40 @@ emit_vertex_input(struct anv_pipeline *pipeline,
 
    /* Pull inputs_read out of the VS prog data */
    const uint64_t inputs_read = vs_prog_data->inputs_read;
-   const uint64_t double_inputs_read =
-      vs_prog_data->double_inputs_read & inputs_read;
    assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
    const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
-   const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
+   const VkVertexInputAttributeDescription *attribs[MAX_VBS] = { };
+
+   uint32_t elem_count = __builtin_popcount(elements);
+   for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
+      const VkVertexInputAttributeDescription *desc =
+         &info->pVertexAttributeDescriptions[i];
+      if ((elements & (1 << desc->location)) == 0)
+         continue; /* Binding unused */
+
+      /* Record the description for the second pass. */
+      attribs[desc->location] = desc;
+
+      enum isl_format format = anv_get_isl_format(&pipeline->device->info,
+                                                  desc->format,
+                                                  VK_IMAGE_ASPECT_COLOR_BIT,
+                                                  VK_IMAGE_TILING_LINEAR);
+
+      /* 64-bit 3 and 4-channel formats consume two slots in the shader but
+       * are only one VERTEX_ELEMENT_STATE.
+       */
+      if (isl_format_layouts[format].channels.b.bits == 64) {
+         assert(elements & (1 << (desc->location + 1)));
+         attribs[desc->location + 1] = DUMMY_64BIT_ATTRIB;
+         elem_count--;
+      }
+   }
+
    const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
                                 vs_prog_data->uses_instanceid ||
                                 vs_prog_data->uses_firstvertex ||
                                 vs_prog_data->uses_baseinstance;
 
-   uint32_t elem_count = __builtin_popcount(elements) -
-      __builtin_popcount(elements_double) / 2;
 
    const uint32_t total_elems =
       elem_count + needs_svgs_elem + vs_prog_data->uses_drawid;
@@ -117,80 +141,75 @@ emit_vertex_input(struct anv_pipeline *pipeline,
    if (!p)
       return;
 
-   for (uint32_t i = 0; i < total_elems; i++) {
-      /* The SKL docs for VERTEX_ELEMENT_STATE say:
-       *
-       *    "All elements must be valid from Element[0] to the last valid
-       *    element. (I.e. if Element[2] is valid then Element[1] and
-       *    Element[0] must also be valid)."
-       *
-       * The SKL docs for 3D_Vertex_Component_Control say:
-       *
-       *    "Don't store this component. (Not valid for Component 0, but can
-       *    be used for Component 1-3)."
-       *
-       * So we can't just leave a vertex element blank and hope for the best.
-       * We have to tell the VF hardware to put something in it; so we just
-       * store a bunch of zero.
-       *
-       * TODO: Compact vertex elements so we never end up with holes.
-       */
-      struct GENX(VERTEX_ELEMENT_STATE) element = {
-         .Valid = true,
-         .Component0Control = VFCOMP_STORE_0,
-         .Component1Control = VFCOMP_STORE_0,
-         .Component2Control = VFCOMP_STORE_0,
-         .Component3Control = VFCOMP_STORE_0,
-      };
-      GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element);
-   }
-
-   for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
-      const VkVertexInputAttributeDescription *desc =
-         &info->pVertexAttributeDescriptions[i];
-      enum isl_format format = anv_get_isl_format(&pipeline->device->info,
-                                                  desc->format,
-                                                  VK_IMAGE_ASPECT_COLOR_BIT,
-                                                  VK_IMAGE_TILING_LINEAR);
-
-      assert(desc->binding < MAX_VBS);
-
-      if ((elements & (1 << desc->location)) == 0)
-         continue; /* Binding unused */
-
-      uint32_t slot =
-         __builtin_popcount(elements & ((1 << desc->location) - 1)) -
-         DIV_ROUND_UP(__builtin_popcount(elements_double &
-                                        ((1 << desc->location) -1)), 2);
+   uint32_t loc, slot = 0;
+   for_each_bit(loc, elements) {
+      /* The previous VERTEX_ELEMENT_STATE takes two slots */
+      if (attribs[loc] == DUMMY_64BIT_ATTRIB)
+         continue;
 
-      struct GENX(VERTEX_ELEMENT_STATE) element = {
-         .VertexBufferIndex = desc->binding,
-         .Valid = true,
-         .SourceElementFormat = format,
-         .EdgeFlagEnable = false,
-         .SourceElementOffset = desc->offset,
-         .Component0Control = vertex_element_comp_control(format, 0),
-         .Component1Control = vertex_element_comp_control(format, 1),
-         .Component2Control = vertex_element_comp_control(format, 2),
-         .Component3Control = vertex_element_comp_control(format, 3),
-      };
-      GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
+      struct GENX(VERTEX_ELEMENT_STATE) element;
+      if (attribs[loc]) {
+         const VkVertexInputAttributeDescription *desc = attribs[loc];
+         enum isl_format format = anv_get_isl_format(&pipeline->device->info,
+                                                     desc->format,
+                                                     VK_IMAGE_ASPECT_COLOR_BIT,
+                                                     VK_IMAGE_TILING_LINEAR);
+         element = (struct GENX(VERTEX_ELEMENT_STATE)) {
+            .VertexBufferIndex = desc->binding,
+            .Valid = true,
+            .SourceElementFormat = format,
+            .EdgeFlagEnable = false,
+            .SourceElementOffset = desc->offset,
+            .Component0Control = vertex_element_comp_control(format, 0),
+            .Component1Control = vertex_element_comp_control(format, 1),
+            .Component2Control = vertex_element_comp_control(format, 2),
+            .Component3Control = vertex_element_comp_control(format, 3),
+         };
 
 #if GEN_GEN >= 8
-      /* On Broadwell and later, we have a separate VF_INSTANCING packet
-       * that controls instancing.  On Haswell and prior, that's part of
-       * VERTEX_BUFFER_STATE which we emit later.
-       */
-      anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
-         vfi.InstancingEnable = pipeline->vb[desc->binding].instanced;
-         vfi.VertexElementIndex = slot;
-         vfi.InstanceDataStepRate =
-            pipeline->vb[desc->binding].instance_divisor;
-      }
+         /* On Broadwell and later, we have a separate VF_INSTANCING packet
+          * that controls instancing.  On Haswell and prior, that's part of
+          * VERTEX_BUFFER_STATE which we emit later.
+          */
+         anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
+            vfi.InstancingEnable = pipeline->vb[desc->binding].instanced;
+            vfi.VertexElementIndex = slot;
+            vfi.InstanceDataStepRate =
+               pipeline->vb[desc->binding].instance_divisor;
+         }
 #endif
+      } else {
+         /* The SKL docs for VERTEX_ELEMENT_STATE say:
+          *
+          *    "All elements must be valid from Element[0] to the last valid
+          *    element. (I.e. if Element[2] is valid then Element[1] and
+          *    Element[0] must also be valid)."
+          *
+          * The SKL docs for 3D_Vertex_Component_Control say:
+          *
+          *    "Don't store this component. (Not valid for Component 0, but can
+          *    be used for Component 1-3)."
+          *
+          * So we can't just leave a vertex element blank and hope for the 
best.
+          * We have to tell the VF hardware to put something in it; so we just
+          * store a bunch of zero.
+          *
+          * TODO: Compact vertex elements so we never end up with holes.
+          */
+         element = (struct GENX(VERTEX_ELEMENT_STATE)) {
+            .Valid = true,
+            .Component0Control = VFCOMP_STORE_0,
+            .Component1Control = VFCOMP_STORE_0,
+            .Component2Control = VFCOMP_STORE_0,
+            .Component3Control = VFCOMP_STORE_0,
+         };
+      }
+      GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
+      slot++;
    }
+   assert(slot == elem_count);
 
-   const uint32_t id_slot = elem_count;
+   const uint32_t id_slot = slot;
    if (needs_svgs_elem) {
       /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
        *    "Within a VERTEX_ELEMENT_STATE structure, if a Component
@@ -219,7 +238,8 @@ emit_vertex_input(struct anv_pipeline *pipeline,
          .Component3Control = VFCOMP_STORE_IID,
 #endif
       };
-      GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
+      GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
+      slot++;
    }
 
 #if GEN_GEN >= 8
@@ -233,7 +253,6 @@ emit_vertex_input(struct anv_pipeline *pipeline,
    }
 #endif
 
-   const uint32_t drawid_slot = elem_count + needs_svgs_elem;
    if (vs_prog_data->uses_drawid) {
       struct GENX(VERTEX_ELEMENT_STATE) element = {
          .VertexBufferIndex = ANV_DRAWID_VB_INDEX,
@@ -244,16 +263,17 @@ emit_vertex_input(struct anv_pipeline *pipeline,
          .Component2Control = VFCOMP_STORE_0,
          .Component3Control = VFCOMP_STORE_0,
       };
-      GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
-                                      &p[1 + drawid_slot * 2],
-                                      &element);
+      GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
 
 #if GEN_GEN >= 8
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
-         vfi.VertexElementIndex = drawid_slot;
+         vfi.VertexElementIndex = slot;
       }
 #endif
+
+      slot++;
    }
+   assert(slot == total_elems);
 }
 
 void
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to