Signed-off-by: Rhys Perry <pendingchao...@gmail.com> --- src/gallium/drivers/nouveau/nvc0/mme/com9097.mme | 91 ++++++++++++---------- src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 64 ++++++++------- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 81 ++++++++++--------- 3 files changed, 133 insertions(+), 103 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme index 38c2e86843..0e5ad66f56 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme @@ -494,62 +494,75 @@ daic_runout_check: /* NVC0_3D_MACRO_QUERY_BUFFER_WRITE: * - * This is a combination macro for all of our query buffer object needs. - * It has the option to clamp results to a configurable amount, as well as + * This macro writes out a query's result into a resource. + * It has the options to either clamp the result to a configurable amount and * to write out one or two words. * * We use the query engine to write out the values, and expect the query * address to point to the right place. * - * arg = clamp value (0 means unclamped). clamped means just 1 written value. - * parm[0] = LSB of end value - * parm[1] = MSB of end value - * parm[2] = LSB of start value - * parm[3] = MSB of start value - * parm[4] = desired sequence - * parm[5] = actual sequence - * parm[6] = query high address + * Also note that although the result availablility is determined at the start, + * the macro only exits if the result is unavailable right before clamping. + * + * arg = write64 | (clamp<<1) + * parm[0] = desired sequence + * parm[1] = actual sequence + * parm[2] = LSB of end value + * parm[3] = MSB of end value + * parm[4] = LSB of start value + * parm[5] = MSB of start value + * parm[6] = clamp value * parm[7] = query low address + * parm[8] = query high address */ .section #mme9097_query_buffer_write +/* determine result availability */ + parm $r2 + parm $r3 + mov $r6 (sub $r3 $r2) + mov $r6 (sbb 0x0 0x0) +/* calculate result and write high into $r3 and low into $r2 */ parm $r2 parm $r3 parm $r4 - parm $r5 maddr 0x16c0 /* QUERY_ADDRESS_HIGH */ - parm $r6 - parm $r7 - mov $r6 (sub $r7 $r6) /* actual - desired */ - mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */ - parm $r7 - exit braz $r6 #qbw_ready - parm $r6 -qbw_ready: + parm $r5 mov $r2 (sub $r2 $r4) - braz $r1 #qbw_postclamp mov $r3 (sbb $r3 $r5) - branz annul $r3 #qbw_clamp - mov $r4 (sub $r1 $r2) - mov $r4 (sbb 0x0 0x0) - braz annul $r4 #qbw_postclamp -qbw_clamp: - mov $r2 $r1 -qbw_postclamp: - send $r7 - send $r6 + braz $r6 #qbw_available + parm $r4 /* clamp value */ + exit parm $r7 /* result not available - drain remaining parameters and exit */ + parm $r7 +qbw_available: + mov $r6 (extrinsrt 0x0 $r1 1 1 0) + braz annul $r6 #qbw_write + branz $r3 #qbw_doclamp /* clamp if the high word is set */ + mov $r7 (sub $r4 $r2) + mov $r7 (sbb 0x0 0x0) + braz annul $r7 #qbw_write +qbw_doclamp: + mov $r2 $r4 + mov $r3 0x0 +qbw_write: + parm $r5 + parm $r4 maddr 0x16c0 /* QUERY_ADDRESS_HIGH */ + send $r4 + send $r5 send $r2 - branz $r1 #qbw_done - mov $r4 0x1000 - send (extrinsrt 0x0 $r4 0x0 0x10 0x10) + mov $r6 (extrinsrt 0x0 $r1 0 1 0) + braz $r6 #qbw_done + mov $r7 0x1000 + send (extrinsrt 0x0 $r7 0 16 16) + /* XXX: things seem to mess up if $r6 is replaced with 0x4 in the add */ + mov $r6 0x4 + mov $r5 (add $r5 $r6) + mov $r4 (adc $r4 0x0) maddr 0x16c0 /* QUERY_ADDRESS_HIGH */ - mov $r5 0x4 - mov $r6 (add $r6 $r5) - mov $r7 (adc $r7 0x0) - send $r7 - send $r6 + send $r4 + send $r5 send $r3 qbw_done: - exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10) - maddrsend 0x44 + exit send (extrinsrt 0x0 $r7 0 16 16) + maddrsend 0x44 /* SERIALIZE */ /* NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE: * diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h index 49c0891114..3ebfda47ee 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h @@ -336,41 +336,47 @@ uint32_t mme9097_draw_arrays_indirect_count[] = { uint32_t mme9097_query_buffer_write[] = { 0x00000201, 0x00000301, -/* 0x000b: qbw_ready */ - 0x00000401, - 0x05b00551, -/* 0x0012: qbw_clamp */ -/* 0x0013: qbw_postclamp */ - 0x00000601, - 0x00000701, - 0x0005be10, + 0x00049e10, +/* 0x000e: qbw_available */ 0x00060610, -/* 0x0020: qbw_done */ - 0x00000701, - 0x0000b087, - 0x00000601, + 0x00000201, +/* 0x0014: qbw_doclamp */ +/* 0x0016: qbw_write */ + 0x00000301, + 0x00000401, + 0x00000501, 0x00051210, - 0x0001c807, +/* 0x0026: qbw_done */ 0x00075b10, - 0x00011837, - 0x00048c10, - 0x00060410, - 0x0000a027, - 0x00000a11, - 0x00003841, - 0x00003041, + 0x00013007, + 0x00000401, + 0x00000781, + 0x00000701, + 0x00424612, + 0x0001f027, + 0x00011817, + 0x0004a710, + 0x00060710, + 0x0000f827, + 0x00002211, + 0x00000311, + 0x00000501, + 0x05b00451, + 0x00002041, + 0x00002841, 0x00001041, - 0x00028817, - 0x04000411, - 0x84010042, + 0x00404612, + 0x0002b007, + 0x04000711, + 0x8401c042, + 0x00010611, + 0x0001ad10, + 0x00022410, 0x05b00021, - 0x00010511, - 0x00017610, - 0x00023f10, - 0x00003841, - 0x00003041, + 0x00002041, + 0x00002841, 0x00001841, - 0x840100c2, + 0x8401c0c2, 0x00110071, }; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index db5f5092ba..835742bbc6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -381,6 +381,8 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, struct nvc0_hw_query *hq = nvc0_hw_query(q); struct nv04_resource *buf = nv04_resource(resource); unsigned qoffset = 0, stride; + bool predicate = false; + uint32_t arg; assert(!hq->funcs || !hq->funcs->get_query_result); @@ -401,18 +403,27 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, return; } + switch (q->type) { + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + predicate = true; + break; + } + + arg = result_type >= PIPE_QUERY_TYPE_I64 ? 1 : 0; + /* Only clamp if the output is 32-bit or a predicate, we don't bother + * clamping 64-bit outputs */ + if ((result_type<PIPE_QUERY_TYPE_I64 || predicate) && index!=-1) + arg |= 1 << 1; + /* If the fence guarding this query has not been emitted, that makes a lot * of the following logic more complicated. */ if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED) nouveau_fence_emit(hq->fence); - /* We either need to compute a 32- or 64-bit difference between 2 values, - * and then store the result as either a 32- or 64-bit value. As such let's - * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit - * ones), and have one macro that clamps result to i32, u32, or just - * outputs the difference (no need to worry about 64-bit clamping). - */ if (hq->state != NVC0_HW_QUERY_STATE_READY) nvc0_hw_query_update(nvc0->screen->base.client, q); @@ -425,22 +436,20 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, nouveau_pushbuf_space(push, 32, 2, 0); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR); - BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 9); - switch (q->type) { - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: /* XXX what if 64-bit? */ - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - PUSH_DATA(push, 0x00000001); - break; - default: - if (result_type == PIPE_QUERY_TYPE_I32) - PUSH_DATA(push, 0x7fffffff); - else if (result_type == PIPE_QUERY_TYPE_U32) - PUSH_DATA(push, 0xffffffff); - else - PUSH_DATA(push, 0x00000000); - break; + BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 10); + PUSH_DATA(push, arg); + + if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) { + PUSH_DATA(push, 0); + PUSH_DATA(push, 0); + } else if (hq->is64bit) { + PUSH_DATA(push, hq->fence->sequence); + nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } else { + PUSH_DATA(push, hq->sequence); + nouveau_pushbuf_data(push, hq->bo, hq->offset, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); } switch (q->type) { @@ -460,6 +469,11 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, break; } + /* We need to compute the difference between 2 values, and then store the + * result as either a 32- or 64-bit value. As such let's treat all inputs + * as 64-bit (and just push an extra 0 for the 32-bit ones), and clamp + * the result to an limit if it's 32 bit or a predicate. + */ if (hq->is64bit || qoffset) { nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset + 16 * index, 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); @@ -480,20 +494,17 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, PUSH_DATA(push, 0); } - if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) { - PUSH_DATA(push, 0); - PUSH_DATA(push, 0); - } else if (hq->is64bit) { - PUSH_DATA(push, hq->fence->sequence); - nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0, - 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); - } else { - PUSH_DATA(push, hq->sequence); - nouveau_pushbuf_data(push, hq->bo, hq->offset, - 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); - } - PUSH_DATAh(push, buf->address + offset); + if (predicate) + PUSH_DATA(push, 0x00000001); + else if (result_type == PIPE_QUERY_TYPE_I32) + PUSH_DATA(push, 0x7fffffff); + else if (result_type == PIPE_QUERY_TYPE_U32) + PUSH_DATA(push, 0xffffffff); + else + PUSH_DATA(push, 0x00000000); + PUSH_DATA (push, buf->address + offset); + PUSH_DATAh(push, buf->address + offset); util_range_add(&buf->valid_buffer_range, offset, offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4)); -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev