Alyssa Rosenzweig <aly...@rosenzweig.io> writes:

> This patch implements the free Midgard shader toolchain: the assembler,
> the disassembler, and the NIR-based compiler. The assembler is a
> standalone inaccessible Python script for reference purposes. The
> disassembler and the compiler are implemented in C, accessible via the
> standalone `midgard_compiler` binary. Later patches will use these
> interfaces from the driver for online compilation.
>
> Signed-off-by: Alyssa Rosenzweig <aly...@rosenzweig.io>
> ---


> +static void
> +emit_load_const(compiler_context *ctx, nir_load_const_instr *instr)
> +{
> +        nir_ssa_def def = instr->def;
> +
> +        float *v = ralloc_array(NULL, float, 4);
> +        memcpy(v, &instr->value.f32, 4 * sizeof(float));
> +        _mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v);
> +}

Looks like you leak the constants?  You could pass ctx->ssa_constants
instead of NULL and the allocation would be automatically freed.

> +static unsigned
> +nir_src_index(nir_src *src)
> +{
> +        if (src->is_ssa)
> +                return src->ssa->index;
> +        else
> +                return 4096 + src->reg.reg->index;
> +}
> +
> +static unsigned
> +nir_dest_index(nir_dest *dst)
> +{
> +        if (dst->is_ssa)
> +                return dst->ssa.index;
> +        else
> +                return 4096 + dst->reg.reg->index;
> +}

Instead of hardcoding 4096, use impl->ssa_index?

> +#define ALU_CASE(_components, nir, _op) \
> +     case nir_op_##nir: \
> +             components = _components; \
> +             op = midgard_alu_op_##_op; \
> +             break;
> +
> +static void
> +emit_alu(compiler_context *ctx, nir_alu_instr *instr)
> +{
> +        bool is_ssa = instr->dest.dest.is_ssa;
> +
> +        unsigned dest = nir_dest_index(&instr->dest.dest);
> +        unsigned nr_components = is_ssa ? 
> instr->dest.dest.ssa.num_components : 
> instr->dest.dest.reg.reg->num_components;
> +
> +        /* Most Midgard ALU ops have a 1:1 correspondance to NIR ops; these 
> are
> +         * supported. A few do not and are commented for now. Also, there 
> are a
> +         * number of NIR ops which Midgard does not support and need to be
> +         * lowered, also TODO. This switch block emits the opcode and calling
> +         * convention of the Midgard instruction; actual packing is done in
> +         * emit_alu below */
> +
> +        unsigned op, components;
> +
> +        switch (instr->op) {
> +                ALU_CASE(2, fadd, fadd);
> +                ALU_CASE(2, fmul, fmul);
> +                ALU_CASE(2, fmin, fmin);
> +                ALU_CASE(2, fmax, fmax);
> +                ALU_CASE(2, imin, imin);
> +                ALU_CASE(2, imax, imax);
> +                ALU_CASE(1, fmov, fmov);
> +                ALU_CASE(0, ffloor, ffloor);
> +                ALU_CASE(0, fceil, fceil);
> +                ALU_CASE(2, fdot3, fdot3);
> +                //ALU_CASE(2, fdot3r);
> +                ALU_CASE(2, fdot4, fdot4);
> +                //ALU_CASE(2, freduce);
> +                ALU_CASE(2, iadd, iadd);
> +                ALU_CASE(2, isub, isub);
> +                ALU_CASE(2, imul, imul);
> +
> +                /* XXX: Use fmov, not imov, since imov was causing major
> +                 * issues with texture precision? XXX research */
> +                ALU_CASE(1, imov, fmov);
> +
> +                ALU_CASE(2, feq, feq);
> +                ALU_CASE(2, fne, fne);
> +                ALU_CASE(2, flt, flt);
> +                ALU_CASE(2, ieq, ieq);
> +                ALU_CASE(2, ine, ine);
> +                ALU_CASE(2, ilt, ilt);
> +                //ALU_CASE(2, icsel, icsel);
> +                ALU_CASE(0, frcp, frcp);
> +                ALU_CASE(0, frsq, frsqrt);
> +                ALU_CASE(0, fsqrt, fsqrt);
> +                ALU_CASE(0, fexp2, fexp2);
> +                ALU_CASE(0, flog2, flog2);
> +
> +                ALU_CASE(3, f2i32, f2i);
> +                ALU_CASE(3, f2u32, f2u);
> +                ALU_CASE(3, i2f32, i2f);
> +                ALU_CASE(3, u2f32, u2f);
> +
> +                ALU_CASE(0, fsin, fsin);
> +                ALU_CASE(0, fcos, fcos);
> +
> +                ALU_CASE(2, iand, iand);
> +                ALU_CASE(2, ior, ior);
> +                ALU_CASE(2, ixor, ixor);
> +                ALU_CASE(0, inot, inot);
> +                ALU_CASE(2, ishl, ishl);
> +                ALU_CASE(2, ishr, iasr);
> +                ALU_CASE(2, ushr, ilsr);
> +                //ALU_CASE(2, ilsr, ilsr);
> +
> +                ALU_CASE(2, ball_fequal4, fball_eq);
> +                ALU_CASE(2, bany_fnequal4, fbany_neq);
> +                ALU_CASE(2, ball_iequal4, iball_eq);
> +                ALU_CASE(2, bany_inequal4, ibany_neq);

Looks like instead of encoding components here you could use
nir_op_info->num_inputs?

> +        nir_foreach_variable(var, &nir->uniforms) {
> +                if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) 
> continue;
> +
> +                unsigned length = glsl_get_aoa_size(var->type);
> +
> +                if (!length) {
> +                        length = glsl_get_length(var->type);
> +                }
> +
> +                if (!length) {
> +                        length = glsl_get_matrix_columns(var->type);
> +                }

This seems suspicious -- I don't have anything like this for my uniforms.

> +        if (ctx->stage == MESA_SHADER_VERTEX) {
> +                ctx->varying_nir_to_mdg = _mesa_hash_table_u64_create(NULL);
> +
> +                /* First, collect the special varyings */
> +                nir_foreach_variable(var, &nir->outputs) {
> +                        if (var->data.location == VARYING_SLOT_POS) {
> +                                /* Set position first, always. It takes up 
> two
> +                                 * spots, the latter one is de facto unused 
> (at
> +                                 * least from the shader's perspective), we
> +                                 * just need to skip over the spot*/
> +
> +                                
> _mesa_hash_table_u64_insert(ctx->varying_nir_to_mdg, 
> var->data.driver_location + 1, (void *) ((uintptr_t) (0 + 1)));
> +                                ctx->varying_count = 
> MAX2(ctx->varying_count, 2);
> +                        } else if (var->data.location == VARYING_SLOT_PSIZ) {
> +                                /* Set point size second (third, see above) 
> */
> +                                
> _mesa_hash_table_u64_insert(ctx->varying_nir_to_mdg, 
> var->data.driver_location + 1, (void *) ((uintptr_t) (2 + 1)));
> +                                ctx->varying_count = 
> MAX2(ctx->varying_count, 3);
> +
> +                                program->writes_point_size = true;
> +                        }
> +                }

Using info.outputs_written might be nicer here.

> +
> +        /* Lower vars -- not I/O -- before epilogue */
> +
> +        NIR_PASS_V(nir, nir_lower_var_copies);
> +        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
> +        NIR_PASS_V(nir, nir_split_var_copies);
> +        NIR_PASS_V(nir, nir_lower_var_copies);
> +        NIR_PASS_V(nir, nir_lower_global_vars_to_local);
> +        NIR_PASS_V(nir, nir_lower_var_copies);
> +        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
> +        NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);

I'm skeptical that this many lower_var_copies() is needed :)

> diff --git a/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py 
> b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py
> new file mode 100644
> index 0000000000..44441727b7
> --- /dev/null
> +++ b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py

> +algebraic = [
> +    (('b2i32', a), ('iand@32', "a@32", 1)),
> +    (('isign', a), ('imin', ('imax', a, -1), 1)),

I need to steal your isign.

> +    (('fge', a, b), ('flt', b, a)),
> +
> +    # XXX: We have hw ops for this, just unknown atm..
> +    #(('fsign@32', a), ('i2f32@32', ('isign', ('f2i32@32', ('fmul', a, 
> 0x43800000)))))
> +    #(('fsign', a), ('fcsel', ('fge', a, 0), 1.0, ('fcsel', ('flt', a, 0.0), 
> -1.0, 0.0)))
> +    (('fsign', a), ('bcsel', ('fge', a, 0), 1.0, -1.0)),

Looks like your fsign never returns 0.0 like it should?


All of this is suggestions for future work.  I'm mostly glad to see the
driver coming into the tree at last.  Both patches are:

Acked-by: Eric Anholt <e...@anholt.net>

Attachment: signature.asc
Description: PGP signature

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to