On 3/12/20 7:58 AM, LIU Zhiwei wrote:
> +target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
> +        uint32_t desc)
> +{
> +    target_ulong cnt = 0;
> +    uint32_t mlen = vext_mlen(desc);
> +    uint32_t vm = vext_vm(desc);
> +    uint32_t vl = env->vl;
> +    int i;
> +
> +    for (i = 0; i < vl; i++) {
> +        if (vm || vext_elem_mask(v0, mlen, i)) {
> +            if (vext_elem_mask(vs2, mlen, i)) {
> +                cnt++;
> +            }
> +        }
> +    }
> +    return cnt;
> +}

This is ok as-is, so
Reviewed-by: Richard Henderson <richard.hender...@linaro.org>

But you can do better.

You create an array, similar to arm's pred_esz_masks[],
indexed by log2(mlen).

    mask = pred_mlen_masks[log2_mlen];
    n = vl >> (6 - log2_mlen);
    r = extract32(vl, 0, 6 - log2_mlen);
    if (r) {
        rmask = extract64(mask, 0, r << log2_mlen);
    } else {
        rmask = 0;
    }

    if (vm) {
        for (i = 0; i < n; i++) {
            uint64_t j = ((uint64_t *)vs2)[i];
            cnt += ctpop64(j & mask);
        }
        if (rmask) {
            uint64_t j = ((uint64_t *)vs2)[i];
            cnt += ctpop64(j & rmask);
        }
    } else {
        for (i = 0; i < n; i++) {
            uint64_t j = ((uint64_t *)vs2)[i];
            uint64_t k = ((uint64_t *)v0)[i];
            cnt += ctpop64(j & k & mask);
        }
        if (rmask) {
            uint64_t j = ((uint64_t *)vs2)[i];
            uint64_t k = ((uint64_t *)v0)[i];
            cnt += ctpop64(j & k & rmask);
        }
    }


r~

Reply via email to