On 5/17/19 9:47 AM, Richard Henderson wrote:
>     first_equal = n;
>     first_zero = n;
>     for (i = n - 1; i >= 0; --i) {
>         if (data1 == data2) {
>             first_equal = i;
>         }
>         if (data1 == 0) {
>             first_zero = i;
>         }
>     }
> 
> // As an aside, there are bit tricks for the above,
> // but let's stay simple(r) for now.

What the hell, it's not /that/ tricky.


/*
 * Returns a bit set in the MSB of each element that is zero,
 * as defined by the mask M.
 */
static inline uint64_t zero_search(uint64_t a, uint64_t m)
{
    return ~(((a & m) + m) | a | m);
}

/*
 * Returns the byte offset for the first match, or 16 for no match.
 */
static inline int match_index(uint64_t c0, uint64_t c1)
{
    return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
}

Use

  dup_const(MO_8, 0x7f)
  dup_const(MO_16, 0x7fff)
  dup_const(MO_32, 0x7fffffff)

for the M parameter for the different element sizes.

    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;

    a0 = s390_vec_read_element64(v2, 0);
    a1 = s390_vec_read_element64(v2, 1);
    b0 = s390_vec_read_element64(v3, 0);
    b1 = s390_vec_read_element64(v3, 1);
    e0 = zero_search(a0 ^ b0, m);
    e1 = zero_search(a1 ^ b1, m);
    first_equal = match_index(e0, e1);

    if (zs) {
        z0 = zero_search(a0, m);
        z1 = zero_search(a1, m);
        first_zero = match_index(z0, z1);
    ...


r~

Reply via email to