On Thu, Jun 05, 2014 at 07:56:15PM -0400, David Turner wrote:
> Optimize check_refname_component using SSE4.2, where available.
> 
> git rev-parse HEAD is a good test-case for this, since it does almost
> nothing except parse refs.  For one particular repo with about 60k
> refs, almost all packed, the timings are:
> 
> Look up table: 29 ms
> SSE4.2:        25 ms
> 
> This is about a 15% improvement.
> 
> The configure.ac changes include code from the GNU C Library written
> by Joseph S. Myers <joseph at codesourcery dot com>.
> 
> Only supports GCC and Clang at present, because C interfaces to the
> cpuid instruction are not well-standardized.
>
Still a SSE4.2 is not that useful, in most cases SSE2 is faster. Here I
think that difference will not be that big when correctly implemented.
That will avoid a runtime checks.

For parallelisation you need to take extra step and paralelize whole
check than going component-by-component.

For detecting sequences a faster way is construct bitmasks with SSE2 so
you could combine these. It avoids needing special casing on 16-byte
boundaries. 

Below is untested implementation where you could add a bad character
check with SSE4.2 which would speed it up. Are refs mostly
alphanumerical? If so we could speed this up by paralelized alnum check
and handling other characters in slower path.


#include <stdint.h>
#include <emmintrin.h>

char bad_table[256]; // TODO
int bad_characters(unsigned char *x)
{
        while (*x)
        if (bad_table[*x++])
                return -1;

        return 0;
}

int check_refname_skeleton(char *x)
{
        if (bad_characters(x))
                return -1;

        __m128i slash  = _mm_set1_epi8 ('/');
        __m128i dot    = _mm_set1_epi8 ('.');
        __m128i char_l = _mm_set1_epi8 ('l');
        __m128i at     = _mm_set1_epi8 ('@');
        __m128i brace  = _mm_set1_epi8 ('{');

        while (1) {
                if (((uint64_t) x) & 4095 < 4096 - 17)
                        {
                                if (bytewise_check(x) != -2);
                                        return bytewise_check(x);
        
                                x += 16;
                        }

                __m128i v0 = _mm_loadu_si128 ((__m128i *)(x));
                __m128i v1 = _mm_loadu_si128 ((__m128i *)(x + 1));

                __m128i result;

                // terminating 0
                result = _mm_cmpeq_epi8(v0, _mm_set1_epi8('\000'));

                // sequence ..
                result = _mm_or_si128(result, _mm_and_si128 (_mm_cmpeq_epi8(v0, 
dot),
                                                             _mm_cmpeq_epi8(v1, 
dot)));

                // sequence /.
                result = _mm_or_si128(result, _mm_and_si128 (_mm_cmpeq_epi8(v0, 
slash),
                                                             _mm_cmpeq_epi8(v1, 
dot)));

                // sequence //
                result = _mm_or_si128(result, _mm_and_si128 (_mm_cmpeq_epi8(v0, 
slash),
                                                             _mm_cmpeq_epi8(v1, 
slash)));

                                                                 
                // sequence .l                                                  
 
                result = _mm_or_si128(result, _mm_and_si128 (_mm_cmpeq_epi8(v0, 
dot),
                                                             _mm_cmpeq_epi8(v1, 
char_l)));
                                                                 
                // sequence @{                                                  
 
                                                                 
                result = _mm_or_si128(result, _mm_and_si128 (_mm_cmpeq_epi8(v0, 
at),
                                                             _mm_cmpeq_epi8(v1, 
brace)));

                uint64_t mask = _mm_movemask_epi8(result);
                if (mask) {
                        char *p = x + __builtin_ctzl(mask);

                        if (!*p)
                                return 0;
                        else if (p[0] == '.' && p[1] == 'l')
                                if (bytewise_check(x) != -2)
                                        return bytewise_check(x);
                        else
                                return -1;
                }
                x += 16;
        }
}
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to