Hi,

On 2022-07-06 15:58:44 +0700, John Naylor wrote:
> From 82e13b6bebd85a152ededcfd75495c0c0f642354 Mon Sep 17 00:00:00 2001
> From: John Naylor <john.nay...@postgresql.org>
> Date: Wed, 6 Jul 2022 15:50:09 +0700
> Subject: [PATCH v4 4/4] Use vectorized lookahead in json_lex_string on x86
> 
> ---
>  src/common/jsonapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 48 insertions(+)
> 
> diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
> index 81e176ad8d..44e8ed2b2f 100644
> --- a/src/common/jsonapi.c
> +++ b/src/common/jsonapi.c
> @@ -24,6 +24,12 @@
>  #include "miscadmin.h"
>  #endif
>  
> +/*  WIP: put somewhere sensible and consider removing CRC from the names */
> +#if (defined (__x86_64__) || defined(_M_AMD64)) && 
> (defined(USE_SSE42_CRC32C) || defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK))
> +#include <nmmintrin.h>
> +#define USE_SSE2
> +#endif
> +
>  /*
>   * The context of the parser is maintained by the recursive descent
>   * mechanism, but is passed explicitly to the error reporting routine
> @@ -842,12 +848,54 @@ json_lex_string(JsonLexContext *lex)
>               }
>               else
>               {
> +#ifdef USE_SSE2
> +                     __m128i         block,
> +                                             has_backslash,
> +                                             has_doublequote,
> +                                             control,
> +                                             has_control,
> +                                             error_cum = _mm_setzero_si128();
> +                     const           __m128i backslash = _mm_set1_epi8('\\');
> +                     const           __m128i doublequote = 
> _mm_set1_epi8('"');
> +                     const           __m128i max_control = 
> _mm_set1_epi8(0x1F);
> +#endif
>                       /* start lookahead at current byte */
>                       char       *p = s;
>  
>                       if (hi_surrogate != -1)
>                               return JSON_UNICODE_LOW_SURROGATE;
>  
> +#ifdef USE_SSE2
> +                     while (p < end - sizeof(__m128i))
> +                     {
> +                             block = _mm_loadu_si128((const __m128i *) p);
> +
> +                             /* direct comparison to quotes and backslashes 
> */
> +                             has_backslash = _mm_cmpeq_epi8(block, 
> backslash);
> +                             has_doublequote = _mm_cmpeq_epi8(block, 
> doublequote);
> +
> +                             /*
> +                              * use saturation arithmetic to check for <= 
> highest control
> +                              * char
> +                              */
> +                             control = _mm_subs_epu8(block, max_control);
> +                             has_control = _mm_cmpeq_epi8(control, 
> _mm_setzero_si128());
> +
> +                             /*
> +                              * set bits in error_cum where the 
> corresponding lanes in has_*
> +                              * are set
> +                              */
> +                             error_cum = _mm_or_si128(error_cum, 
> has_backslash);
> +                             error_cum = _mm_or_si128(error_cum, 
> has_doublequote);
> +                             error_cum = _mm_or_si128(error_cum, 
> has_control);
> +
> +                             if (_mm_movemask_epi8(error_cum))
> +                                     break;
> +
> +                             p += sizeof(__m128i);
> +                     }
> +#endif                                                       /* USE_SSE2 */
> +
>                       while (p < end)
>                       {
>                               if (*p == '\\' || *p == '"')
> -- 
> 2.36.1
> 

I wonder if we can't abstract this at least a bit better. If we go that route
a bit further, then add another arch, this code will be pretty much
unreadable.

Greetings,

Andres Freund


Reply via email to