Optimize check_refname_component using SSE2 on x86_64.
git rev-parse HEAD is a good test-case for this, since it does almost
nothing except parse refs. For one particular repo with about 60k
refs, almost all packed, the timings are:
Look up table: 29 ms
SSE2: 23 ms
This cuts about 20% off of the runtime.
Ondřej Bílka nel...@seznam.cz suggested an SSE2 approach to the
substring searches, which netted a speed boost over the SSE4.2 code I
had initially written.
Signed-off-by: David Turner dtur...@twitter.com
---
git-compat-util.h | 11 +++
refs.c | 223 -
t/t5511-refspec.sh | 20 +
3 files changed, 236 insertions(+), 18 deletions(-)
diff --git a/git-compat-util.h b/git-compat-util.h
index f6d3a46..0aec58e 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -668,6 +668,17 @@ void git_qsort(void *base, size_t nmemb, size_t size,
#endif
#endif
+#if defined(__GNUC__) defined(__x86_64__)
+#include emmintrin.h
+/*
+ * This is the system memory page size; it's used so that we can read
+ * outside the bounds of an allocation without segfaulting.
+ */
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+#endif
+
#ifdef UNRELIABLE_FSTAT
#define fstat_is_reliable() 0
#else
diff --git a/refs.c b/refs.c
index 46139d2..5c2f68b 100644
--- a/refs.c
+++ b/refs.c
@@ -8,20 +8,22 @@
/*
* How to handle various characters in refnames:
* 0: An acceptable character for refs
- * 1: End-of-component
- * 2: ., look for a preceding . to reject .. in refs
- * 3: {, look for a preceding @ to reject @{ in refs
- * 4: A bad character: ASCII control characters, ~, ^, : or SP
+ * 1: \0: End-of-component and string
+ * 2: /: End-of-component
+ * 3: ., look for a preceding . to reject .. in refs
+ * 4: {, look for a preceding @ to reject @{ in refs
+ * 5: *, usually a bad character except, once as a wildcard
+ * 6: A bad character except * (see check_refname_component below)
*/
static unsigned char refname_disposition[256] = {
- 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4,
+ 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 3, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 6, 6
};
/*
@@ -33,8 +35,9 @@ static unsigned char refname_disposition[256] = {
* - any path component of it begins with ., or
* - it has double dots .., or
* - it has ASCII control character, ~, ^, : or SP, anywhere, or
- * - it ends with a /.
- * - it ends with .lock
+ * - it has pattern-matching notation *, ?, [, anywhere, or
+ * - it ends with a /, or
+ * - it ends with .lock, or
* - it contains a \ (backslash)
*/
static int check_refname_component(const char *refname, int flags)
@@ -46,17 +49,19 @@ static int check_refname_component(const char *refname, int
flags)
int ch = *cp 255;
unsigned char disp = refname_disposition[ch];
switch(disp) {
- case 1:
- goto out;
+ case 1: /* fall-through */
case 2:
+ goto out;
+ case 3:
if (last == '.')
return -1; /* Refname contains ... */
break;
- case 3:
+ case 4:
if (last == '@')
return -1; /* Refname contains @{. */
break;
- case 4:
+ case 5: /* fall-through */
+ case 6:
return -1;
}
last = ch;
@@ -79,14 +84,13 @@ out:
return cp - refname;
}
-int check_refname_format(const char *refname, int flags)
+static int check_refname_format_bytewise(const char *refname, int flags)
{
int component_len, component_count = 0;
if (!strcmp(refname, @))
/* Refname is a single character '@'. */
return -1;
-
while (1) {
/* We are at the start of a path component. */
component_len = check_refname_component(refname, flags);
@@ -115,6 +119,189 @@ int check_refname_format(const char *refname, int flags)
return 0;
}
+#if defined(__GNUC__) defined(__x86_64__)
+#define SSE_VECTOR_BYTES 16
+
+/* Vectorized version of check_refname_format. */
+int check_refname_format(const char