In order to prepare for LZ77 matchfinder.  Note that erofs_memcmp2()
is still not quite effective.

Signed-off-by: Gao Xiang <[email protected]>
---
 include/erofs/defs.h     | 24 ++++++++++++++++++++++--
 include/erofs/internal.h |  2 --
 lib/dedupe.c             | 23 ++++++++++++++++++-----
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/include/erofs/defs.h b/include/erofs/defs.h
index e5aa23c..44af557 100644
--- a/include/erofs/defs.h
+++ b/include/erofs/defs.h
@@ -179,9 +179,29 @@ typedef int64_t         s64;
 #define __maybe_unused      __attribute__((__unused__))
 #endif
 
-static inline u32 get_unaligned_le32(const u8 *p)
+#define __packed __attribute__((__packed__))
+
+#define __get_unaligned_t(type, ptr) ({                                        
        \
+       const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);      
\
+       __pptr->x;                                                              
\
+})
+
+#define __put_unaligned_t(type, val, ptr) do {                                 
\
+       struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);            
\
+       __pptr->x = (val);                                                      
\
+} while (0)
+
+#define get_unaligned(ptr)     __get_unaligned_t(typeof(*(ptr)), (ptr))
+#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+       return le32_to_cpu(__get_unaligned_t(__le32, p));
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
 {
-       return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+       __put_unaligned_t(__le32, cpu_to_le32(val), p);
 }
 
 /**
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index ab964d4..aad2115 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -17,8 +17,6 @@ extern "C"
 
 typedef unsigned short umode_t;
 
-#define __packed __attribute__((__packed__))
-
 #include "erofs_fs.h"
 #include <fcntl.h>
 #include <sys/types.h> /* for off_t definition */
diff --git a/lib/dedupe.c b/lib/dedupe.c
index 0a69b8f..17da452 100644
--- a/lib/dedupe.c
+++ b/lib/dedupe.c
@@ -11,12 +11,14 @@
 unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
                            unsigned long sz)
 {
+       const unsigned long *a1, *a2;
        unsigned long n = sz;
 
-       if (sz >= sizeof(long) && ((long)s1 & (sizeof(long) - 1)) ==
-                       ((long)s2 & (sizeof(long) - 1))) {
-               const unsigned long *a1, *a2;
+       if (sz < sizeof(long))
+               goto out_bytes;
 
+       if (((long)s1 & (sizeof(long) - 1)) ==
+                       ((long)s2 & (sizeof(long) - 1))) {
                while ((long)s1 & (sizeof(long) - 1)) {
                        if (*s1 != *s2)
                                break;
@@ -34,9 +36,20 @@ unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
                        ++a2;
                        sz -= sizeof(long);
                }
-               s1 = (const u8 *)a1;
-               s2 = (const u8 *)a2;
+       } else {
+               a1 = (const unsigned long *)s1;
+               a2 = (const unsigned long *)s2;
+               do {
+                       if (get_unaligned(a1) != get_unaligned(a2))
+                               break;
+                       ++a1;
+                       ++a2;
+                       sz -= sizeof(long);
+               } while (sz >= sizeof(long));
        }
+       s1 = (const u8 *)a1;
+       s2 = (const u8 *)a2;
+out_bytes:
        while (sz) {
                if (*s1 != *s2)
                        break;
-- 
2.24.4

Reply via email to