[dpdk-dev] [PATCH v3 08/14] acl: arm64: acl implementation using NEON gcc intrinsic

2015-11-06 Thread Jerin Jacob
verified with testacl and acl_autotest applications on arm64 architecture.

Signed-off-by: Jerin Jacob 
---
 app/test-acl/main.c   |   4 +
 lib/librte_acl/Makefile   |   5 +
 lib/librte_acl/acl.h  |   4 +
 lib/librte_acl/acl_run_neon.c |  46 +++
 lib/librte_acl/acl_run_neon.h | 289 ++
 lib/librte_acl/rte_acl.c  |  25 
 lib/librte_acl/rte_acl.h  |   1 +
 7 files changed, 374 insertions(+)
 create mode 100644 lib/librte_acl/acl_run_neon.c
 create mode 100644 lib/librte_acl/acl_run_neon.h

diff --git a/app/test-acl/main.c b/app/test-acl/main.c
index 72ce83c..0b0c093 100644
--- a/app/test-acl/main.c
+++ b/app/test-acl/main.c
@@ -101,6 +101,10 @@ static const struct acl_alg acl_alg[] = {
.name = "avx2",
.alg = RTE_ACL_CLASSIFY_AVX2,
},
+   {
+   .name = "neon",
+   .alg = RTE_ACL_CLASSIFY_NEON,
+   },
 };

 static struct {
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
index 7a1cf8a..27f91d5 100644
--- a/lib/librte_acl/Makefile
+++ b/lib/librte_acl/Makefile
@@ -48,9 +48,14 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += rte_acl.c
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_bld.c
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_gen.c
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c
+ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c
+else
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c
+endif

 CFLAGS_acl_run_sse.o += -msse4.1
+CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized

 #
 # If the compiler supports AVX2 instructions,
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
index eb4930c..09d6784 100644
--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -230,6 +230,10 @@ int
 rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
uint32_t *results, uint32_t num, uint32_t categories);

+int
+rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
+   uint32_t *results, uint32_t num, uint32_t categories);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
diff --git a/lib/librte_acl/acl_run_neon.c b/lib/librte_acl/acl_run_neon.c
new file mode 100644
index 000..b014451
--- /dev/null
+++ b/lib/librte_acl/acl_run_neon.c
@@ -0,0 +1,46 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "acl_run_neon.h"
+
+int
+rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories)
+{
+   if (likely(num >= 8))
+   return search_neon_8(ctx, data, results, num, categories);
+   else if (num >= 4)
+   return search_neon_4(ctx, data, results, num, categories);
+   else
+   return rte_acl_classify_scalar(ctx, data, results, num,
+   categories);
+}
diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h
new file mode 100644
index 000..cf7c57f
--- /dev/null
+++ b/lib/librte_acl/acl_run_neon.h
@@ -0,0 +1,289 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must 

[dpdk-dev] [PATCH v3 08/14] acl: arm64: acl implementation using NEON gcc intrinsic

2015-11-06 Thread Ananyev, Konstantin


> -Original Message-
> From: Jerin Jacob [mailto:jerin.jacob at caviumnetworks.com]
> Sent: Friday, November 06, 2015 9:40 AM
> To: dev at dpdk.org
> Cc: thomas.monjalon at 6wind.com; Hunt, David; viktorin at rehivetech.com; 
> Ananyev, Konstantin; Jerin Jacob
> Subject: [dpdk-dev] [PATCH v3 08/14] acl: arm64: acl implementation using 
> NEON gcc intrinsic
> 
> verified with testacl and acl_autotest applications on arm64 architecture.
> 
> Signed-off-by: Jerin Jacob 
> ---

Didn't test it on ARM, but from x86 perspective all seems ok. 

Acked-by: Konstantin Ananyev