Re: [PATCH v9 1/4] unicode: Add utf8_casefold_hash

2020-06-23 Thread Eric Biggers
On Tue, Jun 23, 2020 at 09:33:38PM -0700, Daniel Rosenberg wrote:
> This adds a case insensitive hash function to allow taking the hash
> without needing to allocate a casefolded copy of the string.

It would be helpful to add a few more details in this commit message.
Somewhat along the lines of: ->d_hash() for casefolding currently allocates
memory, it needs to use GFP_ATOMIC due to ->d_hash() being called in rcu-walk
mode, this is unreliable and inefficient, and this patch allows solving that
problem by removing the need to allocate memory.

- Eric


Re: [PATCH v9 1/4] unicode: Add utf8_casefold_hash

2020-06-23 Thread Gabriel Krisman Bertazi
Daniel Rosenberg  writes:

> This adds a case insensitive hash function to allow taking the hash
> without needing to allocate a casefolded copy of the string.
>
> Signed-off-by: Daniel Rosenberg 
> ---
>  fs/unicode/utf8-core.c  | 23 ++-
>  include/linux/unicode.h |  3 +++
>  2 files changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
> index 2a878b739115d..90656b9980720 100644
> --- a/fs/unicode/utf8-core.c
> +++ b/fs/unicode/utf8-core.c
> @@ -6,6 +6,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "utf8n.h"
>  
> @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const 
> struct qstr *str,
>   }
>   return -EINVAL;
>  }
> -
>  EXPORT_SYMBOL(utf8_casefold);
>  
> +int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
> +struct qstr *str)
> +{
> + const struct utf8data *data = utf8nfdicf(um->version);
> + struct utf8cursor cur;
> + int c;
> + unsigned long hash = init_name_hash(salt);
> +
> + if (utf8ncursor(, data, str->name, str->len) < 0)
> + return -EINVAL;
> +
> + while ((c = utf8byte())) {
> + if (c < 0)
> + return c;

Return -EINVAL here to match other unicode functions, since utf8byte
will return -1 on a binary blob, which doesn't make sense for this.

Other than that, looks good to me.

Reviewed-by: Gabriel Krisman Bertazi 

> + hash = partial_name_hash((unsigned char)c, hash);
> + }
> + str->hash = end_name_hash(hash);
> + return 0;
> +}
> +EXPORT_SYMBOL(utf8_casefold_hash);
> +
>  int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
>  unsigned char *dest, size_t dlen)
>  {
> diff --git a/include/linux/unicode.h b/include/linux/unicode.h
> index 990aa97d80496..74484d44c7554 100644
> --- a/include/linux/unicode.h
> +++ b/include/linux/unicode.h
> @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const 
> struct qstr *str,
>  int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
> unsigned char *dest, size_t dlen);
>  
> +int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
> +struct qstr *str);
> +
>  struct unicode_map *utf8_load(const char *version);
>  void utf8_unload(struct unicode_map *um);

-- 
Gabriel Krisman Bertazi


[PATCH v9 1/4] unicode: Add utf8_casefold_hash

2020-06-23 Thread Daniel Rosenberg
This adds a case insensitive hash function to allow taking the hash
without needing to allocate a casefolded copy of the string.

Signed-off-by: Daniel Rosenberg 
---
 fs/unicode/utf8-core.c  | 23 ++-
 include/linux/unicode.h |  3 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 2a878b739115d..90656b9980720 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "utf8n.h"
 
@@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const 
struct qstr *str,
}
return -EINVAL;
 }
-
 EXPORT_SYMBOL(utf8_casefold);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str)
+{
+   const struct utf8data *data = utf8nfdicf(um->version);
+   struct utf8cursor cur;
+   int c;
+   unsigned long hash = init_name_hash(salt);
+
+   if (utf8ncursor(, data, str->name, str->len) < 0)
+   return -EINVAL;
+
+   while ((c = utf8byte())) {
+   if (c < 0)
+   return c;
+   hash = partial_name_hash((unsigned char)c, hash);
+   }
+   str->hash = end_name_hash(hash);
+   return 0;
+}
+EXPORT_SYMBOL(utf8_casefold_hash);
+
 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
   unsigned char *dest, size_t dlen)
 {
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 990aa97d80496..74484d44c7554 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct 
qstr *str,
 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
  unsigned char *dest, size_t dlen);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str);
+
 struct unicode_map *utf8_load(const char *version);
 void utf8_unload(struct unicode_map *um);
 
-- 
2.27.0.111.gc72c7da667-goog