Re: [PATCH v9 1/4] unicode: Add utf8_casefold_hash
On Tue, Jun 23, 2020 at 09:33:38PM -0700, Daniel Rosenberg wrote: > This adds a case insensitive hash function to allow taking the hash > without needing to allocate a casefolded copy of the string. It would be helpful to add a few more details in this commit message. Somewhat along the lines of: ->d_hash() for casefolding currently allocates memory, it needs to use GFP_ATOMIC due to ->d_hash() being called in rcu-walk mode, this is unreliable and inefficient, and this patch allows solving that problem by removing the need to allocate memory. - Eric
Re: [PATCH v9 1/4] unicode: Add utf8_casefold_hash
Daniel Rosenberg writes: > This adds a case insensitive hash function to allow taking the hash > without needing to allocate a casefolded copy of the string. > > Signed-off-by: Daniel Rosenberg > --- > fs/unicode/utf8-core.c | 23 ++- > include/linux/unicode.h | 3 +++ > 2 files changed, 25 insertions(+), 1 deletion(-) > > diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c > index 2a878b739115d..90656b9980720 100644 > --- a/fs/unicode/utf8-core.c > +++ b/fs/unicode/utf8-core.c > @@ -6,6 +6,7 @@ > #include > #include > #include > +#include > > #include "utf8n.h" > > @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const > struct qstr *str, > } > return -EINVAL; > } > - > EXPORT_SYMBOL(utf8_casefold); > > +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, > +struct qstr *str) > +{ > + const struct utf8data *data = utf8nfdicf(um->version); > + struct utf8cursor cur; > + int c; > + unsigned long hash = init_name_hash(salt); > + > + if (utf8ncursor(, data, str->name, str->len) < 0) > + return -EINVAL; > + > + while ((c = utf8byte())) { > + if (c < 0) > + return c; Return -EINVAL here to match other unicode functions, since utf8byte will return -1 on a binary blob, which doesn't make sense for this. Other than that, looks good to me. Reviewed-by: Gabriel Krisman Bertazi > + hash = partial_name_hash((unsigned char)c, hash); > + } > + str->hash = end_name_hash(hash); > + return 0; > +} > +EXPORT_SYMBOL(utf8_casefold_hash); > + > int utf8_normalize(const struct unicode_map *um, const struct qstr *str, > unsigned char *dest, size_t dlen) > { > diff --git a/include/linux/unicode.h b/include/linux/unicode.h > index 990aa97d80496..74484d44c7554 100644 > --- a/include/linux/unicode.h > +++ b/include/linux/unicode.h > @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const > struct qstr *str, > int utf8_casefold(const struct unicode_map *um, const struct qstr *str, > unsigned char *dest, size_t dlen); > > +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, > +struct qstr *str); > + > struct unicode_map *utf8_load(const char *version); > void utf8_unload(struct unicode_map *um); -- Gabriel Krisman Bertazi
[PATCH v9 1/4] unicode: Add utf8_casefold_hash
This adds a case insensitive hash function to allow taking the hash without needing to allocate a casefolded copy of the string. Signed-off-by: Daniel Rosenberg --- fs/unicode/utf8-core.c | 23 ++- include/linux/unicode.h | 3 +++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 2a878b739115d..90656b9980720 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "utf8n.h" @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, } return -EINVAL; } - EXPORT_SYMBOL(utf8_casefold); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur; + int c; + unsigned long hash = init_name_hash(salt); + + if (utf8ncursor(, data, str->name, str->len) < 0) + return -EINVAL; + + while ((c = utf8byte())) { + if (c < 0) + return c; + hash = partial_name_hash((unsigned char)c, hash); + } + str->hash = end_name_hash(hash); + return 0; +} +EXPORT_SYMBOL(utf8_casefold_hash); + int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 990aa97d80496..74484d44c7554 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str); + struct unicode_map *utf8_load(const char *version); void utf8_unload(struct unicode_map *um); -- 2.27.0.111.gc72c7da667-goog