Hi Thierry,

On Wed, Oct 25, 2017 at 01:02:18PM +0200, Thierry Fournier wrote:
> Hi,
> 
> This is a patch for lua which adds HAProxy internal regexes support.

Thanks but I'm having a doubt regarding the risk that it introduces
bugs :

> +static int hlua_regex_match(struct lua_State *L)
> +{
> +     struct my_regex *regex;
> +     const char *str;
> +     size_t len;
> +     regmatch_t pmatch[20];
> +     int ret;
> +     int i;
> +
> +     regex = hlua_check_regex(L, 1);
> +     str = luaL_checklstring(L, 2, &len);

As you can see above, luaL_checklstring() returns a const char*, which
is also what your <str> variable is, so it clearly means that the result
is *not* supposed to be modified.

> +     ret = regex_exec_match2(regex, (char *)str, len, 20, pmatch, 0);

And this forced cast to R/W is there to allow regex_exec_match2() to
explicitly modify this string. The function prototype is quite clear
about it :

  /* This function apply regex. It take a "char *" ans length as input. The
   * <subject> can be modified during the processing. If the function doesn't
   * match, it returns false, else it returns true.
   * When it is compiled with standard POSIX regex or PCRE, this function add
   * a temporary null chracters at the end of the <subject>. The <subject> must
   * have a real length of <length> + 1. Currently the only supported flag is
   * REG_NOTBOL.
   */
  int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
                        size_t nmatch, regmatch_t pmatch[], int flags) {
  (...)
        char old_char = subject[length];
        int match;
  
        flags &= REG_NOTBOL;
        subject[length] = 0;
        match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
        subject[length] = old_char;
        if (match == REG_NOMATCH)
                return 0;

This happens in the case where the PCRE is not used and the libc's regex lib
is used instead. So I think it's a bit dangerous to proceed like this. First,
you never know if the byte you modify is allocated or not. Malloc() usually
returns multiple of 8 or 16 bytes, if your string is exactly 16 bytes, you
may very well temporarily overwrite the next byte which is part of a pointer
belonging to the malloc list. It's possible also that the Lua compiler will
sometimes return real constants (I don't know when, for example maybe when a
subject string matches a keywords that is part of the language and declared
in the .text section) and this will crash. Also with threads it can be tricky
as well because a trailing zero will be added during the time the regex is
executed.

As a rule of thumb, a read-only variable pointer must never ever be forced
read-write using a cast, because even if it works for you right now, the
const modifier offers guarantees to other developers about what they can do,
and such guarantees are randomly violated by such code trying to prevent the
compiler from detecting the problem.

I think the best thing to do simply is to memcpy() the subject string into
the trash (which is not used here). As subjects are supposed to be short
most of the time, you will not even notice any performance impact.

As a bonus I'm attaching your current patch in which I fixed a few typos in
the commit message and the doc :-)

Thanks,
Willy
>From 31904278dc8ee68c015538cb1a91e246b1e5d1ea Mon Sep 17 00:00:00 2001
From: Thierry FOURNIER <[email protected]>
Date: Wed, 25 Oct 2017 12:59:51 +0200
Subject: MINOR: hlua: Add regex class

This patch simply brings HAProxy internal regex system to the Lua API.
Lua doesn't embed regexes, now it inherits from the regexes compiled
with haproxy.
---
 doc/lua-api/index.rst |  64 +++++++++++++++++++++++++++++++
 include/types/hlua.h  |   1 +
 src/hlua_fcn.c        | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+)

diff --git a/doc/lua-api/index.rst b/doc/lua-api/index.rst
index 96367f6..dceab01 100644
--- a/doc/lua-api/index.rst
+++ b/doc/lua-api/index.rst
@@ -1890,6 +1890,70 @@ Socket class
   :param class_socket socket: Is the manipulated Socket.
   :param integer value: The timeout value.
 
+.. _regex_class:
+
+Regex class
+===========
+
+.. js:class:: Regex
+
+  This class allows the usage of HAProxy regexes because classic lua doesn't
+  provides regexes. This class inherits the HAProxy compilation options, so the
+  regexes can be libc regex, pcre regex or pcre JIT regex.
+
+  The expression matching number is limited to 20 per regex. The only available
+  option is case sensitive.
+
+  Because regexes compilation is a heavy process, it is better to define all
+  your regexes in the **body context** and use it during the runtime.
+
+.. code-block:: lua
+
+  -- Create the regex
+  st, regex = Regex.new("needle (..) (...)", true);
+
+  -- Check compilation errors
+  if st == false then
+    print "error: " .. regex
+  end
+
+  -- Match the regexes
+  print(regex:exec("Looking for a needle in the haystack")) -- true
+  print(regex:exec("Lokking for a cat in the haystack"))    -- false
+
+  -- Extract words
+  st, list = regex:match("Looking for a needle in the haystack")
+  print(st)      -- true
+  print(list[1]) -- needle in the
+  print(list[2]) -- in
+  print(list[3]) -- the
+
+.. js:function:: Regex.new(regex, case_sensitive)
+
+  Create and compile a regex.
+
+  :param string regex: The regular expression according with the libc or pcre
+    standard
+  :param boolean case_sensitive: Match is case sensitive or not.
+  :returns: boolean status and :ref:`regex_class` or string containing fail 
reason.
+
+.. js:function:: Regex.exec(regex, str)
+
+  Execute the regex.
+
+  :param class_regex regex: A :ref:`regex_class` object.
+  :param string str: The input string will be compared with the compiled regex.
+  :returns: a boolean status according with the match result.
+
+.. js:function:: Regex.match(regex, str)
+
+  Execute the regex and return matched expressions.
+
+  :param class_map map: A :ref:`regex_class` object.
+  :param string str: The input string will be compared with the compiled regex.
+  :returns: a boolean status according with the match result, and
+    a table containing all the string matched in order of declaration.
+
 .. _map_class:
 
 Map class
diff --git a/include/types/hlua.h b/include/types/hlua.h
index bff29d0..74dcf00 100644
--- a/include/types/hlua.h
+++ b/include/types/hlua.h
@@ -24,6 +24,7 @@
 #define CLASS_PROXY        "Proxy"
 #define CLASS_SERVER       "Server"
 #define CLASS_LISTENER     "Listener"
+#define CLASS_REGEX        "Regex"
 
 struct stream;
 
diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c
index 9a7e657..c37e2a9 100644
--- a/src/hlua_fcn.c
+++ b/src/hlua_fcn.c
@@ -36,6 +36,7 @@ static int class_concat_ref;
 static int class_proxy_ref;
 static int class_server_ref;
 static int class_listener_ref;
+static int class_regex_ref;
 
 #define STATS_LEN (MAX((int)ST_F_TOTAL_FIELDS, (int)INF_TOTAL_FIELDS))
 
@@ -1076,6 +1077,90 @@ int hlua_match_addr(lua_State *L)
        return 1;
 }
 
+static struct my_regex *hlua_check_regex(lua_State *L, int ud)
+{
+       return (hlua_checkudata(L, ud, class_regex_ref));
+}
+
+static int hlua_regex_comp(struct lua_State *L)
+{
+       struct my_regex *regex;
+       const char *str;
+       int cs;
+       char *err;
+
+       str = luaL_checkstring(L, 1);
+       luaL_argcheck(L, lua_isboolean(L, 2), 2, NULL);
+       cs = lua_toboolean(L, 2);
+
+       regex = lua_newuserdata(L, sizeof(*regex));
+
+       err = NULL;
+       if (!regex_comp(str, regex, cs, 1, &err)) {
+               lua_pushboolean(L, 0); /* status error */
+               lua_pushstring(L, err); /* Reason */
+               free(err);
+               return 2;
+       }
+
+       lua_pushboolean(L, 1); /* Status ok */
+
+       /* Create object */
+       lua_newtable(L);
+       lua_pushvalue(L, -3); /* Get the userdata pointer. */
+       lua_rawseti(L, -2, 0);
+       lua_rawgeti(L, LUA_REGISTRYINDEX, class_regex_ref);
+       lua_setmetatable(L, -2);
+       return 2;
+}
+
+static int hlua_regex_exec(struct lua_State *L)
+{
+       struct my_regex *regex;
+       const char *str;
+       size_t len;
+
+       regex = hlua_check_regex(L, 1);
+       str = luaL_checklstring(L, 2, &len);
+
+       lua_pushboolean(L, regex_exec2(regex, (char *)str, len));
+
+       return 1;
+}
+
+static int hlua_regex_match(struct lua_State *L)
+{
+       struct my_regex *regex;
+       const char *str;
+       size_t len;
+       regmatch_t pmatch[20];
+       int ret;
+       int i;
+
+       regex = hlua_check_regex(L, 1);
+       str = luaL_checklstring(L, 2, &len);
+
+       ret = regex_exec_match2(regex, (char *)str, len, 20, pmatch, 0);
+       lua_pushboolean(L, ret);
+       lua_newtable(L);
+       if (ret) {
+               for (i = 0; i < 20 && pmatch[i].rm_so != -1; i++) {
+                       lua_pushlstring(L, str + pmatch[i].rm_so, 
pmatch[i].rm_eo - pmatch[i].rm_so);
+                       lua_rawseti(L, -2, i + 1);
+               }
+       }
+       return 2;
+}
+
+static int hlua_regex_free(struct lua_State *L)
+{
+       struct my_regex *regex;
+
+       regex = hlua_check_regex(L, 1);
+       regex_free(regex);
+       return 0;
+}
+
 int hlua_fcn_reg_core_fcn(lua_State *L)
 {
        if (!hlua_concat_init(L))
@@ -1092,6 +1177,24 @@ int hlua_fcn_reg_core_fcn(lua_State *L)
        hlua_class_function(L, "match_addr", hlua_match_addr);
        hlua_class_function(L, "tokenize", hlua_tokenize);
 
+       /* Create regex object. */
+       lua_newtable(L);
+       hlua_class_function(L, "new", hlua_regex_comp);
+
+       lua_newtable(L); /* The metatable. */
+       lua_pushstring(L, "__index");
+       lua_newtable(L);
+       hlua_class_function(L, "exec", hlua_regex_exec);
+       hlua_class_function(L, "match", hlua_regex_match);
+       lua_rawset(L, -3); /* -> META["__index"] = TABLE */
+       hlua_class_function(L, "__gc", hlua_regex_free);
+
+       lua_pushvalue(L, -1); /* Duplicate the metatable reference. */
+       class_regex_ref = hlua_register_metatable(L, CLASS_REGEX);
+
+       lua_setmetatable(L, -2);
+       lua_setglobal(L, CLASS_REGEX); /* Create global object called Regex */
+
        /* Create listener object. */
        lua_newtable(L);
        lua_pushstring(L, "__index");
-- 
1.7.12.1

Reply via email to