Hello.

I hope it will be useful. Now, when local variables are real JSON containers, why don't extract all submatches at once? I've given it a try and it works. The patch is based on a current master-var-refactor, I am sorry for this mess. I will redo it if needed when the refactoring will be done.

I've extended re_extract to achieve this behaviour. Creation of a new function (re_extract_all) just for this would lead to code duplication. The modified function returns a container with all submatches on request. Submatches in the container are identified by index, where "0" is whole match. New behaviour is achieved by setting 'submatch' parameter to any negative number. For non-negative numbers, it works as it used to. Example:

# re_extract(expr, re, match, submatch, no-found)

    set $.str = "Testing 31 32 33 and so on";
set $.var = re_extract($.str, "^([[:alnum:]]+) ([0-9]+) ([0-9]+)", 0, -1, "nothing");
    set $!matches = $.var;            # container here
    set $!match2 = $.var!2;          # 2nd submatch

And here is the output of all these variables (with some magic template):

_local_ { "str": "Testing 31 32 33 and so on", "var": { "0": "Testing 31 32", "1": "Testing", "2": "31", "3": "32" } } _msg_ { "matches": { "0": "Testing 31 32", "1": "Testing", "2": "31", "3": "32" }, "match2": "31" }


By the way, this patch also fixes a bug with buffer overflow in re_extract.


--
Pavel Levshin

diff --git a/grammar/rainerscript.c b/grammar/rainerscript.c
index 3295c9c..32e68d5 100644
--- a/grammar/rainerscript.c
+++ b/grammar/rainerscript.c
@@ -1334,12 +1334,14 @@ finalize_it:
        RETiRet;
 }
 
+#define RE_EXTRACT_MAX_PMATCH 50
+
 static inline void
 doFunc_re_extract(struct cnffunc *func, struct var *ret, void* usrptr)
 {
-       size_t submatchnbr;
+       short submatchnbr;
        short matchnbr;
-       regmatch_t pmatch[50];
+       regmatch_t pmatch[RE_EXTRACT_MAX_PMATCH];
        int bMustFree;
        es_str_t *estr;
        char *str;
@@ -1350,7 +1352,11 @@ doFunc_re_extract(struct cnffunc *func, struct var *ret, 
void* usrptr)
        uchar bFound = 0;
        iOffs = 0;
        sbool bHadNoMatch = 0;
-
+       short i;
+       char submatchStr[3]; // just enough to place 0..49
+       char *cstr;
+       struct json_object *json;
+       
        cnfexprEval(func->expr[0], &r[0], usrptr);
        /* search string is already part of the compiled regex, so we don't
         * need it here!
@@ -1359,8 +1365,8 @@ doFunc_re_extract(struct cnffunc *func, struct var *ret, 
void* usrptr)
        cnfexprEval(func->expr[3], &r[3], usrptr);
        str = (char*) var2CString(&r[0], &bMustFree);
        matchnbr = (short) var2Number(&r[2], NULL);
-       submatchnbr = (size_t) var2Number(&r[3], NULL);
-       if(submatchnbr > sizeof(pmatch)/sizeof(regmatch_t)) {
+       submatchnbr = (short) var2Number(&r[3], NULL);
+       if(submatchnbr >= RE_EXTRACT_MAX_PMATCH) {
                DBGPRINTF("re_extract() submatch %d is too large\n", 
submatchnbr);
                bHadNoMatch = 1;
                goto finalize_it;
@@ -1372,7 +1378,7 @@ doFunc_re_extract(struct cnffunc *func, struct var *ret, 
void* usrptr)
        while(!bFound) {
                int iREstat;
                iREstat = regexp.regexec(func->funcdata, (char*)(str + iOffs),
-                                        submatchnbr+1, pmatch, 0);
+                                       RE_EXTRACT_MAX_PMATCH, pmatch, 0);
                dbgprintf("re_extract: regexec return is %d\n", iREstat);
                if(iREstat == 0) {
                        if(pmatch[0].rm_so == -1) {
@@ -1396,16 +1402,52 @@ doFunc_re_extract(struct cnffunc *func, struct var 
*ret, void* usrptr)
                bHadNoMatch = 1;
                goto finalize_it;
        } else {
-               /* Match- but did it match the one we wanted? */
-               /* we got no match! */
-               if(pmatch[submatchnbr].rm_so == -1) {
-                       bHadNoMatch = 1;
-                       goto finalize_it;
-               }
-               /* OK, we have a usable match - we now need to malloc pB */
-               iLenBuf = pmatch[submatchnbr].rm_eo - pmatch[submatchnbr].rm_so;
-               estr = es_newStrFromBuf(str + iOffs + pmatch[submatchnbr].rm_so,
+               if (submatchnbr >= 0) {
+                       /* Match- but did it match the one we wanted? */
+                       /* we got no match! */
+                       if(pmatch[submatchnbr].rm_so == -1) {
+                               bHadNoMatch = 1;
+                               goto finalize_it;
+                       }
+                       /* OK, we have a usable match - we now need to malloc 
pB */
+                       iLenBuf = pmatch[submatchnbr].rm_eo - 
pmatch[submatchnbr].rm_so;
+                       estr = es_newStrFromBuf(str + iOffs + 
pmatch[submatchnbr].rm_so,
                                        iLenBuf);
+                       ret->datatype = 'S';
+                       ret->d.estr = estr;
+               } else { /* extract all submatches */
+                       ret->datatype = 'J';
+                       ret->d.json = json_object_new_object();
+                       if (ret->d.json == NULL) {
+                               DBGPRINTF("re_extract() unable to create json 
object\n");
+                               bHadNoMatch = 1;
+                               goto finalize_it;
+                       }
+                       for (i = 0; i < RE_EXTRACT_MAX_PMATCH; i++) {
+                               if (pmatch[i].rm_so == -1) {
+                                       break;
+                               }
+                               iLenBuf = pmatch[i].rm_eo - pmatch[i].rm_so;
+                               cstr = strndup(str + iOffs + pmatch[i].rm_so,
+                                               iLenBuf);
+                               if (cstr == NULL) {
+                                       DBGPRINTF("re_extract() unable to 
execute strdup\n");
+                                       bHadNoMatch = 1;
+                                       goto finalize_it;
+                               }
+                               DBGPRINTF("re_extract() sub %d '%s'\n", i, 
cstr);
+                               json = json_object_new_string(cstr);
+                               free(cstr);
+                               if (json == NULL) {
+                                       DBGPRINTF("re_extract() unable to 
create json from string\n");
+                                       bHadNoMatch = 1;
+                                       goto finalize_it;
+                               }
+                               snprintf(&submatchStr[0], sizeof(submatchStr), 
"%d", i);
+                               json_object_object_add(ret->d.json, 
&submatchStr[0], json);
+                       }
+               }
+
        }
 
        if(bMustFree) free(str);
@@ -1420,9 +1462,9 @@ finalize_it:
                 * for r[4]. We pass it to the caller, which in turn frees it.
                 * This saves us doing one unnecessary memory alloc & write.
                 */
+               ret->datatype = 'S';
+               ret->d.estr = estr;
        }
-       ret->datatype = 'S';
-       ret->d.estr = estr;
        return;
 }
 
_______________________________________________
rsyslog mailing list
http://lists.adiscon.net/mailman/listinfo/rsyslog
http://www.rsyslog.com/professional-services/
What's up with rsyslog? Follow https://twitter.com/rgerhards
NOTE WELL: This is a PUBLIC mailing list, posts are ARCHIVED by a myriad of 
sites beyond our control. PLEASE UNSUBSCRIBE and DO NOT POST if you DON'T LIKE 
THAT.

Reply via email to