https://git.reactos.org/?p=reactos.git;a=commitdiff;h=0695ecbfd641306f9912af865010dbb0ccf20fbd

commit 0695ecbfd641306f9912af865010dbb0ccf20fbd
Author:     Hermès Bélusca-Maïto <[email protected]>
AuthorDate: Sun Jul 26 20:15:25 2020 +0200
Commit:     Hermès Bélusca-Maïto <[email protected]>
CommitDate: Mon Sep 21 03:31:01 2020 +0200

    [CMD] FOR: Additional Windows' CMD compatibility "fixes" for FOR /F token 
parsing command.
    
    This compatibility behaviour implements the buggy behaviour of FOR /F
    token parsing that can be observed in Windows' CMD, and that is tested
    by the cmd_winetests.
    It can be disabled at compile time via the MSCMD_FOR_QUIRKS define.
    
    It fixes additional cmd_winetests, in concert with commit cb2a9c31.
    
    Explanation of the implemented buggy behaviour
    ==============================================
    
    In principle, the "tokens=x,y,m-n[*]" option describes a list of token
    numbers (must be between 1 and 31) that will be assigned into variables.
    Theoretically this option does not cumulate: only the latest 'tokens='
    specification should be taken into account.
    
    However things are not that simple in practice. First, not all of the
    "tokens=" option state is reset when more than one specification is
    provided. Second, when specifying a token range, e.g. "1-5", Windows'
    CMD just ignores without error ranges that are not specified in
    increasing order. Thus for example, a range "5-1" is ignored without
    error. Then, token numbers strictly greater than 31 are just ignored,
    and if they appear in a range, the whole range is ignored.
    
    Another bug is the following one: suppose that the 'tokens'
    specification reads:
      "tokens=1-5,1-30" , or: "tokens=1-5,3" ,
    i.e. more than one range, that overlap partially. Then the actual total
    number of variables will not be of the larger range size, but will be
    the sum, instead.
    Thus, in the first example, a total of 5 + 30 == 35 variables (> 31) is
    allocated, while in the second example, a total of 5 + 1 == 6 variables
    is allocated, even if they won't all store data !!
    In the first example, only the first 30 FOR variables will be used, and
    the 5 others will contain an empty string. In the second example, only
    the first 5 FOR variables will be used, and the other one will be empty.
    
    We also see that due to that, the "Variables" buffer of fixed size
    cannot always be used (since it can contain at most 32 variables).
    
    Last but not least, when more than one "tokens=" specification is
    provided, for example:
      "tokens=1-31 tokens=1-20"
    a total number of 31 FOR variables (because 31 is the max of 31 and 20)
    is allocated, **but** only 20 are actually used, and the 11 others
    return an empty string.
    
    And in the specification: "tokens=1-31,* tokens=1-20", a total of
    31 + 1 + 20 = 52 variables is initialized, but only the first 20 will
    be used, and no "remaining-line" token (the '*' one) is used.
---
 base/shell/cmd/for.c | 165 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 135 insertions(+), 30 deletions(-)

diff --git a/base/shell/cmd/for.c b/base/shell/cmd/for.c
index 300588bed31..151be0ca2c8 100644
--- a/base/shell/cmd/for.c
+++ b/base/shell/cmd/for.c
@@ -32,6 +32,10 @@
 
 #include "precomp.h"
 
+/* Enable this define for "buggy" Windows' CMD FOR-command compatibility.
+ * Currently, this enables the buggy behaviour of FOR /F token parsing. */
+#define MSCMD_FOR_QUIRKS
+
 
 /* FOR is a special command, so this function is only used for showing help 
now */
 INT cmd_for(LPTSTR param)
@@ -121,23 +125,27 @@ static LPTSTR ReadFileContents(FILE *InputFile, TCHAR 
*Buffer)
 static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR *Buffer)
 {
     LPTSTR Delims = _T(" \t");
-    LPTSTR DelimsEndPtr = NULL;
+    PTCHAR DelimsEndPtr = NULL;
     TCHAR  DelimsEndChr = _T('\0');
     TCHAR Eol = _T(';');
     INT SkipLines = 0;
-    DWORD Tokens = (1 << 1);
-    BOOL RemainderVar = FALSE;
+    DWORD TokensMask = (1 << 1);
+#ifdef MSCMD_FOR_QUIRKS
+    DWORD NumTokens = 1;
+    DWORD RemainderVar = 0;
+#else
+    DWORD NumTokens = 0;
+#endif
     TCHAR StringQuote = _T('"');
     TCHAR CommandQuote = _T('\'');
     LPTSTR Variables[32];
-    TCHAR *Start, *End;
-    INT i;
+    PTCHAR Start, End;
     INT Ret = 0;
 
     if (Cmd->For.Params)
     {
         TCHAR Quote = 0;
-        TCHAR *Param = Cmd->For.Params;
+        PTCHAR Param = Cmd->For.Params;
         if (*Param == _T('"') || *Param == _T('\''))
             Quote = *Param++;
 
@@ -161,7 +169,7 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
                 {
                     if (*Param == _T(' '))
                     {
-                        TCHAR *FirstSpace = Param;
+                        PTCHAR FirstSpace = Param;
                         Param += _tcsspn(Param, _T(" "));
                         /* Exclude trailing spaces if this is not the last 
parameter */
                         if (*Param && *Param != Quote)
@@ -197,24 +205,59 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
             }
             else if (_tcsnicmp(Param, _T("tokens="), 7) == 0)
             {
+#ifdef MSCMD_FOR_QUIRKS
+                DWORD NumToksInSpec = 0; // Number of tokens in this 
specification.
+#endif
                 Param += 7;
-                /* tokens=x,y,m-n: List of token numbers (must be between
-                 * 1 and 31) that will be assigned into variables. */
-                Tokens = 0;
+                /*
+                 * tokens=x,y,m-n: List of token numbers (must be between 1 
and 31)
+                 * that will be assigned into variables. This option does not 
cumulate:
+                 * only the latest 'tokens=' specification is taken into 
account.
+                 *
+                 * NOTE: In MSCMD_FOR_QUIRKS mode, for Windows' CMD 
compatibility,
+                 * not all the tokens-state is reset. This leads to subtle 
bugs.
+                 */
+                TokensMask = 0;
+#ifdef MSCMD_FOR_QUIRKS
+                NumToksInSpec = 0;
+                // Windows' CMD compatibility: bug: the asterisk-token's 
position is not reset!
+                // RemainderVar = 0;
+#else
+                NumTokens = 0;
+#endif
+
                 while (*Param && *Param != Quote && *Param != _T('*'))
                 {
                     INT First = _tcstol(Param, &Param, 0);
                     INT Last = First;
+#ifdef MSCMD_FOR_QUIRKS
                     if (First < 1)
+#else
+                    if ((First < 1) || (First > 31))
+#endif
                         goto error;
                     if (*Param == _T('-'))
                     {
                         /* It's a range of tokens */
                         Last = _tcstol(Param + 1, &Param, 0);
-                        if (Last < First || Last > 31)
+#ifdef MSCMD_FOR_QUIRKS
+                        /* Ignore the range if the endpoints are not in 
correct order */
+                        if (Last < 1)
+#else
+                        if ((Last < First) || (Last > 31))
+#endif
                             goto error;
                     }
-                    Tokens |= (2 << Last) - (1 << First);
+#ifdef MSCMD_FOR_QUIRKS
+                    /* Ignore the range if the endpoints are not in correct 
order */
+                    if ((First <= Last) && (Last <= 31))
+                    {
+#endif
+                        TokensMask |= (2 << Last) - (1 << First);
+#ifdef MSCMD_FOR_QUIRKS
+                        NumToksInSpec += (Last - First + 1);
+                    }
+#endif
 
                     if (*Param != _T(','))
                         break;
@@ -222,12 +265,19 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
                 }
                 /* With an asterisk at the end, an additional variable
                  * will be created to hold the remainder of the line
-                 * (after the last token specified). */
+                 * (after the last specified token). */
                 if (*Param == _T('*'))
                 {
-                    RemainderVar = TRUE;
+#ifdef MSCMD_FOR_QUIRKS
+                    RemainderVar = ++NumToksInSpec;
+#else
+                    ++NumTokens;
+#endif
                     Param++;
                 }
+#ifdef MSCMD_FOR_QUIRKS
+                NumTokens = max(NumTokens, NumToksInSpec);
+#endif
             }
             else if (_tcsnicmp(Param, _T("useback"), 7) == 0)
             {
@@ -248,12 +298,31 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
         }
     }
 
+#ifdef MSCMD_FOR_QUIRKS
+    /* Windows' CMD compatibility: use the wrongly evaluated number of tokens 
*/
+    fc->varcount = NumTokens;
+    /* Allocate a large enough variables array if needed */
+    if (NumTokens <= ARRAYSIZE(Variables))
+    {
+        fc->values = Variables;
+    }
+    else
+    {
+        fc->values = cmd_alloc(fc->varcount * sizeof(*fc->values));
+        if (!fc->values)
+        {
+            error_out_of_memory();
+            return 1;
+        }
+    }
+#else
     /* Count how many variables will be set: one for each token,
-     * plus maybe one for the remainder */
-    fc->varcount = RemainderVar;
-    for (i = 1; i < 32; i++)
-        fc->varcount += (Tokens >> i & 1);
+     * plus maybe one for the remainder. */
+    fc->varcount = NumTokens;
+    for (NumTokens = 1; NumTokens < 32; ++NumTokens)
+        fc->varcount += (TokensMask >> NumTokens) & 1;
     fc->values = Variables;
+#endif
 
     if (*List == StringQuote || *List == CommandQuote)
     {
@@ -267,7 +336,7 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
     End = List;
     while (!ExitingOrGoto(Cmd) && GetNextElement(&Start, &End))
     {
-        FILE *InputFile;
+        FILE* InputFile;
         LPTSTR FullInput, In, NextLine;
         INT Skip;
     single_element:
@@ -280,13 +349,18 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
         }
         else if (*Start == CommandQuote && End[-1] == CommandQuote)
         {
-            /* Read input from a command */
+            /*
+             * Read input from a command. We let the CRT do the ANSI/UNICODE 
conversion.
+             * NOTE: Should we do that, or instead read in binary mode and
+             * do the conversion by ourselves, using *OUR* current codepage??
+             */
             End[-1] = _T('\0');
             InputFile = _tpopen(Start + 1, _T("r"));
             if (!InputFile)
             {
                 error_bad_command(Start + 1);
-                return 1;
+                Ret = 1;
+                goto Quit;
             }
             FullInput = ReadFileContents(InputFile, Buffer);
             _pclose(InputFile);
@@ -302,7 +376,8 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
             if (!InputFile)
             {
                 error_sfile_not_found(Start);
-                return 1;
+                Ret = 1;
+                goto Quit;
             }
             FullInput = ReadFileContents(InputFile, Buffer);
             fclose(InputFile);
@@ -311,7 +386,8 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
         if (!FullInput)
         {
             error_out_of_memory();
-            return 1;
+            Ret = 1;
+            goto Quit;
         }
 
         /* Patch the delimiters string */
@@ -326,8 +402,13 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
              !ExitingOrGoto(Cmd) && (In != NULL);
              In = NextLine)
         {
-            DWORD RemainingTokens = Tokens;
-            LPTSTR *CurVar = Variables;
+            DWORD RemainingTokens = TokensMask;
+            LPTSTR* CurVar = fc->values;
+
+            ZeroMemory(fc->values, fc->varcount * sizeof(*fc->values));
+#ifdef MSCMD_FOR_QUIRKS
+            NumTokens = fc->varcount;
+#endif
 
             NextLine = _tcschr(In, _T('\n'));
             if (NextLine)
@@ -341,11 +422,19 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
             if (*In == Eol)
                 continue;
 
-            while ((RemainingTokens >>= 1) != 0)
+            /* Loop as long as we have not reached the end of
+             * the line, and that we have tokens available.
+             * A maximum of 31 tokens will be enumerated. */
+            while (*In && ((RemainingTokens >>= 1) != 0))
             {
                 /* Save pointer to this token in a variable if requested */
                 if (RemainingTokens & 1)
+                {
+#ifdef MSCMD_FOR_QUIRKS
+                    --NumTokens;
+#endif
                     *CurVar++ = In;
+                }
                 /* Find end of token */
                 In += _tcscspn(In, Delims);
                 /* NULL-terminate it and advance to next token */
@@ -355,11 +444,21 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
                     In += _tcsspn(In, Delims);
                 }
             }
-            /* Save pointer to remainder of line */
-            *CurVar = In;
 
-            /* Don't run unless the line had enough tokens to fill at least 
one variable */
-            if (*Variables[0])
+            /* Save pointer to remainder of the line if we need to do so */
+            if (*In)
+#ifdef MSCMD_FOR_QUIRKS
+            if (RemainderVar && (fc->varcount - NumTokens + 1 == RemainderVar))
+#endif
+            {
+                /* NOTE: This sets fc->values[0] at least, if no tokens
+                 * were initialized so far, since CurVar is initialized
+                 * originally to point to fc->values. */
+                *CurVar = In;
+            }
+
+            /* Don't run unless we have at least one variable filled */
+            if (fc->values[0])
                 Ret = RunInstance(Cmd);
         }
 
@@ -370,6 +469,12 @@ static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR 
*Buffer)
         cmd_free(FullInput);
     }
 
+Quit:
+#ifdef MSCMD_FOR_QUIRKS
+    if (fc->values && (fc->values != Variables))
+        cmd_free(fc->values);
+#endif
+
     return Ret;
 }
 

Reply via email to