I have removed from TCC all the cases of illegal/undefined C that I
know about, apart from one, which is particularly horrible: the way a
CString is copied into a token string, which is an int array: see
tok_str_add2. On a 64-bit architecture the pointers end up misaligned,
so ASan gives lots of warnings. On a 64-bit architecture that required
memory accesses to be correctly aligned it would not work at all.

Here is a patch to cure the problem by putting the struct CString into
struct CValue instead. According to Valgrind, TCC does not leak memory
with this patch, though that's almost a miracle because it's very
unclear who is responsible for freeing the "data" in a string. Still,
unless someone has a better idea of how to do this, and is willing to
implement that better idea, I think this patch should go in. Opinions?

Edmund
diff --git a/TODO b/TODO
index eb846d9..623592f 100644
--- a/TODO
+++ b/TODO
@@ -34,7 +34,6 @@ Portability:
 
 - it is assumed that int is 32-bit and sizeof(int) == 4
 - int is used when host or target size_t would make more sense
-- struct CString is written into an int array and ends up misaligned
 - TCC handles target floating-point (fp) values using the host's fp
   arithmetic, which is simple and fast but may lead to exceptions
   and inaccuracy and wrong representations when cross-compiling
diff --git a/i386-asm.c b/i386-asm.c
index bd0825b..68a7861 100644
--- a/i386-asm.c
+++ b/i386-asm.c
@@ -330,7 +330,7 @@ static void parse_operand(TCCState *s1, Operand *op)
                 next();
                 if (tok != TOK_PPNUM)
                     goto reg_error;
-                p = tokc.cstr->data;
+                p = tokc.str.data;
                 reg = p[0] - '0';
                 if ((unsigned)reg >= 8 || p[1] != '\0')
                     goto reg_error;
diff --git a/tcc.h b/tcc.h
index 05e3965..b2ec24e 100644
--- a/tcc.h
+++ b/tcc.h
@@ -400,7 +400,11 @@ typedef union CValue {
     double d;
     float f;
     uint64_t i;
-    struct CString *cstr;
+    struct {
+        int size;
+        const void *data;
+        void *data_allocated;
+    } str;
     int tab[LDOUBLE_SIZE/4];
 } CValue;
 
diff --git a/tccasm.c b/tccasm.c
index 044db83..28e07fd 100644
--- a/tccasm.c
+++ b/tccasm.c
@@ -44,7 +44,7 @@ static void asm_expr_unary(TCCState *s1, ExprValue *pe)
 
     switch(tok) {
     case TOK_PPNUM:
-        p = tokc.cstr->data;
+        p = tokc.str.data;
         n = strtoul(p, (char **)&p, 0);
         if (*p == 'b' || *p == 'f') {
             /* backward or forward label */
@@ -378,7 +378,7 @@ static void asm_parse_directive(TCCState *s1)
             uint64_t vl;
             const char *p;
 
-            p = tokc.cstr->data;
+            p = tokc.str.data;
             if (tok != TOK_PPNUM) {
             error_constant:
                 tcc_error("64 bit constant");
@@ -523,8 +523,8 @@ static void asm_parse_directive(TCCState *s1)
             for(;;) {
                 if (tok != TOK_STR)
                     expect("string constant");
-                p = tokc.cstr->data;
-                size = tokc.cstr->size;
+                p = tokc.str.data;
+                size = tokc.str.size;
                 if (t == TOK_ASM_ascii && size > 0)
                     size--;
                 for(i = 0; i < size; i++)
@@ -565,7 +565,7 @@ static void asm_parse_directive(TCCState *s1)
             next();
 
             if (tok == TOK_STR)
-                pstrcat(filename, sizeof(filename), tokc.cstr->data);
+                pstrcat(filename, sizeof(filename), tokc.str.data);
             else
                 pstrcat(filename, sizeof(filename), get_tok_str(tok, NULL));
 
@@ -583,7 +583,7 @@ static void asm_parse_directive(TCCState *s1)
             next();
 
             if (tok == TOK_STR)
-                pstrcat(ident, sizeof(ident), tokc.cstr->data);
+                pstrcat(ident, sizeof(ident), tokc.str.data);
             else
                 pstrcat(ident, sizeof(ident), get_tok_str(tok, NULL));
 
@@ -629,7 +629,7 @@ static void asm_parse_directive(TCCState *s1)
             next();
             skip(',');
             if (tok == TOK_STR) {
-                newtype = tokc.cstr->data;
+                newtype = tokc.str.data;
             } else {
                 if (tok == '@' || tok == '%')
                     skip(tok);
@@ -655,7 +655,7 @@ static void asm_parse_directive(TCCState *s1)
             sname[0] = '\0';
             while (tok != ';' && tok != TOK_LINEFEED && tok != ',') {
                 if (tok == TOK_STR)
-                    pstrcat(sname, sizeof(sname), tokc.cstr->data);
+                    pstrcat(sname, sizeof(sname), tokc.str.data);
                 else
                     pstrcat(sname, sizeof(sname), get_tok_str(tok, NULL));
                 next();
@@ -768,7 +768,7 @@ static int tcc_assemble_internal(TCCState *s1, int 
do_preprocess)
         } else if (tok == TOK_PPNUM) {
             const char *p;
             int n;
-            p = tokc.cstr->data;
+            p = tokc.str.data;
             n = strtoul(p, (char **)&p, 10);
             if (*p != '\0')
                 expect("':'");
@@ -970,8 +970,8 @@ static void parse_asm_operands(ASMOperand *operands, int 
*nb_operands_ptr,
             }
             if (tok != TOK_STR)
                 expect("string constant");
-            op->constraint = tcc_malloc(tokc.cstr->size);
-            strcpy(op->constraint, tokc.cstr->data);
+            op->constraint = tcc_malloc(tokc.str.size);
+            strcpy(op->constraint, tokc.str.data);
             next();
             skip('(');
             gexpr();
@@ -1038,7 +1038,7 @@ ST_FUNC void asm_instr(void)
                     for(;;) {
                         if (tok != TOK_STR)
                             expect("string constant");
-                        asm_clobber(clobber_regs, tokc.cstr->data);
+                        asm_clobber(clobber_regs, tokc.str.data);
                         next();
                         if (tok == ',') {
                             next();
diff --git a/tccgen.c b/tccgen.c
index 3b5ad75..3f75a02 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -2759,7 +2759,7 @@ static void parse_attribute(AttributeDef *ad)
             skip('(');
             if (tok != TOK_STR)
                 expect("section name");
-            ad->section = find_section(tcc_state, (char *)tokc.cstr->data);
+            ad->section = find_section(tcc_state, (char *)tokc.str.data);
             next();
             skip(')');
             break;
@@ -2769,7 +2769,7 @@ static void parse_attribute(AttributeDef *ad)
             if (tok != TOK_STR)
                 expect("alias(\"target\")");
             ad->alias_target = /* save string as token, for later */
-              tok_alloc((char*)tokc.cstr->data, tokc.cstr->size-1)->tok;
+              tok_alloc((char*)tokc.str.data, tokc.str.size-1)->tok;
             next();
             skip(')');
             break;
@@ -2778,13 +2778,13 @@ static void parse_attribute(AttributeDef *ad)
             skip('(');
             if (tok != TOK_STR)
                 expect("visibility(\"default|hidden|internal|protected\")");
-           if (!strcmp (tokc.cstr->data, "default"))
+           if (!strcmp (tokc.str.data, "default"))
                ad->a.visibility = STV_DEFAULT;
-           else if (!strcmp (tokc.cstr->data, "hidden"))
+           else if (!strcmp (tokc.str.data, "hidden"))
                ad->a.visibility = STV_HIDDEN;
-           else if (!strcmp (tokc.cstr->data, "internal"))
+           else if (!strcmp (tokc.str.data, "internal"))
                ad->a.visibility = STV_INTERNAL;
-           else if (!strcmp (tokc.cstr->data, "protected"))
+           else if (!strcmp (tokc.str.data, "protected"))
                ad->a.visibility = STV_PROTECTED;
            else
                 expect("visibility(\"default|hidden|internal|protected\")");
@@ -3376,7 +3376,7 @@ ST_FUNC void parse_asm_str(CString *astr)
     cstr_new(astr);
     while (tok == TOK_STR) {
         /* XXX: add \0 handling too ? */
-        cstr_cat(astr, tokc.cstr->data);
+        cstr_cat(astr, tokc.str.data);
         next();
     }
     cstr_ccat(astr, '\0');
@@ -5509,14 +5509,12 @@ static void decl_initializer(CType *type, Section *sec, 
unsigned long c,
             ) || (tok == TOK_STR && (t1->t & VT_BTYPE) == VT_BYTE)) {
             while (tok == TOK_STR || tok == TOK_LSTR) {
                 int cstr_len, ch;
-                CString *cstr;
 
-                cstr = tokc.cstr;
                 /* compute maximum number of chars wanted */
                 if (tok == TOK_STR)
-                    cstr_len = cstr->size;
+                    cstr_len = tokc.str.size;
                 else
-                    cstr_len = cstr->size / sizeof(nwchar_t);
+                    cstr_len = tokc.str.size / sizeof(nwchar_t);
                 cstr_len--;
                 nb = cstr_len;
                 if (n >= 0 && nb > (n - array_length))
@@ -5528,13 +5526,13 @@ static void decl_initializer(CType *type, Section *sec, 
unsigned long c,
                        string in global variable, we handle it
                        specifically */
                     if (sec && tok == TOK_STR && size1 == 1) {
-                        memcpy(sec->data + c + array_length, cstr->data, nb);
+                        memcpy(sec->data + c + array_length, tokc.str.data, 
nb);
                     } else {
                         for(i=0;i<nb;i++) {
                             if (tok == TOK_STR)
-                                ch = ((unsigned char *)cstr->data)[i];
+                                ch = ((unsigned char *)tokc.str.data)[i];
                             else
-                                ch = ((nwchar_t *)cstr->data)[i];
+                                ch = ((nwchar_t *)tokc.str.data)[i];
                             init_putv(t1, sec, c + (array_length + i) * size1,
                                       ch, EXPR_VAL);
                         }
diff --git a/tccpp.c b/tccpp.c
index 4dba954..0fd90a5 100644
--- a/tccpp.c
+++ b/tccpp.c
@@ -279,7 +279,6 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv)
 {
     static char buf[STRING_MAX_SIZE + 1];
     static CString cstr_buf;
-    CString *cstr;
     char *p;
     int i, len;
 
@@ -314,20 +313,19 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv)
         break;
     case TOK_PPNUM:
     case TOK_PPSTR:
-        return (char*)cv->cstr->data;
+        return (char*)cv->str.data;
     case TOK_LSTR:
         cstr_ccat(&cstr_buf, 'L');
     case TOK_STR:
-        cstr = cv->cstr;
         cstr_ccat(&cstr_buf, '\"');
         if (v == TOK_STR) {
-            len = cstr->size - 1;
+            len = cv->str.size - 1;
             for(i=0;i<len;i++)
-                add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
+                add_char(&cstr_buf, ((unsigned char *)cv->str.data)[i]);
         } else {
-            len = (cstr->size / sizeof(nwchar_t)) - 1;
+            len = (cv->str.size / sizeof(nwchar_t)) - 1;
             for(i=0;i<len;i++)
-                add_char(&cstr_buf, ((nwchar_t *)cstr->data)[i]);
+                add_char(&cstr_buf, ((nwchar_t *)cv->str.data)[i]);
         }
         cstr_ccat(&cstr_buf, '\"');
         cstr_ccat(&cstr_buf, '\0');
@@ -929,21 +927,13 @@ static void tok_str_add2(TokenString *s, int t, CValue 
*cv)
     case TOK_STR:
     case TOK_LSTR:
         {
-            int nb_words;
-            CString cstr;
-
-            nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
+            /* Insert the string into the int array. */
+            size_t nb_words =
+                1 + (cv->str.size + sizeof(int) - 1) / sizeof(int);
             while ((len + nb_words) > s->allocated_len)
                 str = tok_str_realloc(s);
-            /* XXX: Insert the CString into the int array.
-               It may end up incorrectly aligned. */
-            cstr.data = 0;
-            cstr.size = cv->cstr->size;
-            cstr.data_allocated = 0;
-            cstr.size_allocated = cstr.size;
-            memcpy(str + len, &cstr, sizeof(CString));
-            memcpy((char *)(str + len) + sizeof(CString),
-                   cv->cstr->data, cstr.size);
+            str[len] = cv->str.size;
+            memcpy(&str[len + 1], cv->str.data, cv->str.size);
             len += nb_words;
         }
         break;
@@ -1012,10 +1002,10 @@ static inline void TOK_GET(int *t, const int **pp, 
CValue *cv)
     case TOK_LSTR:
     case TOK_PPNUM:
     case TOK_PPSTR:
-        /* XXX: Illegal cast: the pointer p may not be correctly aligned! */
-        cv->cstr = (CString *)p;
-        cv->cstr->data = (char *)p + sizeof(CString);
-        p += (sizeof(CString) + cv->cstr->size + 3) >> 2;
+        cv->str.size = *p++;
+        cv->str.data = p;
+        cv->str.data_allocated = 0;
+        p += (cv->str.size + sizeof(int) - 1) / sizeof(int);
         break;
     case TOK_CDOUBLE:
     case TOK_CLLONG:
@@ -1452,7 +1442,7 @@ static void pragma_parse(TCCState *s1)
             goto pragma_err;
         if (next(), tok != TOK_STR)
             goto pragma_err;
-        v = tok_alloc(tokc.cstr->data, tokc.cstr->size - 1)->tok;
+        v = tok_alloc(tokc.str.data, tokc.str.size - 1)->tok;
         if (next(), tok != ')')
             goto pragma_err;
         if (t == TOK_push_macro) {
@@ -1528,7 +1518,7 @@ static void pragma_parse(TCCState *s1)
         skip(',');
         if (tok != TOK_STR)
             goto pragma_err;
-        file = tcc_strdup((char *)tokc.cstr->data);
+        file = tcc_strdup((char *)tokc.str.data);
         dynarray_add((void ***)&s1->pragma_libs, &s1->nb_pragma_libs, file);
         next();
         if (tok != ')')
@@ -1616,7 +1606,7 @@ ST_FUNC void preprocess(int is_bof)
                     include_syntax:
                         tcc_error("'#include' expects \"FILENAME\" or 
<FILENAME>");
                     }
-                    pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data);
+                    pstrcat(buf, sizeof(buf), (char *)tokc.str.data);
                     next();
                 }
                 c = '\"';
@@ -1775,7 +1765,7 @@ include_done:
         }
         break;
     case TOK_PPNUM:
-        n = strtoul((char*)tokc.cstr->data, &q, 10);
+        n = strtoul((char*)tokc.str.data, &q, 10);
         goto _line_num;
     case TOK_LINE:
         next();
@@ -1787,7 +1777,7 @@ _line_num:
         next();
         if (tok != TOK_LINEFEED) {
             if (tok == TOK_STR)
-                pstrcpy(file->filename, sizeof(file->filename), (char 
*)tokc.cstr->data);
+                pstrcpy(file->filename, sizeof(file->filename), (char 
*)tokc.str.data);
             else if (parse_flags & PARSE_FLAG_ASM_FILE)
                 break;
             else
@@ -1990,7 +1980,9 @@ void parse_string(const char *s, int len)
             tok = TOK_LCHAR;
         }
     } else {
-        tokc.cstr = &tokcstr;
+        tokc.str.size = tokcstr.size;
+        tokc.str.data = tokcstr.data;
+        tokc.str.data_allocated = tokcstr.data_allocated;
         if (!is_long)
             tok = TOK_STR;
         else
@@ -2536,7 +2528,9 @@ maybe_newline:
         }
         /* We add a trailing '\0' to ease parsing */
         cstr_ccat(&tokcstr, '\0');
-        tokc.cstr = &tokcstr;
+        tokc.str.size = tokcstr.size;
+        tokc.str.data = tokcstr.data;
+        tokc.str.data_allocated = tokcstr.data_allocated;
         tok = TOK_PPNUM;
         break;
 
@@ -2571,7 +2565,9 @@ maybe_newline:
         p = parse_pp_string(p, c, &tokcstr);
         cstr_ccat(&tokcstr, c);
         cstr_ccat(&tokcstr, '\0');
-        tokc.cstr = &tokcstr;
+        tokc.str.size = tokcstr.size;
+        tokc.str.data = tokcstr.data;
+        tokc.str.data_allocated = tokcstr.data_allocated;
         tok = TOK_PPSTR;
         break;
 
@@ -2804,9 +2800,11 @@ static int *macro_arg_subst(Sym **nested_list, const int 
*macro_str, Sym *args)
                 printf("\nstringize: <%s>\n", (char *)cstr.data);
 #endif
                 /* add string */
-                cval.cstr = &cstr;
+                cval.str.size = cstr.size;
+                cval.str.data = cstr.data;
+                cval.str.data_allocated = cstr.data_allocated;
                 tok_str_add2(&str, TOK_PPSTR, &cval);
-                cstr_free(cval.cstr);
+                tcc_free(cval.str.data_allocated);
             } else {
         bad_stringy:
                 expect("macro parameter after '#'");
@@ -2970,7 +2968,9 @@ static int macro_subst_tok(
         cstr_new(&cstr);
         cstr_cat(&cstr, cstrval);
         cstr_ccat(&cstr, '\0');
-        cval.cstr = &cstr;
+        cval.str.size = cstr.size;
+        cval.str.data = cstr.data;
+        cval.str.data_allocated = cstr.data_allocated;
         tok_str_add2(tok_str, t1, &cval);
         cstr_free(&cstr);
     } else {
@@ -3300,10 +3300,10 @@ ST_FUNC void next(void)
     /* convert preprocessor tokens into C tokens */
     if (tok == TOK_PPNUM) {
         if  (parse_flags & PARSE_FLAG_TOK_NUM)
-            parse_number((char *)tokc.cstr->data);
+            parse_number((char *)tokc.str.data);
     } else if (tok == TOK_PPSTR) {
         if (parse_flags & PARSE_FLAG_TOK_STR)
-            parse_string((char *)tokc.cstr->data, tokc.cstr->size - 1);
+            parse_string((char *)tokc.str.data, tokc.str.size - 1);
     }
 }
 
_______________________________________________
Tinycc-devel mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/tinycc-devel

Reply via email to