I have removed from TCC all the cases of illegal/undefined C that I
know about, apart from one, which is particularly horrible: the way a
CString is copied into a token string, which is an int array: see
tok_str_add2. On a 64-bit architecture the pointers end up misaligned,
so ASan gives lots of warnings. On a 64-bit architecture that required
memory accesses to be correctly aligned it would not work at all.
Here is a patch to cure the problem by putting the struct CString into
struct CValue instead. According to Valgrind, TCC does not leak memory
with this patch, though that's almost a miracle because it's very
unclear who is responsible for freeing the "data" in a string. Still,
unless someone has a better idea of how to do this, and is willing to
implement that better idea, I think this patch should go in. Opinions?
Edmund
diff --git a/TODO b/TODO
index eb846d9..623592f 100644
--- a/TODO
+++ b/TODO
@@ -34,7 +34,6 @@ Portability:
- it is assumed that int is 32-bit and sizeof(int) == 4
- int is used when host or target size_t would make more sense
-- struct CString is written into an int array and ends up misaligned
- TCC handles target floating-point (fp) values using the host's fp
arithmetic, which is simple and fast but may lead to exceptions
and inaccuracy and wrong representations when cross-compiling
diff --git a/i386-asm.c b/i386-asm.c
index bd0825b..68a7861 100644
--- a/i386-asm.c
+++ b/i386-asm.c
@@ -330,7 +330,7 @@ static void parse_operand(TCCState *s1, Operand *op)
next();
if (tok != TOK_PPNUM)
goto reg_error;
- p = tokc.cstr->data;
+ p = tokc.str.data;
reg = p[0] - '0';
if ((unsigned)reg >= 8 || p[1] != '\0')
goto reg_error;
diff --git a/tcc.h b/tcc.h
index 05e3965..b2ec24e 100644
--- a/tcc.h
+++ b/tcc.h
@@ -400,7 +400,11 @@ typedef union CValue {
double d;
float f;
uint64_t i;
- struct CString *cstr;
+ struct {
+ int size;
+ const void *data;
+ void *data_allocated;
+ } str;
int tab[LDOUBLE_SIZE/4];
} CValue;
diff --git a/tccasm.c b/tccasm.c
index 044db83..28e07fd 100644
--- a/tccasm.c
+++ b/tccasm.c
@@ -44,7 +44,7 @@ static void asm_expr_unary(TCCState *s1, ExprValue *pe)
switch(tok) {
case TOK_PPNUM:
- p = tokc.cstr->data;
+ p = tokc.str.data;
n = strtoul(p, (char **)&p, 0);
if (*p == 'b' || *p == 'f') {
/* backward or forward label */
@@ -378,7 +378,7 @@ static void asm_parse_directive(TCCState *s1)
uint64_t vl;
const char *p;
- p = tokc.cstr->data;
+ p = tokc.str.data;
if (tok != TOK_PPNUM) {
error_constant:
tcc_error("64 bit constant");
@@ -523,8 +523,8 @@ static void asm_parse_directive(TCCState *s1)
for(;;) {
if (tok != TOK_STR)
expect("string constant");
- p = tokc.cstr->data;
- size = tokc.cstr->size;
+ p = tokc.str.data;
+ size = tokc.str.size;
if (t == TOK_ASM_ascii && size > 0)
size--;
for(i = 0; i < size; i++)
@@ -565,7 +565,7 @@ static void asm_parse_directive(TCCState *s1)
next();
if (tok == TOK_STR)
- pstrcat(filename, sizeof(filename), tokc.cstr->data);
+ pstrcat(filename, sizeof(filename), tokc.str.data);
else
pstrcat(filename, sizeof(filename), get_tok_str(tok, NULL));
@@ -583,7 +583,7 @@ static void asm_parse_directive(TCCState *s1)
next();
if (tok == TOK_STR)
- pstrcat(ident, sizeof(ident), tokc.cstr->data);
+ pstrcat(ident, sizeof(ident), tokc.str.data);
else
pstrcat(ident, sizeof(ident), get_tok_str(tok, NULL));
@@ -629,7 +629,7 @@ static void asm_parse_directive(TCCState *s1)
next();
skip(',');
if (tok == TOK_STR) {
- newtype = tokc.cstr->data;
+ newtype = tokc.str.data;
} else {
if (tok == '@' || tok == '%')
skip(tok);
@@ -655,7 +655,7 @@ static void asm_parse_directive(TCCState *s1)
sname[0] = '\0';
while (tok != ';' && tok != TOK_LINEFEED && tok != ',') {
if (tok == TOK_STR)
- pstrcat(sname, sizeof(sname), tokc.cstr->data);
+ pstrcat(sname, sizeof(sname), tokc.str.data);
else
pstrcat(sname, sizeof(sname), get_tok_str(tok, NULL));
next();
@@ -768,7 +768,7 @@ static int tcc_assemble_internal(TCCState *s1, int
do_preprocess)
} else if (tok == TOK_PPNUM) {
const char *p;
int n;
- p = tokc.cstr->data;
+ p = tokc.str.data;
n = strtoul(p, (char **)&p, 10);
if (*p != '\0')
expect("':'");
@@ -970,8 +970,8 @@ static void parse_asm_operands(ASMOperand *operands, int
*nb_operands_ptr,
}
if (tok != TOK_STR)
expect("string constant");
- op->constraint = tcc_malloc(tokc.cstr->size);
- strcpy(op->constraint, tokc.cstr->data);
+ op->constraint = tcc_malloc(tokc.str.size);
+ strcpy(op->constraint, tokc.str.data);
next();
skip('(');
gexpr();
@@ -1038,7 +1038,7 @@ ST_FUNC void asm_instr(void)
for(;;) {
if (tok != TOK_STR)
expect("string constant");
- asm_clobber(clobber_regs, tokc.cstr->data);
+ asm_clobber(clobber_regs, tokc.str.data);
next();
if (tok == ',') {
next();
diff --git a/tccgen.c b/tccgen.c
index 3b5ad75..3f75a02 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -2759,7 +2759,7 @@ static void parse_attribute(AttributeDef *ad)
skip('(');
if (tok != TOK_STR)
expect("section name");
- ad->section = find_section(tcc_state, (char *)tokc.cstr->data);
+ ad->section = find_section(tcc_state, (char *)tokc.str.data);
next();
skip(')');
break;
@@ -2769,7 +2769,7 @@ static void parse_attribute(AttributeDef *ad)
if (tok != TOK_STR)
expect("alias(\"target\")");
ad->alias_target = /* save string as token, for later */
- tok_alloc((char*)tokc.cstr->data, tokc.cstr->size-1)->tok;
+ tok_alloc((char*)tokc.str.data, tokc.str.size-1)->tok;
next();
skip(')');
break;
@@ -2778,13 +2778,13 @@ static void parse_attribute(AttributeDef *ad)
skip('(');
if (tok != TOK_STR)
expect("visibility(\"default|hidden|internal|protected\")");
- if (!strcmp (tokc.cstr->data, "default"))
+ if (!strcmp (tokc.str.data, "default"))
ad->a.visibility = STV_DEFAULT;
- else if (!strcmp (tokc.cstr->data, "hidden"))
+ else if (!strcmp (tokc.str.data, "hidden"))
ad->a.visibility = STV_HIDDEN;
- else if (!strcmp (tokc.cstr->data, "internal"))
+ else if (!strcmp (tokc.str.data, "internal"))
ad->a.visibility = STV_INTERNAL;
- else if (!strcmp (tokc.cstr->data, "protected"))
+ else if (!strcmp (tokc.str.data, "protected"))
ad->a.visibility = STV_PROTECTED;
else
expect("visibility(\"default|hidden|internal|protected\")");
@@ -3376,7 +3376,7 @@ ST_FUNC void parse_asm_str(CString *astr)
cstr_new(astr);
while (tok == TOK_STR) {
/* XXX: add \0 handling too ? */
- cstr_cat(astr, tokc.cstr->data);
+ cstr_cat(astr, tokc.str.data);
next();
}
cstr_ccat(astr, '\0');
@@ -5509,14 +5509,12 @@ static void decl_initializer(CType *type, Section *sec,
unsigned long c,
) || (tok == TOK_STR && (t1->t & VT_BTYPE) == VT_BYTE)) {
while (tok == TOK_STR || tok == TOK_LSTR) {
int cstr_len, ch;
- CString *cstr;
- cstr = tokc.cstr;
/* compute maximum number of chars wanted */
if (tok == TOK_STR)
- cstr_len = cstr->size;
+ cstr_len = tokc.str.size;
else
- cstr_len = cstr->size / sizeof(nwchar_t);
+ cstr_len = tokc.str.size / sizeof(nwchar_t);
cstr_len--;
nb = cstr_len;
if (n >= 0 && nb > (n - array_length))
@@ -5528,13 +5526,13 @@ static void decl_initializer(CType *type, Section *sec,
unsigned long c,
string in global variable, we handle it
specifically */
if (sec && tok == TOK_STR && size1 == 1) {
- memcpy(sec->data + c + array_length, cstr->data, nb);
+ memcpy(sec->data + c + array_length, tokc.str.data,
nb);
} else {
for(i=0;i<nb;i++) {
if (tok == TOK_STR)
- ch = ((unsigned char *)cstr->data)[i];
+ ch = ((unsigned char *)tokc.str.data)[i];
else
- ch = ((nwchar_t *)cstr->data)[i];
+ ch = ((nwchar_t *)tokc.str.data)[i];
init_putv(t1, sec, c + (array_length + i) * size1,
ch, EXPR_VAL);
}
diff --git a/tccpp.c b/tccpp.c
index 4dba954..0fd90a5 100644
--- a/tccpp.c
+++ b/tccpp.c
@@ -279,7 +279,6 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv)
{
static char buf[STRING_MAX_SIZE + 1];
static CString cstr_buf;
- CString *cstr;
char *p;
int i, len;
@@ -314,20 +313,19 @@ ST_FUNC const char *get_tok_str(int v, CValue *cv)
break;
case TOK_PPNUM:
case TOK_PPSTR:
- return (char*)cv->cstr->data;
+ return (char*)cv->str.data;
case TOK_LSTR:
cstr_ccat(&cstr_buf, 'L');
case TOK_STR:
- cstr = cv->cstr;
cstr_ccat(&cstr_buf, '\"');
if (v == TOK_STR) {
- len = cstr->size - 1;
+ len = cv->str.size - 1;
for(i=0;i<len;i++)
- add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
+ add_char(&cstr_buf, ((unsigned char *)cv->str.data)[i]);
} else {
- len = (cstr->size / sizeof(nwchar_t)) - 1;
+ len = (cv->str.size / sizeof(nwchar_t)) - 1;
for(i=0;i<len;i++)
- add_char(&cstr_buf, ((nwchar_t *)cstr->data)[i]);
+ add_char(&cstr_buf, ((nwchar_t *)cv->str.data)[i]);
}
cstr_ccat(&cstr_buf, '\"');
cstr_ccat(&cstr_buf, '\0');
@@ -929,21 +927,13 @@ static void tok_str_add2(TokenString *s, int t, CValue
*cv)
case TOK_STR:
case TOK_LSTR:
{
- int nb_words;
- CString cstr;
-
- nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
+ /* Insert the string into the int array. */
+ size_t nb_words =
+ 1 + (cv->str.size + sizeof(int) - 1) / sizeof(int);
while ((len + nb_words) > s->allocated_len)
str = tok_str_realloc(s);
- /* XXX: Insert the CString into the int array.
- It may end up incorrectly aligned. */
- cstr.data = 0;
- cstr.size = cv->cstr->size;
- cstr.data_allocated = 0;
- cstr.size_allocated = cstr.size;
- memcpy(str + len, &cstr, sizeof(CString));
- memcpy((char *)(str + len) + sizeof(CString),
- cv->cstr->data, cstr.size);
+ str[len] = cv->str.size;
+ memcpy(&str[len + 1], cv->str.data, cv->str.size);
len += nb_words;
}
break;
@@ -1012,10 +1002,10 @@ static inline void TOK_GET(int *t, const int **pp,
CValue *cv)
case TOK_LSTR:
case TOK_PPNUM:
case TOK_PPSTR:
- /* XXX: Illegal cast: the pointer p may not be correctly aligned! */
- cv->cstr = (CString *)p;
- cv->cstr->data = (char *)p + sizeof(CString);
- p += (sizeof(CString) + cv->cstr->size + 3) >> 2;
+ cv->str.size = *p++;
+ cv->str.data = p;
+ cv->str.data_allocated = 0;
+ p += (cv->str.size + sizeof(int) - 1) / sizeof(int);
break;
case TOK_CDOUBLE:
case TOK_CLLONG:
@@ -1452,7 +1442,7 @@ static void pragma_parse(TCCState *s1)
goto pragma_err;
if (next(), tok != TOK_STR)
goto pragma_err;
- v = tok_alloc(tokc.cstr->data, tokc.cstr->size - 1)->tok;
+ v = tok_alloc(tokc.str.data, tokc.str.size - 1)->tok;
if (next(), tok != ')')
goto pragma_err;
if (t == TOK_push_macro) {
@@ -1528,7 +1518,7 @@ static void pragma_parse(TCCState *s1)
skip(',');
if (tok != TOK_STR)
goto pragma_err;
- file = tcc_strdup((char *)tokc.cstr->data);
+ file = tcc_strdup((char *)tokc.str.data);
dynarray_add((void ***)&s1->pragma_libs, &s1->nb_pragma_libs, file);
next();
if (tok != ')')
@@ -1616,7 +1606,7 @@ ST_FUNC void preprocess(int is_bof)
include_syntax:
tcc_error("'#include' expects \"FILENAME\" or
<FILENAME>");
}
- pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data);
+ pstrcat(buf, sizeof(buf), (char *)tokc.str.data);
next();
}
c = '\"';
@@ -1775,7 +1765,7 @@ include_done:
}
break;
case TOK_PPNUM:
- n = strtoul((char*)tokc.cstr->data, &q, 10);
+ n = strtoul((char*)tokc.str.data, &q, 10);
goto _line_num;
case TOK_LINE:
next();
@@ -1787,7 +1777,7 @@ _line_num:
next();
if (tok != TOK_LINEFEED) {
if (tok == TOK_STR)
- pstrcpy(file->filename, sizeof(file->filename), (char
*)tokc.cstr->data);
+ pstrcpy(file->filename, sizeof(file->filename), (char
*)tokc.str.data);
else if (parse_flags & PARSE_FLAG_ASM_FILE)
break;
else
@@ -1990,7 +1980,9 @@ void parse_string(const char *s, int len)
tok = TOK_LCHAR;
}
} else {
- tokc.cstr = &tokcstr;
+ tokc.str.size = tokcstr.size;
+ tokc.str.data = tokcstr.data;
+ tokc.str.data_allocated = tokcstr.data_allocated;
if (!is_long)
tok = TOK_STR;
else
@@ -2536,7 +2528,9 @@ maybe_newline:
}
/* We add a trailing '\0' to ease parsing */
cstr_ccat(&tokcstr, '\0');
- tokc.cstr = &tokcstr;
+ tokc.str.size = tokcstr.size;
+ tokc.str.data = tokcstr.data;
+ tokc.str.data_allocated = tokcstr.data_allocated;
tok = TOK_PPNUM;
break;
@@ -2571,7 +2565,9 @@ maybe_newline:
p = parse_pp_string(p, c, &tokcstr);
cstr_ccat(&tokcstr, c);
cstr_ccat(&tokcstr, '\0');
- tokc.cstr = &tokcstr;
+ tokc.str.size = tokcstr.size;
+ tokc.str.data = tokcstr.data;
+ tokc.str.data_allocated = tokcstr.data_allocated;
tok = TOK_PPSTR;
break;
@@ -2804,9 +2800,11 @@ static int *macro_arg_subst(Sym **nested_list, const int
*macro_str, Sym *args)
printf("\nstringize: <%s>\n", (char *)cstr.data);
#endif
/* add string */
- cval.cstr = &cstr;
+ cval.str.size = cstr.size;
+ cval.str.data = cstr.data;
+ cval.str.data_allocated = cstr.data_allocated;
tok_str_add2(&str, TOK_PPSTR, &cval);
- cstr_free(cval.cstr);
+ tcc_free(cval.str.data_allocated);
} else {
bad_stringy:
expect("macro parameter after '#'");
@@ -2970,7 +2968,9 @@ static int macro_subst_tok(
cstr_new(&cstr);
cstr_cat(&cstr, cstrval);
cstr_ccat(&cstr, '\0');
- cval.cstr = &cstr;
+ cval.str.size = cstr.size;
+ cval.str.data = cstr.data;
+ cval.str.data_allocated = cstr.data_allocated;
tok_str_add2(tok_str, t1, &cval);
cstr_free(&cstr);
} else {
@@ -3300,10 +3300,10 @@ ST_FUNC void next(void)
/* convert preprocessor tokens into C tokens */
if (tok == TOK_PPNUM) {
if (parse_flags & PARSE_FLAG_TOK_NUM)
- parse_number((char *)tokc.cstr->data);
+ parse_number((char *)tokc.str.data);
} else if (tok == TOK_PPSTR) {
if (parse_flags & PARSE_FLAG_TOK_STR)
- parse_string((char *)tokc.cstr->data, tokc.cstr->size - 1);
+ parse_string((char *)tokc.str.data, tokc.str.size - 1);
}
}
_______________________________________________
Tinycc-devel mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/tinycc-devel