Hi,
I'm looking for comments on this patch that fixes a locale data corruption
issue when cross-compiling from x86_64 host to m68k (ColdFire) target.
gen_ldc runs on the host and computes structure member offsets using
offsetof(). These offsets are baked into locale_data.c as integer literals.
When cross-compiling for a target with different alignment requirements
and pointer sizes, the structure layout differs:
x86_64 host: int32_t alignment = 4 bytes, size_t = 8 bytes
m68k target: int32_t alignment = 2 bytes, size_t = 4 bytes
This causes locale data corruption - for example, decimal_point pointing
to wrong memory (showing 0x03 instead of '.').
After a few tests and manips, I have the following patch which:
- Uses uint32_t instead of size_t for lc_common_tbl_offsets
- Uses #pragma pack(2) to force consistent structure layout
I need comments, as it certainly is not the proper fix :-). The #pragma
pack(2) is m68k-specific; other 32-bit targets with 4-byte alignment
(ARM, MIPS, etc.) may not need it. And also, using uint32_t assumes
32-bit target; 64-bit targets would need size_t.
I'm open to suggestions on how to make this more generic !
Thanks,
JM
>From e778c8faec7881095c241e1cf880243a551fe942 Mon Sep 17 00:00:00 2001
From: Jean-Michel Hautbois <[email protected]>
Date: Mon, 1 Dec 2025 12:08:39 +0100
Subject: [RFC PATCH] locale: fix cross-compilation structure layout mismatch
The locale data generator (gen_ldc) runs on the host and computes
structure member offsets using offsetof(). These offsets are baked
into locale_data.c as integer literals.
When cross-compiling for a target with different alignment requirements,
the structure layout differs:
- x86_64 host: int32_t alignment = 4 bytes, size_t = 8 bytes
- m68k target: int32_t alignment = 2 bytes, size_t = 4 bytes
This causes offset values to be wrong for the target, resulting in
corrupted locale data (e.g., decimal_point pointing to wrong memory,
showing 0x03 instead of 0x2e '.').
Fix by:
1. Using uint32_t instead of size_t for lc_common_tbl_offsets
2. Using #pragma pack(2) to force 2-byte alignment for the structure,
ensuring consistent layout between host and target
Signed-off-by: Jean-Michel Hautbois <[email protected]>
---
extra/locale/gen_ldc.c | 17 +++++++++++++++--
extra/locale/locale_mmap.h | 7 ++++++-
libc/misc/locale/locale.c | 2 +-
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/extra/locale/gen_ldc.c b/extra/locale/gen_ldc.c
index 5f454026f..effed4d9d 100644
--- a/extra/locale/gen_ldc.c
+++ b/extra/locale/gen_ldc.c
@@ -69,7 +69,7 @@
offsetof(__locale_mmap_t, __PASTE3(lc_,X,_data)) \
-static const size_t common_tbl_offsets[__LOCALE_DATA_CATEGORIES*4] = {
+static const uint32_t common_tbl_offsets[__LOCALE_DATA_CATEGORIES*4] = {
COMMON_OFFSETS(ctype),
COMMON_OFFSETS(numeric),
COMMON_OFFSETS(monetary),
@@ -78,6 +78,19 @@ static const size_t common_tbl_offsets[__LOCALE_DATA_CATEGORIES*4] = {
COMMON_OFFSETS(messages),
};
+void out_u32(FILE *f, const uint32_t *p, size_t n, char *comment)
+{
+ size_t i;
+
+ fprintf(f, "{\t/* %s */", comment);
+ for (i = 0 ; i < n ; i++) {
+ if (!(i & 3)) {
+ fprintf(f, "\n\t");
+ }
+ fprintf(f, "%#010x, ", p[i]);
+ }
+ fprintf(f, "\n},\n");
+}
void out_uc(FILE *f, const unsigned char *p, size_t n, char *comment)
{
@@ -269,7 +282,7 @@ int main(int argc, char **argv)
out_uc(lso, co_buf, __LOCALE_DATA_CATEGORIES, "lc_common_item_offsets_LEN");
}
- out_size_t(lso, common_tbl_offsets, __LOCALE_DATA_CATEGORIES * 4, "lc_common_tbl_offsets");
+ out_u32(lso, common_tbl_offsets, __LOCALE_DATA_CATEGORIES * 4, "lc_common_tbl_offsets");
/* offsets from start of locale_mmap_t */
/* rows, item_offsets, item_idx, data */
diff --git a/extra/locale/locale_mmap.h b/extra/locale/locale_mmap.h
index d0ae9af1a..6f81c96fe 100644
--- a/extra/locale/locale_mmap.h
+++ b/extra/locale/locale_mmap.h
@@ -19,6 +19,9 @@
#undef __PASTE3
#define __PASTE3(A,B,C) A ## B ## C
+/* Force 2-byte alignment to match m68k ABI and ensure consistent layout */
+#pragma pack(push, 2)
+
#define __LOCALE_DATA_COMMON_MMAP(X) \
unsigned char __PASTE3(lc_,X,_data)[__PASTE3(__lc_,X,_data_LEN)];
@@ -72,7 +75,7 @@ typedef struct {
const uint16_t collate_data[__lc_collate_data_LEN];
unsigned char lc_common_item_offsets_LEN[__LOCALE_DATA_CATEGORIES];
- size_t lc_common_tbl_offsets[__LOCALE_DATA_CATEGORIES * 4];
+ uint32_t lc_common_tbl_offsets[__LOCALE_DATA_CATEGORIES * 4];
/* offsets from start of locale_mmap_t */
/* rows, item_offsets, item_idx, data */
@@ -88,4 +91,6 @@ typedef struct {
#endif
} __locale_mmap_t;
+#pragma pack(pop)
+
extern const __locale_mmap_t *__locale_mmap;
diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c
index d555f5da6..e7832ec50 100644
--- a/libc/misc/locale/locale.c
+++ b/libc/misc/locale/locale.c
@@ -514,7 +514,7 @@ int attribute_hidden _locale_set_l(const unsigned char *p, __locale_t base)
{
const char **x;
unsigned char *s = base->cur_locale + 1;
- const size_t *stp;
+ const uint32_t *stp;
const unsigned char *r;
const uint16_t *io;
const uint16_t *ii;
--
2.39.5
_______________________________________________
devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]