Hi,
    
I'm looking for comments on this patch that fixes a locale data corruption 
issue when cross-compiling from x86_64 host to m68k (ColdFire) target.
    
gen_ldc runs on the host and computes structure member offsets using
offsetof(). These offsets are baked into locale_data.c as integer literals.
    
When cross-compiling for a target with different alignment requirements
and pointer sizes, the structure layout differs:
    
x86_64 host: int32_t alignment = 4 bytes, size_t = 8 bytes
m68k target: int32_t alignment = 2 bytes, size_t = 4 bytes
    
This causes locale data corruption - for example, decimal_point pointing
to wrong memory (showing 0x03 instead of '.').
    
After a few tests and manips, I have the following patch which:
- Uses uint32_t instead of size_t for lc_common_tbl_offsets
- Uses #pragma pack(2) to force consistent structure layout
    
I need comments, as it certainly is not the proper fix :-). The #pragma
pack(2) is m68k-specific; other 32-bit targets with 4-byte alignment
(ARM, MIPS, etc.) may not need it. And also, using uint32_t assumes
32-bit target; 64-bit targets would need size_t.
    
I'm open to suggestions on how to make this more generic !
    
Thanks,
JM
>From e778c8faec7881095c241e1cf880243a551fe942 Mon Sep 17 00:00:00 2001
From: Jean-Michel Hautbois <[email protected]>
Date: Mon, 1 Dec 2025 12:08:39 +0100
Subject: [RFC PATCH] locale: fix cross-compilation structure layout mismatch

The locale data generator (gen_ldc) runs on the host and computes
structure member offsets using offsetof(). These offsets are baked
into locale_data.c as integer literals.

When cross-compiling for a target with different alignment requirements,
the structure layout differs:
- x86_64 host: int32_t alignment = 4 bytes, size_t = 8 bytes
- m68k target: int32_t alignment = 2 bytes, size_t = 4 bytes

This causes offset values to be wrong for the target, resulting in
corrupted locale data (e.g., decimal_point pointing to wrong memory,
showing 0x03 instead of 0x2e '.').

Fix by:
1. Using uint32_t instead of size_t for lc_common_tbl_offsets
2. Using #pragma pack(2) to force 2-byte alignment for the structure,
   ensuring consistent layout between host and target

Signed-off-by: Jean-Michel Hautbois <[email protected]>
---
 extra/locale/gen_ldc.c     | 17 +++++++++++++++--
 extra/locale/locale_mmap.h |  7 ++++++-
 libc/misc/locale/locale.c  |  2 +-
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/extra/locale/gen_ldc.c b/extra/locale/gen_ldc.c
index 5f454026f..effed4d9d 100644
--- a/extra/locale/gen_ldc.c
+++ b/extra/locale/gen_ldc.c
@@ -69,7 +69,7 @@
 	offsetof(__locale_mmap_t, __PASTE3(lc_,X,_data)) \
 
 
-static const size_t common_tbl_offsets[__LOCALE_DATA_CATEGORIES*4] = {
+static const uint32_t common_tbl_offsets[__LOCALE_DATA_CATEGORIES*4] = {
 	COMMON_OFFSETS(ctype),
 	COMMON_OFFSETS(numeric),
 	COMMON_OFFSETS(monetary),
@@ -78,6 +78,19 @@ static const size_t common_tbl_offsets[__LOCALE_DATA_CATEGORIES*4] = {
 	COMMON_OFFSETS(messages),
 };
 
+void out_u32(FILE *f, const uint32_t *p, size_t n, char *comment)
+{
+	size_t i;
+
+	fprintf(f, "{\t/* %s */", comment);
+	for (i = 0 ; i < n ; i++) {
+		if (!(i & 3)) {
+			fprintf(f, "\n\t");
+		}
+		fprintf(f, "%#010x, ", p[i]);
+	}
+	fprintf(f, "\n},\n");
+}
 
 void out_uc(FILE *f, const unsigned char *p, size_t n, char *comment)
 {
@@ -269,7 +282,7 @@ int main(int argc, char **argv)
 		out_uc(lso, co_buf, __LOCALE_DATA_CATEGORIES, "lc_common_item_offsets_LEN");
 	}
 
-	out_size_t(lso, common_tbl_offsets, __LOCALE_DATA_CATEGORIES * 4, "lc_common_tbl_offsets");
+	out_u32(lso, common_tbl_offsets, __LOCALE_DATA_CATEGORIES * 4, "lc_common_tbl_offsets");
 	/* offsets from start of locale_mmap_t */
 	/* rows, item_offsets, item_idx, data */
 
diff --git a/extra/locale/locale_mmap.h b/extra/locale/locale_mmap.h
index d0ae9af1a..6f81c96fe 100644
--- a/extra/locale/locale_mmap.h
+++ b/extra/locale/locale_mmap.h
@@ -19,6 +19,9 @@
 #undef __PASTE3
 #define __PASTE3(A,B,C) A ## B ## C
 
+/* Force 2-byte alignment to match m68k ABI and ensure consistent layout */
+#pragma pack(push, 2)
+
 #define __LOCALE_DATA_COMMON_MMAP(X) \
 	unsigned char   __PASTE3(lc_,X,_data)[__PASTE3(__lc_,X,_data_LEN)];
 
@@ -72,7 +75,7 @@ typedef struct {
 	const uint16_t collate_data[__lc_collate_data_LEN];
 
 	unsigned char lc_common_item_offsets_LEN[__LOCALE_DATA_CATEGORIES];
-	size_t lc_common_tbl_offsets[__LOCALE_DATA_CATEGORIES * 4];
+	uint32_t lc_common_tbl_offsets[__LOCALE_DATA_CATEGORIES * 4];
 	/* offsets from start of locale_mmap_t */
 	/* rows, item_offsets, item_idx, data */
 
@@ -88,4 +91,6 @@ typedef struct {
 #endif
 } __locale_mmap_t;
 
+#pragma pack(pop)
+
 extern const __locale_mmap_t *__locale_mmap;
diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c
index d555f5da6..e7832ec50 100644
--- a/libc/misc/locale/locale.c
+++ b/libc/misc/locale/locale.c
@@ -514,7 +514,7 @@ int attribute_hidden _locale_set_l(const unsigned char *p, __locale_t base)
 {
 	const char **x;
 	unsigned char *s = base->cur_locale + 1;
-	const size_t *stp;
+	const uint32_t *stp;
 	const unsigned char *r;
 	const uint16_t *io;
 	const uint16_t *ii;
-- 
2.39.5

_______________________________________________
devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to