Changeset: e3930423f7df for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e3930423f7df
Added Files:
misc/bash/uniwidthtab.sh
Modified Files:
common/utils/mutf8.c
Branch: default
Log Message:
Add script to create charwidth table.
diffs (73 lines):
diff --git a/common/utils/mutf8.c b/common/utils/mutf8.c
--- a/common/utils/mutf8.c
+++ b/common/utils/mutf8.c
@@ -19,6 +19,7 @@ struct interval {
int width;
};
+/* this table was created using the script uniwidthtab.sh */
static const struct interval intervals[] = {
/* sorted list of non-overlapping ranges:
* ranges with width==0 represent all codepoints with
diff --git a/misc/bash/uniwidthtab.sh b/misc/bash/uniwidthtab.sh
new file mode 100755
--- /dev/null
+++ b/misc/bash/uniwidthtab.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: MPL-2.0
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# Copyright 2024 MonetDB Foundation;
+# Copyright August 2008 - 2023 MonetDB B.V.;
+# Copyright 1997 - July 2008 CWI.
+
+{
+ sed -n -e '/# Mn/d' \
+ -e 's/^\([0-9A-F][0-9A-F]*\)\.\.\([0-9A-F][0-9A-F]*\) *; *[FW].*/0x\1
0x\2/p' \
+ -e 's/^\([0-9A-F][0-9A-F]*\) *; [FW].*/0x\1 0x\1/p' \
+ < /usr/share/unicode/ucd/EastAsianWidth.txt | {
+ while read line; do
+ line=($line)
+ f=$((${line[0]%}))
+ l=$((${line[1]%}))
+ if [[ -n $prevl ]]; then
+ if (($prevl+1 == $f)); then
+ prevl=$l
+ else
+ printf '\t{ 0x%05X, 0x%05X, 2 },\n' $prevf $prevl
+ prevf=$f
+ prevl=$l
+ fi
+ else
+ prevf=$f
+ prevl=$l
+ fi
+ done
+ printf '\t{ 0x%05X, 0x%05X, 2 },\n' $prevf $prevl
+ }
+
+ sed -n '/^00AD/d;s/^\([0-9A-F][0-9A-F]*\);[^;]*;\(Me\|Mn\|Cf\);.*/0x\1/p' \
+ < /usr/share/unicode/ucd/UnicodeData.txt | {
+ while read line; do
+ u=$(($line))
+ if [[ -n $prevf ]]; then
+ if (($prevl+1 == $u)); then
+ prevl=$u
+ else
+ printf '\t{ 0x%05X, 0x%05X, 0 },\n' $prevf $prevl
+ prevf=$u
+ prevl=$u
+ fi
+ else
+ prevf=$u
+ prevl=$u
+ fi
+ done
+ printf '\t{ 0x%05X, 0x%05X, 0 },\n' $prevf $prevl
+ }
+} | sort | sed 's/0x0\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\)/0x\1/g'
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]