#!/usr/bin/awk -f function mystrtonum(str) { # adopted from http://www.gnu.org/software/gawk/manual/html_node/Strtonum-Function.html#Strtonum-Function if (str ~ /^0[xX][[:xdigit:]]+$/) { str = substr(str, 3) # lop off leading 0x n = length(str) ret = 0 for (i = 1; i <= n; i++) { c = substr(str, i, 1) c = tolower(c) # index() returns 0 if c not in string, # includes c == "0" k = index("123456789abcdef", c) ret = ret * 16 + k } } else { ret = "NOT-A-HEX-NUMBER" } return ret } function add_hexrange (start, end) { from = mystrtonum("0x"start) to = mystrtonum("0x"end) for ( i=from; i<=to; i++ ) temp[i] = i result = temp[from] for ( i=from+1; i<=to; i++ ) result = result " " temp[i] return result } BEGIN { FS = " " split("White_Space Dash Terminal_Punctuation STerm Pattern_White_Space", array_names) } { # skip comment and empty lines if ( $0 !~ /^#/ && NF != 0 ) { # cycle over array_names and do the math for (var in array_names) { if ( $0 ~ "; "array_names[var] ) { if ( $1 ~ /\.\./ ) { split($1, bounderies, "\.") array[var] = array[var] " " add_hexrange(bounderies[1], bounderies[3]) } else { array[var] = array[var] " " mystrtonum("0x"$1) } } } } } END { print "/* This file is automatically generated by word-break-data.awk from PropList.txt */" for (var in array_names) { n = split(array[var], integers) print "static const uint32_t "array_names[var]"[]= {" for ( i=1; i<=n; i++) { if ( i == 1 ) { printf("\t0x%05X, ", integers[i]) } else if ( (i-1)%8 == 0 ) { if ( i != n ) { printf("\n\t0x%05X, ", integers[i]) } else { printf("\n\t0x%05X", integers[i]) } } else if ( i != n ) { printf("0x%05X, ", integers[i]) } else { printf("0x%05X", integers[i]) } } print "\n};" } }