Attached are several files, one the patch, another a derived file based on a file that is part of the patch, and the last is the tool used to derive the header from the mapping file.
I've had this toy kicking around for a decade or so. If it were part of some project, it might be libiberty or maybe gnulib or maybe coreutils. Uncertain what to do with it, I've kept it basically hidden. The idea is to specify character classes for a particular program or group of programs and emit the classification table. Zack did it for GCC. There are some ad-hoc methods in the core utils, including fmt. I chose fmt as the example because it was especially trivial and would still demonstrate it. So it's all attached for your amusement. Probably needs a better name. These are the four classes needed by fmt.c: open "(['\"" <<<<=== backtick removed, per recent discussions close ")]'\"" period ".?!" punct "\x21-\x7E" -"a-zA-Z0-9" Cheers - Bruce char-mapper/ char-mapper/cm-opt.c char-mapper/map-text.c char-mapper/mk-str2enum.sh char-mapper/char-mapper.c char-mapper/map-text.def char-mapper/mk-opt-table.sh char-mapper/char-mapper.h char-mapper/Makefile char-mapper/test.sh char-mapper/map-text.h char-mapper/build-html.sh char-mapper/cm-opt.h char-mapper/MakeRules
>From 62fd69a3d3a547cc3579484571b2abcb2fcd668a Mon Sep 17 00:00:00 2001 From: Bruce Korb <[email protected]> Date: Sun, 15 Apr 2012 12:08:53 -0700 Subject: [PATCH] fmt: use generated char classifications * lib/fmt-class.map: file describing the character classes used by fmt. * lib/fmt.c: use fmt-class.h in preference to roll-your-own * src/Makefile.am: add rule to derive fmt-class.h from fmt-class.map --- .gitignore | 1 + src/.gitignore | 1 + src/Makefile.am | 45 +++++++++++++++++++++++---------------------- src/fmt-class.map | 31 +++++++++++++++++++++++++++++++ src/fmt.c | 17 +++++------------ 5 files changed, 61 insertions(+), 34 deletions(-) create mode 100644 src/fmt-class.map diff --git a/.gitignore b/.gitignore index 383361b..f5e35dc 100644 --- a/.gitignore +++ b/.gitignore @@ -75,6 +75,7 @@ /lib/ref-del.sed /lib/selinux /lib/signal.h +/lib/spawn.h /lib/stamp-h1 /lib/stdalign.h /lib/stdio.h diff --git a/src/.gitignore b/src/.gitignore index 9c4c9b7..0e5e47f 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -114,3 +114,4 @@ wheel.h who whoami yes +fmt-class.h diff --git a/src/Makefile.am b/src/Makefile.am index 06ab615..8e59801 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -468,30 +468,27 @@ ginstall_SOURCES = install.c prog-fprintf.c $(copy_sources) # This is for the '[' program. Automake transliterates '[' to '_'. __SOURCES = lbracket.c -cp_SOURCES = cp.c $(copy_sources) -dir_SOURCES = ls.c ls-dir.c -vdir_SOURCES = ls.c ls-vdir.c -id_SOURCES = id.c group-list.c -groups_SOURCES = groups.c group-list.c -ls_SOURCES = ls.c ls-ls.c -ln_SOURCES = ln.c relpath.c relpath.h -chown_SOURCES = chown.c chown-core.c -chgrp_SOURCES = chgrp.c chown-core.c -kill_SOURCES = kill.c operand2sig.c +arch_SOURCES = uname.c uname-arch.c +chgrp_SOURCES = chgrp.c chown-core.c +chown_SOURCES = chown.c chown-core.c +cp_SOURCES = cp.c $(copy_sources) +df_SOURCES = df.c find-mount-point.c +dir_SOURCES = ls.c ls-dir.c +fmt_SOURCES = fmt.c fmt-class.h +groups_SOURCES = groups.c group-list.c +id_SOURCES = id.c group-list.c +kill_SOURCES = kill.c operand2sig.c +ln_SOURCES = ln.c relpath.c relpath.h +ls_SOURCES = ls.c ls-ls.c +mkdir_SOURCES = mkdir.c prog-fprintf.c +mv_SOURCES = mv.c remove.c $(copy_sources) realpath_SOURCES = realpath.c relpath.c relpath.h +rm_SOURCES = rm.c remove.c +rmdir_SOURCES = rmdir.c prog-fprintf.c +stat_SOURCES = stat.c find-mount-point.c timeout_SOURCES = timeout.c operand2sig.c - -mv_SOURCES = mv.c remove.c $(copy_sources) -rm_SOURCES = rm.c remove.c - -mkdir_SOURCES = mkdir.c prog-fprintf.c -rmdir_SOURCES = rmdir.c prog-fprintf.c - -df_SOURCES = df.c find-mount-point.c -stat_SOURCES = stat.c find-mount-point.c - -uname_SOURCES = uname.c uname-uname.c -arch_SOURCES = uname.c uname-arch.c +uname_SOURCES = uname.c uname-uname.c +vdir_SOURCES = ls.c ls-vdir.c md5sum_CPPFLAGS = -DHASH_ALGO_MD5=1 $(AM_CPPFLAGS) sha1sum_SOURCES = md5sum.c @@ -526,6 +523,10 @@ dircolors.h: dcgen dircolors.hin $(AM_V_at)chmod a-w $@-t $(AM_V_at)mv $@-t $@ +BUILT_SOURCES += fmt-class.h +fmt-class.h : fmt-class.map + char-mapper fmt-class.map + wheel_size = 5 BUILT_SOURCES += wheel-size.h diff --git a/src/fmt-class.map b/src/fmt-class.map new file mode 100644 index 0000000..2aee676 --- /dev/null +++ b/src/fmt-class.map @@ -0,0 +1,31 @@ + +%guard +%file fmt-class.h +%backup + +%comment + This file contains the character classifications used by fmt + for identifying quoted strings and sentence terminators. + The table is static scope, so %guard is empty. + + This is part of GNU fmt -- a simple text formatter. + Copyright (C) 1994-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +% + +open "(['\"" +close ")]'\"" +period ".?!" +punct "\x21-\x7E" -"a-zA-Z0-9" diff --git a/src/fmt.c b/src/fmt.c index 308b645..e0bbc22 100644 --- a/src/fmt.c +++ b/src/fmt.c @@ -113,12 +113,7 @@ typedef long int COST; #define MAXWORDS 1000 #define MAXCHARS 5000 - -/* Extra ctype(3)-style macros. */ - -#define isopen(c) (strchr ("(['`\"", c) != NULL) -#define isclose(c) (strchr (")]'\"", c) != NULL) -#define isperiod(c) (strchr (".?!", c) != NULL) +#include "fmt-class.h" /* Size of a tab stop, for expansion on input and re-introduction on output. */ @@ -773,13 +768,11 @@ check_punctuation (WORD *w) { char const *start = w->text; char const *finish = start + (w->length - 1); - unsigned char fin = *finish; - w->paren = isopen (*start); - w->punct = !! ispunct (fin); - while (start < finish && isclose (*finish)) - finish--; - w->period = isperiod (*finish); + w->paren = IS_OPEN_CHAR (*start); + w->punct = IS_PUNCT_CHAR (*finish); + finish = SPN_CLOSE_BACK (start, start + w->length); + w->period = (finish > start) && IS_PERIOD_CHAR (finish[-1]); } /* Flush part of the paragraph to make room. This function is called on -- 1.7.7
/* * 4 bit character mapping generated 04/15/12 12:02:15 * * This file contains the character classifications used by fmt * for identifying quoted strings and sentence terminators. * The table is static scope, so %guard is empty. * * This is part of GNU fmt -- a simple text formatter. * Copyright (C) 1994-2012 Free Software Foundation, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef FMT_CLASS_H_GUARD #define FMT_CLASS_H_GUARD 1 #ifdef HAVE_CONFIG_H # if defined(HAVE_INTTYPES_H) # include <inttypes.h> # elif defined(HAVE_STDINT_H) # include <stdint.h> # elif !defined(HAVE_UINT8_T) typedef unsigned char uint8_t; # endif /* HAVE_*INT*_H header */ #else /* not HAVE_CONFIG_H -- */ # include <inttypes.h> #endif /* HAVE_CONFIG_H */ #if 0 /* mapping specification source (from fmt-class.map) */ // // %guard // %file fmt-class.h // %backup // // %comment -- see above // % // // open "(['\"" // close ")]'\"" // period ".?!" // punct "\x21-\x7E" -"a-zA-Z0-9" // #endif /* 0 -- mapping spec. source */ typedef uint8_t fmt_class_mask_t; #define IS_OPEN_CHAR( _c) is_fmt_class_char((char)( _c), 0x01) #define SPN_OPEN_CHARS(_s) spn_fmt_class_chars((char *)_s, 0x01) #define BRK_OPEN_CHARS(_s) brk_fmt_class_chars((char *)_s, 0x01) #define SPN_OPEN_BACK(s,e) spn_fmt_class_back((char *)s, (char *)e, 0x01) #define BRK_OPEN_BACK(s,e) brk_fmt_class_back((char *)s, (char *)e, 0x01) #define IS_CLOSE_CHAR( _c) is_fmt_class_char((char)( _c), 0x02) #define SPN_CLOSE_CHARS(_s) spn_fmt_class_chars((char *)_s, 0x02) #define BRK_CLOSE_CHARS(_s) brk_fmt_class_chars((char *)_s, 0x02) #define SPN_CLOSE_BACK(s,e) spn_fmt_class_back((char *)s, (char *)e, 0x02) #define BRK_CLOSE_BACK(s,e) brk_fmt_class_back((char *)s, (char *)e, 0x02) #define IS_PERIOD_CHAR( _c) is_fmt_class_char((char)( _c), 0x04) #define SPN_PERIOD_CHARS(_s) spn_fmt_class_chars((char *)_s, 0x04) #define BRK_PERIOD_CHARS(_s) brk_fmt_class_chars((char *)_s, 0x04) #define SPN_PERIOD_BACK(s,e) spn_fmt_class_back((char *)s, (char *)e, 0x04) #define BRK_PERIOD_BACK(s,e) brk_fmt_class_back((char *)s, (char *)e, 0x04) #define IS_PUNCT_CHAR( _c) is_fmt_class_char((char)( _c), 0x08) #define SPN_PUNCT_CHARS(_s) spn_fmt_class_chars((char *)_s, 0x08) #define BRK_PUNCT_CHARS(_s) brk_fmt_class_chars((char *)_s, 0x08) #define SPN_PUNCT_BACK(s,e) spn_fmt_class_back((char *)s, (char *)e, 0x08) #define BRK_PUNCT_BACK(s,e) brk_fmt_class_back((char *)s, (char *)e, 0x08) static fmt_class_mask_t const fmt_class_table[128] = { /*NUL*/ 0x00, /*x01*/ 0x00, /*x02*/ 0x00, /*x03*/ 0x00, /*x04*/ 0x00, /*x05*/ 0x00, /*x06*/ 0x00, /*BEL*/ 0x00, /* BS*/ 0x00, /* HT*/ 0x00, /* NL*/ 0x00, /* VT*/ 0x00, /* FF*/ 0x00, /* CR*/ 0x00, /*x0E*/ 0x00, /*x0F*/ 0x00, /*x10*/ 0x00, /*x11*/ 0x00, /*x12*/ 0x00, /*x13*/ 0x00, /*x14*/ 0x00, /*x15*/ 0x00, /*x16*/ 0x00, /*x17*/ 0x00, /*x18*/ 0x00, /*x19*/ 0x00, /*x1A*/ 0x00, /*ESC*/ 0x00, /*x1C*/ 0x00, /*x1D*/ 0x00, /*x1E*/ 0x00, /*x1F*/ 0x00, /* */ 0x00, /* ! */ 0x0C, /* " */ 0x0B, /* # */ 0x08, /* $ */ 0x08, /* % */ 0x08, /* & */ 0x08, /* ' */ 0x0B, /* ( */ 0x09, /* ) */ 0x0A, /* * */ 0x08, /* + */ 0x08, /* , */ 0x08, /* - */ 0x08, /* . */ 0x0C, /* / */ 0x08, /* 0 */ 0x00, /* 1 */ 0x00, /* 2 */ 0x00, /* 3 */ 0x00, /* 4 */ 0x00, /* 5 */ 0x00, /* 6 */ 0x00, /* 7 */ 0x00, /* 8 */ 0x00, /* 9 */ 0x00, /* : */ 0x08, /* ; */ 0x08, /* < */ 0x08, /* = */ 0x08, /* > */ 0x08, /* ? */ 0x0C, /* @ */ 0x08, /* A */ 0x00, /* B */ 0x00, /* C */ 0x00, /* D */ 0x00, /* E */ 0x00, /* F */ 0x00, /* G */ 0x00, /* H */ 0x00, /* I */ 0x00, /* J */ 0x00, /* K */ 0x00, /* L */ 0x00, /* M */ 0x00, /* N */ 0x00, /* O */ 0x00, /* P */ 0x00, /* Q */ 0x00, /* R */ 0x00, /* S */ 0x00, /* T */ 0x00, /* U */ 0x00, /* V */ 0x00, /* W */ 0x00, /* X */ 0x00, /* Y */ 0x00, /* Z */ 0x00, /* [ */ 0x09, /* \ */ 0x08, /* ] */ 0x0A, /* ^ */ 0x08, /* _ */ 0x08, /* ` */ 0x08, /* a */ 0x00, /* b */ 0x00, /* c */ 0x00, /* d */ 0x00, /* e */ 0x00, /* f */ 0x00, /* g */ 0x00, /* h */ 0x00, /* i */ 0x00, /* j */ 0x00, /* k */ 0x00, /* l */ 0x00, /* m */ 0x00, /* n */ 0x00, /* o */ 0x00, /* p */ 0x00, /* q */ 0x00, /* r */ 0x00, /* s */ 0x00, /* t */ 0x00, /* u */ 0x00, /* v */ 0x00, /* w */ 0x00, /* x */ 0x00, /* y */ 0x00, /* z */ 0x00, /* { */ 0x08, /* | */ 0x08, /* } */ 0x08, /* ~ */ 0x08, /*x7F*/ 0x00 }; static inline int is_fmt_class_char(char ch, fmt_class_mask_t mask) { unsigned int ix = (unsigned char)ch; return ((ix < 128) && ((fmt_class_table[ix] & mask) != 0)); } static inline char * spn_fmt_class_chars(char * p, fmt_class_mask_t mask) { while ((*p != '\0') && is_fmt_class_char(*p, mask)) p++; return p; } static inline char * brk_fmt_class_chars(char * p, fmt_class_mask_t mask) { while ((*p != '\0') && (! is_fmt_class_char(*p, mask))) p++; return p; } static inline char * spn_fmt_class_back(char * s, char * e, fmt_class_mask_t mask) { if (s == e) e += strlen(e); while ((e > s) && is_fmt_class_char(e[-1], mask)) e--; return e; } static inline char * brk_fmt_class_back(char * s, char * e, fmt_class_mask_t mask) { if (s == e) e += strlen(e); while ((e > s) && (! is_fmt_class_char(e[-1], mask))) e--; return e; } #endif /* FMT_CLASS_H_GUARD */
char-mapper.txz
Description: application/xz-compressed-tar
