Attached are several files, one the patch, another a derived file based on
a file that is part of the patch, and the last is the tool used to derive
the header from the mapping file.

I've had this toy kicking around for a decade or so.  If it were
part of some project, it might be libiberty or maybe gnulib or
maybe coreutils.  Uncertain what to do with it, I've kept it
basically hidden.  The idea is to specify character classes for
a particular program or group of programs and emit the classification
table.  Zack did it for GCC.  There are some ad-hoc methods in the
core utils, including fmt.  I chose fmt as the example because it
was especially trivial and would still demonstrate it.

So it's all attached for your amusement.  Probably needs a better name.

These are the four classes needed by fmt.c:

open        "(['\"" <<<<=== backtick removed, per recent discussions
close       ")]'\""
period      ".?!"
punct       "\x21-\x7E" -"a-zA-Z0-9"

Cheers - Bruce

char-mapper/
char-mapper/cm-opt.c
char-mapper/map-text.c
char-mapper/mk-str2enum.sh
char-mapper/char-mapper.c
char-mapper/map-text.def
char-mapper/mk-opt-table.sh
char-mapper/char-mapper.h
char-mapper/Makefile
char-mapper/test.sh
char-mapper/map-text.h
char-mapper/build-html.sh
char-mapper/cm-opt.h
char-mapper/MakeRules
>From 62fd69a3d3a547cc3579484571b2abcb2fcd668a Mon Sep 17 00:00:00 2001
From: Bruce Korb <[email protected]>
Date: Sun, 15 Apr 2012 12:08:53 -0700
Subject: [PATCH] fmt: use generated char classifications

* lib/fmt-class.map: file describing the character classes used by fmt.
* lib/fmt.c: use fmt-class.h in preference to roll-your-own
* src/Makefile.am: add rule to derive fmt-class.h from fmt-class.map
---
 .gitignore        |    1 +
 src/.gitignore    |    1 +
 src/Makefile.am   |   45 +++++++++++++++++++++++----------------------
 src/fmt-class.map |   31 +++++++++++++++++++++++++++++++
 src/fmt.c         |   17 +++++------------
 5 files changed, 61 insertions(+), 34 deletions(-)
 create mode 100644 src/fmt-class.map

diff --git a/.gitignore b/.gitignore
index 383361b..f5e35dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -75,6 +75,7 @@
 /lib/ref-del.sed
 /lib/selinux
 /lib/signal.h
+/lib/spawn.h
 /lib/stamp-h1
 /lib/stdalign.h
 /lib/stdio.h
diff --git a/src/.gitignore b/src/.gitignore
index 9c4c9b7..0e5e47f 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -114,3 +114,4 @@ wheel.h
 who
 whoami
 yes
+fmt-class.h
diff --git a/src/Makefile.am b/src/Makefile.am
index 06ab615..8e59801 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -468,30 +468,27 @@ ginstall_SOURCES = install.c prog-fprintf.c $(copy_sources)
 # This is for the '[' program.  Automake transliterates '[' to '_'.
 __SOURCES = lbracket.c
 
-cp_SOURCES = cp.c $(copy_sources)
-dir_SOURCES = ls.c ls-dir.c
-vdir_SOURCES = ls.c ls-vdir.c
-id_SOURCES = id.c group-list.c
-groups_SOURCES = groups.c group-list.c
-ls_SOURCES = ls.c ls-ls.c
-ln_SOURCES = ln.c relpath.c relpath.h
-chown_SOURCES = chown.c chown-core.c
-chgrp_SOURCES = chgrp.c chown-core.c
-kill_SOURCES = kill.c operand2sig.c
+arch_SOURCES 	= uname.c uname-arch.c
+chgrp_SOURCES 	= chgrp.c chown-core.c
+chown_SOURCES 	= chown.c chown-core.c
+cp_SOURCES 	= cp.c $(copy_sources)
+df_SOURCES 	= df.c find-mount-point.c
+dir_SOURCES 	= ls.c ls-dir.c
+fmt_SOURCES 	= fmt.c fmt-class.h
+groups_SOURCES 	= groups.c group-list.c
+id_SOURCES 	= id.c group-list.c
+kill_SOURCES 	= kill.c operand2sig.c
+ln_SOURCES 	= ln.c relpath.c relpath.h
+ls_SOURCES 	= ls.c ls-ls.c
+mkdir_SOURCES 	= mkdir.c prog-fprintf.c
+mv_SOURCES 	= mv.c remove.c $(copy_sources)
 realpath_SOURCES = realpath.c relpath.c relpath.h
+rm_SOURCES 	= rm.c remove.c
+rmdir_SOURCES 	= rmdir.c prog-fprintf.c
+stat_SOURCES 	= stat.c find-mount-point.c
 timeout_SOURCES = timeout.c operand2sig.c
-
-mv_SOURCES = mv.c remove.c $(copy_sources)
-rm_SOURCES = rm.c remove.c
-
-mkdir_SOURCES = mkdir.c prog-fprintf.c
-rmdir_SOURCES = rmdir.c prog-fprintf.c
-
-df_SOURCES = df.c find-mount-point.c
-stat_SOURCES = stat.c find-mount-point.c
-
-uname_SOURCES = uname.c uname-uname.c
-arch_SOURCES = uname.c uname-arch.c
+uname_SOURCES 	= uname.c uname-uname.c
+vdir_SOURCES 	= ls.c ls-vdir.c
 
 md5sum_CPPFLAGS = -DHASH_ALGO_MD5=1 $(AM_CPPFLAGS)
 sha1sum_SOURCES = md5sum.c
@@ -526,6 +523,10 @@ dircolors.h: dcgen dircolors.hin
 	$(AM_V_at)chmod a-w $@-t
 	$(AM_V_at)mv $@-t $@
 
+BUILT_SOURCES += fmt-class.h
+fmt-class.h : fmt-class.map
+	char-mapper fmt-class.map
+
 wheel_size = 5
 
 BUILT_SOURCES += wheel-size.h
diff --git a/src/fmt-class.map b/src/fmt-class.map
new file mode 100644
index 0000000..2aee676
--- /dev/null
+++ b/src/fmt-class.map
@@ -0,0 +1,31 @@
+
+%guard
+%file           fmt-class.h
+%backup
+
+%comment
+  This file contains the character classifications used by fmt
+  for identifying quoted strings and sentence terminators.
+  The table is static scope, so %guard is empty.
+
+  This is part of GNU fmt -- a simple text formatter.
+  Copyright (C) 1994-2012 Free Software Foundation, Inc.
+
+  This program is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+%
+
+open        "(['\""
+close       ")]'\""
+period      ".?!"
+punct       "\x21-\x7E" -"a-zA-Z0-9"
diff --git a/src/fmt.c b/src/fmt.c
index 308b645..e0bbc22 100644
--- a/src/fmt.c
+++ b/src/fmt.c
@@ -113,12 +113,7 @@ typedef long int COST;
 
 #define MAXWORDS	1000
 #define MAXCHARS	5000
-
-/* Extra ctype(3)-style macros.  */
-
-#define isopen(c)	(strchr ("(['`\"", c) != NULL)
-#define isclose(c)	(strchr (")]'\"", c) != NULL)
-#define isperiod(c)	(strchr (".?!", c) != NULL)
+#include "fmt-class.h"
 
 /* Size of a tab stop, for expansion on input and re-introduction on
    output.  */
@@ -773,13 +768,11 @@ check_punctuation (WORD *w)
 {
   char const *start = w->text;
   char const *finish = start + (w->length - 1);
-  unsigned char fin = *finish;
 
-  w->paren = isopen (*start);
-  w->punct = !! ispunct (fin);
-  while (start < finish && isclose (*finish))
-    finish--;
-  w->period = isperiod (*finish);
+  w->paren  = IS_OPEN_CHAR   (*start);
+  w->punct  = IS_PUNCT_CHAR  (*finish);
+  finish    = SPN_CLOSE_BACK (start, start + w->length);
+  w->period = (finish > start) && IS_PERIOD_CHAR (finish[-1]);
 }
 
 /* Flush part of the paragraph to make room.  This function is called on
-- 
1.7.7

/*
 *  4 bit character mapping generated 04/15/12 12:02:15
 *
 *  This file contains the character classifications used by fmt
 *  for identifying quoted strings and sentence terminators.
 *  The table is static scope, so %guard is empty.
 *
 *  This is part of GNU fmt -- a simple text formatter.
 *  Copyright (C) 1994-2012 Free Software Foundation, Inc.
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
#ifndef FMT_CLASS_H_GUARD
#define FMT_CLASS_H_GUARD 1

#ifdef HAVE_CONFIG_H
# if defined(HAVE_INTTYPES_H)
#   include <inttypes.h>

# elif defined(HAVE_STDINT_H)
#   include <stdint.h>

#   elif !defined(HAVE_UINT8_T)
        typedef unsigned char   uint8_t;
# endif /* HAVE_*INT*_H header */

#else /* not HAVE_CONFIG_H -- */
# include <inttypes.h>
#endif /* HAVE_CONFIG_H */

#if 0 /* mapping specification source (from fmt-class.map) */
// 
// %guard
// %file           fmt-class.h
// %backup
// 
// %comment -- see above
// %
// 
// open        "(['\""
// close       ")]'\""
// period      ".?!"
// punct       "\x21-\x7E" -"a-zA-Z0-9"
//
#endif /* 0 -- mapping spec. source */


typedef uint8_t fmt_class_mask_t;

#define  IS_OPEN_CHAR( _c)     is_fmt_class_char((char)( _c), 0x01)
#define SPN_OPEN_CHARS(_s)    spn_fmt_class_chars((char *)_s, 0x01)
#define BRK_OPEN_CHARS(_s)    brk_fmt_class_chars((char *)_s, 0x01)
#define SPN_OPEN_BACK(s,e)    spn_fmt_class_back((char *)s, (char *)e, 0x01)
#define BRK_OPEN_BACK(s,e)    brk_fmt_class_back((char *)s, (char *)e, 0x01)
#define  IS_CLOSE_CHAR( _c)    is_fmt_class_char((char)( _c), 0x02)
#define SPN_CLOSE_CHARS(_s)   spn_fmt_class_chars((char *)_s, 0x02)
#define BRK_CLOSE_CHARS(_s)   brk_fmt_class_chars((char *)_s, 0x02)
#define SPN_CLOSE_BACK(s,e)   spn_fmt_class_back((char *)s, (char *)e, 0x02)
#define BRK_CLOSE_BACK(s,e)   brk_fmt_class_back((char *)s, (char *)e, 0x02)
#define  IS_PERIOD_CHAR( _c)   is_fmt_class_char((char)( _c), 0x04)
#define SPN_PERIOD_CHARS(_s)  spn_fmt_class_chars((char *)_s, 0x04)
#define BRK_PERIOD_CHARS(_s)  brk_fmt_class_chars((char *)_s, 0x04)
#define SPN_PERIOD_BACK(s,e)  spn_fmt_class_back((char *)s, (char *)e, 0x04)
#define BRK_PERIOD_BACK(s,e)  brk_fmt_class_back((char *)s, (char *)e, 0x04)
#define  IS_PUNCT_CHAR( _c)    is_fmt_class_char((char)( _c), 0x08)
#define SPN_PUNCT_CHARS(_s)   spn_fmt_class_chars((char *)_s, 0x08)
#define BRK_PUNCT_CHARS(_s)   brk_fmt_class_chars((char *)_s, 0x08)
#define SPN_PUNCT_BACK(s,e)   spn_fmt_class_back((char *)s, (char *)e, 0x08)
#define BRK_PUNCT_BACK(s,e)   brk_fmt_class_back((char *)s, (char *)e, 0x08)

static fmt_class_mask_t const fmt_class_table[128] = {
  /*NUL*/ 0x00, /*x01*/ 0x00, /*x02*/ 0x00, /*x03*/ 0x00,
  /*x04*/ 0x00, /*x05*/ 0x00, /*x06*/ 0x00, /*BEL*/ 0x00,
  /* BS*/ 0x00, /* HT*/ 0x00, /* NL*/ 0x00, /* VT*/ 0x00,
  /* FF*/ 0x00, /* CR*/ 0x00, /*x0E*/ 0x00, /*x0F*/ 0x00,
  /*x10*/ 0x00, /*x11*/ 0x00, /*x12*/ 0x00, /*x13*/ 0x00,
  /*x14*/ 0x00, /*x15*/ 0x00, /*x16*/ 0x00, /*x17*/ 0x00,
  /*x18*/ 0x00, /*x19*/ 0x00, /*x1A*/ 0x00, /*ESC*/ 0x00,
  /*x1C*/ 0x00, /*x1D*/ 0x00, /*x1E*/ 0x00, /*x1F*/ 0x00,
  /*   */ 0x00, /* ! */ 0x0C, /* " */ 0x0B, /* # */ 0x08,
  /* $ */ 0x08, /* % */ 0x08, /* & */ 0x08, /* ' */ 0x0B,
  /* ( */ 0x09, /* ) */ 0x0A, /* * */ 0x08, /* + */ 0x08,
  /* , */ 0x08, /* - */ 0x08, /* . */ 0x0C, /* / */ 0x08,
  /* 0 */ 0x00, /* 1 */ 0x00, /* 2 */ 0x00, /* 3 */ 0x00,
  /* 4 */ 0x00, /* 5 */ 0x00, /* 6 */ 0x00, /* 7 */ 0x00,
  /* 8 */ 0x00, /* 9 */ 0x00, /* : */ 0x08, /* ; */ 0x08,
  /* < */ 0x08, /* = */ 0x08, /* > */ 0x08, /* ? */ 0x0C,
  /* @ */ 0x08, /* A */ 0x00, /* B */ 0x00, /* C */ 0x00,
  /* D */ 0x00, /* E */ 0x00, /* F */ 0x00, /* G */ 0x00,
  /* H */ 0x00, /* I */ 0x00, /* J */ 0x00, /* K */ 0x00,
  /* L */ 0x00, /* M */ 0x00, /* N */ 0x00, /* O */ 0x00,
  /* P */ 0x00, /* Q */ 0x00, /* R */ 0x00, /* S */ 0x00,
  /* T */ 0x00, /* U */ 0x00, /* V */ 0x00, /* W */ 0x00,
  /* X */ 0x00, /* Y */ 0x00, /* Z */ 0x00, /* [ */ 0x09,
  /* \ */ 0x08, /* ] */ 0x0A, /* ^ */ 0x08, /* _ */ 0x08,
  /* ` */ 0x08, /* a */ 0x00, /* b */ 0x00, /* c */ 0x00,
  /* d */ 0x00, /* e */ 0x00, /* f */ 0x00, /* g */ 0x00,
  /* h */ 0x00, /* i */ 0x00, /* j */ 0x00, /* k */ 0x00,
  /* l */ 0x00, /* m */ 0x00, /* n */ 0x00, /* o */ 0x00,
  /* p */ 0x00, /* q */ 0x00, /* r */ 0x00, /* s */ 0x00,
  /* t */ 0x00, /* u */ 0x00, /* v */ 0x00, /* w */ 0x00,
  /* x */ 0x00, /* y */ 0x00, /* z */ 0x00, /* { */ 0x08,
  /* | */ 0x08, /* } */ 0x08, /* ~ */ 0x08, /*x7F*/ 0x00
};
static inline int
is_fmt_class_char(char ch, fmt_class_mask_t mask)
{
    unsigned int ix = (unsigned char)ch;
    return ((ix < 128) && ((fmt_class_table[ix] & mask) != 0));
}

static inline char *
spn_fmt_class_chars(char * p, fmt_class_mask_t mask)
{
    while ((*p != '\0') && is_fmt_class_char(*p, mask))  p++;
    return p;
}

static inline char *
brk_fmt_class_chars(char * p, fmt_class_mask_t mask)
{
    while ((*p != '\0') && (! is_fmt_class_char(*p, mask)))  p++;
    return p;
}

static inline char *
spn_fmt_class_back(char * s, char * e, fmt_class_mask_t mask)
{
    if (s == e) e += strlen(e);
    while ((e > s) && is_fmt_class_char(e[-1], mask))  e--;
    return e;
}

static inline char *
brk_fmt_class_back(char * s, char * e, fmt_class_mask_t mask)
{
    if (s == e) e += strlen(e);
    while ((e > s) && (! is_fmt_class_char(e[-1], mask)))  e--;
    return e;
}
#endif /* FMT_CLASS_H_GUARD */

Attachment: char-mapper.txz
Description: application/xz-compressed-tar

Reply via email to