Module Name: src
Committed By: christos
Date: Thu Jan 14 20:41:47 UTC 2016
Modified Files:
src/lib/libc/regex: Makefile.inc regex.3
Added Files:
src/lib/libc/regex: regsub.c
Log Message:
add reg{,a}sub
To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 src/lib/libc/regex/Makefile.inc
cvs rdiff -u -r1.22 -r1.23 src/lib/libc/regex/regex.3
cvs rdiff -u -r0 -r1.1 src/lib/libc/regex/regsub.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/lib/libc/regex/Makefile.inc
diff -u src/lib/libc/regex/Makefile.inc:1.7 src/lib/libc/regex/Makefile.inc:1.8
--- src/lib/libc/regex/Makefile.inc:1.7 Thu Nov 13 21:04:46 1997
+++ src/lib/libc/regex/Makefile.inc Thu Jan 14 15:41:47 2016
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile.inc,v 1.7 1997/11/14 02:04:46 mrg Exp $
+# $NetBSD: Makefile.inc,v 1.8 2016/01/14 20:41:47 christos Exp $
# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
# regex sources
@@ -6,9 +6,9 @@
CPPFLAGS+=-DPOSIX_MISTAKE
-SRCS+= regcomp.c regerror.c regexec.c regfree.c
+SRCS+= regcomp.c regerror.c regexec.c regfree.c regsub.c
MAN+= regex.3 re_format.7
MLINKS+=regex.3 regcomp.3 regex.3 regexec.3 regex.3 regerror.3 \
- regex.3 regfree.3
+ regex.3 regfree.3 regex.3 regsub.3 regex.3 regasub.3
Index: src/lib/libc/regex/regex.3
diff -u src/lib/libc/regex/regex.3:1.22 src/lib/libc/regex/regex.3:1.23
--- src/lib/libc/regex/regex.3:1.22 Mon May 16 23:35:38 2011
+++ src/lib/libc/regex/regex.3 Thu Jan 14 15:41:47 2016
@@ -1,4 +1,4 @@
-.\" $NetBSD: regex.3,v 1.22 2011/05/17 03:35:38 enami Exp $
+.\" $NetBSD: regex.3,v 1.23 2016/01/14 20:41:47 christos Exp $
.\"
.\" Copyright (c) 1992, 1993, 1994
.\" The Regents of the University of California. All rights reserved.
@@ -65,7 +65,7 @@
.\"
.\" @(#)regex.3 8.4 (Berkeley) 3/20/94
.\"
-.Dd December 29, 2003
+.Dd January 8, 2016
.Dt REGEX 3
.Os
.Sh NAME
@@ -73,7 +73,9 @@
.Nm regcomp ,
.Nm regexec ,
.Nm regerror ,
-.Nm regfree
+.Nm regfree ,
+.Nm regasub ,
+.Nm regsub
.Nd regular-expression library
.Sh LIBRARY
.Lb libc
@@ -87,6 +89,10 @@
.Fn regerror "int errcode" "const regex_t * restrict preg" "char * restrict errbuf" "size_t errbuf_size"
.Ft void
.Fn regfree "regex_t *preg"
+.Ft ssize_t
+.Fn regsub "char *buf" "size_t bufsiz" "const char *sub" "const regmatch_t *rm" "const char *str"
+.Ft ssize_t
+.Fn regasub "char **buf" "const char *sub" "const regmatch_t *rm" "const char *sstr"
.Sh DESCRIPTION
These routines implement
.St -p1003.2-92
@@ -466,6 +472,46 @@ is undefined.
None of these functions references global variables except for tables
of constants;
all are safe for use from multiple threads if the arguments are safe.
+.Pp
+The
+.Fn regsub
+and
+.Fn regasub
+functions perform substitutions using
+.Xr sed 1
+like syntax.
+They return the length of the string that would have been created
+if there was enough space or
+.Dv \-1
+on error, setting
+.Dv errno .
+The result
+is being placed in
+.Fa buf
+which is user-supplied in
+.Fn regsub
+and dynamically allocated in
+.Fn regasub .
+The
+.Fa sub
+argument contains a substitution string which might refer to the first
+9 regular expression strings using
+.Dq \e<n>
+to refer to the nth matched
+item, or
+.Dq &
+(which is equivalent to
+.Dq \e0 )
+to refer to the full match.
+The
+.Fa rm
+array must be at least 10 elements long, and should contain the result
+of the matches from a previous
+.Fn regexec
+call.
+The
+.Fa str
+argument contains the source string to apply the transformation to.
.Sh IMPLEMENTATION CHOICES
There are a number of decisions that
.St -p1003.2-92
@@ -576,6 +622,13 @@ Originally written by Henry Spencer.
Altered for inclusion in the
.Bx 4.4
distribution.
+.Pp
+The
+.Fn regsub
+and
+.Fn regasub
+functions appeared in
+.Nx 8 .
.Sh BUGS
There is one known functionality bug.
The implementation of internationalization is incomplete:
Added files:
Index: src/lib/libc/regex/regsub.c
diff -u /dev/null src/lib/libc/regex/regsub.c:1.1
--- /dev/null Thu Jan 14 15:41:47 2016
+++ src/lib/libc/regex/regsub.c Thu Jan 14 15:41:47 2016
@@ -0,0 +1,162 @@
+/* $NetBSD: regsub.c,v 1.1 2016/01/14 20:41:47 christos Exp $ */
+
+/*-
+ * Copyright (c) 2015 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: regsub.c,v 1.1 2016/01/14 20:41:47 christos Exp $");
+
+#include <sys/param.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <regex.h>
+
+struct str {
+ char *s_ptr;
+ size_t s_max;
+ size_t s_len;
+ int s_fixed;
+};
+
+#define REINCR 64
+
+static int
+addspace(struct str *s, size_t len)
+{
+ void *v;
+
+ if (s->s_max - s->s_len > len)
+ return 0;
+
+ if (s->s_fixed)
+ return -1;
+
+ s->s_max += len + REINCR;
+
+ v = realloc(s->s_ptr, s->s_max);
+ if (v == NULL)
+ return -1;
+ s->s_ptr = v;
+
+ return 0;
+}
+
+static void
+addchar(struct str *s, int c)
+{
+ if (addspace(s, 1) == -1)
+ s->s_len++;
+ else
+ s->s_ptr[s->s_len++] = c;
+ if (c == 0) {
+ --s->s_len;
+ s->s_ptr[s->s_max - 1] = c;
+ }
+}
+
+static void
+addnstr(struct str *s, const char *buf, size_t len)
+{
+ if (addspace(s, len) != -1)
+ memcpy(s->s_ptr + s->s_len, buf, len);
+ s->s_len += len;
+}
+
+static int
+initstr(struct str *s, char *buf, size_t len)
+{
+ s->s_max = len;
+ s->s_ptr = buf == NULL ? malloc(len) : buf;
+ s->s_fixed = buf != NULL;
+ s->s_len = 0;
+ return s->s_ptr == NULL ? -1 : 0;
+}
+
+static ssize_t
+regsub1(char **buf, size_t len, const char *sub,
+ const regmatch_t *rm, const char *str)
+{
+ ssize_t i;
+ char c;
+ struct str s;
+
+ if (initstr(&s, *buf, len) == -1)
+ return -1;
+
+ while ((c = *sub++) != '\0') {
+
+ switch (c) {
+ case '&':
+ i = 0;
+ break;
+ case '\\':
+ if (isdigit((unsigned char)*sub))
+ i = *sub++ - '0';
+ else
+ i = -1;
+ break;
+ default:
+ i = -1;
+ break;
+ }
+
+ if (i == -1) {
+ if (c == '\\' && (*sub == '\\' || *sub == '&'))
+ c = *sub++;
+ addchar(&s, c);
+ } else if (rm[i].rm_so != -1 && rm[i].rm_eo != -1) {
+ size_t l = (size_t)(rm[i].rm_eo - rm[i].rm_so);
+ addnstr(&s, str + rm[i].rm_so, l);
+ }
+ }
+
+ addchar(&s, '\0');
+ if (!s.s_fixed) {
+ if (s.s_len >= s.s_max) {
+ free(s.s_ptr);
+ return -1;
+ }
+ *buf = s.s_ptr;
+ }
+ return s.s_len;
+}
+
+ssize_t
+regsub(char *buf, size_t len, const char *sub, const regmatch_t *rm,
+ const char *str)
+{
+ return regsub1(&buf, len, sub, rm, str);
+}
+
+ssize_t
+regasub(char **buf, const char *sub, const regmatch_t *rm, const char *str)
+{
+ *buf = NULL;
+ return regsub1(buf, REINCR, sub, rm, str);
+}