Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-04 Thread Steve Schnepp
2010/9/3 Jilles Tjoelker jil...@stack.nl:
 This patch assumes that the file descriptor is discarded afterwards (its
 position does not matter). Therefore the very common construct
  while read x; do
    ...
  done
 stops working.

Ohh.. thanks for that, I didn't see it.

Actually while read x continues to work.
But reopening the file doesn't as in :

read a b  datafile
echo ${a} ${b}
read a b  datafile
echo ${a} ${b}

I attached an updated patch that corrects this pb by discarding the
buffer when opening a new file.
I also put everything in new files (bufreadcmd.c  .h), in order to
ease its understanding.

--
Steve Schnepp
http://blog.pwkf.org/



 A possible fix is to check first if the input supports seeking. If it
 does, use the buffering and at the end of the line seek backwards for
 the number of bytes remaining in the buffer. If it does not, read one
 byte at a time.

 --
 Jilles Tjoelker



--
Steve Schnepp
http://blog.pwkf.org/
Common subdirectories: dash-0.5.4/src/bltin and dash-0.5.4-patched/src/bltin
diff -puN dash-0.5.4/src/bufreadcmd.c dash-0.5.4-patched/src/bufreadcmd.c
--- dash-0.5.4/src/bufreadcmd.c	1970-01-01 01:00:00.0 +0100
+++ dash-0.5.4-patched/src/bufreadcmd.c	2010-09-04 12:31:46.0 +0200
@@ -0,0 +1,59 @@
+/*
+ * Offers a buffered read builtin
+ */
+
+#include unistd.h
+#include stdlib.h
+
+#include bufreadcmd.h
+
+#ifdef BUF_READ_BUILTIN_DISABLED
+int dup2_wrapper(int old, int new) {
+	return dup2(old, new);
+}
+int read_stdin_bufferred(char *c) {
+	return read(0, buffer, 1);
+}
+#else // BUF_READ_BUILTIN_DISABLED
+
+/*
+ * Reads from fd 0, with a CHUNK_READ_SIZE,
+ * but emitting one char at a time
+ */
+#define CHUNK_READ_SIZE 32
+static char buffer[CHUNK_READ_SIZE];
+static int buffer_offset = 0;
+static int buffer_len = 0;
+
+int read_stdin_bufferred(char* c) {
+   if (buffer_len == 0) {
+   // No caracter left, resetting buffer  read some more
+   buffer_offset = 0;
+   buffer_len = read(0, buffer, CHUNK_READ_SIZE);
+
+   if (buffer_len == 0) {
+   // Nothing to read anymore
+   return 0;
+   }
+   }
+
+   // Still some character left
+   *c = buffer[buffer_offset++];
+   buffer_len--;
+
+   return 1;
+}
+
+static void _flush_readcmd(int fd) {
+	if (fd == 0) { 
+		// Flush the buffer, discarding its content
+		buffer_len = 0;
+	}
+}
+
+/* Intercept dup2() calls */
+int dup2_wrapper(int old, int new) {
+	_flush_readcmd(new);
+	return dup2(old, new);
+}
+#endif // BUF_READ_BUILTIN_DISABLED
diff -puN dash-0.5.4/src/bufreadcmd.h dash-0.5.4-patched/src/bufreadcmd.h
--- dash-0.5.4/src/bufreadcmd.h	1970-01-01 01:00:00.0 +0100
+++ dash-0.5.4-patched/src/bufreadcmd.h	2010-09-04 12:23:53.0 +0200
@@ -0,0 +1,3 @@
+/* Used for flushing the readcmd read() buffer */
+int dup2_wrapper(int to, int from);
+int read_stdin_bufferred(char *c);
Common subdirectories: dash-0.5.4/src/.deps and dash-0.5.4-patched/src/.deps
diff -puN dash-0.5.4/src/eval.c dash-0.5.4-patched/src/eval.c
--- dash-0.5.4/src/eval.c	2007-07-13 10:26:42.0 +0200
+++ dash-0.5.4-patched/src/eval.c	2010-09-04 12:24:55.0 +0200
@@ -64,6 +64,8 @@
 #include myhistedit.h
 #endif
 
+#include bufreadcmd.h
+
 
 /* flags in argument to evaltree */
 #define EV_EXIT 01		/* exit after evaluating tree */
@@ -543,11 +545,12 @@ evalpipe(union node *n, int flags)
 close(pip[0]);
 			}
 			if (prevfd  0) {
-dup2(prevfd, 0);
+dup2_wrapper(prevfd, 0);
+
 close(prevfd);
 			}
 			if (pip[1]  1) {
-dup2(pip[1], 1);
+dup2_wrapper(pip[1], 1);
 close(pip[1]);
 			}
 			evaltreenr(lp-n, flags);
@@ -625,7 +628,7 @@ evalbackcmd(union node *n, struct backcm
 			FORCEINTON;
 			close(pip[0]);
 			if (pip[1] != 1) {
-dup2(pip[1], 1);
+dup2_wrapper(pip[1], 1);
 close(pip[1]);
 			}
 			eflag = 0;
diff -puN dash-0.5.4/src/Makefile dash-0.5.4-patched/src/Makefile
--- dash-0.5.4/src/Makefile	2010-09-04 13:06:05.0 +0200
+++ dash-0.5.4-patched/src/Makefile	2010-09-04 13:08:26.0 +0200
@@ -57,7 +57,7 @@ am__objects_1 = alias.$(OBJEXT) arith_yy
 	miscbltin.$(OBJEXT) mystring.$(OBJEXT) options.$(OBJEXT) \
 	parser.$(OBJEXT) redir.$(OBJEXT) show.$(OBJEXT) trap.$(OBJEXT) \
 	output.$(OBJEXT) printf.$(OBJEXT) system.$(OBJEXT) \
-	test.$(OBJEXT) times.$(OBJEXT) var.$(OBJEXT)
+	test.$(OBJEXT) times.$(OBJEXT) var.$(OBJEXT) bufreadcmd.$(OBJEXT)
 am_dash_OBJECTS = $(am__objects_1) arith.$(OBJEXT)
 dash_OBJECTS = $(am_dash_OBJECTS)
 dash_DEPENDENCIES = builtins.o init.o nodes.o signames.o syntax.o
@@ -169,14 +169,14 @@ dash_CFILES = \
 	alias.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \
 	histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \
 	mystring.c options.c parser.c redir.c show.c trap.c output.c \
-	bltin/printf.c system.c bltin/test.c bltin/times.c var.c
+	bltin/printf.c system.c bltin/test.c bltin/times.c var.c bufreadcmd.c
 
 

Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-04 Thread Jilles Tjoelker
On Sat, Sep 04, 2010 at 08:20:33PM +0200, Steve Schnepp wrote:
 2010/9/3 Jilles Tjoelker jil...@stack.nl:
  This patch assumes that the file descriptor is discarded afterwards (its
  position does not matter). Therefore the very common construct
   while read x; do
     ...
   done
  stops working.

 Ohh.. thanks for that, I didn't see it.

 Actually while read x continues to work.
 But reopening the file doesn't as in :

 read a b  datafile
 echo ${a} ${b}
 read a b  datafile
 echo ${a} ${b}

You're right, it's even stranger than I expected.

 I attached an updated patch that corrects this pb by discarding the
 buffer when opening a new file.

This discarding is still bad as it throws away valid data if the open
file description is shared. This happens if stdin is redirected inside a
while read... loop.

Furthermore, I think constructions like
  read x; cat
and
  read x; (read y); read z
should keep working. This requires that the input's file position be
synced whenever another process may see it (fork/exit). Due to the
highly dynamic character of the shell and the common use of fd 0, this
probably means that you can't do better than syncing after each read
builtin. (For example, 'read' could be overridden with a function after
the third line.)

Another thought:
  exec 30; read x; read y 3
or even
  sh -c 'read x; read y 3' 30
Different file descriptors may refer to the same open file description
and the shell may not know this.

-- 
Jilles Tjoelker
--
To unsubscribe from this list: send the line unsubscribe dash in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html