Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-04 Thread Steve Schnepp
2010/9/3 Jilles Tjoelker jil...@stack.nl:
 This patch assumes that the file descriptor is discarded afterwards (its
 position does not matter). Therefore the very common construct
  while read x; do
    ...
  done
 stops working.

Ohh.. thanks for that, I didn't see it.

Actually while read x continues to work.
But reopening the file doesn't as in :

read a b  datafile
echo ${a} ${b}
read a b  datafile
echo ${a} ${b}

I attached an updated patch that corrects this pb by discarding the
buffer when opening a new file.
I also put everything in new files (bufreadcmd.c  .h), in order to
ease its understanding.

--
Steve Schnepp
http://blog.pwkf.org/



 A possible fix is to check first if the input supports seeking. If it
 does, use the buffering and at the end of the line seek backwards for
 the number of bytes remaining in the buffer. If it does not, read one
 byte at a time.

 --
 Jilles Tjoelker



--
Steve Schnepp
http://blog.pwkf.org/
Common subdirectories: dash-0.5.4/src/bltin and dash-0.5.4-patched/src/bltin
diff -puN dash-0.5.4/src/bufreadcmd.c dash-0.5.4-patched/src/bufreadcmd.c
--- dash-0.5.4/src/bufreadcmd.c	1970-01-01 01:00:00.0 +0100
+++ dash-0.5.4-patched/src/bufreadcmd.c	2010-09-04 12:31:46.0 +0200
@@ -0,0 +1,59 @@
+/*
+ * Offers a buffered read builtin
+ */
+
+#include unistd.h
+#include stdlib.h
+
+#include bufreadcmd.h
+
+#ifdef BUF_READ_BUILTIN_DISABLED
+int dup2_wrapper(int old, int new) {
+	return dup2(old, new);
+}
+int read_stdin_bufferred(char *c) {
+	return read(0, buffer, 1);
+}
+#else // BUF_READ_BUILTIN_DISABLED
+
+/*
+ * Reads from fd 0, with a CHUNK_READ_SIZE,
+ * but emitting one char at a time
+ */
+#define CHUNK_READ_SIZE 32
+static char buffer[CHUNK_READ_SIZE];
+static int buffer_offset = 0;
+static int buffer_len = 0;
+
+int read_stdin_bufferred(char* c) {
+   if (buffer_len == 0) {
+   // No caracter left, resetting buffer  read some more
+   buffer_offset = 0;
+   buffer_len = read(0, buffer, CHUNK_READ_SIZE);
+
+   if (buffer_len == 0) {
+   // Nothing to read anymore
+   return 0;
+   }
+   }
+
+   // Still some character left
+   *c = buffer[buffer_offset++];
+   buffer_len--;
+
+   return 1;
+}
+
+static void _flush_readcmd(int fd) {
+	if (fd == 0) { 
+		// Flush the buffer, discarding its content
+		buffer_len = 0;
+	}
+}
+
+/* Intercept dup2() calls */
+int dup2_wrapper(int old, int new) {
+	_flush_readcmd(new);
+	return dup2(old, new);
+}
+#endif // BUF_READ_BUILTIN_DISABLED
diff -puN dash-0.5.4/src/bufreadcmd.h dash-0.5.4-patched/src/bufreadcmd.h
--- dash-0.5.4/src/bufreadcmd.h	1970-01-01 01:00:00.0 +0100
+++ dash-0.5.4-patched/src/bufreadcmd.h	2010-09-04 12:23:53.0 +0200
@@ -0,0 +1,3 @@
+/* Used for flushing the readcmd read() buffer */
+int dup2_wrapper(int to, int from);
+int read_stdin_bufferred(char *c);
Common subdirectories: dash-0.5.4/src/.deps and dash-0.5.4-patched/src/.deps
diff -puN dash-0.5.4/src/eval.c dash-0.5.4-patched/src/eval.c
--- dash-0.5.4/src/eval.c	2007-07-13 10:26:42.0 +0200
+++ dash-0.5.4-patched/src/eval.c	2010-09-04 12:24:55.0 +0200
@@ -64,6 +64,8 @@
 #include myhistedit.h
 #endif
 
+#include bufreadcmd.h
+
 
 /* flags in argument to evaltree */
 #define EV_EXIT 01		/* exit after evaluating tree */
@@ -543,11 +545,12 @@ evalpipe(union node *n, int flags)
 close(pip[0]);
 			}
 			if (prevfd  0) {
-dup2(prevfd, 0);
+dup2_wrapper(prevfd, 0);
+
 close(prevfd);
 			}
 			if (pip[1]  1) {
-dup2(pip[1], 1);
+dup2_wrapper(pip[1], 1);
 close(pip[1]);
 			}
 			evaltreenr(lp-n, flags);
@@ -625,7 +628,7 @@ evalbackcmd(union node *n, struct backcm
 			FORCEINTON;
 			close(pip[0]);
 			if (pip[1] != 1) {
-dup2(pip[1], 1);
+dup2_wrapper(pip[1], 1);
 close(pip[1]);
 			}
 			eflag = 0;
diff -puN dash-0.5.4/src/Makefile dash-0.5.4-patched/src/Makefile
--- dash-0.5.4/src/Makefile	2010-09-04 13:06:05.0 +0200
+++ dash-0.5.4-patched/src/Makefile	2010-09-04 13:08:26.0 +0200
@@ -57,7 +57,7 @@ am__objects_1 = alias.$(OBJEXT) arith_yy
 	miscbltin.$(OBJEXT) mystring.$(OBJEXT) options.$(OBJEXT) \
 	parser.$(OBJEXT) redir.$(OBJEXT) show.$(OBJEXT) trap.$(OBJEXT) \
 	output.$(OBJEXT) printf.$(OBJEXT) system.$(OBJEXT) \
-	test.$(OBJEXT) times.$(OBJEXT) var.$(OBJEXT)
+	test.$(OBJEXT) times.$(OBJEXT) var.$(OBJEXT) bufreadcmd.$(OBJEXT)
 am_dash_OBJECTS = $(am__objects_1) arith.$(OBJEXT)
 dash_OBJECTS = $(am_dash_OBJECTS)
 dash_DEPENDENCIES = builtins.o init.o nodes.o signames.o syntax.o
@@ -169,14 +169,14 @@ dash_CFILES = \
 	alias.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \
 	histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \
 	mystring.c options.c parser.c redir.c show.c trap.c output.c \
-	bltin/printf.c system.c bltin/test.c bltin/times.c var.c
+	bltin/printf.c system.c bltin/test.c bltin/times.c var.c bufreadcmd.c
 
 

Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-04 Thread Jilles Tjoelker
On Sat, Sep 04, 2010 at 08:20:33PM +0200, Steve Schnepp wrote:
 2010/9/3 Jilles Tjoelker jil...@stack.nl:
  This patch assumes that the file descriptor is discarded afterwards (its
  position does not matter). Therefore the very common construct
   while read x; do
     ...
   done
  stops working.

 Ohh.. thanks for that, I didn't see it.

 Actually while read x continues to work.
 But reopening the file doesn't as in :

 read a b  datafile
 echo ${a} ${b}
 read a b  datafile
 echo ${a} ${b}

You're right, it's even stranger than I expected.

 I attached an updated patch that corrects this pb by discarding the
 buffer when opening a new file.

This discarding is still bad as it throws away valid data if the open
file description is shared. This happens if stdin is redirected inside a
while read... loop.

Furthermore, I think constructions like
  read x; cat
and
  read x; (read y); read z
should keep working. This requires that the input's file position be
synced whenever another process may see it (fork/exit). Due to the
highly dynamic character of the shell and the common use of fd 0, this
probably means that you can't do better than syncing after each read
builtin. (For example, 'read' could be overridden with a function after
the third line.)

Another thought:
  exec 30; read x; read y 3
or even
  sh -c 'read x; read y 3' 30
Different file descriptors may refer to the same open file description
and the shell may not know this.

-- 
Jilles Tjoelker
--
To unsubscribe from this list: send the line unsubscribe dash in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-03 Thread Steve Schnepp
2010/9/2 Jilles Tjoelker jil...@stack.nl:

Thanks for your prompt reply.

 Note that a change in the file between the single-byte reads will cause
 an inconsistent value to be read. This is also the case with regular
 files on a filesystem, so it is acceptable.

Are you implying that:
- if the procfs is made to support char per char reads, dash reading
an inconsistent value is actually a feature ?
- buffering should, therefore, always be explicit ?

On a side note, the whole procfs seems to be designed around one
unique page read if possible (1x 4K).
I think it does so in order to be able to vastly simplify its
usage/implementation by kernel modules.

 If single-byte reads are really unacceptable, then the proper way to
 read these files needs to be documented, and clear violations that will
 not work properly should cause an error (in this case, this means that
 reading one byte from offset 0 should fail like reading one byte from
 offset 1 does).

+1 for the proper way to read these files needs to be documented and
I also think that emitting an error would be better than silently
returning erroneous data. [ EOVERFLOW is coming to my mind ]

--
Steve Schnepp
http://blog.pwkf.org/
--
To unsubscribe from this list: send the line unsubscribe dash in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-02 Thread Steve Schnepp
2010/9/1 Steve Schnepp steve.schn...@gmail.com:
 conforming to POSIX isn't a realistic option, would it be possible to
 have a workaround that doesn't involve an external tool like cat(1) ?

Hi, I just hacked  attached a little patch away to be able to solve this case.
Feel free to reply with your comments.

NB: I just targeted dash-0.5.5.1, but it might apply to any version.

--
Steve Schnepp
http://blog.pwkf.org/


chunked_bltin_read.diff
Description: Binary data