Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-04 Thread Steve Schnepp
2010/9/3 Jilles Tjoelker :
> This patch assumes that the file descriptor is discarded afterwards (its
> position does not matter). Therefore the very common construct
>  while read x; do
>    ...
>  done
> stops working.

Ohh.. thanks for that, I didn't see it.

Actually "while read x" continues to work.
But "reopening the file" doesn't as in :

read a b < datafile
echo ${a} ${b}
read a b < datafile
echo ${a} ${b}

I attached an updated patch that corrects this pb by discarding the
buffer when opening a new file.
I also put everything in new files (bufreadcmd.c & .h), in order to
ease its understanding.

--
Steve Schnepp
http://blog.pwkf.org/


>
> A possible fix is to check first if the input supports seeking. If it
> does, use the buffering and at the end of the line seek backwards for
> the number of bytes remaining in the buffer. If it does not, read one
> byte at a time.
>
> --
> Jilles Tjoelker
>


--
Steve Schnepp
http://blog.pwkf.org/
Common subdirectories: dash-0.5.4/src/bltin and dash-0.5.4-patched/src/bltin
diff -puN dash-0.5.4/src/bufreadcmd.c dash-0.5.4-patched/src/bufreadcmd.c
--- dash-0.5.4/src/bufreadcmd.c	1970-01-01 01:00:00.0 +0100
+++ dash-0.5.4-patched/src/bufreadcmd.c	2010-09-04 12:31:46.0 +0200
@@ -0,0 +1,59 @@
+/*
+ * Offers a buffered read builtin
+ */
+
+#include 
+#include 
+
+#include "bufreadcmd.h"
+
+#ifdef BUF_READ_BUILTIN_DISABLED
+int dup2_wrapper(int old, int new) {
+	return dup2(old, new);
+}
+int read_stdin_bufferred(char *c) {
+	return read(0, buffer, 1);
+}
+#else // BUF_READ_BUILTIN_DISABLED
+
+/*
+ * Reads from fd 0, with a CHUNK_READ_SIZE,
+ * but emitting one char at a time
+ */
+#define CHUNK_READ_SIZE 32
+static char buffer[CHUNK_READ_SIZE];
+static int buffer_offset = 0;
+static int buffer_len = 0;
+
+int read_stdin_bufferred(char* c) {
+   if (buffer_len == 0) {
+   // No caracter left, resetting buffer & read some more
+   buffer_offset = 0;
+   buffer_len = read(0, buffer, CHUNK_READ_SIZE);
+
+   if (buffer_len == 0) {
+   // Nothing to read anymore
+   return 0;
+   }
+   }
+
+   // Still some character left
+   *c = buffer[buffer_offset++];
+   buffer_len--;
+
+   return 1;
+}
+
+static void _flush_readcmd(int fd) {
+	if (fd == 0) { 
+		// Flush the buffer, discarding its content
+		buffer_len = 0;
+	}
+}
+
+/* Intercept dup2() calls */
+int dup2_wrapper(int old, int new) {
+	_flush_readcmd(new);
+	return dup2(old, new);
+}
+#endif // BUF_READ_BUILTIN_DISABLED
diff -puN dash-0.5.4/src/bufreadcmd.h dash-0.5.4-patched/src/bufreadcmd.h
--- dash-0.5.4/src/bufreadcmd.h	1970-01-01 01:00:00.0 +0100
+++ dash-0.5.4-patched/src/bufreadcmd.h	2010-09-04 12:23:53.0 +0200
@@ -0,0 +1,3 @@
+/* Used for flushing the readcmd read() buffer */
+int dup2_wrapper(int to, int from);
+int read_stdin_bufferred(char *c);
Common subdirectories: dash-0.5.4/src/.deps and dash-0.5.4-patched/src/.deps
diff -puN dash-0.5.4/src/eval.c dash-0.5.4-patched/src/eval.c
--- dash-0.5.4/src/eval.c	2007-07-13 10:26:42.0 +0200
+++ dash-0.5.4-patched/src/eval.c	2010-09-04 12:24:55.0 +0200
@@ -64,6 +64,8 @@
 #include "myhistedit.h"
 #endif
 
+#include "bufreadcmd.h"
+
 
 /* flags in argument to evaltree */
 #define EV_EXIT 01		/* exit after evaluating tree */
@@ -543,11 +545,12 @@ evalpipe(union node *n, int flags)
 close(pip[0]);
 			}
 			if (prevfd > 0) {
-dup2(prevfd, 0);
+dup2_wrapper(prevfd, 0);
+
 close(prevfd);
 			}
 			if (pip[1] > 1) {
-dup2(pip[1], 1);
+dup2_wrapper(pip[1], 1);
 close(pip[1]);
 			}
 			evaltreenr(lp->n, flags);
@@ -625,7 +628,7 @@ evalbackcmd(union node *n, struct backcm
 			FORCEINTON;
 			close(pip[0]);
 			if (pip[1] != 1) {
-dup2(pip[1], 1);
+dup2_wrapper(pip[1], 1);
 close(pip[1]);
 			}
 			eflag = 0;
diff -puN dash-0.5.4/src/Makefile dash-0.5.4-patched/src/Makefile
--- dash-0.5.4/src/Makefile	2010-09-04 13:06:05.0 +0200
+++ dash-0.5.4-patched/src/Makefile	2010-09-04 13:08:26.0 +0200
@@ -57,7 +57,7 @@ am__objects_1 = alias.$(OBJEXT) arith_yy
 	miscbltin.$(OBJEXT) mystring.$(OBJEXT) options.$(OBJEXT) \
 	parser.$(OBJEXT) redir.$(OBJEXT) show.$(OBJEXT) trap.$(OBJEXT) \
 	output.$(OBJEXT) printf.$(OBJEXT) system.$(OBJEXT) \
-	test.$(OBJEXT) times.$(OBJEXT) var.$(OBJEXT)
+	test.$(OBJEXT) times.$(OBJEXT) var.$(OBJEXT) bufreadcmd.$(OBJEXT)
 am_dash_OBJECTS = $(am__objects_1) arith.$(OBJEXT)
 dash_OBJECTS = $(am_dash_OBJECTS)
 dash_DEPENDENCIES = builtins.o init.o nodes.o signames.o syntax.o
@@ -169,14 +169,14 @@ dash_CFILES = \
 	alias.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \
 	histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \
 	mystring.c options.c parser.c redir.c show.c trap.c output.c \
-	

Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-03 Thread Steve Schnepp
2010/9/2 Jilles Tjoelker :

Thanks for your prompt reply.

> Note that a change in the file between the single-byte reads will cause
> an inconsistent value to be read. This is also the case with regular
> files on a filesystem, so it is acceptable.

Are you implying that:
- if the procfs is made to support char per char reads, dash reading
an inconsistent value is actually a feature ?
- buffering should, therefore, always be explicit ?

On a side note, the whole procfs seems to be designed around one
unique page read if possible (1x 4K).
I think it does so in order to be able to vastly simplify its
usage/implementation by kernel modules.

> If single-byte reads are really unacceptable, then the proper way to
> read these files needs to be documented, and clear violations that will
> not work properly should cause an error (in this case, this means that
> reading one byte from offset 0 should fail like reading one byte from
> offset 1 does).

+1 for "the proper way to read these files needs to be documented" and
I also think that emitting an error would be better than silently
returning erroneous data. [ EOVERFLOW is coming to my mind ]

--
Steve Schnepp
http://blog.pwkf.org/
--
To unsubscribe from this list: send the line "unsubscribe dash" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-02 Thread Steve Schnepp
2010/9/1 Steve Schnepp :
> conforming to POSIX isn't a realistic option, would it be possible to
> have a workaround that doesn't involve an external tool like cat(1) ?

Hi, I just hacked & attached a little patch away to be able to solve this case.
Feel free to reply with your comments.

NB: I just targeted dash-0.5.5.1, but it might apply to any version.

--
Steve Schnepp
http://blog.pwkf.org/


chunked_bltin_read.diff
Description: Binary data


read() builtin doesn't read integer value /proc files (but bash's does)

2010-09-01 Thread Steve Schnepp
Hi, I opened bug 595063 on the debian BTS [1] and I was suggested to
resend the email upstream.

So I copied the body of the bug below :

dash's read() builtin seems to read the underlying file 1 char at a
time. This doesn't work with some files under /proc, since procfs isn't
fully POSIX compliant.

With bash it works :

$ bash -c 'read MAX < /proc/sys/kernel/pid_max; echo $MAX'
32768

With dash it only reads the first character :

$ dash -c 'read MAX < /proc/sys/kernel/pid_max; echo $MAX'
3

If we use the cat(1) external program it works :

$ dash -c 'MAX=$(cat /proc/sys/kernel/pid_max); echo $MAX'
32768

After a little digging, it only appears on files that contains just an
integer value. When asked to read with a non-null offset (*ppos != 0),
__do_proc_dointvec() just returns 0 (meaning an EOF) as shown on [2].

I'm aware that the issue isn't strictly a dash one, since it has the
right to read one character at a time. But since fixing procfs to be
conforming to POSIX isn't a realistic option, would it be possible to
have a workaround that doesn't involve an external tool like cat(1) ?

[1] http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=595063
[2] http://lxr.linux.no/#linux+v2.6.32/kernel/sysctl.c#L2371
--
Steve Schnepp
http://blog.pwkf.org/
--
To unsubscribe from this list: send the line "unsubscribe dash" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html