lør, 17 10 2009 kl. 18:29 +0200, skrev Eric Chassande-Mottin:
> please do your tests with this version instead. it includes
> the headerline prop and the '%*' dummy specifier.
> those are the basic functionalities of the
> original textread we want to have in octave.
> the others are kind of gadget, i think.
The code seemed to handle whatever I through at it, so I went ahead and
did a vectorisation of it. There are still a few spots (marked with XXX
in the code) that I'd like to see improved, but otherwise I think this
is quite good.
I'm attaching the code for comments. It should be noted that Matlab has
a 'strread' function that does the same thing as 'textread' except it
works in strings instead of files. So, I changed the code to behave like
'strread' and created a simple wrapper around this for 'textread'.
Should I replace the current version with this one?
Søren
## Copyright (C) 2009 Eric Chassande-Mottin, CNRS (France)
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, see
## <http://www.gnu.org/licenses/>.
## -*- texinfo -*-
## @deftypefn {Function File} {...@var{a} @var{b} ...]=}strread(@var{str},@var{format})
## @deftypefnx {Function File} {...@var{a} @var{b} ...] =}strread(@var{str},@var{format},@var{prop},@var{value})
## Read data from a dtring.
## The string @var{format} describes the different columns of @var{str} and
## It may continue the following specifiers:
## @table @code
## @item %s
## for a string,
##
## @item %d,%f
## for a double, floating-point or integer number and
##
## @item %*
## to ignore a column.
## @end table
##
## For example, the string
##
## @example
## @group
## @var{str} = "\
## Bunny Bugs 5.5\n\
## Duck Daffy -7.5e-5\n\
## Penguin Tux 6"
## @end group
## @end example
##
## can be read using
##
## @example
## @code{[a,b,c] = strread(@var{str}, "%s %s %f").}
## @end example
##
## Currently implemented @var{prop} arguments are:
## @itemize
## @item "headerlines":
## @var{value} represents the number of header lines to skip.
## @item "commentstyle":
## @var{value} is the style and can be
## @itemize
## @item "shell": comment specifier is #
## @item "c": comment specifier is /*
## @item "c++": comment specifier is //
## @item "matlab": comment specifier is %
## @end itemize
## @end itemize
##
## @seealso{textread, load, dlmread, fscanf}
## @end deftypefn
function varargout = strread (str, formatstr = "%f", varargin)
## Check input
if (nargin < 1)
print_usage ();
endif
if (!ischar (str) || !ischar (str))
error ("strread: first and second input arguments must be strings");
endif
## Parse options
comment_flag = false;
header_skip = 0;
numeric_fill_value = 0; # XXX: the user cannot set this
for n = 1:2:length (varargin)
switch (varargin {n})
case "commentstyle"
comment_flag = true;
switch (varargin {n+1})
case "c"
comment_specif = {"/*", "*/"};
case "c++"
comment_specif = {"//", "\n"};
case "shell"
comment_specif = {"#", "\n"};
case "matlab"
comment_specif = {"%", "\n"};
otherwise
warning ("strread: unknown comment style '%s'", val);
endswitch
case "headerlines"
header_skip = varargin {n+1};
otherwise
warning ("strread: unknown option '%s'", varargin {n});
endswitch
endfor
## Parse format string
idx = strfind (formatstr, "%")';
specif = formatstr ([idx, idx+1]);
nspecif = length (idx);
idx_star = strfind (formatstr, "%*");
nfields = length (idx) - length (idx_star);
if (nargout != nfields)
error ("strread: the number of output variables must match that of format specifiers");
endif
## Remove comments (XXX: can this be done in a smarter way?)
if (comment_flag)
cstart = strfind (str, comment_specif {1});
cstop = strfind (str, comment_specif {2});
keep = true (size (str));
for k = 1:length (cstart)
a = cstart (k);
b = cstop (find (cstop > a, 1)) + length (comment_specif {2}) - 1;
keep (a:b) = false;
endfor
str = str (keep);
endif
## Split 'str' into lines
str = split_by (str, "\n");
## Skip headers
str = str (header_skip+1:end);
## Split 'str' into words (XXX: can this be done smarter?)
tmp = sprintf ("%s ", str {:});
words = split_by (tmp, " ");
num_words = numel (words);
num_lines = ceil (num_words / nspecif);
## For each specifier
k = 1;
for m = 1:nspecif
data = words (m:nspecif:end);
## Map to format
switch specif (m, :)
case "%s"
data (end+1:num_lines) = {""};
varargout {k} = data';
k++;
case {"%d", "%f"}
data = str2double (data);
data (end+1:num_lines) = numeric_fill_value;
varargout {k} = data.';
k++;
case "%*"
## do nothing
endswitch
endfor
endfunction
function out = split_by (text, sep)
out = strtrim (strsplit (text, sep, true));
endfunction
%!test
%! str = "# comment\n# comment\n1 2 3";
%! [a, b] = strread (str, '%d %s', 'commentstyle', 'shell');
%! assert (a, [1; 3]);
%! assert (b, {"2"; ""});
%!test
%! str = '';
%! a = rand (10, 1);
%! b = char (round (65 + 20 * rand (10, 1)));
%! for k = 1:10
%! str = sprintf ('%s %.6f %s\n', str, a (k), b (k));
%! endfor
%! [aa, bb] = strread (str, '%f %s');
%! assert (a, aa, 1e-5);
%! assert (cellstr (b), bb);
%!test
%! str = '';
%! a = rand (10, 1);
%! b = char (round (65 + 20 * rand (10, 1)));
%! for k = 1:10
%! str = sprintf ('%s %.6f %s\n', str, a (k), b (k));
%! endfor
%! aa = strread (str, '%f %*s');
%! assert (a, aa, 1e-5);
%!test
%! str = sprintf ('/* this is\nacomment*/ 1 2 3');
%! a = strread (str, '%f', 'commentstyle', 'c');
%! assert (a, [1; 2; 3]);
## Copyright (C) 2009 Eric Chassande-Mottin, CNRS (France)
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, see
## <http://www.gnu.org/licenses/>.
## -*- texinfo -*-
## @deftypefn {Function File} {...@var{a} @var{b} ...]=}textread(@var{filename},@var{format})
## @deftypefnx {Function File} {...@var{a} @var{b} ...] =}textread(@var{filename},@var{format},@var{prop},@var{value})
## Read data from a text file.
## The string @var{format} describes the different columns of the text file and
## It may continue the following specifiers:
## @table @code
## @item %s
## for a string,
##
## @item %d,%f
## for a double, floating-point or integer number and
##
## @item %*
## to ignore a column.
## @end table
##
## For example, the textfile containing
##
## @example
## @group
## Bunny Bugs 5.5
## Duck Daffy -7.5e-5
## Penguin Tux 6
## @end group
## @end example
##
## can be read using
##
## @example
## @code{[a,b,c] = textread("test.txt", "%s %s %f").}
## @end example
##
## Currently implemented @var{prop} arguments are:
## @itemize
## @item "headerlines":
## @var{value} represents the number of header lines to skip.
## @item "commentstyle":
## @var{value} is the style and can be
## @itemize
## @item "shell": comment specifier is #
## @item "c": comment specifier is /*
## @item "c++": comment specifier is //
## @item "matlab": comment specifier is %
## @end itemize
## @end itemize
##
## @seealso{strread, load, dlmread, fscanf}
## @end deftypefn
function varargout = textread (filename, formatstr = "%f", varargin)
## Check input
if (nargin < 1)
print_usage ();
endif
if (!ischar (filename) || !ischar (filename))
error ("textread: first and second input arguments must be strings");
endif
## Read file
fid = fopen (filename, "r");
if (fid == -1)
error ("textread: could not open '%s' for reading", filename);
endif
str = char (fread (fid, "char")');
fclose (fid);
## Call strread to make it do the real work
[varargout{1:nargout}] = strread (str, formatstr, varargin {:});
endfunction
------------------------------------------------------------------------------
Come build with us! The BlackBerry(R) Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay
ahead of the curve. Join us from November 9 - 12, 2009. Register now!
http://p.sf.net/sfu/devconference
_______________________________________________
Octave-dev mailing list
Octave-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/octave-dev