pc Wrote:

> dsimcha Wrote:
> 
> > == Quote from pc ([email protected])'s article
> > > Is there a way to make the functions in std.string, such as replace, 
> > > pure? Many
> > pure functions are going to  want to use these. Also, could some of them be
> > executable at compile time?
> > > For me, using D2.032, this did not compile.
> > > pure string replaceXX(string str){
> > >   return replace(str,"XX","X");
> > > }
> > > If I am missing something, help!
> > 
> > For a function in D to pass the compiler checks for purity, it must only 
> > call
> > functions that are *marked as* being pure.  If a function is not marked as 
> > pure
> > but is de facto pure, it won't work.  For example:
> > 
> > uint nPlus2(uint n) pure {
> >     return nPlus1( nPlus1( n));  // Not pure.
> > }
> > 
> > uint nPlus1(uint n) {
> >     return n + 1;
> > }
> > 
> > Many functions that are, in fact, pure, have not been annotated as such yet 
> > in
> > Phobos, since pure was implemented fairly recently.  If you want to help 
> > out, this
> > is fairly low hanging fruit.
> > 
> > Also, purity is very restrictive right now and is designed partly with 
> > thread
> > safety in mind.  A function that truly has no side effects from an 
> > observable
> > behavior in a single thread point of view won't necessarily pass the 
> > compiler as pure:
> > 
> > __gshared uint foo;
> > 
> > /* wasteTime() is impure even though it has no observable side
> >  * effects in a single thread because it still (at least
> >  * temporarily) manipulates global state, and thus could
> >  * cause problems in multithreaded code.  Furthermore, even if
> >  * it were thread safe, it would be hard to prove for all but
> >  * the simplest cases that functions like these have no
> >  * observable side effects.*/
> > void wasteTime() pure {  // Won't compile.
> >    foo++;
> >    foo--;
> > }
> 
> Thank you for the helpful comments.
> 
> Re helping out, I would like to help, but at this stage I feel that I need to 
> learn much much more before I can be of any use. (I am a recently retired 
> international income tax consultant). If I get up to speed, I will certainly 
> help.
> 
> I was thinking that it would be good if std.string was completely templated 
> to work for char, wchar and dchar (My main hobby is learning Chinese, so I 
> have an interest in unicode.) I also thought the functions should be pure. 
> The first step in this direction, and to learn D2, was to write
> 
>       immutable(T)[][] csvSplit(T)(immutable(T)[], T sep=',', T quote='"');
> 
> This worked out pretty well for string, wstring and dstring. (copy attached). 
> I take no credit for anything clever in the code (its all based on a lisp 
> program written by Alain Picard that is availble on the web -- it was by far 
> the easiest to understand).
> 
> Here's the catch -- I could not make csvSplit pure. The inner functions were 
> referencing cvsSplits local variables.  I think that the problem only occurs 
> in templates. The following isolates the issue:
> 
> 
> 
> import std.stdio;
> 
> /*
>   ATTEMPT TO USE NESTED "PURE" FUNCTIONS IN A TEMPLATE.
> 
>   All works fine unless you uncomment the third line in main. If you
>   do, dmd 2.032 yeilds:
> 
>   pure.d(35): Error: pure nested function 'bar' cannot access mutable
>   data 'fooState'
> 
>   pure.d(36): Error: pure nested function 'bar' cannot access mutable
>   data 'y'
> 
>   pure.d(47): Error: template instance pure.fooPT!(char) error
>   instantiating
> */
> 
> 
> //"pure" inner function, with concrete types - ok
> pure string foo(string x, string y){
>   
>   string fooState;
> 
>   string bar(string x){
>     fooState = "hello ";
>     return x ~ y;
>   }
> 
>   return fooState ~ bar(x);
> }
> 
> //potentially pure (?) templated version not labled as pure - ok
> immutable(T)[] fooT(T)(immutable(T)[] x, immutable(T)[] y){
> 
>   immutable(T)[] fooState;
> 
>   immutable(T)[] bar(immutable(T)[] x){
>     fooState = "hello ";
>     return x ~ y;
>   }
> 
>   return fooState ~ bar(x);
> 
> }
> 
> //attempt to make templated version pure - no dice
> pure immutable(T)[] fooPT(T)(immutable(T)[] x, immutable(T)[] y){
> 
>   immutable(T)[] fooState;
> 
>   immutable(T)[] bar(immutable(T)[] x){
>     fooState = "hello ";
>     return x ~ y;
>   }
> 
>   return fooState ~ bar(x);
> 
> }
> 
> 
> void main(){
>   writeln(foo("p", "c"));
>   writeln(fooT("p", "c"));
>   //writeln(fooPT("p", "c"));
> 
> 
// Alain Picard -- the  states
// figure how to acknowledge
// put in my string util module
// 

module dcsv;
import std.string;
version(unittest){import std.conv;}

private enum {OUTSIDE_FIELD, IN_FIELD, IN_QUOTED_FIELD, AFTER_ENDING_QUOTE}


/* csvSplit
 *
 * Splits a line into its csv formatted fields
 * Strips leading and trailing whitespace from fields, unless quoted
 * The line can be string, wstring or dstring
 *
 */ 

public immutable(T)[][] csvSplit(T)(immutable(T)[] line, 
                                    T fieldSepChar=',',
                                    T quoteChar = '"')
{
  alias immutable(T)[] tstring;
  immutable int EOL = -1;
  tstring nullField = "";
  int state;
  int fieldBeg;
  int charPos;
  tstring[] fields;
  bool fieldHasDoubleQuoteChars;
  int numTrailingWhitespaceChars;

  tstring reduceDoubles(T)(tstring str, T q){
    tstring ret;
    bool afterQuote = false;
    foreach(T c;str){
      if (c==q) {
        if (afterQuote){
          afterQuote = false;
          continue;
        }
        else
          afterQuote = true;
      }
      ret ~= c;
    }
    return ret;
  }
    
  bool isWhitespace(T c){    
    return (c != quoteChar) && (c==' ' || c=='\t');
  }
   
  void putField (int end){
    end = end - numTrailingWhitespaceChars;
    tstring str = line[fieldBeg..end];
    if (fieldHasDoubleQuoteChars)
      str = reduceDoubles(str, quoteChar);
    fields ~= str;
    state = OUTSIDE_FIELD;
  }

  bool parseOutsideField(){
    if (charPos==EOL) {
      if (fields.length > 0) //skip all blank lines
        fields ~= nullField; //emit last "" field
      return false;
    }
    fieldHasDoubleQuoteChars = false;
    T c = line[charPos];
    if (isWhitespace(c)){} //just skip it
    else if (c == fieldSepChar)
      fields ~= nullField; // emit "" field
    else if (c == quoteChar){
      state = IN_QUOTED_FIELD;
      fieldBeg = charPos + 1;
      numTrailingWhitespaceChars = 0;
    }
    else {
      state = IN_FIELD;
      fieldBeg = charPos;
      numTrailingWhitespaceChars = 0;
    }
    return true;
  }

  bool parseInField(){
    //in the midst of an unquoted field
    if (charPos==EOL) {
      putField(line.length);
      return false;
    }
    T c = line[charPos];
    if (isWhitespace(c))
      numTrailingWhitespaceChars++;
    else if (c == fieldSepChar)
      putField(charPos);
    else if (c == quoteChar)
      throw new Exception("Unexpected quote in unquoted field");
    return true;
  }

  bool parseInQuotedField(){
    //in the midst of a quoted field
    if (charPos==EOL)
      throw new Exception("Unbalanced initial \".");
    T c = line[charPos];
    if (c == quoteChar)
      state = AFTER_ENDING_QUOTE;
    return true;
  }

  bool parseAfterEndingQuote(){
    if (charPos==EOL) {
      putField(line.length-1);
      return false;
    }
    T c = line[charPos];
    if (c == quoteChar){
      fieldHasDoubleQuoteChars = true; //false alarm
      state = IN_QUOTED_FIELD; //continue parsing quoted field
    }    
    else if (c == fieldSepChar)
      putField(charPos-1);
    else if (isWhitespace(c))
      numTrailingWhitespaceChars++;
    else 
      throw new Exception("Unexpected char after end of quoted field.");
    return true;
  }

  bool parse(){
    bool ret;
    switch (state){
    case OUTSIDE_FIELD:
      ret = parseOutsideField(); break;
    case IN_FIELD:
      ret = parseInField(); break;
    case IN_QUOTED_FIELD:
      ret = parseInQuotedField(); break;
    case AFTER_ENDING_QUOTE:
      ret = parseAfterEndingQuote(); break;
    default:
      ret = false;
    }
    return ret;
  }

  //nullField = to!(tstring)("");
  state = OUTSIDE_FIELD;
  while (charPos<line.length && parse()){
    charPos++;
  }
  charPos = EOL;
  parse();
  return cast(immutable(T)[][])fields;
}

version(unittest){

  immutable(T)[] joinfields(T)(immutable(T)[] line){
    immutable(T)[] ret = "|";
    foreach(s;csvSplit(line))
      ret = ret ~ s ~"|";
    return ret;
  }

  void csvAssert(string line, string joined){
    assert(joinfields(line)==joined, line);
    auto wline = to!(wstring)(line);
    auto wjoined = to!(wstring)(joined);
    assert(joinfields(wline)==wjoined, line);
    auto dline = to!(dstring)(line);
    auto djoined = to!(dstring)(joined);
    assert(joinfields(dline)==djoined, line);
  }
}


unittest {
  csvAssert(`,`,`|||`);
  csvAssert(`,a`,`||a|`);
  csvAssert(`a,`,`|a||`);
  csvAssert(`a`,`|a|`);
  csvAssert(`a, `,`|a||`);
  csvAssert(` a,`,`|a||`);
  csvAssert(`a,,`,`|a|||`);
  csvAssert(`a,"b b"`,`|a|b b|`);
  csvAssert(`a,"b b" `,`|a|b b|`);
  csvAssert(`a, "b b"`,`|a|b b|`);
  csvAssert(`a,"b""c""b" `,`|a|b"c"b|`);
  csvAssert(`a,"b""c""" `,`|a|b"c"|`);
  csvAssert(`a,"""c""b" `,`|a|"c"b|`);
}

I added a dos version and a littel benchmark program using csvSplit. Also, note 
that this is a draft.
// csvSplit.d
//
// Based on cvs-parser, a lisp program written by Alain Picard
// It appears in the Common Lisp Directory 
// http://members.optusnet.com.au/apicard/csv-parser.lisp
// Any good idea in this code in Alain's.
// Any errors are mine.
// 
// DRAFT FOR DISCUSSION PURPOSES ONLY - NOT COMPLETE
// COMMENTS SOLICITED -ESPECIALLY RE THIS LICENSE STUFF
//
// This software is "as is", and has no warranty of any kind.  The
// author assumes no responsibility for the consequences of any use
// of this software.
//
// To the extent I have added anything, do what ever you want with
// it. To the extent that the software a modification of cvs-parser,
// it is subject to its license (LGPL). See 
// http://members.optusnet.com.au/apicard/csv-parser.lisp
// for a copy of the license.
//


module dcsv;
import std.string;
version(unittest){import std.conv;}

private enum {OUTSIDE_FIELD, IN_FIELD, IN_QUOTED_FIELD, AFTER_ENDING_QUOTE}


/* csvSplit
 *
 * Splits a line into its csv formatted fields
 * Strips leading and trailing whitespace from fields, unless quoted
 * The line can be string, wstring or dstring
 *
 */ 

public immutable(T)[][] csvSplit(T)(immutable(T)[] line, 
				    T fieldSepChar=',',
				    T quoteChar = '"')
{
  alias immutable(T)[] tstring;
  immutable int EOL = -1;
  tstring nullField = "";
  int state;
  int fieldBeg;
  int charPos;
  tstring[] fields;
  bool fieldHasDoubleQuoteChars;
  int numTrailingWhitespaceChars;

  tstring reduceDoubles(T)(tstring str, T q){
    tstring ret;
    bool afterQuote = false;
    foreach(T c;str){
      if (c==q) {
	if (afterQuote){
	  afterQuote = false;
	  continue;
	}
	else
	  afterQuote = true;
      }
      ret ~= c;
    }
    return ret;
  }
    
  bool isWhitespace(T c){    
    return (c != quoteChar) && (c==' ' || c=='\t');
  }
   
  void putField (int end){
    end = end - numTrailingWhitespaceChars;
    tstring str = line[fieldBeg..end];
    if (fieldHasDoubleQuoteChars)
      str = reduceDoubles(str, quoteChar);
    fields ~= str;
    state = OUTSIDE_FIELD;
  }

  bool parseOutsideField(){
    if (charPos==EOL) {
      if (fields.length > 0) //skip all blank lines
	fields ~= nullField; //emit last "" field
      return false;
    }
    fieldHasDoubleQuoteChars = false;
    T c = line[charPos];
    if (isWhitespace(c)){} //just skip it
    else if (c == fieldSepChar)
      fields ~= nullField; // emit "" field
    else if (c == quoteChar){
      state = IN_QUOTED_FIELD;
      fieldBeg = charPos + 1;
      numTrailingWhitespaceChars = 0;
    }
    else {
      state = IN_FIELD;
      fieldBeg = charPos;
      numTrailingWhitespaceChars = 0;
    }
    return true;
  }

  bool parseInField(){
    //in the midst of an unquoted field
    if (charPos==EOL) {
      putField(line.length);
      return false;
    }
    T c = line[charPos];
    if (isWhitespace(c))
      numTrailingWhitespaceChars++;
    else if (c == fieldSepChar)
      putField(charPos);
    else if (c == quoteChar)
      throw new Exception("Unexpected quote in unquoted field");
    return true;
  }

  bool parseInQuotedField(){
    //in the midst of a quoted field
    if (charPos==EOL)
      throw new Exception("Unbalanced initial \".");
    T c = line[charPos];
    if (c == quoteChar)
      state = AFTER_ENDING_QUOTE;
    return true;
  }

  bool parseAfterEndingQuote(){
    if (charPos==EOL) {
      putField(line.length-1);
      return false;
    }
    T c = line[charPos];
    if (c == quoteChar){
      fieldHasDoubleQuoteChars = true; //false alarm
      state = IN_QUOTED_FIELD; //continue parsing quoted field
    }    
    else if (c == fieldSepChar)
      putField(charPos-1);
    else if (isWhitespace(c))
      numTrailingWhitespaceChars++;
    else 
      throw new Exception("Unexpected char after end of quoted field.");
    return true;
  }

  bool parse(){
    bool ret;
    switch (state){
    case OUTSIDE_FIELD:
      ret = parseOutsideField(); break;
    case IN_FIELD:
      ret = parseInField(); break;
    case IN_QUOTED_FIELD:
      ret = parseInQuotedField(); break;
    case AFTER_ENDING_QUOTE:
      ret = parseAfterEndingQuote(); break;
    default:
      ret = false;
    }
    return ret;
  }

  //nullField = to!(tstring)("");
  state = OUTSIDE_FIELD;
  while (charPos<line.length && parse()){
    charPos++;
  }
  charPos = EOL;
  parse();
  return cast(immutable(T)[][])fields;
}

version(unittest){

  immutable(T)[] joinfields(T)(immutable(T)[] line){
    immutable(T)[] ret = "|";
    foreach(s;csvSplit(line))
      ret = ret ~ s ~"|";
    return ret;
  }

  void csvAssert(string line, string joined){
    assert(joinfields(line)==joined, line);
    auto wline = to!(wstring)(line);
    auto wjoined = to!(wstring)(joined);
    assert(joinfields(wline)==wjoined, line);
    auto dline = to!(dstring)(line);
    auto djoined = to!(dstring)(joined);
    assert(joinfields(dline)==djoined, line);
  }
}


unittest {
  csvAssert(`,`,`|||`);
  csvAssert(`,a`,`||a|`);
  csvAssert(`a,`,`|a||`);
  csvAssert(`a`,`|a|`);
  csvAssert(`a, `,`|a||`);
  csvAssert(` a,`,`|a||`);
  csvAssert(`a,,`,`|a|||`);
  csvAssert(`a,"b b"`,`|a|b b|`);
  csvAssert(`a,"b b" `,`|a|b b|`);
  csvAssert(`a, "b b"`,`|a|b b|`);
  csvAssert(`a,"b""c""b" `,`|a|b"c"b|`);
  csvAssert(`a,"b""c""" `,`|a|b"c"|`);
  csvAssert(`a,"""c""b" `,`|a|"c"b|`);
}

import std.stdio;
import std.date;
//import dcsv;
//import fun_dcsv;
import pure_dcsv;

void test1(){
  string line = q"[1,2,3,abc,"abc","qaqqbqqcq",12/31/06]";
  for(int i=0; i<1000; i++)
    csvSplit(line);
}
void test2(){
  string line = q"[1,2,3,abc,"abc","""a""""b""""c""",12/31/06]";
  for(int i=0; i<1000; i++)
    csvSplit(line);
}

void main(){
  writeln("Unittest me");
  auto res = benchmark!(test1)(1000);
  writeln("benchmarked test1: ", res);
  res = benchmark!(test2)(1000);
  writeln("benchmarked test2: ", res);
}

Reply via email to