Hello, Manuel,
I have made some additional improvements to the CSV library. I found that the 
way in which I was using (possibly abusing) the lalr parser resulted in more 
memory being allocated than needed. By calling it for every record in a csv 
file, I was repeatedly reallocating the lalr stack. Since my parsing needs are 
simple, I rewrote it and obtained a significant performance improvement (~4x) 
on my test file. I introduced one change in semantics, which I don't believe 
should bother anyone. Now, when we see a line consisting of just a newline, the 
empty list is returned instead of a list containing an empty string. The patch 
is attached.
Best Regards,Joseph Donaldson

--- bigloo4.3b/api/csv/src/Llib/csv.scm	2017-08-03 00:11:25.000000000 -0700
+++ bigloo4.3b_mod/api/csv/src/Llib/csv.scm	2017-08-12 13:05:46.149711507 -0700
@@ -35,39 +35,42 @@
 
 (define +psv-lexer+ (make-csv-lexer #\| #\"))
 
-;*---------------------------------------------------------------------*/
-;*    +csv-parser+ ...                                                 */
-;*---------------------------------------------------------------------*/
-(define +csv-parser+
-   (lalr-grammar (separator text)
-		 
-      (fields
-	 ((field)
-	  (list field))
-	 ((field separator fields)
-	  (cons field fields)))
-      
-      (field
-	 (()
-	  "")
-	 ((text field)
-	  (string-append text field)))))
-		 
+
+
+(define +csv-unspecified+ '(#unspecified))
+
 ;*---------------------------------------------------------------------*/
 ;*    read-csv-record ...                                              */
 ;*---------------------------------------------------------------------*/
 (define (read-csv-record in #!optional (lexer +csv-lexer+))
-   (if (input-port? in)
-       (let ((pc (peek-char in)))
-	  (if (eof-object? pc)
-	      pc
-	      (read/lalrp +csv-parser+ lexer in
-		 (lambda (x) (or (eof-object? x) (eq? x 'newline))))))
-       (raise
-	  (instantiate::&io-port-error
-	     (proc "read-csv-record")
-	     (msg "invalid input port")
-	     (obj in)))))
+   (when (not (input-port? in))
+      (raise (instantiate::&io-port-error (proc "read-csv-record")
+                                          (msg "invalid input port")
+                                          (obj in))))
+   (let loop ((token (read/rp lexer in))
+              (last-token +csv-unspecified+)
+              (res '()))
+      (cond ((or (eq? token 'newline)
+                 (eof-object? token))
+             (if (and (eof-object? token)
+                      (eq? last-token +csv-unspecified+))
+                 #eof-object
+                  (reverse! res)))
+            ((and (pair? token)
+                  (eq? (car token) 'text))
+             (loop (read/rp lexer in)
+                (car token)
+                (if (eq? last-token 'text)
+                    (cons (string-append (car res) (cdr token)) (cdr res))
+                    (cons (cdr token) res))))
+            ((eq? token 'separator)
+             (loop (read/rp lexer in)
+                'separator
+                res))
+            (else
+             (loop (read/rp lexer in)
+                'text
+                res)))))
 
 ;*---------------------------------------------------------------------*/
 ;*    read-csv-records ...                                             */
--- bigloo4.3b/api/csv/src/Llib/csv.sch	2017-08-03 00:11:25.000000000 -0700
+++ bigloo4.3b_mod/api/csv/src/Llib/csv.sch	2017-08-12 13:07:49.452368223 -0700
@@ -13,31 +13,32 @@
 ;*    make-csv-lexer                                                   */
 ;*---------------------------------------------------------------------*/
 (define-macro (make-csv-lexer sep quot)
-   (if (and (char? sep) (char? quot))
+   (if (and (char? sep)
+	    (char? quot))
        `(regular-grammar ((quote ,quot)
 			  (separator ,sep))
 	   (quote
 	    (let loop ((curr (read-char (the-port)))
-		       (res ""))
+		       (res '()))
 	       (cond ((eof-object? curr)
-		      (raise (instantiate::&io-parse-error
-				(proc "csv lexer")
-				(msg "failed to parse")
-				(obj curr))))
+		      (raise (instantiate::&io-parse-error (proc "lexer")
+							   (msg "failed to parse fail")
+							   (obj curr))))
 		     ((and (char=? curr ,quot)
 			   (not (eof-object? (peek-char (the-port))))
 			   (char=? (peek-char (the-port)) ,quot))
 		      (read-char (the-port))
 		      (loop (read-char (the-port))
-			 (string-append res (string ,quot))))
+			 (cons ,quot res)))
 		     ((char=? curr ,quot)
-		      (cons 'text res))
+		      (cons 'text  (list->string (reverse! res))))
 		     (else
 		      (loop (read-char (the-port))
-			 (string-append res (string curr)))))))
+			    (cons curr res))))))
 	   (separator
 	    'separator)
-	   ((or (: #\return #\newline) #\newline)
+	   ((or (: #\return #\newline)
+		#\newline)
 	    'newline)
 	   ((+ (out quote separator #\return #\newline))
 	    (cons 'text (the-string)))
@@ -45,7 +46,5 @@
 	    (let ((c (the-failure)))
 	       (if (eof-object? c)
 		   c
-		   (error "csv-lexer" "Illegal character" c)))))
-       (error "csv-lexer"
-	  "separator and quote must be a single character"
-	  (list sep quot))))
+		   (error 'csv-lexer "Illegal character" c)))))
+   (error 'csv-lexer "separator and quote must be a single character" (list sep quot))))
\ No newline at end of file
--- bigloo4.3b/api/csv/recette/recette.scm	2017-08-03 00:11:25.000000000 -0700
+++ bigloo4.3b_mod/api/csv/recette/recette.scm	2017-08-12 13:10:16.164896480 -0700
@@ -155,8 +155,8 @@
 	 (close-input-port in)))
    :result (lambda (v)
 	      (if (eq? v 'result)
-		  '("")
-		  (csv-record=? v '("")))))
+		  '()
+		  (csv-record=? v '()))))
 
 (define-test empty
    (let* ((test-string "")

Reply via email to