Hi Bigloo users.
The attached patch speeds up reading large expressions that contain
many instances of sharing (via the notation of SRFI-38).
Read time for a 40 MB expression (180 MB without sharing) with
around 500.000 sharings:
before: 5.20 sec
after: 0.37 sec
(Times are averages measured on an Intel i7-4790K; data on an SSD;
IO caches cleared befor each run: echo 3 > /proc/sys/vm/drop_caches )
To use the patch, build as normal, but after the "make install" step
add the following steps:
patch -p0 < bigloo-cycles.patch
( cd runtime && make lib && make lib_u )
make install
Manuel, please consider for inclusion.
Ciao
Sven
--- runtime/Read/reader.scm.orig 2017-08-26 06:28:39.000000000 +0200
+++ runtime/Read/reader.scm 2017-08-31 11:06:37.375174621 +0200
@@ -17,7 +17,6 @@
__rgc
__param
__object
- __hash
__thread)
(use __type
@@ -25,7 +24,6 @@
__param
__structure
__tvector
- __hash
__dsssl
__ucs2
__unicode
@@ -198,20 +196,10 @@
(cond
((procedure? obj)
(let* ((no (obj))
- (val (cond
- ((hashtable? cycles)
- (let ((val (hashtable-get cycles no)))
- (if (and (not val)
- (not (hashtable-contains? cycles no)))
- (read-error "no target for graph reference"
- no port)
- val)))
- (else
- (let ((cell (assq no cycles)))
- (if (not (pair? cell))
- (read-error "no target for graph reference"
- no port)
- (cdr cell)))))))
+ (val0 (vector-ref cycles no))
+ (val (if (not val0)
+ (read-error "no target for graph reference" no port)
+ val0)))
(if (eq? val obj)
(read-error "Illegal cyclic reference" no port)
val)))
@@ -472,7 +460,6 @@
posp cycles par-open bra-open par-poses bra-poses)
(define resolve #t)
- (define cycles-count 0)
;; newlines
((+ #\Newline)
@@ -744,24 +731,18 @@
(cond
((eof-object? the-object)
(read-error/loc pos "Illegal cyclic reference" no (the-port)))
- ((>=fx cycles-count 64)
- ;; convert to hashtable
- (set! cycles-count -1)
- (let ((h (create-hashtable :eqtest eq? :bucket-expansion 2.0)))
- (for-each (lambda (cell)
- (hashtable-put! h (car cell) (cdr cell)))
- cycles)
- (set! cycles h))))
- (cond
- ((hashtable? cycles)
- (unless (eq? (hashtable-put! cycles no the-object)
- the-object)
- (read-error "Illegal duplicate declaration" no (the-port))))
- (else
- (when (assq no cycles)
- (read-error "Illegal duplicate declaration" no (the-port)))
- (set! cycles (cons (cons no the-object) cycles))
- (set! cycles-count (+fx cycles-count 1))))
+ ((>=fx no (vector-length cycles))
+ ;; extend vector
+ (let* ((old-length (vector-length cycles))
+ (new-length (maxfx (+ no 1)
+ (*fx old-length 8)))
+ (new-cycles (make-vector new-length #f)))
+ ;(display "new read sharing limit: ") (write new-length) (newline)
+ (vector-copy! new-cycles 0 cycles)
+ (set! cycles new-cycles))))
+ (when (vector-ref cycles no)
+ (read-error "Illegal duplicate declaration" (list no (vector-ref cycles no)) (the-port)))
+ (vector-set! cycles no the-object)
(set! resolve rsvp)
(if rsvp
(unreference! the-object (the-port) cycles)
@@ -773,20 +754,12 @@
(cond
((not resolve)
(lambda () no))
- ((hashtable? cycles)
- (let ((val (hashtable-get cycles no)))
- (if (and (not val)
- (not (hashtable-contains? cycles no)))
- (read-error "no target for graph reference"
- no (the-port))
- val)))
(else
- (let ((cell (assq no cycles)))
- (if (pair? cell)
- (cdr cell)
- (read-error "no target for graph reference"
- no (the-port))))))))
-
+ (let ((val (vector-ref cycles no)))
+ (if (not val)
+ (read-error "no target for graph reference" no (the-port))
+ val))))))
+
;; special tokens
("#"
(read/rp *sharp-grammar* (the-port)))
@@ -836,7 +809,7 @@
;; expansion of the *BIGLOO-GRAMMAR* never checks if the
;; input port is not already closed. In consequence, we
;; have to explicitly test the closeness before reading.
- (read/rp (bigloo-regular-grammar) iport location '() 0 0 '() '())))
+ (read/rp (bigloo-regular-grammar) iport location (make-vector 32 #f) 0 0 '() '())))
;*---------------------------------------------------------------------*/
;* read/case ... */