The attached patch makes BDB more robust against interrupts in various
places. It achieves this mainly by wrapping critical paths in
WITHOUT-INTERRUPTS and by registering cursors on the Lisp level
(and cleaning up any dangling ones when the txn ends).

This is work in progress, although it passes the test suite.

  Leslie

-- 
http://www.linkedin.com/in/polzer
diff -rN -u old-elephant-1.0/src/db-bdb/bdb-collections.lisp new-elephant-1.0/src/db-bdb/bdb-collections.lisp
--- old-elephant-1.0/src/db-bdb/bdb-collections.lisp	2009-10-14 14:32:59.884710862 +0200
+++ new-elephant-1.0/src/db-bdb/bdb-collections.lisp	2009-10-14 14:32:59.918038222 +0200
@@ -282,18 +282,34 @@
   ((handle :accessor cursor-handle :initarg :handle))
   (:documentation "A cursor for traversing (primary) BDB-BTrees."))
 
+(defun register-transaction-cursor (sc txn cursor)
+  (symbol-macrolet ((cursors (controller-transaction-cursors sc)))
+    (when (assoc txn cursors :test #'eq) ; might be called outside of an explicit txn
+      (push cursor (cdr (assoc txn cursors :test #'eq))))))
+
 (defmethod make-cursor ((bt bdb-btree))
   "Make a cursor from a btree."
-  (let ((sc (get-con bt)))
-    (make-instance 'bdb-cursor 
-		   :btree bt
-		   :handle (db-cursor (controller-btrees sc)
-				      :transaction (my-current-transaction sc))
-		   :oid (oid bt))))
+  (let* ((sc (get-con bt))
+         (txn (my-current-transaction sc))
+         cursor)
+    (without-interrupts
+      (setf cursor (make-instance 'bdb-cursor 
+                                  :btree bt
+                                  :handle (db-cursor (controller-btrees sc)
+                                                     :transaction txn)
+                                  :oid (oid bt)))
+      (register-transaction-cursor sc txn cursor))
+    cursor))
 
 (defmethod cursor-close ((cursor bdb-cursor))
-  (db-cursor-close (cursor-handle cursor))
-  (setf (cursor-initialized-p cursor) nil))
+  (let* ((sc (get-con (cursor-btree cursor)))
+         (txn (my-current-transaction sc)))
+    (without-interrupts
+      (db-cursor-close (cursor-handle cursor))
+      (symbol-macrolet ((cursors (controller-transaction-cursors sc)))
+        (setf (cdr (assoc txn cursors :test #'eq)) (remove cursor (cdr (assoc txn cursors :test #'eq)) :test #'eq)))
+      (setf (cursor-initialized-p cursor) nil)))
+  t)
 
 (defmethod cursor-duplicate ((cursor bdb-cursor))
   (make-instance (type-of cursor)
@@ -493,12 +509,17 @@
 
 (defmethod make-cursor ((bt bdb-btree-index))
   "Make a secondary-cursor from a secondary index."
-  (let ((sc (get-con bt)))
-    (make-instance 'bdb-secondary-cursor 
-		   :btree bt
-		   :handle (db-cursor (controller-indices-assoc sc)
-				      :transaction (my-current-transaction sc))
-		   :oid (oid bt))))
+  (let* ((sc (get-con bt))
+         (txn (my-current-transaction sc) )
+        cursor)
+    (without-interrupts
+      (setf cursor (make-instance 'bdb-secondary-cursor 
+                                  :btree bt
+                                  :handle (db-cursor (controller-indices-assoc sc)
+                                                     :transaction txn)
+                                  :oid (oid bt)))
+      (register-transaction-cursor sc txn cursor))
+    cursor))
 
 (defmethod cursor-pcurrent ((cursor bdb-secondary-cursor))
   (when (cursor-initialized-p cursor)
@@ -812,12 +833,18 @@
 
 (defmethod make-cursor ((bt bdb-dup-btree))
   "Make a secondary-cursor from a secondary index."
-  (let ((sc (get-con bt)))
-    (make-instance 'bdb-dup-cursor
-		   :btree bt
-		   :handle (db-cursor (controller-dup-btrees sc)
-				      :transaction (my-current-transaction sc))
-		   :oid (oid bt))))
+  (let* ((sc (get-con bt))
+         (txn (my-current-transaction sc))
+         cursor)
+    (without-interrupts
+      (setf cursor (make-instance 'bdb-dup-cursor
+                                  :btree bt
+                                  :handle (db-cursor (controller-dup-btrees sc)
+                                                     :transaction txn)
+                                  :oid (oid bt)))
+      (register-transaction-cursor sc txn cursor))
+    cursor))
+
 
 (defmethod cursor-next-nodup ((cursor bdb-dup-cursor))
   (if (cursor-initialized-p cursor)
diff -rN -u old-elephant-1.0/src/db-bdb/bdb-controller.lisp new-elephant-1.0/src/db-bdb/bdb-controller.lisp
--- old-elephant-1.0/src/db-bdb/bdb-controller.lisp	2009-10-14 14:32:59.888043774 +0200
+++ new-elephant-1.0/src/db-bdb/bdb-controller.lisp	2009-10-14 14:32:59.918038222 +0200
@@ -37,6 +37,9 @@
    (oid-db :type (or null pointer-void) :accessor controller-oid-db)
    (oid-seq :type (or null pointer-void) :accessor controller-oid-seq)
    (cid-seq :type (or null pointer-void) :accessor controller-cid-seq)
+   (transaction-cursors :type list :accessor controller-transaction-cursors
+                        :initform nil
+                        :documentation "Alist of active transactions and their cursors")
    (deadlock-pid :accessor controller-deadlock-pid :initform nil)
    (deadlock-detect-thread :type (or null t)
                            :accessor controller-deadlock-detect-thread
diff -rN -u old-elephant-1.0/src/db-bdb/bdb-transactions.lisp new-elephant-1.0/src/db-bdb/bdb-transactions.lisp
--- old-elephant-1.0/src/db-bdb/bdb-transactions.lisp	2009-10-14 14:32:59.884710862 +0200
+++ new-elephant-1.0/src/db-bdb/bdb-transactions.lisp	2009-10-14 14:32:59.918038222 +0200
@@ -24,6 +24,20 @@
 (declaim #-elephant-without-optimize (optimize (speed 3))
 	 #+elephant-without-optimize (optimize (speed 1) (safety 3) (debug 3)))
 
+(defun ensure-cursors-closed (sc txn)
+  (awhen (cdr (assoc txn (controller-transaction-cursors sc)))
+    (warn "Transaction ~S for controller ~S has dangling cursors: ~S~%Closing them." txn sc it)
+    (mapc #'cursor-close it)))
+
+(defun register-transaction (sc txn)
+  (push (cons txn nil) (controller-transaction-cursors sc))
+  t)
+
+(defun unregister-transaction (sc txn)
+  (setf (controller-transaction-cursors sc)
+        (remove txn (controller-transaction-cursors sc) :key #'car :test #'eq))
+  t)
+
 (defvar *retry-wait* 0.1
   "Set this to the number of seconds to wait between transaction
 retries. May also be a function of three arguments (transaction
@@ -39,62 +53,86 @@
 				txn-nowait txn-sync (snapshot elephant::*default-mvcc*)
 				inhibit-rollback-fn)
   (declare (ignorable transaction))
-  (let ((env (if environment environment (controller-environment sc))))
+  (let ((env (or environment (controller-environment sc))))
     (loop 
        for count fixnum from 0 to retries
        for success = nil
        do
-       (let ((txn (db-transaction-begin env
-					:parent (if parent parent +NULL-VOID+)
-					:degree-2 degree-2
-					:read-uncommitted read-uncommitted
-					:txn-nosync txn-nosync
-					:txn-nowait txn-nowait
-					:txn-sync txn-sync
-					:snapshot snapshot)))
-	 (declare (type pointer-void txn))
-	 (let (result)
-	   (let ((*current-transaction* (without-interrupts
-                                          (make-transaction-record sc txn *current-transaction*)))
-		 (*store-controller* sc))
-	     (declare (special *current-transaction* *store-controller*))
-	     (catch 'transaction
-	       (unwind-protect
-		    (progn
-		      ;; Run body, inhibit rollback if necessary and resignal
-		      (handler-bind 
-			  ((condition (lambda (c)
-					(when (and inhibit-rollback-fn
-						   (funcall inhibit-rollback-fn c))
-					  ;; Commit if non-local exit is OK
-					  (db-transaction-commit txn 
-								 :txn-nosync txn-nosync
-								 :txn-sync txn-sync)
-					  (setq success :yes))
-					(signal c))))
-			;; Run the body fn
-			(setf result (multiple-value-list (funcall txn-fn)))
-			;; Commit on regular exit
-			(db-transaction-commit txn 
-					       :txn-nosync txn-nosync
-					       :txn-sync txn-sync)
-			(setq success :yes)))
-		 ;; If unhandled non-local exit or commit failure: abort
-		 (unless (eq success :yes)
-		   (without-interrupts
-                     (db-transaction-abort txn))
-                   #+thread-support(bt:thread-yield)
-                   (sleep (etypecase retry-wait
-                            (number retry-wait)
-                            (function (funcall retry-wait txn count retries))))))))
-	   ;; A positive success results in a normal return
-	   (when (eq success :yes)
-	     (return (values-list result)))))
-       finally (cerror "Retry transaction again?"
-		       'transaction-retry-count-exceeded
-		       :format-control "Transaction exceeded the ~A retries limit"
-		       :format-arguments (list retries)
-		       :count retries))))
+       (let (txn)
+         (declare (type (or null pointer-void) txn))
+         (unwind-protect
+             (progn
+               (without-interrupts
+                 (setf txn (db-transaction-begin env
+                                              :parent (if parent parent +NULL-VOID+)
+                                              :degree-2 degree-2
+                                              :read-uncommitted read-uncommitted
+                                              :txn-nosync txn-nosync
+                                              :txn-nowait txn-nowait
+                                              :txn-sync txn-sync
+                                              :snapshot snapshot))
+                 (assert (not (assoc txn (controller-transaction-cursors sc))))
+                 (register-transaction sc txn))
+               (let (result)
+                 (let ((*current-transaction* (make-transaction-record sc txn *current-transaction*))
+                       (*store-controller* sc))
+                   (declare (special *current-transaction* *store-controller*))
+                   (catch 'transaction
+                     (unwind-protect
+                          (progn
+                            ;; Run body, inhibit rollback if necessary and resignal
+                            (handler-bind 
+                                ((condition (lambda (c)
+                                              (without-interrupts
+                                                (when (and inhibit-rollback-fn
+                                                           (funcall inhibit-rollback-fn c))
+                                                  ;; Commit if non-local exit is OK
+                                                  (db-transaction-commit txn 
+                                                                         :txn-nosync txn-nosync
+                                                                         :txn-sync txn-sync)
+                                                  (unregister-transaction sc txn)
+                                                  (setf txn nil)
+                                                  (setq success :yes))
+                                                (signal c)))))
+                              ;; Run the body fn
+                              (setf result (multiple-value-list (funcall txn-fn)))
+                              ;; Commit on regular exit
+                              (ensure-cursors-closed sc txn)
+                              (without-interrupts
+                                (db-transaction-commit txn 
+                                                       :txn-nosync txn-nosync
+                                                       :txn-sync txn-sync)
+                                (unregister-transaction sc txn)
+                                (setf txn nil)
+                                (setq success :yes))))
+                       ;; If unhandled non-local exit or commit failure: abort
+                       (unless (eq success :yes)
+                         (ensure-cursors-closed sc txn)
+                         (without-interrupts
+                           (db-transaction-abort txn)
+                           (unregister-transaction sc txn)
+                           (setf txn nil))
+                         #+thread-support(bt:thread-yield)
+                         (sleep (etypecase retry-wait
+                                  (number retry-wait)
+                                  (function (funcall retry-wait txn count retries))))))))
+                 ;; A positive success results in a normal return
+                 (when (eq success :yes)
+                   (return (values-list result)))))
+         ;; ensure the transaction is closed when we unwind
+         (progn
+           (without-interrupts
+             (when txn
+               (format t "caught txn still running~%")
+               (ensure-cursors-closed sc txn)
+                 (db-transaction-abort txn)
+                 (unregister-transaction sc txn)
+                 (setf txn nil))))))
+         finally (cerror "Retry transaction again?"
+                         'transaction-retry-count-exceeded
+                         :format-control "Transaction exceeded the ~A retries limit"
+                         :format-arguments (list retries)
+                         :count retries))))
 		       
     
 (defmethod controller-start-transaction ((sc bdb-store-controller)
@@ -121,9 +159,13 @@
 (defmethod controller-commit-transaction ((sc bdb-store-controller) transaction 
 					  &key txn-nosync txn-sync &allow-other-keys)
   (assert (not *current-transaction*))
-  (db-transaction-commit transaction :txn-nosync txn-nosync :txn-sync txn-sync))
+  (ensure-cursors-closed sc transaction)
+  (db-transaction-commit transaction :txn-nosync txn-nosync :txn-sync txn-sync)
+  (unregister-transaction sc transaction))
 
 (defmethod controller-abort-transaction ((sc bdb-store-controller) transaction &key &allow-other-keys)
   (assert (not *current-transaction*))
-  (db-transaction-abort transaction))
+  (ensure-cursors-closed sc transaction)
+  (db-transaction-abort transaction)
+  (unregister-transaction sc transaction))
 
diff -rN -u old-elephant-1.0/src/db-bdb/berkeley-db.lisp new-elephant-1.0/src/db-bdb/berkeley-db.lisp
--- old-elephant-1.0/src/db-bdb/berkeley-db.lisp	2009-10-14 14:32:59.884710862 +0200
+++ new-elephant-1.0/src/db-bdb/berkeley-db.lisp	2009-10-14 14:32:59.918038222 +0200
@@ -961,20 +964,20 @@
    for value-length fixnum = (buffer-stream-length value-buffer-stream)
    do
    (multiple-value-bind (errno ret-key-size result-size)
-       (%db-cursor-get-key-buffered cursor 
-				    (buffer-stream-buffer key-buffer-stream)
-				    0 key-length
-				    (buffer-stream-buffer value-buffer-stream)
-				    0 value-length
-				    (flags :current current
-					   :first first
-					   :last last
-					   :next next
-					   :next-dup next-dup
-					   :next-nodup next-nodup
-					   :prev prev
-					   :prev-nodup prev-nodup
-					   :dirty-read (or dirty-read read-uncommitted)))
+       (without-interrupts (%db-cursor-get-key-buffered cursor 
+                             (buffer-stream-buffer key-buffer-stream)
+                             0 key-length
+                             (buffer-stream-buffer value-buffer-stream)
+                             0 value-length
+                             (flags :current current
+                                    :first first
+                                    :last last
+                                    :next next
+                                    :next-dup next-dup
+                                    :next-nodup next-nodup
+                                    :prev prev
+                                    :prev-nodup prev-nodup
+                                    :dirty-read (or dirty-read read-uncommitted))))
      (declare (type fixnum errno ret-key-size result-size))
      (cond 
        ((= errno 0)
@@ -1005,15 +1008,15 @@
    for value-length fixnum = (buffer-stream-length value-buffer-stream)
    do
    (multiple-value-bind (errno ret-key-size result-size)
-       (%db-cursor-get-key-buffered cursor 
-				    (buffer-stream-buffer key-buffer-stream)
-				    (buffer-stream-size key-buffer-stream)
-				    key-length
-				    (buffer-stream-buffer value-buffer-stream)
-				    0 value-length
-				    (flags :set set
-					   :set-range set-range
-					   :dirty-read (or dirty-read read-uncommitted)))
+       (without-interrupts (%db-cursor-get-key-buffered cursor 
+                             (buffer-stream-buffer key-buffer-stream)
+                             (buffer-stream-size key-buffer-stream)
+                             key-length
+                             (buffer-stream-buffer value-buffer-stream)
+                             0 value-length
+                             (flags :set set
+                                    :set-range set-range
+                                    :dirty-read (or dirty-read read-uncommitted))))
      (declare (type fixnum errno ret-key-size result-size))
      (cond 
        ((= errno 0)
@@ -1163,17 +1166,17 @@
    for value-length fixnum = (buffer-stream-length value-buffer-stream)
    do
    (multiple-value-bind (errno ret-key-size ret-pkey-size result-size)
-       (%db-cursor-pget-key-buffered cursor 
-				     (buffer-stream-buffer key-buffer-stream)
-				     (buffer-stream-size key-buffer-stream)
-				     key-length
-				     (buffer-stream-buffer pkey-buffer-stream)
-				     0 pkey-length
-				     (buffer-stream-buffer value-buffer-stream)
-				     0 value-length
-				     (flags :set set
-					    :set-range set-range
-					    :dirty-read dirty-read))
+       (without-interrupts (%db-cursor-pget-key-buffered cursor 
+                             (buffer-stream-buffer key-buffer-stream)
+                             (buffer-stream-size key-buffer-stream)
+                             key-length
+                             (buffer-stream-buffer pkey-buffer-stream)
+                             0 pkey-length
+                             (buffer-stream-buffer value-buffer-stream)
+                             0 value-length
+                             (flags :set set
+                                    :set-range set-range
+                                    :dirty-read dirty-read)))
      (declare (type fixnum errno ret-key-size ret-pkey-size result-size))
      (cond 
        ((= errno 0)
_______________________________________________
elephant-devel site list
elephant-devel@common-lisp.net
http://common-lisp.net/mailman/listinfo/elephant-devel

Reply via email to