cbaines pushed a commit to branch master
in repository data-service.

commit 77962f7c2c4cb1f6b78e5bac48e9471dee009136
Author: Christopher Baines <[email protected]>
AuthorDate: Wed Aug 7 16:51:57 2024 +0100

    Move inserting derivations in to the load-new-guix-revision module
    
    And start to more closely integrate it. This makes it possible to start 
making
    it faster by doing more in parallel.
---
 guix-data-service/jobs/load-new-guix-revision.scm | 596 +++++++++++++++++-----
 guix-data-service/model/channel-instance.scm      |  34 +-
 guix-data-service/model/derivation.scm            | 280 +---------
 guix-data-service/model/system-test.scm           |  24 +-
 4 files changed, 507 insertions(+), 427 deletions(-)

diff --git a/guix-data-service/jobs/load-new-guix-revision.scm 
b/guix-data-service/jobs/load-new-guix-revision.scm
index ebd067a..c9ec9e1 100644
--- a/guix-data-service/jobs/load-new-guix-revision.scm
+++ b/guix-data-service/jobs/load-new-guix-revision.scm
@@ -25,13 +25,18 @@
   #:use-module (ice-9 textual-ports)
   #:use-module (ice-9 hash-table)
   #:use-module (ice-9 suspendable-ports)
+  #:use-module (ice-9 binary-ports)
   #:use-module ((ice-9 ports internal) #:select (port-poll))
+  #:use-module (rnrs bytevectors)
   #:use-module (rnrs exceptions)
+  #:use-module (lzlib)
   #:use-module (json)
   #:use-module (squee)
+  #:use-module (gcrypt hash)
   #:use-module (fibers)
   #:use-module (fibers channels)
   #:use-module (guix monads)
+  #:use-module (guix base32)
   #:use-module (guix store)
   #:use-module (guix channels)
   #:use-module (guix inferior)
@@ -41,6 +46,7 @@
   #:use-module (guix progress)
   #:use-module (guix packages)
   #:use-module (guix derivations)
+  #:use-module (guix serialization)
   #:use-module (guix build utils)
   #:use-module ((guix build syscalls)
                 #:select (set-thread-name))
@@ -49,6 +55,7 @@
   #:use-module (guix-data-service utils)
   #:use-module (guix-data-service model utils)
   #:use-module (guix-data-service model build)
+  #:use-module (guix-data-service model system)
   #:use-module (guix-data-service model channel-instance)
   #:use-module (guix-data-service model channel-news)
   #:use-module (guix-data-service model package)
@@ -477,10 +484,7 @@
                           (package-derivation store package system))))
                  ;; You don't always get what you ask for, so check
                  (if (string=? system (derivation-system derivation))
-                     (let ((file-name
-                            (derivation-file-name derivation)))
-                       (add-temp-root store file-name)
-                       file-name)
+                     (derivation-file-name derivation)
                      (begin
                        (simple-format
                         (current-error-port)
@@ -907,6 +911,294 @@
     lint-checker-ids
     lint-warnings-data)))
 
+(define (update-derivation-ids-hash-table! conn
+                                           derivation-ids-hash-table
+                                           file-names)
+  (define file-names-count (vector-length file-names))
+
+  (simple-format #t "debug: update-derivation-ids-hash-table!: ~A file-names\n"
+                 file-names-count)
+  (let ((missing-file-names
+         (vector-fold
+          (lambda (_ result file-name)
+            (if (and file-name
+                     (hash-ref derivation-ids-hash-table
+                               file-name))
+                result
+                (cons file-name result)))
+          '()
+          file-names)))
+
+    (simple-format
+     #t "debug: update-derivation-ids-hash-table!: lookup ~A file-names, ~A 
not cached\n"
+     file-names-count (length missing-file-names))
+
+    (unless (null? missing-file-names)
+      (for-each
+       (lambda (chunk)
+         (for-each
+          (match-lambda
+            ((id file-name)
+             (hash-set! derivation-ids-hash-table
+                        file-name
+                        (string->number id))))
+          (exec-query conn (select-existing-derivations chunk))))
+       (chunk! missing-file-names 1000)))))
+
+(define (insert-missing-derivations postgresql-connection-pool
+                                    utility-thread-channel
+                                    derivation-ids-hash-table
+                                    derivations)
+
+  (define (ensure-input-derivations-exist input-derivation-file-names)
+    (unless (null? input-derivation-file-names)
+      (simple-format
+       #t "debug: ensure-input-derivations-exist: processing ~A derivations\n"
+       (length input-derivation-file-names))
+
+      (with-resource-from-pool postgresql-connection-pool conn
+        (update-derivation-ids-hash-table! conn
+                                           derivation-ids-hash-table
+                                           (list->vector
+                                            input-derivation-file-names)))
+      (simple-format
+       #t
+       "debug: ensure-input-derivations-exist: checking for missing input 
derivations\n")
+      (let ((missing-derivations-filenames
+             (remove (lambda (derivation-file-name)
+                       (hash-ref derivation-ids-hash-table
+                                 derivation-file-name))
+                     input-derivation-file-names)))
+
+        (unless (null? missing-derivations-filenames)
+          (simple-format
+           #f
+           "debug: ensure-input-derivations-exist: inserting missing input 
derivations\n")
+          ;; Ensure all the input derivations exist
+          (insert-missing-derivations
+           postgresql-connection-pool
+           utility-thread-channel
+           derivation-ids-hash-table
+           (call-with-worker-thread
+            utility-thread-channel
+            (lambda ()
+              (map read-derivation-from-file
+                   missing-derivations-filenames))))))))
+
+  (define (insert-into-derivations conn drvs)
+    (string-append
+     "INSERT INTO derivations "
+     "(file_name, builder, args, env_vars, system_id) VALUES "
+     (string-join
+      (map (match-lambda
+             (($ <derivation> outputs inputs sources
+                              system builder args env-vars file-name)
+              (simple-format
+               #f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')"
+               file-name
+               builder
+               (string-join (map quote-string args) ",")
+               (string-join (map (match-lambda
+                                   ((key . value)
+                                    (string-append
+                                     "['" key '"', $$"
+                                     value "$$ ]")))
+                                 env-vars)
+                            ",")
+               (system->system-id conn system))))
+           drvs)
+      ",")
+     " RETURNING id"
+     ";"))
+
+  (with-time-logging
+      (simple-format
+       #f "insert-missing-derivations: inserting ~A derivations"
+       (length derivations))
+    (let* ((chunks (chunk derivations 500))
+           (derivation-ids
+            (with-resource-from-pool postgresql-connection-pool conn
+              (append-map!
+               (lambda (chunk)
+                 (map (lambda (result)
+                        (string->number (car result)))
+                      (exec-query conn (insert-into-derivations conn chunk))))
+               chunks))))
+
+      (with-time-logging
+          "insert-missing-derivations: updating hash table"
+        (for-each (lambda (derivation derivation-id)
+                    (hash-set! derivation-ids-hash-table
+                               (derivation-file-name derivation)
+                               derivation-id))
+                  derivations
+                  derivation-ids))
+
+      (with-time-logging
+          "insert-missing-derivations: inserting sources"
+        (for-each
+         (lambda (derivation-id derivation)
+           (let ((sources (derivation-sources derivation)))
+             (unless (null? sources)
+               (let ((sources-ids
+                      (with-resource-from-pool postgresql-connection-pool conn
+                        (insert-derivation-sources conn
+                                                   derivation-id
+                                                   sources))))
+                 (par-map&
+                  (lambda (id source-file)
+                    (match
+                        (with-resource-from-pool postgresql-connection-pool 
conn
+                          (exec-query
+                           conn
+                           "
+SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
+                           (list (number->string id))))
+                      (()
+                       (let ((nar-bytevector
+                              (call-with-worker-thread
+                               utility-thread-channel
+                               (lambda ()
+                                 (call-with-values
+                                     (lambda ()
+                                       (open-bytevector-output-port))
+                                   (lambda (port get-bytevector)
+                                     (write-file source-file port)
+                                     (get-bytevector)))))))
+                         (letpar&
+                             ((compressed-nar-bytevector
+                               (call-with-worker-thread
+                                utility-thread-channel
+                                (lambda ()
+                                  (call-with-values
+                                      (lambda ()
+                                        (open-bytevector-output-port))
+                                    (lambda (port get-bytevector)
+                                      (call-with-lzip-output-port port
+                                        (lambda (port)
+                                          (put-bytevector port nar-bytevector))
+                                        #:level 9)
+                                      (get-bytevector))))))
+                              (hash
+                               (call-with-worker-thread
+                                utility-thread-channel
+                                (lambda ()
+                                  (bytevector->nix-base32-string
+                                   (sha256 nar-bytevector)))))
+                              (uncompressed-size (bytevector-length 
nar-bytevector)))
+
+                           (with-resource-from-pool postgresql-connection-pool 
conn
+                             (insert-derivation-source-file-nar
+                              conn
+                              id
+                              hash
+                              compressed-nar-bytevector
+                              uncompressed-size)))))
+                      (_ #f)))
+                  sources-ids
+                  sources)))))
+         derivation-ids
+         derivations))
+
+      (with-resource-from-pool postgresql-connection-pool conn
+        (with-time-logging
+            "insert-missing-derivations: inserting outputs"
+          (for-each (lambda (derivation-id derivation)
+                      (insert-derivation-outputs conn
+                                                 derivation-id
+                                                 (derivation-outputs 
derivation)))
+                    derivation-ids
+                    derivations)))
+
+      (with-time-logging
+          "insert-missing-derivations: ensure-input-derivations-exist"
+        (ensure-input-derivations-exist (deduplicate-strings
+                                         (map derivation-input-path
+                                              (append-map derivation-inputs
+                                                          derivations)))))
+
+      (with-resource-from-pool postgresql-connection-pool conn
+        (with-time-logging
+            (simple-format
+             #f "insert-missing-derivations: inserting inputs for ~A 
derivations"
+             (length derivations))
+          (insert-derivation-inputs conn
+                                    derivation-ids
+                                    derivations))))))
+
+(define (derivation-file-names->derivation-ids postgresql-connection-pool
+                                               utility-thread-channel
+                                               derivation-file-names)
+  (define derivations-count
+    (vector-length derivation-file-names))
+
+  (if (= 0 derivations-count)
+      #()
+      (let* ((derivation-ids-hash-table (make-hash-table
+                                         ;; Account for more derivations in
+                                         ;; the graph
+                                         (* 2 derivations-count))))
+        (simple-format
+         #t "debug: derivation-file-names->derivation-ids: processing ~A 
derivations\n"
+         derivations-count)
+
+        (with-resource-from-pool postgresql-connection-pool conn
+          (update-derivation-ids-hash-table! conn
+                                             derivation-ids-hash-table
+                                             derivation-file-names))
+
+        (let* ((missing-derivation-filenames
+                (deduplicate-strings
+                 (vector-fold
+                  (lambda (_ result derivation-file-name)
+                    (if (not derivation-file-name)
+                        result
+                        (if (hash-ref derivation-ids-hash-table
+                                      derivation-file-name)
+                            result
+                            (cons derivation-file-name result))))
+                  '()
+                  derivation-file-names)))
+               (missing-derivations-chunked-promises
+                (map
+                 (lambda (chunk)
+                   (fibers-delay
+                    (lambda ()
+                      (map read-derivation-from-file chunk))))
+                 (chunk! missing-derivation-filenames 1000))))
+
+          (for-each
+           (lambda (missing-derivation-filenames-chunk)
+             (let ((missing-derivations-chunk
+                    ;; Do the filter again, since processing the last chunk
+                    ;; might have inserted some of the derivations in this
+                    ;; chunk
+                    (remove! (lambda (derivation)
+                               (hash-ref derivation-ids-hash-table
+                                         (derivation-file-name
+                                          derivation)))
+                             (fibers-force
+                              missing-derivation-filenames-chunk))))
+
+               (unless (null? missing-derivations-chunk)
+                 (insert-missing-derivations postgresql-connection-pool
+                                             utility-thread-channel
+                                             derivation-ids-hash-table
+                                             missing-derivations-chunk))))
+           missing-derivations-chunked-promises))
+
+        (let ((all-ids
+               (vector-map
+                (lambda (_ derivation-file-name)
+                  (if derivation-file-name
+                      (or (hash-ref derivation-ids-hash-table
+                                    derivation-file-name)
+                          (error "missing derivation id"))
+                      #f))
+                derivation-file-names)))
+
+          all-ids))))
+
 (define guix-store-path
   (let ((store-path #f))
     (lambda (store)
@@ -1162,7 +1454,7 @@
                (cons inferior inferior-store)))
            parallelism
            #:min-size 0
-           #:idle-seconds 10
+           #:idle-seconds 30
            #:destructor (match-lambda
                           ((inferior . store)
                            (close-inferior inferior)
@@ -1399,6 +1691,7 @@
 
 (define* (extract-information-from db-conn guix-revision-id commit
                                    guix-source store-item
+                                   utility-thread-channel
                                    #:key skip-system-tests?
                                    extra-inferior-environment-variables
                                    parallelism)
@@ -1454,25 +1747,29 @@
      1
      #:min-size 0))
 
-  (define packages-data-promise
-    (fibers-delay
-     (lambda ()
-       (with-resource-from-pool inf-and-store-pool res
-         (match res
-           ((inferior . inferior-store)
-            (with-time-logging "getting all inferior package data"
-              (let ((packages
-                     pkg-to-replacement-hash-table
-                     (inferior-packages-plus-replacements inferior)))
-                (all-inferior-packages-data
-                 inferior
-                 packages
-                 pkg-to-replacement-hash-table)))))))))
+  (define derivation-file-names->derivation-ids/fiberized
+    (fiberize
+     (lambda (derivation-file-names)
+       (derivation-file-names->derivation-ids
+        postgresql-connection-pool
+        utility-thread-channel
+        derivation-file-names))))
 
   (define package-ids-promise
     (fibers-delay
      (lambda ()
-       (let ((packages-data (fibers-force packages-data-promise)))
+       (let ((packages-data
+              (with-resource-from-pool inf-and-store-pool res
+                (match res
+                  ((inferior . inferior-store)
+                   (with-time-logging "getting all inferior package data"
+                     (let ((packages
+                            pkg-to-replacement-hash-table
+                            (inferior-packages-plus-replacements inferior)))
+                       (all-inferior-packages-data
+                        inferior
+                        packages
+                        pkg-to-replacement-hash-table))))))))
          (with-resource-from-pool postgresql-connection-pool conn
            (insert-packages conn packages-data))))))
 
@@ -1534,94 +1831,99 @@
 
   (define (extract-and-store-package-derivations)
     (define packages-count
-      (vector-length
-       (assq-ref (fibers-force packages-data-promise)
-                 'names)))
+      (with-resource-from-pool inf-and-store-pool res
+        (match res
+          ((inferior . inferior-store)
+           (ensure-gds-inferior-packages-defined! inferior)
 
-    (define chunk-size 3000)
+           (inferior-eval '(vector-length gds-inferior-packages) inferior)))))
 
-    (fibers-for-each
-     (match-lambda
-       ((system . target)
-        (let loop ((wal-bytes (stat:size (stat "/var/guix/db/db.sqlite-wal"))))
-          (when (> wal-bytes (* 512 (expt 2 20)))
-            (simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n"
-                           wal-bytes)
+    (define chunk-size 5000)
 
-            (sleep 30)
-            (loop (stat:size (stat "/var/guix/db/db.sqlite-wal")))))
+    (define (process-system-and-target system target)
+      (let loop ((wal-bytes
+                  (catch #t
+                    (lambda ()
+                      (stat:size (stat "/var/guix/db/db.sqlite-wal")))
+                    (lambda _ 0))))
+        (when (> wal-bytes (* 512 (expt 2 20)))
+          (simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n"
+                         wal-bytes)
+
+          (sleep 30)
+          (loop (catch #t
+                  (lambda ()
+                    (stat:size (stat "/var/guix/db/db.sqlite-wal")))
+                  (lambda _ 0)))))
 
+      (with-time-logging
+          (simple-format #f "processing derivations for ~A" (cons system 
target))
         (let ((derivations-vector (make-vector packages-count)))
           (with-time-logging
               (simple-format #f "getting derivations for ~A" (cons system 
target))
             (let loop ((start-index 0))
-              (if (>= (+ start-index chunk-size) packages-count)
-                  (let* ((remaining-count
-                          (- packages-count start-index))
-                         (chunk
-                          (with-resource-from-pool inf-and-store-pool res
-                            (match res
-                              ((inferior . inferior-store)
-                               (ensure-gds-inferior-packages-defined! inferior)
-
-                               (inferior-package-derivations
-                                inferior-store
-                                inferior
-                                system
-                                target
-                                start-index
-                                remaining-count))))))
-                    (vector-copy! derivations-vector
-                                  start-index
-                                  chunk))
-                  (let ((chunk
-                         (with-resource-from-pool inf-and-store-pool res
-                           (match res
-                             ((inferior . inferior-store)
-                              (ensure-gds-inferior-packages-defined! inferior)
-
-                              (inferior-package-derivations
-                               inferior-store
-                               inferior
-                               system
-                               target
-                               start-index
-                               chunk-size))))))
-                    (vector-copy! derivations-vector
-                                  start-index
-                                  chunk)
-                    (loop (+ start-index chunk-size))))))
-
-          (let ((package-ids (fibers-force package-ids-promise)))
-            (with-resource-from-pool postgresql-connection-pool conn
-              (let* ((derivation-ids
+              (let* ((count
+                      (if (>= (+ start-index chunk-size) packages-count)
+                          (- packages-count start-index)
+                          chunk-size))
+                     (chunk
+                      (with-resource-from-pool inf-and-store-pool res
+                        (match res
+                          ((inferior . inferior-store)
+                           (ensure-gds-inferior-packages-defined! inferior)
+
+                           (inferior-package-derivations
+                            inferior-store
+                            inferior
+                            system
+                            target
+                            start-index
+                            count))))))
+                (vector-copy! derivations-vector
+                              start-index
+                              chunk)
+                (unless (>= (+ start-index chunk-size) packages-count)
+                  (loop (+ start-index chunk-size))))))
+
+          (let* ((derivation-ids
+                  (with-time-logging
+                      (simple-format #f "derivation-file-names->derivation-ids 
(~A ~A)"
+                                     system target)
+                    (derivation-file-names->derivation-ids/fiberized
+                     derivations-vector))))
+
+            (let* ((package-ids (fibers-force package-ids-promise))
+                   (package-derivation-ids
+                    (with-resource-from-pool postgresql-connection-pool conn
                       (with-time-logging
-                          (simple-format #f 
"derivation-file-names->derivation-ids (~A ~A)"
+                          (simple-format #f "insert-package-derivations (~A 
~A)"
                                          system target)
-                        (derivation-file-names->derivation-ids
-                         conn
-                         derivations-vector))))
-
-                (let ((package-derivation-ids
-                       (with-time-logging
-                           (simple-format #f "insert-package-derivations (~A 
~A)"
-                                          system target)
-                         (insert-package-derivations conn
-                                                     system
-                                                     (or target "")
-                                                     package-ids
-                                                     derivation-ids))))
-                  (chunk-for-each! (lambda (package-derivation-ids-chunk)
-                                     (insert-guix-revision-package-derivations
-                                      conn
-                                      guix-revision-id
-                                      package-derivation-ids-chunk))
-                                   2000
-                                   package-derivation-ids))))))))
-     (with-resource-from-pool inf-and-store-pool res
-       (match res
-         ((inferior . inferior-store)
-          (inferior-fetch-system-target-pairs inferior)))))
+                        (insert-package-derivations conn
+                                                    system
+                                                    (or target "")
+                                                    package-ids
+                                                    derivation-ids)))))
+              (chunk-for-each!
+               (lambda (package-derivation-ids-chunk)
+                 (with-resource-from-pool postgresql-connection-pool conn
+                   (insert-guix-revision-package-derivations
+                    conn
+                    guix-revision-id
+                    package-derivation-ids-chunk)))
+               2000
+               package-derivation-ids))))))
+
+    (let ((process-system-and-target/fiberized
+           (fiberize process-system-and-target
+                     #:parallelism parallelism)))
+      (par-map&
+       (match-lambda
+         ((system . target)
+          (process-system-and-target/fiberized system target)))
+       (with-resource-from-pool inf-and-store-pool res
+         (match res
+           ((inferior . inferior-store)
+            (inferior-fetch-system-target-pairs inferior))))))
 
     (with-resource-from-pool postgresql-connection-pool conn
       (with-time-logging
@@ -1635,7 +1937,7 @@
         (begin
           (simple-format #t "debug: skipping system tests\n")
           '())
-        (let ((data
+        (let ((data-with-derivation-file-names
                (with-resource-from-pool inf-and-store-pool res
                  (match res
                    ((inferior . inferior-store)
@@ -1645,22 +1947,41 @@
                        inferior-store
                        guix-source
                        commit)))))))
-          (when data
-            (with-resource-from-pool postgresql-connection-pool conn
-              (insert-system-tests-for-guix-revision conn
-                                                     guix-revision-id
-                                                     data))))))
-
-  (simple-format #t "debug: extract-information-from: ~A\n" store-path)
-  (parallel-via-fibers
-   (fibers-force package-ids-promise)
-   (extract-and-store-lint-checkers-and-warnings)
-   (extract-and-store-package-derivations)
-   (extract-and-store-system-tests)))
+          (when data-with-derivation-file-names
+            (let ((data-with-derivation-ids
+                   (map (match-lambda
+                          ((name description derivation-file-names-by-system 
location-data)
+                           (list name
+                                 description
+                                 (let ((systems
+                                        (map car 
derivation-file-names-by-system))
+                                       (derivation-ids
+                                        
(derivation-file-names->derivation-ids/fiberized
+                                         (list->vector
+                                          (map cdr 
derivation-file-names-by-system)))))
+                                   (map cons systems derivation-ids))
+                                 location-data)))
+                        data-with-derivation-file-names)))
+              (with-resource-from-pool postgresql-connection-pool conn
+                (insert-system-tests-for-guix-revision
+                 conn
+                 guix-revision-id
+                 data-with-derivation-ids)))))))
+
+  (with-time-logging
+      (simple-format #f "extract-information-from: ~A\n" store-path)
+    (parallel-via-fibers
+     (fibers-force package-ids-promise)
+     (extract-and-store-package-derivations)
+     (extract-and-store-system-tests)
+     (extract-and-store-lint-checkers-and-warnings)))
+
+  #t)
 
 (prevent-inlining-for-tests extract-information-from)
 
-(define (load-channel-instances git-repository-id commit
+(define (load-channel-instances utility-thread-channel
+                                git-repository-id commit
                                 channel-derivations-by-system)
   ;; Load the channel instances in a different transaction, so that this can
   ;; commit prior to the outer transaction
@@ -1685,19 +2006,35 @@
                (guix-revision-id
                 (or existing-guix-revision-id
                     (insert-guix-revision channel-instances-conn
-                                          git-repository-id commit))))
+                                          git-repository-id commit)))
+               (postgresql-connection-pool
+                (make-resource-pool
+                 (const channel-instances-conn)
+                 1
+                 #:min-size 0)))
+
           (unless existing-guix-revision-id
-            (insert-channel-instances channel-instances-conn
-                                      guix-revision-id
-                                      (filter-map
-                                       (match-lambda
-                                         ((system . derivations)
-                                          (and=>
-                                           (assoc-ref derivations
-                                                      'manifest-entry-item)
-                                           (lambda (drv)
-                                             (cons system drv)))))
-                                       channel-derivations-by-system))
+            (let* ((derivations-by-system
+                    (filter-map
+                     (match-lambda
+                       ((system . derivations)
+                        (and=>
+                         (assoc-ref derivations
+                                    'manifest-entry-item)
+                         (lambda (drv)
+                           (cons system drv)))))
+                     channel-derivations-by-system))
+                   (derivation-ids
+                    (derivation-file-names->derivation-ids
+                     postgresql-connection-pool
+                     utility-thread-channel
+                     (list->vector (map cdr derivations-by-system)))))
+
+              (insert-channel-instances channel-instances-conn
+                                        guix-revision-id
+                                        (map cons
+                                             (map car derivations-by-system)
+                                             (vector->list derivation-ids))))
             (simple-format
              (current-error-port)
              "guix-data-service: saved the channel instance derivations to the 
database\n"))
@@ -1709,6 +2046,13 @@
 (define* (load-new-guix-revision conn git-repository-id commit
                                  #:key skip-system-tests? parallelism
                                  extra-inferior-environment-variables)
+  (define utility-thread-channel
+    (make-worker-thread-channel
+     (const '())
+     #:parallelism parallelism))
+
+  (%worker-thread-default-timeout #f)
+
   (let* ((git-repository-fields
           (select-git-repository conn git-repository-id))
          (git-repository-url
@@ -1727,7 +2071,8 @@
            fetch-with-authentication?
            #:parallelism parallelism))
          (guix-revision-id
-          (load-channel-instances git-repository-id commit
+          (load-channel-instances utility-thread-channel
+                                  git-repository-id commit
                                   channel-derivations-by-system)))
     (let ((store-item
            (channel-derivations-by-system->guix-store-item
@@ -1737,6 +2082,7 @@
            (extract-information-from conn
                                      guix-revision-id
                                      commit guix-source store-item
+                                     utility-thread-channel
                                      #:skip-system-tests?
                                      skip-system-tests?
                                      #:extra-inferior-environment-variables
diff --git a/guix-data-service/model/channel-instance.scm 
b/guix-data-service/model/channel-instance.scm
index 84fc901..2cce2da 100644
--- a/guix-data-service/model/channel-instance.scm
+++ b/guix-data-service/model/channel-instance.scm
@@ -22,36 +22,28 @@
   #:use-module (json)
   #:use-module (guix utils)
   #:use-module (guix-data-service model utils)
-  #:use-module (guix-data-service model derivation)
   #:export (insert-channel-instances
             channel-instances-exist-for-guix-revision?
             select-channel-instances-for-guix-revision))
 
 (define (insert-channel-instances conn
                                   guix-revision-id
-                                  derivations-by-system)
-  (let ((derivation-ids
-         (derivation-file-names->derivation-ids
-          conn
-          (list->vector
-           (map cdr derivations-by-system)))))
-
-    (exec-query
-     conn
-     (string-append
-      "
+                                  derivation-ids-by-system)
+  (exec-query
+   conn
+   (string-append
+    "
 INSERT INTO channel_instances
   (guix_revision_id, system, derivation_id)
 VALUES "
-      (string-join
-       (map (lambda (system derivation-id)
-              (simple-format #f "(~A, '~A', ~A)"
-                             guix-revision-id
-                             system
-                             derivation-id))
-            (map car derivations-by-system)
-            (vector->list derivation-ids))
-       ", "))))
+    (string-join
+     (map (lambda (derivation-id-and-system)
+            (simple-format #f "(~A, '~A', ~A)"
+                           guix-revision-id
+                           (car derivation-id-and-system)
+                           (cdr derivation-id-and-system)))
+          derivation-ids-by-system)
+     ", ")))
   #t)
 
 (define (channel-instances-exist-for-guix-revision? conn commit-hash)
diff --git a/guix-data-service/model/derivation.scm 
b/guix-data-service/model/derivation.scm
index 1b82889..35b1a29 100644
--- a/guix-data-service/model/derivation.scm
+++ b/guix-data-service/model/derivation.scm
@@ -23,13 +23,10 @@
   #:use-module (ice-9 format)
   #:use-module (ice-9 binary-ports)
   #:use-module (rnrs bytevectors)
-  #:use-module (gcrypt hash)
   #:use-module (squee)
   #:use-module (json)
   #:use-module (guix base16)
   #:use-module (guix base32)
-  #:use-module (guix serialization)
-  #:use-module (lzlib)
   #:use-module (guix inferior)
   #:use-module (guix memoization)
   #:use-module (guix derivations)
@@ -58,6 +55,10 @@
             select-fixed-output-package-derivations-in-revision
             select-derivation-outputs-in-revision
             fix-derivation-output-details-hash-encoding
+            insert-derivation-sources
+            insert-derivation-source-file-nar
+            insert-derivation-outputs
+            insert-derivation-inputs
             derivation-output-details->derivation-output-details-ids
             derivation-output-details-ids->derivation-output-details-set-id
             select-derivations-by-revision-name-and-version
@@ -66,7 +67,6 @@
             select-existing-derivations
             select-derivations-by-id
             select-derivations-and-build-status
-            derivation-file-names->derivation-ids
             update-derivation-inputs-statistics
             vacuum-derivation-inputs-table
             update-derivation-outputs-statistics
@@ -1487,38 +1487,11 @@ INNER JOIN derivation_outputs
 
     sources-ids))
 
-(define (insert-derivation-source-file-nar conn id source-file)
-  (define missing?
-    (match (exec-query
-            conn
-            "SELECT 1 FROM derivation_source_file_nars WHERE 
derivation_source_file_id = $1"
-            (list (number->string id)))
-      (() #t)
-      (_ #f)))
-
-  (when missing?
-    (let* ((nar-bytevector   (call-with-values
-                                 (lambda ()
-                                   (open-bytevector-output-port))
-                               (lambda (port get-bytevector)
-                                 (write-file source-file port)
-                                 (get-bytevector))))
-           (data-string       (bytevector->base16-string
-                               (call-with-values
-                                   (lambda ()
-                                     (open-bytevector-output-port))
-                                 (lambda (port get-bytevector)
-                                   (call-with-lzip-output-port port
-                                     (lambda (port)
-                                       (put-bytevector port nar-bytevector))
-                                     #:level 9)
-                                   (get-bytevector)))))
-           (hash              (bytevector->nix-base32-string
-                               (sha256 nar-bytevector)))
-           (uncompressed-size (bytevector-length nar-bytevector)))
-      (exec-query
-       conn
-       "
+(define (insert-derivation-source-file-nar conn id
+                                           hash bytevector uncompressed-size)
+  (exec-query
+   conn
+   "
 INSERT INTO derivation_source_file_nars (
   derivation_source_file_id,
   compression,
@@ -1527,12 +1500,12 @@ INSERT INTO derivation_source_file_nars (
   uncompressed_size,
   data
 ) VALUES ($1, $2, $3, $4, $5, $6)"
-       (list (number->string id)
-             "lzip"
-             "sha256"
-             hash
-             (number->string uncompressed-size)
-             (string-append "\\x" data-string))))))
+   (list (number->string id)
+         "lzip"
+         "sha256"
+         hash
+         (number->string uncompressed-size)
+         (string-append "\\x" (bytevector->base16-string bytevector)))))
 
 (define* (backfill-derivation-source-file-nars conn #:key
                                                (batch-size 10000)
@@ -1564,130 +1537,6 @@ LIMIT $1"
        batch)
       (when loop? (loop (missing-batch))))))
 
-(define (insert-missing-derivations conn
-                                    derivation-ids-hash-table
-                                    derivations)
-  (define (ensure-input-derivations-exist input-derivation-file-names)
-    (unless (null? input-derivation-file-names)
-      (simple-format
-       #t "debug: ensure-input-derivations-exist: processing ~A derivations\n"
-       (length input-derivation-file-names))
-
-      (update-derivation-ids-hash-table! conn
-                                         derivation-ids-hash-table
-                                         (list->vector
-                                          input-derivation-file-names))
-      (simple-format
-       #t
-       "debug: ensure-input-derivations-exist: checking for missing input 
derivations\n")
-      (let ((missing-derivations-filenames
-             (filter (lambda (derivation-file-name)
-                       (not (hash-ref derivation-ids-hash-table
-                                      derivation-file-name)))
-                     input-derivation-file-names)))
-
-        (unless (null? missing-derivations-filenames)
-          (simple-format
-           #f
-           "debug: ensure-input-derivations-exist: inserting missing input 
derivations\n")
-          ;; Ensure all the input derivations exist
-          (insert-missing-derivations
-           conn
-           derivation-ids-hash-table
-           (map read-derivation-from-file
-                missing-derivations-filenames))))))
-
-  (define (insert-into-derivations dervs)
-    (string-append
-     "INSERT INTO derivations "
-     "(file_name, builder, args, env_vars, system_id) VALUES "
-     (string-join
-      (map (match-lambda
-             (($ <derivation> outputs inputs sources
-                              system builder args env-vars file-name)
-              (simple-format
-               #f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')"
-               file-name
-               builder
-               (string-join (map quote-string args) ",")
-               (string-join (map (match-lambda
-                                   ((key . value)
-                                    (string-append
-                                     "['" key '"', $$"
-                                     value "$$ ]")))
-                                 env-vars)
-                            ",")
-               (system->system-id conn system))))
-           dervs)
-      ",")
-     " RETURNING id"
-     ";"))
-
-  (with-time-logging
-      (simple-format
-       #f "insert-missing-derivations: inserting ~A derivations"
-       (length derivations))
-    (let ((derivation-ids
-           (append-map
-            (lambda (chunk)
-              (map (lambda (result)
-                     (string->number (car result)))
-                   (exec-query conn (insert-into-derivations chunk))))
-            (chunk derivations 500))))
-
-      (with-time-logging
-          "insert-missing-derivations: updating hash table"
-        (for-each (lambda (derivation derivation-id)
-                    (hash-set! derivation-ids-hash-table
-                               (derivation-file-name derivation)
-                               derivation-id))
-                  derivations
-                  derivation-ids))
-
-      (with-time-logging
-          "insert-missing-derivations: inserting outputs"
-        (for-each (lambda (derivation-id derivation)
-                    (insert-derivation-outputs conn
-                                               derivation-id
-                                               (derivation-outputs 
derivation)))
-                  derivation-ids
-                  derivations))
-
-      (with-time-logging
-          "insert-missing-derivations: inserting sources"
-        (for-each (lambda (derivation-id derivation)
-                    (let ((sources (derivation-sources derivation)))
-                      (unless (null? sources)
-                        (let ((sources-ids
-                               (insert-derivation-sources conn
-                                                          derivation-id
-                                                          sources)))
-                          (map (lambda (id source-file)
-                                 (insert-derivation-source-file-nar conn
-                                                                    id
-                                                                    
source-file))
-                               sources-ids
-                               sources)))))
-                  derivation-ids
-                  derivations))
-
-      (with-time-logging
-          "insert-missing-derivations: ensure-input-derivations-exist"
-        (ensure-input-derivations-exist (deduplicate-strings
-                                         (map derivation-input-path
-                                              (append-map derivation-inputs
-                                                          derivations)))))
-
-      (with-time-logging
-          (simple-format
-           #f "insert-missing-derivations: inserting inputs for ~A derivations"
-           (length derivations))
-        (insert-derivation-inputs conn
-                                  derivation-ids
-                                  derivations))
-
-      derivation-ids)))
-
 (define (select-derivations-by-id conn ids)
   (define query
     (string-append "SELECT id, file_name "
@@ -1772,40 +1621,6 @@ WHERE " criteria ";"))
    '()
    sorted-derivations))
 
-(define (update-derivation-ids-hash-table! conn
-                                           derivation-ids-hash-table
-                                           file-names)
-  (define file-names-count (vector-length file-names))
-
-  (simple-format #t "debug: update-derivation-ids-hash-table!: ~A file-names\n"
-                 file-names-count)
-  (let ((missing-file-names
-         (vector-fold
-          (lambda (_ result file-name)
-            (if (and file-name
-                     (hash-ref derivation-ids-hash-table
-                               file-name))
-                result
-                (cons file-name result)))
-          '()
-          file-names)))
-
-    (simple-format
-     #t "debug: update-derivation-ids-hash-table!: lookup ~A file-names, ~A 
not cached\n"
-     file-names-count (length missing-file-names))
-
-    (unless (null? missing-file-names)
-      (for-each
-       (lambda (chunk)
-         (for-each
-          (match-lambda
-            ((id file-name)
-             (hash-set! derivation-ids-hash-table
-                        file-name
-                        (string->number id))))
-          (exec-query conn (select-existing-derivations chunk))))
-       (chunk! missing-file-names 1000)))))
-
 (define (insert-source-files-missing-nars conn derivation-ids)
   (define (derivation-ids->next-related-derivation-ids! ids seen-ids)
     (delete-duplicates/sort!
@@ -1888,71 +1703,6 @@ INNER JOIN derivation_source_files
           next-related-derivation-ids
           seen-ids))))))
 
-(define (derivation-file-names->derivation-ids conn derivation-file-names)
-  (define derivations-count
-    (vector-length derivation-file-names))
-
-  (if (= 0 derivations-count)
-      #()
-      (let* ((derivation-ids-hash-table (make-hash-table
-                                         ;; Account for more derivations in
-                                         ;; the graph
-                                         (* 2 derivations-count))))
-        (simple-format
-         #t "debug: derivation-file-names->derivation-ids: processing ~A 
derivations\n"
-         derivations-count)
-
-        (update-derivation-ids-hash-table! conn
-                                           derivation-ids-hash-table
-                                           derivation-file-names)
-
-        (let ((missing-derivation-filenames
-               (deduplicate-strings
-                (vector-fold
-                 (lambda (_ result derivation-file-name)
-                   (if (not derivation-file-name)
-                       result
-                       (if (hash-ref derivation-ids-hash-table
-                                     derivation-file-name)
-                           result
-                           (cons derivation-file-name result))))
-                 '()
-                 derivation-file-names))))
-
-          (chunk-for-each!
-           (lambda (missing-derivation-filenames-chunk)
-             (let ((missing-derivations-chunk
-                    (with-time-logging
-                        (simple-format #f "reading ~A missing derivations"
-                                       (length 
missing-derivation-filenames-chunk))
-                      (map read-derivation-from-file
-                           ;; Do the filter again, since processing the last
-                           ;; chunk might have inserted some of the
-                           ;; derivations in this chunk
-                           (filter (lambda (derivation-file-name)
-                                     (not (hash-ref derivation-ids-hash-table
-                                                    derivation-file-name)))
-                                   missing-derivation-filenames-chunk)))))
-
-               (unless (null? missing-derivations-chunk)
-                 (insert-missing-derivations conn
-                                             derivation-ids-hash-table
-                                             missing-derivations-chunk))))
-           1000
-           missing-derivation-filenames)
-
-          (let ((all-ids
-                 (vector-map
-                  (lambda (_ derivation-file-name)
-                    (if derivation-file-name
-                        (or (hash-ref derivation-ids-hash-table
-                                      derivation-file-name)
-                            (error "missing derivation id"))
-                        #f))
-                  derivation-file-names)))
-
-            all-ids)))))
-
 (define (update-derivation-inputs-statistics conn)
   (let ((query
          "
diff --git a/guix-data-service/model/system-test.scm 
b/guix-data-service/model/system-test.scm
index fe2fb83..ab438b7 100644
--- a/guix-data-service/model/system-test.scm
+++ b/guix-data-service/model/system-test.scm
@@ -23,7 +23,6 @@
   #:use-module (guix utils)
   #:use-module (guix-data-service model utils)
   #:use-module (guix-data-service model location)
-  #:use-module (guix-data-service model derivation)
   #:export (insert-system-tests-for-guix-revision
 
             select-system-tests-for-guix-revision
@@ -39,7 +38,7 @@
              "system_tests"
              '(name description location_id)
              (map (match-lambda
-                    ((name description derivation-file-names-by-system 
location-data)
+                    ((name description derivation-ids-by-system location-data)
                      (list name
                            description
                            (location->location-id
@@ -48,20 +47,13 @@
                   system-test-data)))
            (data
             (append-map
-             (lambda (system-test-id derivation-file-names-by-system)
-               (let ((systems
-                      (map car derivation-file-names-by-system))
-                     (derivation-ids
-                      (derivation-file-names->derivation-ids
-                       conn
-                       (map cdr derivation-file-names-by-system))))
-                 (map (lambda (system derivation-id)
-                        (list guix-revision-id
-                              system-test-id
-                              derivation-id
-                              system))
-                      systems
-                      derivation-ids)))
+             (lambda (system-test-id derivation-ids-by-system)
+               (map (lambda (system-and-derivation-id)
+                      (list guix-revision-id
+                            system-test-id
+                            (cdr system-and-derivation-id)
+                            (car system-and-derivation-id)))
+                    derivation-ids-by-system))
              system-test-ids
              (map third system-test-data))))
 

Reply via email to