This aids in development, but I'm not sure it's going to stay
or be moved into another interface.
---
 lib/PublicInbox/CodeSearchIdx.pm | 32 ++++++++++++++++++++++++++++++++
 script/public-inbox-cindex       |  2 +-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 2480dbd2..e795c2b3 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -1058,6 +1058,37 @@ sub _prep_ibx { # each_inbox callback
                push @{$self->{IBX}}, $ibx;
 }
 
+sub show_roots { # for diagnostics
+       my ($self) = @_;
+       local $self->{xdb};
+       my $cur = $self->xdb->allterms_begin('G');
+       my $end = $self->{xdb}->allterms_end('G');
+       my $qrepo = $PublicInbox::Search::X{Query}->new('T'.'r');
+       my $enq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
+       $enq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
+       $enq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
+       for (; $cur != $end; $cur++) {
+               my $G_oidhex = $cur->get_termname;
+               my $qry = $PublicInbox::Search::X{Query}->new(
+                               PublicInbox::Search::OP_FILTER(),
+                               $qrepo, $G_oidhex);
+               $enq->set_query($qry);
+               my ($off, $lim) = (0, 10000);
+               say 'commit ',substr($G_oidhex, 1), ' appears in:';
+               while (1) {
+                       my $mset = $enq->get_mset($off, $lim);
+                       my $size = $mset->size or last;
+                       for my $x ($mset->items) {
+                               my $doc = $x->get_document;
+                               for (xap_terms('P', $x->get_document)) {
+                                       say '- /', substr($_, 1);
+                               }
+                       }
+                       $off += $size;
+               }
+       }
+}
+
 sub cidx_run { # main entry point
        my ($self) = @_;
        my $restore_umask = prep_umask($self);
@@ -1150,6 +1181,7 @@ sub cidx_run { # main entry point
        PublicInbox::DS::event_loop($MY_SIG, $SIGSET) if shards_active();
        PublicInbox::DS->Reset;
        $self->lock_release(!!$NCHANGE);
+       show_roots($self) if $self->{-opt}->{'show-roots'} # for diagnostics
 }
 
 sub ipc_atfork_child { # @IDX_SHARDS
diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex
index 888c8b10..0526434c 100755
--- a/script/public-inbox-cindex
+++ b/script/public-inbox-cindex
@@ -29,7 +29,7 @@ GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i 
fsync|sync! dangerous
                indexlevel|index-level|L=s associate associate-max=i
                associate-date-range=s associate-prefixes=s@
                batch_size|batch-size=s max_size|max-size=s
-               include|I=s@ only=s@ all
+               include|I=s@ only=s@ all show-roots
                project-list=s exclude=s@
                sort-parallel=s sort-compress-program=s sort-buffer-size=s
                d=s update|u scan! prune dry-run|n C=s@ help|h))

Reply via email to