Retrieving Xapian document terms, data (and possibly values) and
transferring to the Perl side would be an increase in complexity
and I/O both the Perl and C++ sides.  It would require more I/O
in C++ and transient memory use on the Perl side where slow mset
iteration gives an opportunity to dictate memory release rate.

So lets ignore the document-related stuff here for now for
ease-of-development.  We can reconsider this change if dropping
Xapian Perl bindings entirely and relying on JAOT C++ ever
becomes a possibility.
---
 lib/PublicInbox/Search.pm    |  1 -
 lib/PublicInbox/XapHelper.pm |  5 -----
 lib/PublicInbox/xap_helper.h |  2 --
 lib/PublicInbox/xh_mset.h    | 24 ------------------------
 t/cindex.t                   | 17 +++++++++--------
 t/xap_helper.t               | 21 +++++----------------
 6 files changed, 14 insertions(+), 56 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 678c8c5d..0196dd45 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -89,7 +89,6 @@ our @XH_SPEC = (
        'r', # 1=relevance then column
        't', # collapse threads
        'A=s@', # prefixes
-       'D', # emit docdata
        'K=i', # timeout kill after i seconds
        'O=s', # eidx_key
        'T=i', # threadid
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 8c7732f5..27d98ba1 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -150,11 +150,6 @@ sub mset_iter ($$) {
        eval {
                my $buf = $it->get_docid;
                $buf .= "\0".$it->get_percent if $req->{p};
-               my $doc = ($req->{A} || $req->{D}) ? $it->get_document : undef;
-               for my $p (@{$req->{A}}) {
-                       $buf .= "\0".$p.$_ for xap_terms($p, $doc);
-               }
-               $buf .= "\0".$doc->get_data if $req->{D};
                say { $req->{0} } $buf;
        };
        $@ ? iter_retry_check($req) : 0;
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 0e6805b3..872f063d 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -142,7 +142,6 @@ struct req { // argv and pfxv point into global rbuf
        bool code_search;
        bool relevance; // sort by relevance before column
        bool emit_percent;
-       bool emit_docdata;
        bool asc; // ascending sort
 };
 
@@ -641,7 +640,6 @@ static void dispatch(struct req *req)
                        if (MY_ARG_MAX == req->pfxc)
                                ABORT("too many -A");
                        break;
-               case 'D': req->emit_docdata = true; break;
                case 'K':
                        req->timeout_sec = strtoul(optarg, &end, 10);
                        if (*end || req->timeout_sec == ULONG_MAX)
diff --git a/lib/PublicInbox/xh_mset.h b/lib/PublicInbox/xh_mset.h
index 4e97a284..3727a932 100644
--- a/lib/PublicInbox/xh_mset.h
+++ b/lib/PublicInbox/xh_mset.h
@@ -3,20 +3,6 @@
 // This file is only intended to be included by xap_helper.h
 // it implements pieces used by WWW, IMAP and lei
 
-static void emit_doc_term(FILE *fp, const char *pfx, Xapian::Document *doc)
-{
-       Xapian::TermIterator cur = doc->termlist_begin();
-       Xapian::TermIterator end = doc->termlist_end();
-       size_t pfx_len = strlen(pfx);
-
-       for (cur.skip_to(pfx); cur != end; cur++) {
-               std::string tn = *cur;
-               if (!starts_with(&tn, pfx, pfx_len)) break;
-               fputc(0, fp);
-               fwrite(tn.data(), tn.size(), 1, fp);
-       }
-}
-
 static enum exc_iter mset_iter(const struct req *req, FILE *fp, off_t off,
                                Xapian::MSetIterator *i)
 {
@@ -24,16 +10,6 @@ static enum exc_iter mset_iter(const struct req *req, FILE 
*fp, off_t off,
                fprintf(fp, "%llu", (unsigned long long)(*(*i))); // get_docid
                if (req->emit_percent)
                        fprintf(fp, "%c%d", 0, i->get_percent());
-               if (req->pfxc || req->emit_docdata) {
-                       Xapian::Document doc = i->get_document();
-                       for (int p = 0; p < req->pfxc; p++)
-                               emit_doc_term(fp, req->pfxv[p], &doc);
-                       if (req->emit_docdata) {
-                               std::string d = doc.get_data();
-                               fputc(0, fp);
-                               fwrite(d.data(), d.size(), 1, fp);
-                       }
-               }
                fputc('\n', fp);
        } catch (const Xapian::DatabaseModifiedError & e) {
                req->srch->db->reopen();
diff --git a/t/cindex.t b/t/cindex.t
index e5f26ec3..acd74a5d 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -147,17 +147,18 @@ if ('multi-repo search') {
 
 my $test_xhc = sub {
        my ($xhc) = @_;
+       my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
        my $impl = $xhc->{impl};
        my ($r, @l);
-       $r = $xhc->mkreq([], qw(mset -D -c -g), $zp_git, @xh_args, 'NUL');
+       $r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL');
        chomp(@l = <$r>);
        is(shift(@l), 'mset.size=2', "got expected header $impl");
        my %docid2data;
        my @got = sort map {
-               my @f = split /\0/;
-               is scalar(@f), 2, 'got 2 entries';
-               $docid2data{$f[0]} = $f[1];
-               $f[1];
+               my ($docid, @extra) = split /\0/;
+               is scalar(@extra), 0, 'no extra fields';
+               $docid2data{$docid} =
+                       $csrch->xdb->get_document($docid)->get_data;
        } @l;
        is_deeply(\@got, $exp, "expected doc_data $impl");
 
@@ -166,7 +167,6 @@ my $test_xhc = sub {
        is(shift(@l), 'mset.size=0', "got miss in wrong dir $impl");
        is_deeply(\@l, [], "no extra lines $impl");
 
-       my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
        while (my ($did, $expect) = each %docid2data) {
                is_deeply($csrch->xdb->get_document($did)->get_data,
                        $expect, "docid=$did data matches");
@@ -179,14 +179,15 @@ SKIP: {
        require_mods('+SCM_RIGHTS', 1);
        require PublicInbox::XapClient;
        my $xhc = PublicInbox::XapClient::start_helper('-j0');
-       $test_xhc->($xhc);
+       my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
+       $test_xhc->($xhc, $csrch);
        skip 'PI_NO_CXX set', 1 if $ENV{PI_NO_CXX};
        $xhc->{impl} =~ /Cxx/ or
                skip 'C++ compiler or xapian development libs missing', 1;
        skip 'TEST_XH_CXX_ONLY set', 1 if $ENV{TEST_XH_CXX_ONLY};
        local $ENV{PI_NO_CXX} = 1; # force XS or SWIG binding test
        $xhc = PublicInbox::XapClient::start_helper('-j0');
-       $test_xhc->($xhc);
+       $test_xhc->($xhc, $csrch);
 }
 
 if ('--update') {
diff --git a/t/xap_helper.t b/t/xap_helper.t
index 0f474608..70c634ac 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -204,7 +204,7 @@ for my $n (@NO_CXX) {
        $err = do { local $/; <$err_r> };
        is $err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})";
 
-       $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+       $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
                                'dfn:lib/PublicInbox/Search.pm');
        chomp((my $hdr, @res) = readline($r));
        is $hdr, 'mset.size=1', "got expected header via mset ($xhc->{impl}";
@@ -212,15 +212,14 @@ for my $n (@NO_CXX) {
        @res = split /\0/, $res[0];
        {
                my $doc = $v2->search->xdb->get_document($res[0]);
+               ok $doc, 'valid document retrieved';
                my @q = PublicInbox::Search::xap_terms('Q', $doc);
                is_deeply \@q, [ $mid ], 'docid usable';
        }
        ok $res[1] > 0 && $res[1] <= 100, 'pct > 0 && <= 100';
-       is $res[2], 'XDFID'.$dfid, 'XDFID result matches';
-       is $res[3], 'Q'.$mid, 'Q (msgid) mset result matches';
-       is scalar(@res), 4, 'only 4 columns in result';
+       is scalar(@res), 2, 'only 2 columns in result';
 
-       $r = $xhc->mkreq([], qw(mset -p -A XDFID -A Q), @ibx_shard_args,
+       $r = $xhc->mkreq([], qw(mset -p), @ibx_shard_args,
                                'dt:19700101'.'000000..');
        chomp(($hdr, @res) = readline($r));
        is $hdr, 'mset.size=6',
@@ -231,17 +230,7 @@ for my $n (@NO_CXX) {
                my $doc = $v2->search->xdb->get_document($docid);
                ok $pct > 0 && $pct <= 100,
                        "pct > 0 && <= 100 #$docid ($xhc->{impl})";
-               my %terms;
-               for (@rest) {
-                       s/\A([A-Z]+)// or xbail 'no prefix=', \@rest;
-                       push @{$terms{$1}}, $_;
-               }
-               while (my ($pfx, $vals) = each %terms) {
-                       @$vals = sort @$vals;
-                       my @q = PublicInbox::Search::xap_terms($pfx, $doc);
-                       is_deeply $vals, \@q,
-                               "#$docid $pfx as expected ($xhc->{impl})";
-               }
+               is scalar(@rest), 0, 'no extra rows returned';
        }
        my $nr;
        for my $i (7, 8, 39, 40) {

Reply via email to