Konstantin Ryabitsev <[email protected]> wrote:
> On Thu, Mar 30, 2023 at 11:29:51AM +0000, Eric Wong wrote:
> > This implements the mbox.gz retrieval.  I didn't want to deal
> > with HTML nor figuring out how to expose more <form> elements,
> > yet; but I figure mbox.gz is the most important.
> > 
> > Now deployed on 80x24.org/lore:
> > 
> > MSGID=20230327080502.GA570847@ziqianlu-desk2
> > curl -d '' -sSf \
> >    https://80x24.org/lore/all/"$MSGID/?x=m&q=rt:2023-03-29.."; | \
> >    zcat | grep -i ^Message-ID:
> 
> Eric:
> 
> Reviving this old thread for some clarification. I noticed that this only
> works for /all/, but not for individual inboxes. E.g.:
> 
>     $ curl -d '' -sSf \
>       https://lore.kernel.org/all/"$MSGID/?x=m&q=rt:2023-03-29.."; \
>       | zgrep -i ^Message-ID:
>     Message-ID: <[email protected]>
> 
> but with /lkml/ I get a 404:
> 
>     $ curl -d '' -sSf \
>       https://lore.kernel.org/lkml/"$MSGID/?x=m&q=rt:2023-03-29.."; \
>       | zgrep -i ^Message-ID:
>     curl: (22) The requested URL returned error: 404
> 
> Is that intentionally restricted to just extindex?

It's a bug, fix below and deployed to https://80x24.org/lore/

---------8<---------
Subject: [PATCH] www: use correct threadid for per-thread search

For individual public-inboxes relying on extindex for per-inbox
search, we must use the threadid from the extindex over.sqlite3
rather than the per-inbox over.sqlite3 file.

Reported-by: Konstantin Ryabitsev <[email protected]>
Link: https://public-inbox.org/meta/20230616-rudy-comedy-vision-2b9f92@meerkat/
---
 lib/PublicInbox/Mbox.pm | 10 +++++++---
 t/extindex-psgi.t       | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index e1abf7ec..bf61bb0e 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -225,15 +225,19 @@ sub mbox_all {
        return mbox_all_ids($ctx) if $q_string !~ /\S/;
        my $srch = $ctx->{ibx}->isrch or
                return PublicInbox::WWW::need($ctx, 'Search');
-       my $over = $ctx->{ibx}->over or
-               return PublicInbox::WWW::need($ctx, 'Overview');
 
        my $qopts = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC
 
        # {threadid} limits results to a given thread
        # {threads} collapses results from messages in the same thread,
        # allowing us to use ->expand_thread w/o duplicates in our own code
-       $qopts->{threadid} = $over->mid2tid($ctx->{mid}) if 
defined($ctx->{mid});
+       if (defined($ctx->{mid})) {
+               my $over = ($ctx->{ibx}->{isrch} ?
+                               $ctx->{ibx}->{isrch}->{es}->over :
+                               $ctx->{ibx}->over) or
+                       return PublicInbox::WWW::need($ctx, 'Overview');
+               $qopts->{threadid} = $over->mid2tid($ctx->{mid});
+       }
        $qopts->{threads} = 1 if $q->{t};
        $srch->query_approxidate($ctx->{ibx}->git, $q_string);
        my $mset = $srch->mset($q_string, $qopts);
diff --git a/t/extindex-psgi.t b/t/extindex-psgi.t
index 98dc2e48..f10ffbb6 100644
--- a/t/extindex-psgi.t
+++ b/t/extindex-psgi.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2020-2021 all contributors <[email protected]>
+# Copyright (C) all contributors <[email protected]>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use v5.10.1;
@@ -21,7 +21,28 @@ mkdir "$home/.public-inbox" or BAIL_OUT $!;
 my $pi_config = "$home/.public-inbox/config";
 cp($cfg_path, $pi_config) or BAIL_OUT;
 my $env = { HOME => $home };
-run_script([qw(-extindex --all), "$tmpdir/eidx"], $env) or BAIL_OUT;
+my $m2t = create_inbox 'mid2tid', version => 2, indexlevel => 'basic', sub {
+       my ($im, $ibx) = @_;
+       for my $n (1..3) {
+               $im->add(PublicInbox::Eml->new(<<EOM)) or xbail 'add';
+Date: Fri, 02 Oct 1993 00:0$n:00 +0000
+Message-ID: <t\@$n>
+Subject: tid $n
+From: x\@example.com
+References: <a-mid\@b>
+
+$n
+EOM
+               $im->add(PublicInbox::Eml->new(<<EOM)) or xbail 'add';
+Date: Fri, 02 Oct 1993 00:0$n:00 +0000
+Message-ID: <ut\@$n>
+Subject: unrelated tid $n
+From: x\@example.com
+References: <b-mid\@b>
+
+EOM
+       }
+};
 {
        open my $cfgfh, '>>', $pi_config or BAIL_OUT;
        $cfgfh->autoflush(1);
@@ -32,8 +53,14 @@ run_script([qw(-extindex --all), "$tmpdir/eidx"], $env) or 
BAIL_OUT;
 [publicinbox]
        wwwlisting = all
        grokManifest = all
+[publicinbox "m2t"]
+       inboxdir = $m2t->{inboxdir}
+       address = $m2t->{-primary_address}
 EOM
+       close $cfgfh or xbail "close: $!";
 }
+
+run_script([qw(-extindex --all), "$tmpdir/eidx"], $env) or BAIL_OUT;
 my $www = PublicInbox::WWW->new(PublicInbox::Config->new($pi_config));
 my $client = sub {
        my ($cb) = @_;
@@ -83,6 +110,14 @@ my $client = sub {
                't2 manifest');
        is_deeply([ sort keys %{$m->{'/t1'}} ], [ '/t1' ],
                't2 manifest');
+
+       # ensure ibx->{isrch}->{es}->over is used instead of ibx->over:
+       $res = $cb->(POST("/m2t/t\@1/?q=dt:19931002000259..&x=m"));
+       is($res->code, 200, 'hit on mid2tid query');
+       $res = $cb->(POST("/m2t/t\@1/?q=dt:19931002000400..&x=m"));
+       is($res->code, 404, '404 on out-of-range mid2tid query');
+       $res = $cb->(POST("/m2t/t\@1/?q=s:unrelated&x=m"));
+       is($res->code, 404, '404 on cross-thread search');
 };
 test_psgi(sub { $www->call(@_) }, $client);
 %$env = (%$env, TMPDIR => $tmpdir, PI_CONFIG => $pi_config);

Reply via email to