Konstantin Ryabitsev <konstan...@linuxfoundation.org> wrote: > Hello: > > Following the discussion on the ksummit list [1], I wanted to give someone a > query > they could use to keep an eye on any new threads. Is there a xapian query that > can be used to effectively say "return just top-level messages and exclude any > follow-ups"? It's not quite as simple as "s:* AND NOT s:Re:" because we also > want to exclude threaded patches. Some kind of equivalent of "any messages > without an in-reply-to/references header"?
Not easily with current Xapian schema.. It can get kinda close but you don't get the thread root with: https://yhbt.net/lore/all/?q=rt:yesterday..&o=-1&t=1 The above isn't very useful IMHO, and also very expensive... SQLite can actually do it pretty quickly, but it's WWW-only (patch below): https://yhbt.net/lore/all/topics.html I don't know if it can work with the way lei is supposed to dump output for MUAs to consume... So maybe a custom TUI is the way forward, but that comes with all the problems with developing+maintaining a TUI I wrote about[1] previously... -------8<------ Subject: [PATCH] www: add topics.html endpoint This seems like a easy (but WWW-specific) way to get recent topics as suggested by Konstantin. Perhaps an Atom endpoint will also be useful. To do this with Xapian would require a new columns and reindexing; and I'm not sure if the current lei handling of search results by dumping results to a format readable by common MUAs would work well with this. Suggested-by: Konstantin Ryabitsev <konstan...@linuxfoundation.org> Link: https://public-inbox.org/meta/20231107-skilled-cobra-of-swiftness-a6ff26@meerkat/ --- MANIFEST | 1 + lib/PublicInbox/WWW.pm | 9 ++++++ lib/PublicInbox/WwwStream.pm | 1 + lib/PublicInbox/WwwTopics.pm | 55 ++++++++++++++++++++++++++++++++++++ t/extindex-psgi.t | 6 ++++ t/plack.t | 9 ++++-- 6 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 lib/PublicInbox/WwwTopics.pm diff --git a/MANIFEST b/MANIFEST index 51dcffaf..e1c3dc97 100644 --- a/MANIFEST +++ b/MANIFEST @@ -371,6 +371,7 @@ lib/PublicInbox/WwwListing.pm lib/PublicInbox/WwwStatic.pm lib/PublicInbox/WwwStream.pm lib/PublicInbox/WwwText.pm +lib/PublicInbox/WwwTopics.pm lib/PublicInbox/XapClient.pm lib/PublicInbox/XapHelper.pm lib/PublicInbox/XapHelperCxx.pm diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index d2bd68ea..dcaf93cb 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -101,6 +101,8 @@ sub call { invalid_inbox($ctx, $1) || get_atom($ctx); } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) { invalid_inbox($ctx, $1) || get_new($ctx); + } elsif ($path_info =~ m!$INBOX_RE/topics\.html\z!o) { + invalid_inbox($ctx, $1) || get_topics($ctx); } elsif ($path_info =~ m!$INBOX_RE/description\z!o) { get_description($ctx, $1); } elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)? @@ -270,6 +272,13 @@ sub get_new { PublicInbox::Feed::new_html($ctx); } +# /$INBOX/topics.html -> HTML only +sub get_topics { + my ($ctx) = @_; + require PublicInbox::WwwTopics; + PublicInbox::WwwTopics::topics_html($ctx) || r404($ctx); +} + # /$INBOX/?r=$GIT_COMMIT -> HTML only sub get_index { my ($ctx) = @_; diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 4cbdda99..3a1d6edf 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -113,6 +113,7 @@ sub html_top ($) { qq(<a\nid=mirror) . qq(\nhref="${upfx}_/text/mirror/">mirror</a>$code / ). qq(<a\nhref="$atom">Atom feed</a>); + $links .= delete($ctx->{-html_more_links}) if $ctx->{-html_more_links}; if ($ibx->isrch) { my $q_val = delete($ctx->{-q_value_html}) // ''; $q_val = qq(\nvalue="$q_val") if $q_val ne ''; diff --git a/lib/PublicInbox/WwwTopics.pm b/lib/PublicInbox/WwwTopics.pm new file mode 100644 index 00000000..5605cfbe --- /dev/null +++ b/lib/PublicInbox/WwwTopics.pm @@ -0,0 +1,55 @@ +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +package PublicInbox::WwwTopics; +use v5.12; +use autodie qw(open); +use PublicInbox::Hval qw(ascii_html mid_href fmt_ts); +use PublicInbox::WwwStream; + +sub add_topic_line ($$$) { + my (undef, $prev, $nr) = @_; + my $s = ascii_html($prev->{subject}); + $s = '(no subject)' if $s eq ''; + $_[0] .= "\n".fmt_ts($prev->{ds}). + qq{ <a\nhref="}.mid_href($prev->{mid}).qq{/#r">$s</a>}; + $_[0] .= " $nr+ messages" if $nr > 1; +} + +sub topics_html { # GET /$INBOX_NAME/topics.html + my ($ctx) = @_; + my $over = $ctx->{ibx}->over or + return $ctx->{www}->can('need')->($ctx,'Overview'); + + # XXX there is likely faster ways to do this. + # OTOH SQLite tends to be faster with multiple simple queries + # rather than more complex ones + my $msgs = $over->do_get(<<EOS, { limit => 10000 }); +SELECT num,ts,ds,tid,ddd FROM over WHERE tid IN +(SELECT DISTINCT(tid) FROM over WHERE tid > 0 ORDER BY tid DESC LIMIT 200) +AND +num > 0 +ORDER BY tid,ts ASC +EOS + # can't use SQL to filter references since our schema wasn't designed + # for it, but our SQL sorts by ascending time to favor top-level + # messages while our final result (post-references filter) favors + # recent messages + chomp($ctx->{-html_more_links} = <<EOM); +\n- recent:[<a href="./">subjects (threaded)</a>|topics] (all times UTC) +EOM + my $buf = '<pre>'; + my ($nr, $prev); + while (my $smsg = pop @$msgs) { + if ($prev && $smsg->{tid} != $prev->{tid}) { + add_topic_line($buf, $prev, $nr); + $nr = 0; + } + ++$nr; + $prev = $smsg; + } + add_topic_line($buf, $prev, $nr) if $prev; + $buf .= '</pre>'; + PublicInbox::WwwStream::html_oneshot($ctx, 200, $buf); +} + +1; diff --git a/t/extindex-psgi.t b/t/extindex-psgi.t index f71210a5..9e0c7dc3 100644 --- a/t/extindex-psgi.t +++ b/t/extindex-psgi.t @@ -118,6 +118,12 @@ my $client = sub { is($res->code, 404, '404 on out-of-range mid2tid query'); $res = $cb->(POST("/m2t/t\@1/?q=s:unrelated&x=m")); is($res->code, 404, '404 on cross-thread search'); + + + $res = $cb->(GET('/m2t/topics.html')); + is($res->code, 200, 'topics.html on basic v2'); + $res = $cb->(GET('/all/topics.html')); + is($res->code, 200, 'topics.html on extindex'); }; test_psgi(sub { $www->call(@_) }, $client); %$env = (%$env, TMPDIR => $tmpdir, PI_CONFIG => $pi_config); diff --git a/t/plack.t b/t/plack.t index 7f80f488..7ec35e7a 100644 --- a/t/plack.t +++ b/t/plack.t @@ -204,9 +204,12 @@ my $c1 = sub { my $raw = PublicInbox::Eml->new(\$body); is($raw->body_raw, $eml->body_raw, 'ISO-2022-JP body unmodified'); - $res = $cb->(GET($pfx . '/b...@example.com/t.mbox.gz')); - is(501, $res->code, '501 when overview missing'); - like($res->content, qr!\bOverview\b!, 'overview omission noted'); + for my $u (qw(b...@example.com/t.mbox.gz topics.html)) { + $res = $cb->(GET("$pfx/$u")); + is(501, $res->code, "501 on /$u when overview missing"); + like($res->content, qr!\bOverview\b!, + "overview omission noted for /$u"); + } # legacy redirects for my $t (qw(m f)) { [1] https://public-inbox.org/meta/20230922203353.M780211@dcvr/