That was in the original post but to restate the issue, I use debug
output to write rules.  With the addition of a few rules with Maxhits in
the several hundred like __E_LIKE_LETTER, trying to read what subtests a
message hits on looks like this:

Jun  7 09:52:37.553 [23945] dbg: check:
subtests=__ANY_TEXT_ATTACH,__ANY_TEXT_ATTACH_DOC,__BODY_TEXT_LINE,__BODY_TEXT_LINE,__BODY_TEXT_LINE,__BUGGED_IMG,__CBJ_GiveMeABreak2,__CLICK_HERE,__CT,__CTYPE_HAS_BOUNDARY,__CTYPE_MULTIPART_ALT,__CTYPE_MULTIPART_ANY,__DEAL,__DKIMWL_WL_BL,__DKIM_DEPENDABLE,__DKIM_EXISTS,__DOS_HAS_ANY_URI,__DOS_HAS_LIST_UNSUB,__DOS_RCVD_WED,__DOS_RELAYED_EXT,__DOS_SINGLE_EXT_RELAY,__END_FUTURE_EMAILS,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__E_LIKE_LETTER,__FROM_FMBLA_NEWDOM,__FSL_HAS_LIST_UNSUB,__HAS_ANY_URI,__HAS_DATE,__HAS_DKIM_SIGHD,__HAS_DOMAINKEY_SIG,__HAS_FROM,__HAS_HREF,__HAS_MESSAGE_ID,__HAS_MSGID,__HAS_RCVD,__HAS_SUBJECT,__HAS_TO,__HAS_URI,__HAVE_BOUNCE_RELAYS,__HTML_LINK_IMAGE,__HUSH_HUSH,__KAM_COUNT_URIS,__KAM_COUNT_URIS,__KAM_COUNT_URIS,__KAM_COUNT_URIS,__KAM_COUNT_URIS,__KAM_COUNT_URIS,__KAM_COUNT_URIS,__KAM_COUNT_URIS,__KAM_DROPBOX2,__KAM_FAKEDELIVER12,__KAM_FAKEDELIVER4,__KAM_FAKEDELIVER6,__KAM_FAKEDELIVER8,__KAM_FUN1,__KAM_FUN2,__KAM_FUN3,__KAM_FUN4,__KAM_GENERICHEALTH3,__KAM_GOOGLE2_2,__KAM_HARP3,__KAM_HAS_1_URIS,__KAM_HAS_2_URIS,__KAM_HAS_3_URIS,__KAM_HAS_4_URIS,__KAM_HAS_5_URIS,__KAM_HUGEIMGSRC,__KAM_JURY3,__KAM_LOTSOFHASH,__KAM_LOTTO3,__KAM_MAILSPLOIT2,__KAM_MULTIPLE_FROM,__KAM_PAYPAL3B,__KAM_RPTR_PASSED,__KAM_SEO7,__KAM_TIME4,__KAM_UPS2,__KAM_URIBL_PCCC,__KAM_WU1,__KB_WAM_FROM_NAME_SINGLEWORD,__LAST_EXTERNAL_RELAY_NO_AUTH,__LAST_UNTRUSTED_RELAY_NO_AUTH,__LIST_PARTIAL,__LOCAL_PP_NONPPURL,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__LOWER_E,__MIME_HTML,__MIME_VERSION,__MISSING_REF,__MISSING_REPLY,__MSGID_OK_HOST,__NONEMPTY_BODY,__NOT_A_PERSON,__NOT_SPOOFED,__PDS_NEWDOMAIN,__RB_GT_200,__RCD_RDNS_MAIL,__RCD_RDNS_MAIL_MESSY,__RCVD_IN_RPBL,__RCVD_IN_SORBS,__RCVD_IN_ZEN,__RP_MATCHES_RCVD,__SANE_MSGID,__SUBJ_NOT_SHORT,__SUBSCRIPTION_INFO,__TAG_EXISTS_BODY,__TAG_EXISTS_CENTER,__TAG_EXISTS_HEAD,__TAG_EXISTS_HTML,__TAG_EXISTS_META,__TOCC_EXISTS,__TVD_MIME_ATT_TP,__USING_VERP1

With my patch for the debug output, it looks like this (note the 320
deduplicated __E_LIKE_LETTER hits, for example:
Jun  7 09:55:43.872 [24500] dbg: check:
subtests=__ANY_TEXT_ATTACH,__ANY_TEXT_ATTACH_DOC,__BODY_TEXT_LINE(3),__BUGGED_IMG,__CLICK_HERE,__CT,__CTYPE_HAS_BOUNDARY,__CTYPE_MULTIPART_ALT,__CTYPE_MULTIPART_ANY,__DEAL,__DKIMWL_WL_BL,__DKIM_DEPENDABLE,__DKIM_EXISTS,__DOS_HAS_ANY_URI,__DOS_HAS_LIST_UNSUB,__DOS_RCVD_WED,__DOS_RELAYED_EXT,__DOS_SINGLE_EXT_RELAY,__END_FUTURE_EMAILS,__E_LIKE_LETTER(320),__FROM_FMBLA_NEWDOM,__FSL_HAS_LIST_UNSUB,__HAS_ANY_URI,__HAS_DATE,__HAS_DKIM_SIGHD,__HAS_DOMAINKEY_SIG,__HAS_FROM,__HAS_HREF,__HAS_MESSAGE_ID,__HAS_MSGID,__HAS_RCVD,__HAS_SUBJECT,__HAS_TO,__HAS_URI,__HTML_LINK_IMAGE,__HUSH_HUSH,__KAM_COUNT_URIS(8),__KAM_DROPBOX2,__KAM_FAKEDELIVER12,__KAM_FAKEDELIVER4,__KAM_FAKEDELIVER6,__KAM_FAKEDELIVER8,__KAM_GENERICHEALTH3,__KAM_GOOGLE2_2,__KAM_HARP3,__KAM_HAS_1_URIS,__KAM_HAS_2_URIS,__KAM_HAS_3_URIS,__KAM_HAS_4_URIS,__KAM_HAS_5_URIS,__KAM_HUGEIMGSRC,__KAM_JURY3,__KAM_LOTSOFHASH,__KAM_LOTTO3,__KAM_MAILSPLOIT2,__KAM_MULTIPLE_FROM,__KAM_PAYPAL3B,__KAM_SEO7,__KAM_TIME4,__KAM_UPS2,__KAM_WU1,__KB_WAM_FROM_NAME_SINGLEWORD,__LAST_EXTERNAL_RELAY_NO_AUTH,__LAST_UNTRUSTED_RELAY_NO_AUTH,__LIST_PARTIAL,__LOCAL_PP_NONPPURL,__LOWER_E(230),__MIME_HTML,__MIME_VERSION,__MISSING_REF,__MISSING_REPLY,__MSGID_OK_HOST,__NONEMPTY_BODY,__NOT_A_PERSON,__NOT_SPOOFED,__PDS_NEWDOMAIN,__RB_GT_200,__RCD_RDNS_MAIL,__RCD_RDNS_MAIL_MESSY,__RCVD_IN_SORBS,__RP_MATCHES_RCVD,__SANE_MSGID,__SUBJ_NOT_SHORT,__SUBSCRIPTION_INFO,__TAG_EXISTS_BODY,__TAG_EXISTS_CENTER,__TAG_EXISTS_HEAD,__TAG_EXISTS_HTML,__TAG_EXISTS_META,__TOCC_EXISTS,__TVD_MIME_ATT_TP,__USING_VERP1
(Total Subtest Hits: 649 / Deduplicated Total Hits: 92)

Thanks for the improvement on other duplicates you committed.  That will
help to.

My change for debug output is committed now for 3.4 and trunk.
Committed revision 1860766.

Regards,
KAM
On 6/7/2019 2:50 AM, Henrik K wrote:
> What does "unreadable for rule analysis" mean?  Surely no one is actually
> manually reading such lines one rule at a time?  Computers can check and
> grep for you.. ;-)
>
> I think this needs a little bit more of thought what we really want to
> accomplish here and maybe do it in a bug along with the new templates and
> stuff if needed..
>
>
>
> On Thu, Jun 06, 2019 at 07:48:02AM -0400, Kevin A. McGrail wrote:
>> That is a frightening one liner.  Should we use it?
>>
>> As for the more output comment, if you have emails with 300 lower case e's, 
>> you
>> get 300 hits for the subtext.  It is unreadable for rule analysis.
>>
>> As for modifying the normal output, I have no idea if anyone out there is 
>> using
>> the public routine so better to be safe.
>>
>> I didn't find a tag for subtests either. That might be a good 4.0 addition.
>>
>> Regards, KAM
>>
>> On Thu, Jun 6, 2019, 01:30 Henrik K <[1]h...@hege.li> wrote:
>>
>>
>>     Well in theory you see _more_ debug output now when there are no
>>     duplicates,
>>     due to the stats string..  honestly atleast I wouldn't care about that.
>>     Feel free to vote.
>>
>>     As a silly morning exercise, here's a one-liner that compacts stuff :-P
>>
>>     my $foo = '__A,__B,__C,__C,__C,__CC,__D,__D,__E,__E';
>>     my $m; $foo =~ s/([^,]+)(?{$m=1})(?:,\1(?=,|$)(?{$m++}))+/"$1($m)"/eg;
>>
>>     __A,__B,__C(3),__CC,__D(2),__E(2)
>>
>>
>>     On Wed, Jun 05, 2019 at 08:25:00PM -0400, Kevin A. McGrail wrote:
>>     > Good point, Henrik & John.
>>     >
>>     > OK, I've left the output alone except for the calls from dbg so it
>>     > shouldn't break anything in the public interface.
>>     >
>>     > Thoughts on this version?
>>     >
>>     > Regards,
>>     > KAM
>>     >
>>     > On 6/4/2019 1:51 PM, John Hardin wrote:
>>     > > On Tue, 4 Jun 2019, Kevin A. McGrail wrote:
>>     > >
>>     > >> Yes, I was thinking about that and wanting to fix uritests so well
>>     > >> for the
>>     > >> template.   Thanks for the feedback.  I will take another pass at 
>> it.
>>     > >
>>     > > Just do the deduplication without modifying the output format.
>>     > >
>>     > > If we want to log the hit counts, then make another function that 
>> does
>>     > > what you did and use it for logging.
>>     > >
>>     > >
>>     > >> On Tue, Jun 4, 2019, 03:23 Henrik K <[2]h...@hege.li> wrote:
>>     > >>
>>     > >>>
>>     > >>> If you want to modify debug output, you have to modify only the 
>> dbg()
>>     > >>> output
>>     > >>> itself.  You can't modify internal functions that have specific
>>     output
>>     > >>> formats and start adding random strings to them.  Atleast these
>>     places
>>     > >>> depend on the comma delimited rules:
>>     > >>>
>>     > >>> ./masses/mass-check:    push @tests, split(/,/,
>>     > >>> $status->get_names_of_subtests_hit());
>>     > >>> ./t/rule_tests.t:    my %rules_hit = map { $_ => 1 }
>>     > >>> split(/,/,$msg->get_names_of_tests_hit()),
>>     > >>> split(/,/,$msg->get_names_of_subtests_hit());
>>     > >>> ./t.rules/run:  my $testsline =
>>     > >>> $status->get_names_of_tests_hit().",".$status->
>>     get_names_of_subtests_hit();
>>     > >>>
>>     > >>>
>>     > >>>
>>     > >>>
>>     > >>> On Tue, Jun 04, 2019 at 01:56:26AM -0400, Kevin A. McGrail wrote:
>>     > >>>> Morning All,
>>     > >>>>
>>     > >>>> After a few thoughts on limits, it appears that any duplicate
>>     subtest
>>     > >>>> hits are best combined for debug output.
>>     > >>>>
>>     > >>>> Any thoughts on the attached?  It looks like it will help me with
>>     rule
>>     > >>>> development while support rules with valid but large maxhits like
>>     > >>> __LOWER_E
>>     > >>>>
>>     > >>>> Regards,
>>     > >>>> KAM
>>     > >>>>
>>     > >>>> On 5/31/2019 10:30 AM, Bill Cole wrote:
>>     > >>>>> On 30 May 2019, at 20:35, Kevin A. McGrail wrote:
>>     > >>>>>
>>     > >>>>>> I was curious if anyone noticed the debug output for subtests 
>> has
>>     > >>> gotten
>>     > >>>>>> insane:
>>     > >>>>>
>>     > >>>>> It got a little discussion on users@ when I created those rules.
>>     > >>>>>
>>     > >>>>> [...]
>>     > >>>>>
>>     > >>>>>> [3]72_active.cf:    body            __LOWER_E       /e/
>>     > >>>>>> [4]72_active.cf:    tflags          __LOWER_E       multiple
>>     > >>>>>> maxhits=230
>>     > >>>>>>
>>     > >>>>>> [5]72_active.cf:    body            __E_LIKE_LETTER /<lcase_e>/
>>     > >>>>>> [6]72_active.cf:    tflags          __E_LIKE_LETTER multiple
>>     > >>>>>> maxhits=320
>>     > >>>>>>
>>     > >>>>>> Assuming those maxhits are correct,
>>     > >>>>>
>>     > >>>>> They are. In fact they were carefully tuned to catch the targeted
>>     > >>>>> extortion spam.
>>     > >>>>>
>>     > >>>>>> maybe we need something in the debug
>>     > >>>>>> output that says __E_LIKE_LETTER (number of hits if more than 
>> 1).
>>     > >>>>>
>>     > >>>>> That would be a useful enhancement even without my flagrant log
>>     > >>>>> vandalism.
>>     > >>>>>
>>     > >>>>
>>     > >>>> --
>>     > >>>> Kevin A. McGrail
>>     > >>>> Member, Apache Software Foundation
>>     > >>>> Chair Emeritus Apache SpamAssassin Project
>>     > >>>> [7]https://www.linkedin.com/in/kmcgrail - 703.798.0171
>>     > >>>>
>>     > >>>
>>     > >>>> Index: lib/Mail/SpamAssassin/PerMsgStatus.pm
>>     > >>>> 
>> ===================================================================
>>     > >>>> --- lib/Mail/SpamAssassin/PerMsgStatus.pm       (revision 1860582)
>>     > >>>> +++ lib/Mail/SpamAssassin/PerMsgStatus.pm       (working copy)
>>     > >>>> @@ -769,7 +769,38 @@
>>     > >>>>  sub get_names_of_subtests_hit {
>>     > >>>>    my ($self) = @_;
>>     > >>>>
>>     > >>>> -  return join(',', sort @{$self->{subtest_names_hit}});
>>     > >>>> +  #return join(',', sort @{$self->{subtest_names_hit}});
>>     > >>>> +
>>     > >>>> +  #This routine prints only one instance of a subrule hit with a
>>     > >>>> count
>>     > >>> of how many times it hit if greater than 1
>>     > >>>> +  my (%subtest_names_hit, $i, $key, @keys, @sorted, $string, 
>> $rule,
>>     > >>> $total_hits, $deduplicated_hits);
>>     > >>>> +
>>     > >>>> +  $total_hits = scalar(@{$self->{subtest_names_hit}});
>>     > >>>> +
>>     > >>>> +  for ($i=0; $i < $total_hits; $i++) {
>>     > >>>> +    $rule = ${$self->{subtest_names_hit}}[$i];
>>     > >>>> +    $subtest_names_hit{$rule}++;
>>     > >>>> +  }
>>     > >>>> +
>>     > >>>> +  foreach $key (keys %subtest_names_hit) {
>>     > >>>> +    push (@keys, $key);
>>     > >>>> +  }
>>     > >>>> +  @sorted = sort @keys;
>>     > >>>> +
>>     > >>>> +  $deduplicated_hits = scalar(@sorted);
>>     > >>>> +
>>     > >>>> +  for ($i=0; $i < $deduplicated_hits; $i++) {
>>     > >>>> +    $string .= $sorted[$i];
>>     > >>>> +    if ($subtest_names_hit{$sorted[$i]} > 1) {
>>     > >>>> +      $string .= "($subtest_names_hit{$sorted[$i]})"
>>     > >>>> +    }
>>     > >>>> +    $string .= ",";
>>     > >>>> +  }
>>     > >>>> +
>>     > >>>> +  $string =~ s/,$//;
>>     > >>>> +
>>     > >>>> +  $string .= " (Total Subtest Hits: $total_hits / Deduplicated
>>     Total
>>     > >>> Hits: $deduplicated_hits)";
>>     > >>>> +
>>     > >>>> +  return $string;
>>     > >>>>  }
>>     > >>>>
>>     > >>>>
>>     > >>> 
>> #####################################################################
>>     ######
>>     > >>>
>>     > >>>
>>     > >>>
>>     > >>
>>     > >
>>     >
>>     > --
>>     > Kevin A. McGrail
>>     > Member, Apache Software Foundation
>>     > Chair Emeritus Apache SpamAssassin Project
>>     > [8]https://www.linkedin.com/in/kmcgrail - 703.798.0171
>>     >
>>
>>     > Index: lib/Mail/SpamAssassin/PerMsgStatus.pm
>>     > ===================================================================
>>     > --- lib/Mail/SpamAssassin/PerMsgStatus.pm       (revision 1860582)
>>     > +++ lib/Mail/SpamAssassin/PerMsgStatus.pm       (working copy)
>>     > @@ -398,7 +398,7 @@
>>     >    dbg("check: is spam? score=".$self->{score}.
>>     >                          " required=".$self->{conf}->{required_score});
>>     >    dbg("check: tests=".$self->get_names_of_tests_hit());
>>     > -  dbg("check: subtests=".$self->get_names_of_subtests_hit());
>>     > +  dbg("check: subtests=".$self->get_names_of_subtests_hit("dbg"));
>>     >    $self->{is_spam} = $self->is_spam();
>>     > 
>>     >    $self->{main}->{resolver}->bgabort();
>>     > @@ -764,12 +764,52 @@
>>     >  normally-hidden rules, which score 0 and have names beginning with two
>>     >  underscores, used in meta rules.
>>     > 
>>     > +If a parameter of dbg is passed, the output will be more condensed and
>>     > +sub-tests with multiple hits reduced to one entry with the number of
>>     hits
>>     > +in parentheses. Some information is also added at the end regarding 
>> the
>>     > +multiple hits.
>>     > +
>>     >  =cut
>>     > 
>>     >  sub get_names_of_subtests_hit {
>>     > -  my ($self) = @_;
>>     > +  my ($self, $mode) = @_;
>>     > 
>>     > -  return join(',', sort @{$self->{subtest_names_hit}});
>>     > +  if (defined $mode && $mode eq 'dbg') {
>>     > +    #This routine prints only one instance of a subrule hit with a 
>> count
>>     of how many times it hit if greater than 1
>>     > +    my (%subtest_names_hit, $i, $key, @keys, @sorted, $string, $rule,
>>     $total_hits, $deduplicated_hits); 
>>     > + 
>>     > +    $total_hits = scalar(@{$self->{subtest_names_hit}});
>>     > + 
>>     > +    for ($i=0; $i < $total_hits; $i++) {
>>     > +      $rule = ${$self->{subtest_names_hit}}[$i];
>>     > +      $subtest_names_hit{$rule}++;
>>     > +    }
>>     > + 
>>     > +    foreach $key (keys %subtest_names_hit) {
>>     > +      push (@keys, $key);
>>     > +    }
>>     > +    @sorted = sort @keys;
>>     > + 
>>     > +    $deduplicated_hits = scalar(@sorted);
>>     > + 
>>     > +    for ($i=0; $i < $deduplicated_hits; $i++) {
>>     > +      $string .= $sorted[$i];
>>     > +      if ($subtest_names_hit{$sorted[$i]} > 1) {
>>     > +        $string .= "($subtest_names_hit{$sorted[$i]})"
>>     > +      }
>>     > +      $string .= ",";
>>     > +    }
>>     > + 
>>     > +    $string =~ s/,$//;
>>     > + 
>>     > +    $string .= " (Total Subtest Hits: $total_hits / Deduplicated Total
>>     Hits: $deduplicated_hits)";
>>     > + 
>>     > +    return $string;
>>     > +
>>     > +  } else {
>>     > +    #return the simpler string with duplicates and commas
>>     > +    return join(',', sort @{$self->{subtest_names_hit}});
>>     > +  }
>>     >  }
>>     > 
>>     >  
>> ########################################################################
>>     ###
>>
>>
>>
>> On Thu, Jun 6, 2019, 01:30 Henrik K <[9]h...@hege.li> wrote:
>>
>>
>>     Well in theory you see _more_ debug output now when there are no
>>     duplicates,
>>     due to the stats string..  honestly atleast I wouldn't care about that.
>>     Feel free to vote.
>>
>>     As a silly morning exercise, here's a one-liner that compacts stuff :-P
>>
>>     my $foo = '__A,__B,__C,__C,__C,__CC,__D,__D,__E,__E';
>>     my $m; $foo =~ s/([^,]+)(?{$m=1})(?:,\1(?=,|$)(?{$m++}))+/"$1($m)"/eg;
>>
>>     __A,__B,__C(3),__CC,__D(2),__E(2)
>>
>>
>>     On Wed, Jun 05, 2019 at 08:25:00PM -0400, Kevin A. McGrail wrote:
>>     > Good point, Henrik & John.
>>     >
>>     > OK, I've left the output alone except for the calls from dbg so it
>>     > shouldn't break anything in the public interface.
>>     >
>>     > Thoughts on this version?
>>     >
>>     > Regards,
>>     > KAM
>>     >
>>     > On 6/4/2019 1:51 PM, John Hardin wrote:
>>     > > On Tue, 4 Jun 2019, Kevin A. McGrail wrote:
>>     > >
>>     > >> Yes, I was thinking about that and wanting to fix uritests so well
>>     > >> for the
>>     > >> template.   Thanks for the feedback.  I will take another pass at 
>> it.
>>     > >
>>     > > Just do the deduplication without modifying the output format.
>>     > >
>>     > > If we want to log the hit counts, then make another function that 
>> does
>>     > > what you did and use it for logging.
>>     > >
>>     > >
>>     > >> On Tue, Jun 4, 2019, 03:23 Henrik K <[10]h...@hege.li> wrote:
>>     > >>
>>     > >>>
>>     > >>> If you want to modify debug output, you have to modify only the 
>> dbg()
>>     > >>> output
>>     > >>> itself.  You can't modify internal functions that have specific
>>     output
>>     > >>> formats and start adding random strings to them.  Atleast these
>>     places
>>     > >>> depend on the comma delimited rules:
>>     > >>>
>>     > >>> ./masses/mass-check:    push @tests, split(/,/,
>>     > >>> $status->get_names_of_subtests_hit());
>>     > >>> ./t/rule_tests.t:    my %rules_hit = map { $_ => 1 }
>>     > >>> split(/,/,$msg->get_names_of_tests_hit()),
>>     > >>> split(/,/,$msg->get_names_of_subtests_hit());
>>     > >>> ./t.rules/run:  my $testsline =
>>     > >>> $status->get_names_of_tests_hit().",".$status->
>>     get_names_of_subtests_hit();
>>     > >>>
>>     > >>>
>>     > >>>
>>     > >>>
>>     > >>> On Tue, Jun 04, 2019 at 01:56:26AM -0400, Kevin A. McGrail wrote:
>>     > >>>> Morning All,
>>     > >>>>
>>     > >>>> After a few thoughts on limits, it appears that any duplicate
>>     subtest
>>     > >>>> hits are best combined for debug output.
>>     > >>>>
>>     > >>>> Any thoughts on the attached?  It looks like it will help me with
>>     rule
>>     > >>>> development while support rules with valid but large maxhits like
>>     > >>> __LOWER_E
>>     > >>>>
>>     > >>>> Regards,
>>     > >>>> KAM
>>     > >>>>
>>     > >>>> On 5/31/2019 10:30 AM, Bill Cole wrote:
>>     > >>>>> On 30 May 2019, at 20:35, Kevin A. McGrail wrote:
>>     > >>>>>
>>     > >>>>>> I was curious if anyone noticed the debug output for subtests 
>> has
>>     > >>> gotten
>>     > >>>>>> insane:
>>     > >>>>>
>>     > >>>>> It got a little discussion on users@ when I created those rules.
>>     > >>>>>
>>     > >>>>> [...]
>>     > >>>>>
>>     > >>>>>> [11]72_active.cf:    body            __LOWER_E       /e/
>>     > >>>>>> [12]72_active.cf:    tflags          __LOWER_E       multiple
>>     > >>>>>> maxhits=230
>>     > >>>>>>
>>     > >>>>>> [13]72_active.cf:    body            __E_LIKE_LETTER /<lcase_e>/
>>     > >>>>>> [14]72_active.cf:    tflags          __E_LIKE_LETTER multiple
>>     > >>>>>> maxhits=320
>>     > >>>>>>
>>     > >>>>>> Assuming those maxhits are correct,
>>     > >>>>>
>>     > >>>>> They are. In fact they were carefully tuned to catch the targeted
>>     > >>>>> extortion spam.
>>     > >>>>>
>>     > >>>>>> maybe we need something in the debug
>>     > >>>>>> output that says __E_LIKE_LETTER (number of hits if more than 
>> 1).
>>     > >>>>>
>>     > >>>>> That would be a useful enhancement even without my flagrant log
>>     > >>>>> vandalism.
>>     > >>>>>
>>     > >>>>
>>     > >>>> --
>>     > >>>> Kevin A. McGrail
>>     > >>>> Member, Apache Software Foundation
>>     > >>>> Chair Emeritus Apache SpamAssassin Project
>>     > >>>> [15]https://www.linkedin.com/in/kmcgrail - 703.798.0171
>>     > >>>>
>>     > >>>
>>     > >>>> Index: lib/Mail/SpamAssassin/PerMsgStatus.pm
>>     > >>>> 
>> ===================================================================
>>     > >>>> --- lib/Mail/SpamAssassin/PerMsgStatus.pm       (revision 1860582)
>>     > >>>> +++ lib/Mail/SpamAssassin/PerMsgStatus.pm       (working copy)
>>     > >>>> @@ -769,7 +769,38 @@
>>     > >>>>  sub get_names_of_subtests_hit {
>>     > >>>>    my ($self) = @_;
>>     > >>>>
>>     > >>>> -  return join(',', sort @{$self->{subtest_names_hit}});
>>     > >>>> +  #return join(',', sort @{$self->{subtest_names_hit}});
>>     > >>>> +
>>     > >>>> +  #This routine prints only one instance of a subrule hit with a
>>     > >>>> count
>>     > >>> of how many times it hit if greater than 1
>>     > >>>> +  my (%subtest_names_hit, $i, $key, @keys, @sorted, $string, 
>> $rule,
>>     > >>> $total_hits, $deduplicated_hits);
>>     > >>>> +
>>     > >>>> +  $total_hits = scalar(@{$self->{subtest_names_hit}});
>>     > >>>> +
>>     > >>>> +  for ($i=0; $i < $total_hits; $i++) {
>>     > >>>> +    $rule = ${$self->{subtest_names_hit}}[$i];
>>     > >>>> +    $subtest_names_hit{$rule}++;
>>     > >>>> +  }
>>     > >>>> +
>>     > >>>> +  foreach $key (keys %subtest_names_hit) {
>>     > >>>> +    push (@keys, $key);
>>     > >>>> +  }
>>     > >>>> +  @sorted = sort @keys;
>>     > >>>> +
>>     > >>>> +  $deduplicated_hits = scalar(@sorted);
>>     > >>>> +
>>     > >>>> +  for ($i=0; $i < $deduplicated_hits; $i++) {
>>     > >>>> +    $string .= $sorted[$i];
>>     > >>>> +    if ($subtest_names_hit{$sorted[$i]} > 1) {
>>     > >>>> +      $string .= "($subtest_names_hit{$sorted[$i]})"
>>     > >>>> +    }
>>     > >>>> +    $string .= ",";
>>     > >>>> +  }
>>     > >>>> +
>>     > >>>> +  $string =~ s/,$//;
>>     > >>>> +
>>     > >>>> +  $string .= " (Total Subtest Hits: $total_hits / Deduplicated
>>     Total
>>     > >>> Hits: $deduplicated_hits)";
>>     > >>>> +
>>     > >>>> +  return $string;
>>     > >>>>  }
>>     > >>>>
>>     > >>>>
>>     > >>> 
>> #####################################################################
>>     ######
>>     > >>>
>>     > >>>
>>     > >>>
>>     > >>
>>     > >
>>     >
>>     > --
>>     > Kevin A. McGrail
>>     > Member, Apache Software Foundation
>>     > Chair Emeritus Apache SpamAssassin Project
>>     > [16]https://www.linkedin.com/in/kmcgrail - 703.798.0171
>>     >
>>
>>     > Index: lib/Mail/SpamAssassin/PerMsgStatus.pm
>>     > ===================================================================
>>     > --- lib/Mail/SpamAssassin/PerMsgStatus.pm       (revision 1860582)
>>     > +++ lib/Mail/SpamAssassin/PerMsgStatus.pm       (working copy)
>>     > @@ -398,7 +398,7 @@
>>     >    dbg("check: is spam? score=".$self->{score}.
>>     >                          " required=".$self->{conf}->{required_score});
>>     >    dbg("check: tests=".$self->get_names_of_tests_hit());
>>     > -  dbg("check: subtests=".$self->get_names_of_subtests_hit());
>>     > +  dbg("check: subtests=".$self->get_names_of_subtests_hit("dbg"));
>>     >    $self->{is_spam} = $self->is_spam();
>>     > 
>>     >    $self->{main}->{resolver}->bgabort();
>>     > @@ -764,12 +764,52 @@
>>     >  normally-hidden rules, which score 0 and have names beginning with two
>>     >  underscores, used in meta rules.
>>     > 
>>     > +If a parameter of dbg is passed, the output will be more condensed and
>>     > +sub-tests with multiple hits reduced to one entry with the number of
>>     hits
>>     > +in parentheses. Some information is also added at the end regarding 
>> the
>>     > +multiple hits.
>>     > +
>>     >  =cut
>>     > 
>>     >  sub get_names_of_subtests_hit {
>>     > -  my ($self) = @_;
>>     > +  my ($self, $mode) = @_;
>>     > 
>>     > -  return join(',', sort @{$self->{subtest_names_hit}});
>>     > +  if (defined $mode && $mode eq 'dbg') {
>>     > +    #This routine prints only one instance of a subrule hit with a 
>> count
>>     of how many times it hit if greater than 1
>>     > +    my (%subtest_names_hit, $i, $key, @keys, @sorted, $string, $rule,
>>     $total_hits, $deduplicated_hits); 
>>     > + 
>>     > +    $total_hits = scalar(@{$self->{subtest_names_hit}});
>>     > + 
>>     > +    for ($i=0; $i < $total_hits; $i++) {
>>     > +      $rule = ${$self->{subtest_names_hit}}[$i];
>>     > +      $subtest_names_hit{$rule}++;
>>     > +    }
>>     > + 
>>     > +    foreach $key (keys %subtest_names_hit) {
>>     > +      push (@keys, $key);
>>     > +    }
>>     > +    @sorted = sort @keys;
>>     > + 
>>     > +    $deduplicated_hits = scalar(@sorted);
>>     > + 
>>     > +    for ($i=0; $i < $deduplicated_hits; $i++) {
>>     > +      $string .= $sorted[$i];
>>     > +      if ($subtest_names_hit{$sorted[$i]} > 1) {
>>     > +        $string .= "($subtest_names_hit{$sorted[$i]})"
>>     > +      }
>>     > +      $string .= ",";
>>     > +    }
>>     > + 
>>     > +    $string =~ s/,$//;
>>     > + 
>>     > +    $string .= " (Total Subtest Hits: $total_hits / Deduplicated Total
>>     Hits: $deduplicated_hits)";
>>     > + 
>>     > +    return $string;
>>     > +
>>     > +  } else {
>>     > +    #return the simpler string with duplicates and commas
>>     > +    return join(',', sort @{$self->{subtest_names_hit}});
>>     > +  }
>>     >  }
>>     > 
>>     >  
>> ########################################################################
>>     ###
>>
>>
>>
>> References:
>>
>> [1] mailto:h...@hege.li
>> [2] mailto:h...@hege.li
>> [3] http://72_active.cf/
>> [4] http://72_active.cf/
>> [5] http://72_active.cf/
>> [6] http://72_active.cf/
>> [7] https://www.linkedin.com/in/kmcgrail
>> [8] https://www.linkedin.com/in/kmcgrail
>> [9] mailto:h...@hege.li
>> [10] mailto:h...@hege.li
>> [11] http://72_active.cf/
>> [12] http://72_active.cf/
>> [13] http://72_active.cf/
>> [14] http://72_active.cf/
>> [15] https://www.linkedin.com/in/kmcgrail
>> [16] https://www.linkedin.com/in/kmcgrail


-- 
Kevin A. McGrail
Member, Apache Software Foundation
Chair Emeritus Apache SpamAssassin Project
https://www.linkedin.com/in/kmcgrail - 703.798.0171


Reply via email to