stas 02/03/23 01:11:33 Modified: tmpl/custom/html page_body search src/search README SwishSpiderConfig.pl swish.conf Log: sync with the latest swish Submitted by: Bill Moseley <[EMAIL PROTECTED]> Reviewed by: stas Revision Changes Path 1.20 +7 -5 modperl-docs/tmpl/custom/html/page_body Index: page_body =================================================================== RCS file: /home/cvs/modperl-docs/tmpl/custom/html/page_body,v retrieving revision 1.19 retrieving revision 1.20 diff -u -r1.19 -r1.20 --- page_body 22 Mar 2002 02:02:16 -0000 1.19 +++ page_body 23 Mar 2002 09:11:33 -0000 1.20 @@ -15,11 +15,13 @@ INCLUDE page_toc toc=doc.toc; # render the content - "<!-- SwishCommand index -->"; + # index_section is used to break up the doc into sections for indexing + FOREACH sec = doc.body; '<div class="index_section">'; + '<!-- SwishCommand index -->'; sec; - "<br><br>"; + "<br><br>\n"; IF loop.count == loop.size; INCLUDE navbar_local_bottom nav=doc.nav @@ -27,9 +29,9 @@ ELSE; INCLUDE top_link; END; - "<br><br>"; + "<br><br>\n"; + '<!-- SwishCommand noindex -->'; "</div>\n\n"; END; - "<!-- SwishCommand noindex -->"; %] -<!-- end content--> \ No newline at end of file +<!-- end content--> 1.11 +1 -1 modperl-docs/tmpl/custom/html/search Index: search =================================================================== RCS file: /home/cvs/modperl-docs/tmpl/custom/html/search,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- search 22 Mar 2002 19:22:41 -0000 1.10 +++ search 23 Mar 2002 09:11:33 -0000 1.11 @@ -18,7 +18,7 @@ <td class="menu-border" width="1"><br class="smallbr"></td> <td class="search" width="2" align="center"> <input type="submit" name="submit" value="Search" class="submit-but"> - <input type="hidden" name="section" value=""[% doc.dir.path_from_base %]""> + <input type="hidden" name="sbm" value=""[% doc.dir.path_from_base %]""> </td> <td class="menu-border" width="1"><br class="smallbr"></td> </tr> 1.7 +6 -5 modperl-docs/src/search/README Index: README =================================================================== RCS file: /home/cvs/modperl-docs/src/search/README,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- README 22 Mar 2002 19:22:41 -0000 1.6 +++ README 23 Mar 2002 09:11:33 -0000 1.7 @@ -123,21 +123,22 @@ =item * Since we want to be able to search any sub-section of the site, the -search form includes the hidden variable C<section>. For example: +search form includes the hidden variable C<sbm> (mnemonics: 'search by +meta'). For example: - <input type="checkbox" name="section" value="docs/1.0/guide" /> + <input type="checkbox" name="sbm" value="docs/1.0/guide" /> will search all the documents under I<docs/1.0/guide> directory. -the correct value for the C<section> variable are set in the template when +the correct value for the C<sbm> variable are set in the template when the site is created. The main search page I</search/swish.cgi>, has multiply checkboxes for -the for the C<section> variable so you can limit searches to only selected +the for the C<sbm> variable so you can limit searches to only selected sections. The C<$ENV{MODPERL_SITE}> mentioned earlier is matched against the -C<section> variable to extract only the wanted subsets of the hits: +C<sbm> variable to extract only the wanted subsets of the hits: $uri =~ m!$ENV{MODPERL_SITE}{/([^/]+)/.+$! 1.5 +36 -9 modperl-docs/src/search/SwishSpiderConfig.pl Index: SwishSpiderConfig.pl =================================================================== RCS file: /home/cvs/modperl-docs/src/search/SwishSpiderConfig.pl,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- SwishSpiderConfig.pl 22 Mar 2002 02:02:15 -0000 1.4 +++ SwishSpiderConfig.pl 23 Mar 2002 09:11:33 -0000 1.5 @@ -4,7 +4,8 @@ my $base_path = $ENV{MODPERL_SITE} || die "must set \$ENV{MODPERL_SITE}"; -die "Don't use trailing slash in MODPERL_SITE" if $base_path =~ m!/$!; +$base_path =~ s[/$][]; + @servers = ( @@ -52,18 +53,38 @@ my $tree = HTML::TreeBuilder->new; - $tree->parse( ${$params{content}} ); # Why not allow a scalar ref? + $tree->store_comments(1); + + $tree->parse( ${$params{content}} ); # Why not allow a scalar ref? $tree->eof; + + # Find the <head> section for use in all split pages my $head = $tree->look_down( '_tag', 'head' ); - for my $section ( $tree->look_down( '_tag', 'div', 'class', 'index_section' ) ) { - create_page( $head->clone, $section->clone, \%params ) - } + + # Now create a new "document" for each + create_page( $head->clone, $_->clone, \%params ) + for $tree->look_down( '_tag', 'div', 'class', 'index_section' ); + + + # Indexed the page in sections, just return + return 0 if $params{found}; + + # No sections found, so index the entire page (probably index.html) + + # Stip base_path + #my $url = $params{uri}->as_string; + #$url =~ s/^$base_path//; + + my $new_content = $tree->as_HTML(undef,"\t"); + output_content( $params{server}, $params{content}, + $params{uri}, $params{response} ); + $tree->delete; - return !$params{found}; # tell spider.pl to not index the page + return 0; # don't index } sub create_page { @@ -95,11 +116,14 @@ # Extract out part of the path to use for limiting searches to parts of the document tree. - if ( $uri =~ m!$base_path/([^/]+)/.+$! ) { - my $meta = HTML::Element->new('meta', name=> 'section', content => $1); + if ( $uri =~ m!$base_path/(.+)$! ) { + my $path = $1; + $path =~ s{[^/]$}{}; # remove file name, if one + my $meta = HTML::Element->new('meta', name=> 'section', content => $path); $head->push_content( $meta ); } - + + my $body = HTML::Element->new('body'); my $doc = HTML::Element->new('html'); @@ -107,6 +131,9 @@ $body->push_content( $section ); $doc->push_content( $head, $body ); + # If we want to stip the base_path + #my $url = $uri->as_string; + #$url =~ s/$base_path//; my $new_content = $doc->as_HTML(undef,"\t"); output_content( $params->{server}, \$new_content, 1.5 +1 -0 modperl-docs/src/search/swish.conf Index: swish.conf =================================================================== RCS file: /home/cvs/modperl-docs/src/search/swish.conf,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- swish.conf 3 Mar 2002 11:27:22 -0000 1.4 +++ swish.conf 23 Mar 2002 09:11:33 -0000 1.5 @@ -17,3 +17,4 @@ #BuzzWords in highlighting -- #How about counting highlighted terms individually in the highlight module #so every term is highlighted at least once, with a total of say five. +
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]