stas        02/03/23 01:11:33

  Modified:    tmpl/custom/html page_body search
               src/search README SwishSpiderConfig.pl swish.conf
  Log:
  sync with the latest swish
  Submitted by: Bill Moseley <[EMAIL PROTECTED]>
  Reviewed by:  stas
  
  Revision  Changes    Path
  1.20      +7 -5      modperl-docs/tmpl/custom/html/page_body
  
  Index: page_body
  ===================================================================
  RCS file: /home/cvs/modperl-docs/tmpl/custom/html/page_body,v
  retrieving revision 1.19
  retrieving revision 1.20
  diff -u -r1.19 -r1.20
  --- page_body 22 Mar 2002 02:02:16 -0000      1.19
  +++ page_body 23 Mar 2002 09:11:33 -0000      1.20
  @@ -15,11 +15,13 @@
       INCLUDE page_toc toc=doc.toc;
   
       # render the content
  -    "<!-- SwishCommand index -->";
  +    # index_section is used to break up the doc into sections for indexing
  +
       FOREACH sec = doc.body;
           '<div class="index_section">';
  +        '<!-- SwishCommand index -->';
           sec;
  -        "<br><br>";
  +        "<br><br>\n";
           IF loop.count == loop.size;
               INCLUDE navbar_local_bottom
                   nav=doc.nav
  @@ -27,9 +29,9 @@
           ELSE;
               INCLUDE top_link;
           END;
  -        "<br><br>";
  +        "<br><br>\n";
  +        '<!-- SwishCommand noindex -->';
           "</div>\n\n";
       END;
  -    "<!-- SwishCommand noindex -->";
   %]
  -<!-- end content-->
  \ No newline at end of file
  +<!-- end content-->
  
  
  
  1.11      +1 -1      modperl-docs/tmpl/custom/html/search
  
  Index: search
  ===================================================================
  RCS file: /home/cvs/modperl-docs/tmpl/custom/html/search,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- search    22 Mar 2002 19:22:41 -0000      1.10
  +++ search    23 Mar 2002 09:11:33 -0000      1.11
  @@ -18,7 +18,7 @@
           <td class="menu-border" width="1"><br class="smallbr"></td>
           <td class="search" width="2" align="center">
               <input type="submit" name="submit" value="Search" 
class="submit-but">
  -            <input type="hidden" name="section" value="&quot;[% 
doc.dir.path_from_base %]&quot;">
  +            <input type="hidden" name="sbm" value="&quot;[% 
doc.dir.path_from_base %]&quot;">
            </td>
           <td class="menu-border" width="1"><br class="smallbr"></td>
       </tr>
  
  
  
  1.7       +6 -5      modperl-docs/src/search/README
  
  Index: README
  ===================================================================
  RCS file: /home/cvs/modperl-docs/src/search/README,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- README    22 Mar 2002 19:22:41 -0000      1.6
  +++ README    23 Mar 2002 09:11:33 -0000      1.7
  @@ -123,21 +123,22 @@
   =item *
   
   Since we want to be able to search any sub-section of the site, the
  -search form includes the hidden variable C<section>. For example:
  +search form includes the hidden variable C<sbm> (mnemonics: 'search by
  +meta'). For example:
   
  -  <input type="checkbox" name="section" value="docs/1.0/guide" />
  +  <input type="checkbox" name="sbm" value="docs/1.0/guide" />
   
   will search all the documents under I<docs/1.0/guide> directory.
   
  -the correct value for the C<section> variable are set in the template when
  +the correct value for the C<sbm> variable are set in the template when
   the site is created. 
   
   The main search page I</search/swish.cgi>, has multiply checkboxes for
  -the for the C<section> variable so you can limit searches to only selected
  +the for the C<sbm> variable so you can limit searches to only selected
   sections.
   
   The C<$ENV{MODPERL_SITE}> mentioned earlier is matched against the
  -C<section> variable to extract only the wanted subsets of the hits:
  +C<sbm> variable to extract only the wanted subsets of the hits:
   
     $uri =~ m!$ENV{MODPERL_SITE}{/([^/]+)/.+$!
   
  
  
  
  1.5       +36 -9     modperl-docs/src/search/SwishSpiderConfig.pl
  
  Index: SwishSpiderConfig.pl
  ===================================================================
  RCS file: /home/cvs/modperl-docs/src/search/SwishSpiderConfig.pl,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- SwishSpiderConfig.pl      22 Mar 2002 02:02:15 -0000      1.4
  +++ SwishSpiderConfig.pl      23 Mar 2002 09:11:33 -0000      1.5
  @@ -4,7 +4,8 @@
   
   my $base_path = $ENV{MODPERL_SITE} || die "must set \$ENV{MODPERL_SITE}";
   
  -die "Don't use trailing slash in MODPERL_SITE" if $base_path =~ m!/$!;
  +$base_path =~ s[/$][];
  +
   
   
   @servers = (
  @@ -52,18 +53,38 @@
   
   
       my $tree = HTML::TreeBuilder->new;
  -    $tree->parse( ${$params{content}} );  # Why not allow a scalar ref?
  +    $tree->store_comments(1);
  +
  +        $tree->parse( ${$params{content}} );  # Why not allow a scalar ref?
       $tree->eof;
   
  +
  +    # Find the <head> section for use in all split pages
       my $head = $tree->look_down( '_tag', 'head' );
   
  -    for my $section ( $tree->look_down( '_tag', 'div', 'class', 
'index_section' ) ) {
  -        create_page( $head->clone, $section->clone, \%params )
  -    }
  +
  +    # Now create a new "document" for each
  +    create_page( $head->clone, $_->clone, \%params )
  +        for $tree->look_down( '_tag', 'div', 'class', 'index_section' );
  +
  +
  +    # Indexed the page in sections, just return
  +    return 0 if $params{found};
  +
  +    # No sections found, so index the entire page (probably index.html)
  +
  +    # Stip base_path
  +    #my $url = $params{uri}->as_string;
  +    #$url =~ s/^$base_path//;
  +
  +    my $new_content = $tree->as_HTML(undef,"\t");
  +    output_content( $params{server}, $params{content},
  +                    $params{uri}, $params{response} );
  +
   
       $tree->delete;
   
  -    return !$params{found};  # tell spider.pl to not index the page
  +    return 0; # don't index
   }
   
   sub create_page {
  @@ -95,11 +116,14 @@
   
       # Extract out part of the path to use for limiting searches to parts of 
the document tree.
   
  -    if ( $uri =~ m!$base_path/([^/]+)/.+$! ) {
  -        my $meta = HTML::Element->new('meta', name=> 'section', content => 
$1);
  +    if ( $uri =~ m!$base_path/(.+)$! ) {
  +        my $path = $1;
  +        $path =~ s{[^/]$}{};  # remove file name, if one
  +        my $meta = HTML::Element->new('meta', name=> 'section', content => 
$path);
           $head->push_content( $meta );
       }
  -        
  +
  +
   
       my $body = HTML::Element->new('body');
       my $doc  = HTML::Element->new('html');
  @@ -107,6 +131,9 @@
       $body->push_content( $section );
       $doc->push_content( $head, $body );
   
  +    # If we want to stip the base_path
  +    #my $url = $uri->as_string;
  +    #$url =~ s/$base_path//;
   
       my $new_content = $doc->as_HTML(undef,"\t");
       output_content( $params->{server}, \$new_content,
  
  
  
  1.5       +1 -0      modperl-docs/src/search/swish.conf
  
  Index: swish.conf
  ===================================================================
  RCS file: /home/cvs/modperl-docs/src/search/swish.conf,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- swish.conf        3 Mar 2002 11:27:22 -0000       1.4
  +++ swish.conf        23 Mar 2002 09:11:33 -0000      1.5
  @@ -17,3 +17,4 @@
   #BuzzWords in highlighting -- 
   #How about counting highlighted terms individually in the highlight module
   #so every term is highlighted at least once, with a total of say five.
  +
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to