stas        2002/08/14 02:29:47

  Modified:    src/docs/2.0/user/handlers handlers.pod
  Log:
  improving the snooping filter
  
  Revision  Changes    Path
  1.10      +263 -81   modperl-docs/src/docs/2.0/user/handlers/handlers.pod
  
  Index: handlers.pod
  ===================================================================
  RCS file: /home/cvs/modperl-docs/src/docs/2.0/user/handlers/handlers.pod,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- handlers.pod      13 Aug 2002 15:57:07 -0000      1.9
  +++ handlers.pod      14 Aug 2002 09:29:47 -0000      1.10
  @@ -944,15 +944,15 @@
     use Apache::Const -compile => qw(OK DECLINED);
     use APR::Const -compile => ':common';
     
  -  sub connection : FilterConnectionHandler { snoop(@_) }
  -  sub request    : FilterRequestHandler    { snoop(@_) }
  +  sub connection : FilterConnectionHandler { snoop("connection", @_) }
  +  sub request    : FilterRequestHandler    { snoop("request",    @_) }
     
     sub snoop {
  -      my($filter, $bb, $mode, $block, $readbytes) = @_;
  +      my $type = shift;
  +      my($filter, $bb, $mode, $block, $readbytes) = @_; # filter args
     
         # $mode, $block, $readbytes are passed only for input filters
  -      my $stream = defined $mode ? "input"   : "output";
  -      my $phase  = $filter->r    ? "request" : "connection";
  +      my $stream = defined $mode ? "input" : "output";
     
         # read the data and pass-through the bucket brigades unchanged
         my $ra_data = '';
  @@ -969,12 +969,15 @@
             return $rv unless $rv == APR::SUCCESS;
         }
     
  -      # send the sniffed info to stderr so not to interfere with normal
  +      # send the sniffed info to STDERR so not to interfere with normal
         # output
  -      warn "[$phase ($stream)]\n";
  +      my $direction = $stream eq 'output' ? ">>>" : "<<<";
  +      print STDERR "\n$direction $type $stream filter\n";
  +      my $c = 1;
         while (my($btype, $data) = splice @$ra_data, 0, 2) {
  -          $data = join "", map "        $_\n", split /\n/, $data;
  -          warn "    $btype:\n$data\n";
  +          print STDERR "    o bucket $c: $btype\n";
  +          print STDERR "[$data]\n";
  +          $c++;
         }
     
         return Apache::OK;
  @@ -996,12 +999,15 @@
   This package provides two filter handlers, one for connection and
   another for request filtering:
   
  -  sub connection : FilterConnectionHandler { snoop(@_) }
  -  sub request    : FilterRequestHandler    { snoop(@_) }
  +  sub connection : FilterConnectionHandler { snoop("connection", @_) }
  +  sub request    : FilterRequestHandler    { snoop("request",    @_) }
   
   Both handlers forward their arguments to the C<snoop()> function that
   does the real job. We needed to add these two subroutines in order to
  -assign the two different attributes.
  +assign the two different attributes. Plus the functions pass the
  +filter type to C<snoop()> as the first argument, which gets shifted
  +off C<@_> and the rest of the C<@_> are the arguments that were
  +originally passed to the filter handler.
   
   It's easy to know whether a filter handler is running in the input or
   the output mode. The arguments C<$filter> and C<$bb> are always
  @@ -1058,40 +1064,49 @@
   work.
   
   First we can see the connection input filter at work, as it processes
  -the HTTP headers. We can see that each header is put into a separate
  -brigade with a single bucket:
  -
  -  [connection (input)]
  -      HEAP:
  -          POST /dump?foo=1&bar=2 HTTP/1.1
  -  
  -  [connection (input)]
  -      HEAP:
  -          TE: deflate,gzip;q=0.3
  -  
  -  [connection (input)]
  -      HEAP:
  -          Connection: TE, close
  -  
  -  [connection (input)]
  -      HEAP:
  -          Host: localhost:8008
  -  
  -  [connection (input)]
  -      HEAP:
  -          User-Agent: lwp-request/2.01
  -  
  -  [connection (input)]
  -      HEAP:
  -          Content-Length: 15
  -  
  -  [connection (input)]
  -      HEAP:
  -          Content-Type: application/x-www-form-urlencoded
  -  
  -  [connection (input)]
  -      HEAP:
  -          
  +the HTTP headers. We can see that for this request each header is put
  +into a separate brigade with a single bucket. The data is conveniently
  +enclosed by C<[]> so you can see the new line characters as well.
  +
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [POST /dump?foo=1&bar=2 HTTP/1.1
  +  ]
  +  
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [TE: deflate,gzip;q=0.3
  +  ]
  +  
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [Connection: TE, close
  +  ]
  +  
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [Host: localhost:8008
  +  ]
  +  
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [User-Agent: lwp-request/2.01
  +  ]
  +  
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [Content-Length: 14
  +  ]
  +  
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [Content-Type: application/x-www-form-urlencoded
  +  ]
  +  
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [
  +  ]
   
   Here the HTTP header has been terminated by a double new line. So far
   all the buckets were of the I<HEAP> type, meaning that they were
  @@ -1102,15 +1117,15 @@
   The following two entries are generated when
   C<MyApache::Dump::handler> reads the POSTed content:
   
  -  [connection (input)]
  -      HEAP:
  -          mod_perl rules
  -  
  -  [request (input)]
  -      HEAP:
  -          mod_perl rules
  -  
  -      EOS:
  +  <<< connection input filter
  +      o bucket 1: HEAP
  +  [mod_perl rules]
  +  
  +  <<< request input filter
  +      o bucket 1: HEAP
  +  [mod_perl rules]
  +      o bucket 2: EOS
  +  []
   
   as we saw earlier on the diagram, the connection input filter is run
   before the request input filter. Since our connection input filter was
  @@ -1124,12 +1139,13 @@
   response. However only the request output filter is filtering it at
   this point:
   
  -  [request (output)]
  -      TRANSIENT:
  -          args:
  -          foo=1&bar=2
  -          content:
  -          mod_perl rules
  +  >>> request output filter
  +      o bucket 1: TRANSIENT
  +  [args:
  +  foo=1&bar=2
  +  content:
  +  mod_perl rules
  +  ]
   
   This happens because Apache hasn't sent yet the response HTTP headers
   to the client. Apache postpones the header sending so it can calculate
  @@ -1142,35 +1158,38 @@
   connection output filters (notice that the request output filters
   don't see it):
   
  -  [connection (output)]
  -      HEAP:
  -          HTTP/1.1 200 OK
  -          Date: Tue, 13 Aug 2002 12:36:52 GMT
  -          Server: Apache/2.0.40-dev (Unix) mod_perl/1.99_05-dev 
  -          Perl/v5.8.0 mod_ssl/2.0.40-dev OpenSSL/0.9.6d DAV/2
  -          Content-Length: 43
  -          Connection: close
  -          Content-Type: text/plain; charset=ISO-8859-1
  -          
  +  >>> connection output filter
  +      o bucket 1: HEAP
  +  [HTTP/1.1 200 OK
  +  Date: Wed, 14 Aug 2002 07:31:53 GMT
  +  Server: Apache/2.0.41-dev (Unix) mod_perl/1.99_05-dev 
  +  Perl/v5.8.0 mod_ssl/2.0.41-dev OpenSSL/0.9.6d DAV/2
  +  Content-Length: 42
  +  Connection: close
  +  Content-Type: text/plain; charset=ISO-8859-1
  +  
  +  ]
   
   Now the response body in the bucket of type I<HEAP> is passed through
   the connection output filter, followed by the I<EOS> bucket to mark
   the end of the request:
   
  -  [connection (output)]
  -      HEAP:
  -          args:
  -          foo=1&bar=2
  -          content:
  -          mod_perl rules
  -  
  -      EOS:
  +  >>> connection output filter
  +      o bucket 1: HEAP
  +  [args:
  +  foo=1&bar=2
  +  content:
  +  mod_perl rules
  +  ]
  +      o bucket 2: EOS
  +  []
   
   Finally the output is flushed, to make sure that any buffered output
   is sent to the client:
   
  -  [connection (output)]
  -      FLUSH:
  +  >>> connection output filter
  +      o bucket 1: FLUSH
  +  []
   
   This module helps to understand that each filter handler can be called
   many time during each request and connection. It's called for each
  @@ -1182,6 +1201,169 @@
   input filter won't be called.
   
   
  +
  +=head2 Connection Input Filter
  +
  +Let's say that we want to test how our handlers behave when they are
  +requested as C<HEAD> requests, rather than C<GET>. We can alter the
  +request headers at the incoming connection level transparently to all
  +handlers. So here is the input filter handler that does that by
  +directly manipulating the bucket brigades:
  +
  +  file:MyApache/InputFilterGET2HEAD.pm
  +  -----------------------------------
  +  package MyApache::InputFilterGET2HEAD;
  +  
  +  use strict;
  +  use warnings;
  +  
  +  use base qw(Apache::Filter);
  +  
  +  use Apache::RequestRec ();
  +  use Apache::RequestIO ();
  +  use APR::Brigade ();
  +  use APR::Bucket ();
  +  
  +  use Apache::Const -compile => 'OK';
  +  use APR::Const -compile => ':common';
  +  
  +  sub handler : FilterConnectionHandler {
  +      my($filter, $bb, $mode, $block, $readbytes) = @_;
  +  
  +      my $c = $filter->c;
  +      my $ctx_bb = APR::Brigade->new($c->pool, $c->bucket_alloc);
  +      my $rv = $filter->next->get_brigade($ctx_bb, $mode, $block, 
$readbytes);
  +      return $rv unless $rv == APR::SUCCESS;
  +  
  +      while (!$ctx_bb->empty) {
  +          my $bucket = $ctx_bb->first;
  +  
  +          $bucket->remove;
  +  
  +          if ($bucket->is_eos) {
  +              $bb->insert_tail($bucket);
  +              last;
  +          }
  +  
  +          my $data;
  +          my $status = $bucket->read($data);
  +          return $status unless $status == APR::SUCCESS;
  +  
  +          if ($data and $data =~ s|^GET|HEAD|) {
  +              $bucket = APR::Bucket->new($data);
  +          }
  +  
  +          $bb->insert_tail($bucket);
  +      }
  +  
  +      Apache::OK;
  +  }
  +  
  +  1;
  +
  +The filter handler is called for each bucket brigade, which in turn
  +includes buckets with data. The gist of any filter handler is to
  +retrieve the bucket brigade sent from the previous filter, prepare a
  +new empty brigade, and move buckets from the former brigade to the
  +latter optionally modifying the buckets on the way, which may include
  +removing or adding new buckets. Of course if the filter doesn't want
  +to modify any of the buckets it may decide to pass through the
  +original brigade without doing any work.
  +
  +In our example the handler first removes the bucket at the top of the
  +brigade and looks at its type. If it sees an end of stream, that
  +removed bucket is linked to the tail of the bucket brigade that will
  +go to the next filter and it doesn't attempt to read any more
  +buckets. If this event doesn't happen the handler reads the data from
  +that bucket and if it finds that the data is of interest to us, it
  +modifies the data, creates a new bucket using the modified data and
  +links it to the tail of the outgoing brigade, while discarding the
  +original bucket. In our case the interesting data is a such that
  +matches the regex C</^GET/>. If the data is not interesting to the
  +handler, it simply links the unmodified bucket to the outgoing
  +brigade.
  +
  +The handler looks for data like:
  +
  +  GET /perl/test.pl HTTP/1.1
  +
  +and turns it into:
  +
  +  HEAD /perl/test.pl HTTP/1.1
  +
  +For example, consider the following response handler:
  +
  +  file:MyApache/RequestType.pm
  +  ---------------------------
  +  package MyApache::RequestType;
  +  
  +  use strict;
  +  use warnings;
  +  
  +  use Apache::Const -compile => 'OK';
  +  
  +  sub handler {
  +      my $r = shift;
  +      $r->content_type('text/plain');
  +      $r->print("the request type was " . $r->method);
  +      Apache::OK;
  +  }
  +  1;
  +
  +which returns to the client the request type it has issued. In the
  +case of the C<HEAD> request Apache will discard the response body, but
  +it'll will still set the correct C<Content-Length> header, which will
  +be 24 in case of the C<GET> request and 25 for C<HEAD>. Therefore if
  +this response handler is configured as:
  +
  +  Listen 8005
  +  <VirtualHost _default_:8005>
  +      <Location />
  +          SetHandler modperl
  +          PerlResponseHandler +MyApache::RequestType
  +      </Location>
  +  </VirtualHost>
  +
  +and a C<GET> request is issued to I</>:
  +
  +  panic% perl -MLWP::UserAgent -le \
  +  '$r = LWP::UserAgent->new()->get("http://localhost:8005/";); \
  +  print $r->headers->content_length . ": ".  $r->content'
  +  24: the request type was GET
  +
  +where the response's body is:
  +
  +  the request type was GET
  +
  +And the C<Content-Length> header is set to 24.
  +
  +However if we enable the C<MyApache::InputFilterGET2HEAD> input
  +connection filter:
  +
  +  Listen 8005
  +  <VirtualHost _default_:8005>
  +      PerlInputFilterHandler +MyApache::InputFilterGET2HEAD
  +  
  +      <Location />
  +          SetHandler modperl
  +          PerlResponseHandler +MyApache::RequestType
  +      </Location>
  +  </VirtualHost>
  +
  +And issue the same C<GET> request, we get only:
  +
  +  25: 
  +
  +which means that the body was discarded by Apache, because our filter
  +turned the C<GET> request into a C<HEAD> request and if Apache wasn't
  +discarding the body on C<HEAD>, the response would be:
  +
  +  the request type was HEAD
  +
  +that's why the content length is reported as 25 and not 24 as in the
  +real GET request.
  +
  +=head2 Request Input Filter
   
   =head2 Bucket Brigades and Stream-Oriented Request Output Filters
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to