On Mon, Mar 24, 2003 at 01:59:56PM -0500, Mike Simons wrote:
>   Net::HTTP does not play nicely with mod_gzip from apache.
> 
>   Net::HTTP sends 'TE:' headers, mod_gzip looks for 'Accept-encoding:'.
> 
> - Any chance 'Accept-encoding:' can be advertised and 'Content-Encoding:'
>   results can be decoded by Net::HTTP sometime soon?

  So, I have something that works between mod_gzip and Net::HTTP,
using the gzip transfer type.  The data is transparently decompressed
by the HTTP module and block by block decompression is supported.

  Patch attached ... in order for LWP to use this a minor patch is
needed to the http.pm module.

- Who does code review or where do patches go?

    Later,
      Mike Simons


  I'll try to clean it up somewhat tomorrow...

First Draft BUGS:
===
- The HTTP modules advertises support for deflate, but doesn't handle
  that yet... from what I can tell mod_gzip can not send deflate data.
  In order to get deflate working I need to teach mod_gzip to send
  deflate data...

- The documentation isn't updated.

- No attempt was made to support TE and Content-Encoded data at the same
  time.

- No test of this code with Compress::Zlib uninstalled to verify that
  it still works there was done.

- The decompression routine does block by block decompression, but
  in order to do this calls a private Compress::ZLib method
  (_removeGzipHeader) to strip off the gzip header, this is the exact 
  same function that the MemGunzip call makes to prepare the strip the 
  header...
    While it's unclean calling something else's private method
  it would be worse re-implementing the prune here, because it's size is
  dynamic.

- If compression is requested it is important that client code not pay
  attention to the content-length value... that is not the number of
  bytes to read, call the read method until it returns 0 bytes.
--- /usr/share/perl5/Net/HTTP/Methods.pm        Wed Dec  5 10:32:57 2001
+++ /home/msimons/cvs/jobtrack/Fetch/mikes/Net/HTTP/Methods.pm  Mon Mar 24 22:44:27 
2003
@@ -40,6 +40,7 @@ sub http_configure {
     my $peer_http_version = delete $cnf->{PeerHTTPVersion};
     $peer_http_version = "1.0" unless defined $peer_http_version;
     my $send_te = delete $cnf->{SendTE};
+    my $want_compression = delete $cnf->{WantCompression};
     my $max_line_length = delete $cnf->{MaxLineLength};
     $max_line_length = 4*1024 unless defined $max_line_length;
     my $max_header_lines = delete $cnf->{MaxHeaderLines};
@@ -54,6 +55,7 @@ sub http_configure {
     $self->host($host);
     $self->keep_alive($keep_alive);
     $self->send_te($send_te);
+    $self->want_compression($want_compression);
     $self->http_version($http_version);
     $self->peer_http_version($peer_http_version);
     $self->max_line_length($max_line_length);
@@ -69,7 +71,7 @@ sub http_default_port {
 }
 
 # set up property accessors
-for my $method (qw(host keep_alive send_te max_line_length max_header_lines 
peer_http_version)) {
+for my $method (qw(host keep_alive send_te want_compression max_line_length 
max_header_lines peer_http_version)) {
     my $prop_name = "http_" . $method;
     no strict 'refs';
     *$method = sub {
@@ -114,7 +116,8 @@ sub format_request {
 
     my @h;
     my @connection;
-    my %given = (host => 0, "content-length" => 0, "te" => 0);
+    my %given = (host => 0, "content-length" => 0, 
+                 "te" => 0, "accept-encoding" => 0);
     while (@_) {
        my($k, $v) = splice(@_, 0, 2);
        my $lc_k = lc($k);
@@ -158,6 +161,10 @@ sub format_request {
     push(@h2, "Connection: " . join(", ", @connection)) if @connection;
     push(@h2, "Host: ${*$self}{'http_host'}") unless $given{host};
 
+    if ($self->want_compression && !$given{'accept-encoding'} && zlib_ok()) {
+       push(@h2, "Accept-Encoding: gzip, deflate");
+    }
+
     return join($CRLF, "$method $uri HTTP/$ver", @h2, @h, "", $content);
 }
 
@@ -194,13 +201,56 @@ sub write_chunk_eof {
 }
 
 
+#TODO: if compression(gzip), need to strip header, but need enough data 
+#      bytes to do that...
+#      if type == deflate, don't need to strip header first.
+
+# if compression_on,
+#   while ((http_buf < n) && (sysread raw blocks))
+#   { decompress them into http_buf }
+# if http_buf
+#   return data from http_buf
+# else
+#   return data from sysread
 sub my_read {
-    die if @_ > 3;
+    die "wrong number of args to my_read" if @_ > 4;
     my $self = shift;
     my $len = $_[1];
+    my ($buffer, $result);
+
+    ${$_[2]} = 0;                                          # update read count
     for (${*$self}{'http_buf'}) {
+       if (exists ${*$self}{'inflator'}) {            # compression in effect
+           if (!exists ${*$self}{'inflator_loaded'}) {
+               ${$_[2]} += length;                        # update read count
+
+               if (&Compress::Zlib::_removeGzipHeader(\$_) !=
+                       &Compress::Zlib::Z_OK) {
+                   die "gzip header damaged\n";
+               }
+
+               ${*$self}{'inflator_loaded'} = 1;
+               $buffer = $_;
+               my ($out, $status) = ${*$self}{'inflator'}->inflate($buffer);
+               $_ = $out if defined($out);
+           }
+
+           while (($len > length) && (${*$self}{'http_bytes'}) &&
+                  ($result = $self->sysread($buffer, 4096))) {
+               ${$_[2]} += length $buffer;                # update read count
+               my ($out, $status) = ${*$self}{'inflator'}->inflate($buffer);
+               $_ .= $out if defined($out);
+
+               # TODO: how to throw errors?
+               die "decompression failed $status\n" 
+                   if ($status != &Compress::Zlib::Z_OK && 
+                       $status != &Compress::Zlib::Z_STREAM_END);
+           }
+       }
+
        if (length) {
            $_[0] = substr($_, 0, $len, "");
+# print "buffer length = ", length, ", result data length $len, stream read 
${$_[2]}\n";
            return length($_[0]);
        }
        else {
@@ -323,17 +373,22 @@ sub read_response_headers {
 
     # pick out headers that read_entity_body might need
     my @te;
+    my @content_encoding;
     my $content_length;
     for (my $i = 0; $i < @headers; $i += 2) {
        my $h = lc($headers[$i]);
        if ($h eq 'transfer-encoding') {
            push(@te, $headers[$i+1]);
        }
+       elsif ($h eq 'content-encoding') {
+           push(@content_encoding, $headers[$i+1]);
+       }
        elsif ($h eq 'content-length') {
            $content_length = $headers[$i+1];
        }
     }
     ${*$self}{'http_te'} = join(",", @te);
+    ${*$self}{'http_content_encoding'} = join(",", @content_encoding);
     ${*$self}{'http_content_length'} = $content_length;
     ${*$self}{'http_first_body'}++;
     delete ${*$self}{'http_trailers'};
@@ -355,6 +410,8 @@ sub read_entity_body {
        ${*$self}{'http_first_body'} = 0;
        delete ${*$self}{'http_chunked'};
        delete ${*$self}{'http_bytes'};
+       delete ${*$self}{'inflator'};
+       delete ${*$self}{'inflator_loaded'};
        my $method = shift(@{${*$self}{'http_request_method'}});
        my $status = ${*$self}{'http_status'};
        if ($method eq "HEAD" || $status =~ /^(?:1|[23]04)/) {
@@ -404,6 +461,15 @@ sub read_entity_body {
 
            # Read until EOF
        }
+
+       if (defined(my $content_encoding = ${*$self}{'http_content_encoding'})) {
+           my @ce = split ", ", lc($content_encoding);
+           if ($ce[0] eq "gzip" || $ce[0] eq "deflate") {
+               ${*$self}{'inflator'} = Compress::Zlib::inflateInit(
+                       "-WindowBits" => -&Compress::Zlib::MAX_WBITS) or 
+                   die "Can't make inflator";
+           }
+       }
     }
     else {
        $chunked = ${*$self}{'http_chunked'};
@@ -465,15 +531,17 @@ sub read_entity_body {
        return $n;
     }
     elsif (defined $bytes) {
-       unless ($bytes) {
+       unless ($bytes || length ${*$self}{'http_buf'}) {
            $$buf_ref = "";
            return 0;
        }
-       my $n = $bytes;
-       $n = $size if $size && $size < $n;
-       $n = my_read($self, $$buf_ref, $n);
+       my $from_stream = 0;
+       my $n = exists ${*$self}{'inflator'} ?
+           8*1024 : $bytes;                            # bytes left on stream
+       $n = $size if $size && $size < $n;            # limit to request bytes 
+       $n = my_read($self, $$buf_ref, $n, \$from_stream);    # bytes returned
        return undef unless defined $n;
-       ${*$self}{'http_bytes'} = $bytes - $n;
+       ${*$self}{'http_bytes'} = $bytes - $from_stream;
        return $n;
     }
     else {
--- /usr/share/perl5/LWP/Protocol/http.pm       Fri Dec 14 11:33:52 2001
+++ mikes/LWP/Protocol/http.pm  Mon Mar 24 22:47:40 2003
@@ -37,7 +37,8 @@ sub _new_socket
                                        Proto    => 'tcp',
                                        Timeout  => $timeout,
                                        KeepAlive => !!$conn_cache,
-                                       SendTE    => 1,
+                                       SendTE    => 0,
+                                       WantCompression => 1,
                                        $self->_extra_sock_opts($host, $port),
                                       );
 

Reply via email to