This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new f8ae02cc79e SOLR-17740: V2 API: fix raw file uploads missing 1st byte 
(#3322)
f8ae02cc79e is described below

commit f8ae02cc79ea25b5163249c9f4053b74ee02a286
Author: David Smiley <[email protected]>
AuthorDate: Fri Apr 18 07:52:12 2025 -0400

    SOLR-17740: V2 API: fix raw file uploads missing 1st byte (#3322)
    
    SolrRequestParsers can consume one byte of the underlying stream, but V2 
was ignoring the SolrQueryRequest content stream.  Fix is to detect HTTP body 
in a more standards-compliant way; don't read a byte to see.
    
    (cherry picked from commit 4aa321635d6fd71e9e0812c97639886ebc229735)
---
 solr/CHANGES.txt                                   |  6 +++--
 .../apache/solr/servlet/SolrRequestParsers.java    | 28 +++++++++-------------
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2162207a4f2..4e504b2fb90 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -27,7 +27,7 @@ New Features
 
 Improvements
 ---------------------
-* SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments" 
APIs, which can be used to fetch detailed information about 
+* SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments" 
APIs, which can be used to fetch detailed information about
   specific collections or cores.  Collection information can be fetched by a 
call to `GET /api/collections/collectionName`, and core
   information with a call to `GET /api/cores/coreName/segments`. (Jason 
Gerlowski)
 
@@ -81,6 +81,8 @@ Bug Fixes
 * SOLR-12831: Clean up shard metadata in ZooKeeper nodes after shard deletion 
is invoked. This makes sure Zookeeper
  nodes for leader election and terms are not left behind (Andy Vuong, Pierre 
Salagnac).
 
+* SOLR-17740: When the V2 API is receiving raw files, it could sometimes skip 
the first byte. (David Smiley)
+
 Dependency Upgrades
 ---------------------
 * SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke)
@@ -89,7 +91,7 @@ Dependency Upgrades
 
 Other Changes
 ---------------------
-* SOLR-17579: Remove unused code and other refactorings in ReplicationHandler 
and tests.  Removed unused public 
+* SOLR-17579: Remove unused code and other refactorings in ReplicationHandler 
and tests.  Removed unused public
   LOCAL_ACTIVITY_DURING_REPLICATION variable. (Eric Pugh)
 
 * GITHUB#2869: SolrTestCase now supports @LogLevel annotations (as 
SolrTestCaseJ4 has).  Added LogLevelTestRule
diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java 
b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
index ee459c29231..83dcb65c9b2 100644
--- a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
+++ b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
@@ -22,7 +22,6 @@ import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.lang.invoke.MethodHandles;
 import java.net.URI;
 import java.nio.ByteBuffer;
@@ -39,8 +38,8 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import javax.servlet.MultipartConfigElement;
-import javax.servlet.ServletInputStream;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.Part;
 import org.apache.commons.io.input.CloseShieldInputStream;
@@ -600,26 +599,21 @@ public class SolrRequestParsers {
 
   /** The raw parser just uses the params directly */
   static class RawRequestParser implements SolrRequestParser {
+
+    // Methods that shouldn't have a body according to HTTP spec
+    private static Set<String> NO_BODY_METHODS = Set.of("GET", "HEAD", 
"DELETE");
+
     @Override
     public SolrParams parseParamsAndFillStreams(
         final HttpServletRequest req, ArrayList<ContentStream> streams) throws 
Exception {
-      // If we wrongly add a stream that actually has no content, then it can 
confuse
-      //  some of our code that sees a stream but has no content-type.
-      // If we wrongly don't add a stream, then obviously we'll miss data.
-      final ServletInputStream inputStream = req.getInputStream(); // don't 
close it
-      if (req.getContentLengthLong() >= 0
+      if (req.getContentLengthLong() > 0
           || req.getHeader("Transfer-Encoding") != null
-          || inputStream.available() > 0) {
-        streams.add(new HttpRequestContentStream(req, inputStream));
-      } else if (!req.getMethod().equals("GET")) { // GET shouldn't have data
-        // We're not 100% sure there is no data, so check by reading a byte 
(and put back).
-        PushbackInputStream pbInputStream = new 
PushbackInputStream(inputStream);
-        int b = pbInputStream.read();
-        if (b != -1) {
-          pbInputStream.unread(b); // put back
-          streams.add(new HttpRequestContentStream(req, pbInputStream));
-        }
+          || !NO_BODY_METHODS.contains(req.getMethod())) {
+        // If Content-Length > 0 OR Transfer-Encoding exists OR
+        // it's a method that can have a body (POST/PUT/PATCH etc)
+        streams.add(new HttpRequestContentStream(req, req.getInputStream()));
       }
+
       return parseQueryString(req.getQueryString());
     }
   }

Reply via email to