This is an automated email from the ASF dual-hosted git repository. ndipiazza pushed a commit to branch TIKA-4679-http2-server-support in repository https://gitbox.apache.org/repos/asf/tika.git
commit 6ced8d48b4bb2f5ab40f026021e11d8868066044 Author: Nicholas DiPiazza <[email protected]> AuthorDate: Wed Mar 4 10:29:55 2026 -0600 TIKA-4679: Add e2e test module for HTTP/2 tika-server - Add tika-e2e-tests/tika-server module with TikaServerHttp2Test - Test starts the real fat-jar and verifies HTTP/2 (h2c) responses via Java HttpClient configured with Version.HTTP_2 - Wire module into tika-e2e-tests/pom.xml modules list - Module is skipped by default; enable with -Pe2e profile Co-authored-by: Copilot <[email protected]> --- tika-e2e-tests/pom.xml | 1 + tika-e2e-tests/tika-server/pom.xml | 111 ++++++++++++ .../tika/server/e2e/TikaServerHttp2Test.java | 193 +++++++++++++++++++++ tika-parent/pom.xml | 5 + 4 files changed, 310 insertions(+) diff --git a/tika-e2e-tests/pom.xml b/tika-e2e-tests/pom.xml index e87d0149c5..818c438780 100644 --- a/tika-e2e-tests/pom.xml +++ b/tika-e2e-tests/pom.xml @@ -59,6 +59,7 @@ <modules> <module>tika-grpc</module> + <module>tika-server</module> </modules> <dependencyManagement> diff --git a/tika-e2e-tests/tika-server/pom.xml b/tika-e2e-tests/tika-server/pom.xml new file mode 100644 index 0000000000..0cb54144b3 --- /dev/null +++ b/tika-e2e-tests/tika-server/pom.xml @@ -0,0 +1,111 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.tika</groupId> + <artifactId>tika-e2e-tests</artifactId> + <version>${revision}</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <artifactId>tika-e2e-tests-server</artifactId> + <packaging>jar</packaging> + <name>Apache Tika E2E Tests: REST Server</name> + <description>End-to-end tests for tika-server-standard, including HTTP/2 support verification</description> + + <properties> + <!-- Path to the tika-server-standard fat-jar built in the same reactor --> + <tika.server.jar>${project.basedir}/../../tika-server/tika-server-standard/target/tika-server-standard-${revision}.jar</tika.server.jar> + </properties> + + <dependencies> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-api</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-engine</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.logging.log4j</groupId> + <artifactId>log4j-core</artifactId> + </dependency> + <dependency> + <groupId>org.apache.logging.log4j</groupId> + <artifactId>log4j-slf4j2-impl</artifactId> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <!-- Skip by default; run with -Pe2e --> + <skipTests>true</skipTests> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <configuration> + <inputExcludes> + <inputExclude>**/README*.md</inputExclude> + <inputExclude>src/test/resources/**</inputExclude> + </inputExcludes> + </configuration> + </plugin> + </plugins> + </build> + + <profiles> + <profile> + <id>e2e</id> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <skipTests>false</skipTests> + <systemPropertyVariables> + <tika.server.jar>${tika.server.jar}</tika.server.jar> + </systemPropertyVariables> + </configuration> + </plugin> + </plugins> + </build> + </profile> + </profiles> +</project> diff --git a/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java b/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java new file mode 100644 index 0000000000..d54877dfb6 --- /dev/null +++ b/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.server.e2e; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.ServerSocket; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.time.Instant; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * End-to-end test verifying that tika-server-standard supports HTTP/2 (h2c cleartext). + * + * Starts the real fat-jar, sends a request using Java's HttpClient configured for HTTP/2, + * and asserts the response was served over HTTP/2. This validates the runtime classpath + * has the Jetty http2-server jar and CXF negotiates h2c correctly. + * + * Run with: mvn test -pl tika-e2e-tests/tika-server -Pe2e + * + * Inspired by Lawrence Moorehead's original contribution (elemdisc/tika PR#1, TIKA-4679). + */ +@Tag("E2ETest") +public class TikaServerHttp2Test { + + private static final Logger log = LoggerFactory.getLogger(TikaServerHttp2Test.class); + private static final long SERVER_STARTUP_TIMEOUT_MS = 60_000; + private static final String STATUS_PATH = "/status"; + + private Process serverProcess; + private int port; + private String endPoint; + + @BeforeEach + void startServer() throws Exception { + port = findFreePort(); + endPoint = "http://localhost:" + port; + + String jarPath = System.getProperty("tika.server.jar"); + if (jarPath == null) { + // fall back to conventional location relative to this module + Path moduleDir = Paths.get("").toAbsolutePath(); + Path repoRoot = moduleDir; + while (repoRoot != null && !repoRoot.resolve("tika-server").toFile().isDirectory()) { + repoRoot = repoRoot.getParent(); + } + if (repoRoot == null) { + throw new IllegalStateException("Cannot locate tika root. Pass -Dtika.server.jar=/path/to/tika-server-standard.jar"); + } + jarPath = repoRoot.resolve("tika-server/tika-server-standard/target") + .toAbsolutePath() + .toString() + "/tika-server-standard-" + + System.getProperty("tika.version", "4.0.0-SNAPSHOT") + ".jar"; + } + + log.info("Starting tika-server-standard from: {}", jarPath); + ProcessBuilder pb = new ProcessBuilder( + "java", "-jar", jarPath, + "-p", String.valueOf(port), + "-h", "localhost" + ); + pb.redirectErrorStream(true); + serverProcess = pb.start(); + + // Drain output in background so the process doesn't block + Thread drainThread = new Thread(() -> { + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(serverProcess.getInputStream(), UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + log.debug("tika-server: {}", line); + } + } catch (Exception e) { + log.debug("Server output stream closed", e); + } + }); + drainThread.setDaemon(true); + drainThread.start(); + + awaitServerStartup(); + } + + @AfterEach + void stopServer() throws Exception { + if (serverProcess != null && serverProcess.isAlive()) { + serverProcess.destroy(); + serverProcess.waitFor(); + } + } + + @Test + void testH2cStatusEndpoint() throws Exception { + HttpClient httpClient = HttpClient.newBuilder() + .version(HttpClient.Version.HTTP_2) + .build(); + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create(endPoint + STATUS_PATH)) + .header("Accept", "application/json") + .GET() + .build(); + + HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString(UTF_8)); + + assertEquals(200, response.statusCode(), "Expected 200 from /status"); + assertEquals(HttpClient.Version.HTTP_2, response.version(), + "Expected HTTP/2 protocol; server may be missing http2-server on classpath"); + log.info("HTTP/2 h2c verified: {} {}", response.statusCode(), response.version()); + } + + @Test + void testH2cParseEndpoint() throws Exception { + HttpClient httpClient = HttpClient.newBuilder() + .version(HttpClient.Version.HTTP_2) + .build(); + + // Send a small plain-text document for parsing + byte[] body = "Hello, HTTP/2 world!".getBytes(UTF_8); + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create(endPoint + "/tika")) + .header("Content-Type", "text/plain") + .PUT(HttpRequest.BodyPublishers.ofByteArray(body)) + .build(); + + HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString(UTF_8)); + + assertEquals(200, response.statusCode(), "Expected 200 from /tika"); + assertEquals(HttpClient.Version.HTTP_2, response.version(), + "Expected HTTP/2 protocol on /tika endpoint"); + log.info("HTTP/2 parse endpoint verified: {} bytes returned over {}", response.body().length(), response.version()); + } + + private void awaitServerStartup() throws Exception { + // Use HTTP/1.1 for the health-check poll so we don't depend on HTTP/2 during startup + HttpClient pollClient = HttpClient.newBuilder() + .version(HttpClient.Version.HTTP_1_1) + .connectTimeout(Duration.ofSeconds(2)) + .build(); + HttpRequest pollRequest = HttpRequest.newBuilder() + .uri(URI.create(endPoint + "/")) + .GET() + .build(); + + Instant deadline = Instant.now().plusMillis(SERVER_STARTUP_TIMEOUT_MS); + while (Instant.now().isBefore(deadline)) { + try { + HttpResponse<Void> resp = pollClient.send(pollRequest, HttpResponse.BodyHandlers.discarding()); + if (resp.statusCode() == 200) { + log.info("tika-server ready on port {}", port); + return; + } + } catch (Exception e) { + log.debug("Waiting for server on port {} ...", port); + } + Thread.sleep(1000); + } + throw new IllegalStateException("tika-server did not start within " + SERVER_STARTUP_TIMEOUT_MS + " ms"); + } + + private static int findFreePort() throws Exception { + try (ServerSocket s = new ServerSocket(0)) { + return s.getLocalPort(); + } + } +} diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index a5d730dfc0..f66896ea4d 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -586,6 +586,11 @@ <artifactId>http2-common</artifactId> <version>${jetty.http2.version}</version> </dependency> + <dependency> + <groupId>org.eclipse.jetty.http2</groupId> + <artifactId>http2-server</artifactId> + <version>${jetty.http2.version}</version> + </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId>
