This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4625
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 7751b86fc92f864d1067feee45b57b9d533f4543
Author: tallison <[email protected]>
AuthorDate: Fri Jan 16 07:07:38 2026 -0500

    TIKA-4582: Add AsciiDoc documentation module
    
    - Create docs module with asciidoctor-maven-plugin (builds only during 
apache-release)
    - Add documentation structure:
      - using-tika/: java-api, cli, server, grpc (with getting started guide)
      - pipes/: scalable document processing
      - configuration/: JSON and XML options
      - migration-to-4x/: migration guides, metadata changes, design notes, 
serialization
      - advanced/: robustness, spooling
      - maintainers/: release guides for Tika, Docker, Helm, gRPC
    - Migrate content from Confluence wiki:
      - Release processes for all components
      - Design notes and serialization architecture for 4.x
      - Roadmap with version support matrix
      - Robustness guide (updated for 4.x with Pipes as recommended approach)
    - Add security page linking to security model and CVEs
    - Add FAQ stub
    - Add tika.stable.version property for versioned links (formats, API docs)
    - Add external links: website, JIRA, CI, snapshots, Confluence (with 
deprecation note)
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-Authored-By: Claude Opus 4.5 <[email protected]>
---
 docs/pom.xml                                       |  92 +++++++
 docs/src/assembly/docs.xml                         |  37 +++
 docs/src/main/asciidoc/advanced/index.adoc         |  31 +++
 docs/src/main/asciidoc/advanced/robustness.adoc    | 137 +++++++++++
 .../{ => src/main/asciidoc/advanced}/spooling.adoc |   0
 docs/src/main/asciidoc/configuration/index.adoc    |  34 +++
 docs/src/main/asciidoc/faq.adoc                    |  28 +++
 docs/src/main/asciidoc/index.adoc                  |  72 ++++++
 docs/src/main/asciidoc/maintainers/index.adoc      |  29 +++
 .../maintainers/release-guides/docker.adoc         | 133 ++++++++++
 .../asciidoc/maintainers/release-guides/grpc.adoc  |  32 +++
 .../asciidoc/maintainers/release-guides/helm.adoc  | 138 +++++++++++
 .../asciidoc/maintainers/release-guides/index.adoc |  32 +++
 .../asciidoc/maintainers/release-guides/tika.adoc  | 271 +++++++++++++++++++++
 .../asciidoc/migration-to-4x/design-notes-4x.adoc  | 127 ++++++++++
 docs/src/main/asciidoc/migration-to-4x/index.adoc  |  32 +++
 .../migration-to-4x/metadata-changes-4x.adoc       | 121 +++++++++
 .../asciidoc/migration-to-4x/migrating-to-4x.adoc  |  46 ++++
 .../asciidoc/migration-to-4x/serialization-4x.adoc | 101 ++++++++
 docs/src/main/asciidoc/pipes/index.adoc            |  37 +++
 docs/src/main/asciidoc/roadmap.adoc                |  96 ++++++++
 docs/src/main/asciidoc/security.adoc               |  34 +++
 docs/src/main/asciidoc/using-tika/cli/index.adoc   |  39 +++
 docs/src/main/asciidoc/using-tika/grpc/index.adoc  |  32 +++
 docs/src/main/asciidoc/using-tika/index.adoc       |  65 +++++
 .../using-tika/java-api/getting-started.adoc       | 130 ++++++++++
 .../main/asciidoc/using-tika/java-api/index.adoc   |  38 +++
 .../src/main/asciidoc/using-tika/server/index.adoc |  42 ++++
 pom.xml                                            |   3 +
 29 files changed, 2009 insertions(+)

diff --git a/docs/pom.xml b/docs/pom.xml
new file mode 100644
index 0000000000..4f979c6f97
--- /dev/null
+++ b/docs/pom.xml
@@ -0,0 +1,92 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika</artifactId>
+        <version>4.0.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>tika-docs</artifactId>
+    <packaging>pom</packaging>
+    <name>Apache Tika Documentation</name>
+
+    <properties>
+        <!-- Update this when a new stable version is released -->
+        <tika.stable.version>3.2.3</tika.stable.version>
+    </properties>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.asciidoctor</groupId>
+                <artifactId>asciidoctor-maven-plugin</artifactId>
+                <version>3.2.0</version>
+                <executions>
+                    <execution>
+                        <id>output-html</id>
+                        <phase>generate-resources</phase>
+                        <goals>
+                            <goal>process-asciidoc</goal>
+                        </goals>
+                        <configuration>
+                            <doctype>article</doctype>
+                            <attributes>
+                                
<source-highlighter>coderay</source-highlighter>
+                                <toc />
+                                <linkcss>false</linkcss>
+                                <icons>font</icons>
+                                
<tika-stable-version>${tika.stable.version}</tika-stable-version>
+                            </attributes>
+                        </configuration>
+                    </execution>
+                </executions>
+                <configuration>
+                    <sourceDirectory>src/main/asciidoc</sourceDirectory>
+                    <preserveDirectories>true</preserveDirectories>
+                </configuration>
+            </plugin>
+
+            <!-- Maven Assembly plugin to create tar.gz -->
+            <plugin>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>make-docs-archive</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                        <configuration>
+                            <descriptors>
+                                <descriptor>src/assembly/docs.xml</descriptor>
+                            </descriptors>
+                            
<finalName>${project.artifactId}-${project.version}</finalName>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
diff --git a/docs/src/assembly/docs.xml b/docs/src/assembly/docs.xml
new file mode 100644
index 0000000000..5a4b5c5746
--- /dev/null
+++ b/docs/src/assembly/docs.xml
@@ -0,0 +1,37 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<assembly 
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3";
+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+          
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3
+                              https://maven.apache.org/xsd/assembly-1.1.3.xsd";>
+    <id>docs</id>
+    <formats>
+        <format>tar.gz</format>
+    </formats>
+    <includeBaseDirectory>false</includeBaseDirectory>
+    <fileSets>
+        <fileSet>
+            <directory>${project.build.directory}/generated-docs</directory>
+            <outputDirectory>/</outputDirectory>
+            <includes>
+                <include>**/*</include>
+            </includes>
+        </fileSet>
+    </fileSets>
+</assembly>
diff --git a/docs/src/main/asciidoc/advanced/index.adoc 
b/docs/src/main/asciidoc/advanced/index.adoc
new file mode 100644
index 0000000000..64b0624241
--- /dev/null
+++ b/docs/src/main/asciidoc/advanced/index.adoc
@@ -0,0 +1,31 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Advanced Topics
+
+This section covers advanced usage and internals of Apache Tika.
+
+== Topics
+
+* link:robustness.html[Robustness] - Process isolation and fault tolerance 
when parsing untrusted content
+* link:spooling.html[TikaInputStream and Spooling] - Understanding how 
TikaInputStream handles buffering, caching, and spooling to disk
+
+// Add links to specific topics as they are created
+// * link:custom-parsers.html[Writing Custom Parsers]
+// * link:custom-detectors.html[Writing Custom Detectors]
+// * link:configuration.html[Advanced Configuration]
+// * link:performance.html[Performance Tuning]
diff --git a/docs/src/main/asciidoc/advanced/robustness.adoc 
b/docs/src/main/asciidoc/advanced/robustness.adoc
new file mode 100644
index 0000000000..757f009dee
--- /dev/null
+++ b/docs/src/main/asciidoc/advanced/robustness.adoc
@@ -0,0 +1,137 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= The Robustness of Apache Tika
+
+Running parsers on untrusted data carries inherent risks. In rare cases, Tika 
can
+encounter infinite loops or allocate unexpected amounts of memory 
(OutOfMemoryErrors).
+When processing documents at scale, you must implement protective measures.
+
+IMPORTANT: Avoid running Tika in the same process as critical infrastructure 
like
+indexers or search systems.
+
+== Process Isolation
+
+The primary defense against parser failures is process isolation. By running 
parsers
+in separate processes, you protect your main application from:
+
+* OutOfMemoryErrors
+* Infinite loops
+* Native code crashes
+* Resource exhaustion
+
+=== Tika 4.x
+
+**In Tika 4.x, link:../pipes/index.html[Tika Pipes] is the recommended 
approach for
+robust document processing.** It provides:
+
+* Automatic process isolation
+* Fault tolerance and recovery
+* Scalable parallel processing
+* Unified architecture for all deployment scenarios
+
+Pipes can be used in multiple ways:
+
+* **Programmatically** - Via `PipesForkParser` in the `tika-pipes-fork-parser` 
module
+  (see link:../using-tika/java-api/getting-started.html[Java API Getting 
Started])
+* **Via tika-server** - REST endpoints for pipes-based processing
+* **Via tika-grpc** - gRPC interface with pipes backend
+
+In Tika 4.x, the approach to robustness has been simplified. Previous versions 
offered
+four different forking mechanisms:
+
+[cols="1,2,1"]
+|===
+|Mechanism |Description |Status in 4.x
+
+|ForkParser
+|Spawned child processes for individual parse operations
+|Deprecated
+
+|tika-batch
+|Desktop/VM-scale batch processing
+|Deprecated
+
+|tika-server (forked mode)
+|REST server with forked parsing processes
+|Available, but Pipes recommended
+
+|tika-pipes
+|Scalable, fault-tolerant pipeline processing
+|*Recommended approach*
+|===
+
+=== Tika 3.x and Earlier
+
+If you are using Tika 3.x or earlier, you have several options for process 
isolation:
+
+ForkParser::
+Spawns child processes to protect against out-of-memory errors and infinite 
loops.
+Suitable for programmatic use in Java applications.
+
+tika-batch::
+For desktop/VM-scale processing (not cloud-scale):
++
+[source,bash]
+----
+java -jar tika-app.jar -i <input_dir> -o <output_dir>
+----
+
+tika-server::
+In version 2.x and later, parsing defaults to forked processes. Clients must 
handle
+tika-server restarts gracefully.
+
+tika-pipes::
+Available through programmatic use, tika-app `-a` option, or tika-server's 
`/async`
+and `/pipes` endpoints.
+
+== Security Testing and Prevention
+
+The Apache Tika team implements several measures to identify and prevent 
vulnerabilities:
+
+* **Regression testing** against ~2 million files from Common Crawl before 
releases
+* **Code reviews** of dependencies to identify vulnerability patterns
+* **Fuzzing modules** for automated vulnerability discovery
+* **Collaboration** with security researchers
+* **Maintained forks** of parsers with critical fixes (released independently 
when needed)
+* **Public documentation** of vulnerabilities at 
link:../security.html[security page]
+
+== MockParser for Testing
+
+Tika provides a `MockParser` tool for testing your system's robustness. You can
+configure it to simulate various failure modes:
+
+* Infinite loops
+* OutOfMemoryErrors
+* Excessive runtime
+* Large output generation
+
+This allows you to verify that your integration handles parser failures 
gracefully.
+
+== Recommendations
+
+1. **Use Tika Pipes** (4.x) for production workloads with untrusted content
+2. **Isolate Tika** from critical systems - never run in the same JVM as your 
indexer
+3. **Set timeouts** for all parsing operations
+4. **Monitor memory usage** and set appropriate limits
+5. **Plan for failures** - your system should handle parser crashes gracefully
+6. **Stay updated** - apply security updates promptly
+
+== Further Reading
+
+* link:../pipes/index.html[Tika Pipes] - Recommended approach for robust 
processing
+* link:../security.html[Security] - Known vulnerabilities and security model
diff --git a/docs/spooling.adoc b/docs/src/main/asciidoc/advanced/spooling.adoc
similarity index 100%
rename from docs/spooling.adoc
rename to docs/src/main/asciidoc/advanced/spooling.adoc
diff --git a/docs/src/main/asciidoc/configuration/index.adoc 
b/docs/src/main/asciidoc/configuration/index.adoc
new file mode 100644
index 0000000000..a040a1e63b
--- /dev/null
+++ b/docs/src/main/asciidoc/configuration/index.adoc
@@ -0,0 +1,34 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Configuration
+
+This section covers configuring Apache Tika.
+
+== Overview
+
+Tika can be configured via JSON or XML configuration files. Configuration 
controls
+parsers, detectors, content handlers, and other components.
+
+== Topics
+
+// Add links to specific topics as they are created
+// * link:json-config.html[JSON Configuration]
+// * link:xml-config.html[XML Configuration]
+// * link:parsers.html[Configuring Parsers]
+// * link:detectors.html[Configuring Detectors]
+// * link:mime-types.html[MIME Type Configuration]
diff --git a/docs/src/main/asciidoc/faq.adoc b/docs/src/main/asciidoc/faq.adoc
new file mode 100644
index 0000000000..168c9a9547
--- /dev/null
+++ b/docs/src/main/asciidoc/faq.adoc
@@ -0,0 +1,28 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= FAQ and Troubleshooting
+
+This page covers frequently asked questions and common issues when using 
Apache Tika.
+
+== Frequently Asked Questions
+
+// TODO: Add FAQs
+
+== Troubleshooting
+
+// TODO: Add common issues and solutions
diff --git a/docs/src/main/asciidoc/index.adoc 
b/docs/src/main/asciidoc/index.adoc
new file mode 100644
index 0000000000..cd8b88cc80
--- /dev/null
+++ b/docs/src/main/asciidoc/index.adoc
@@ -0,0 +1,72 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Apache Tika Documentation
+
+WARNING: This reference guide was generated with the assistance of AI and 
requires
+human review before it can be fully trusted. This documentation serves as an 
example
+and a starting point, but more work remains. Contributions and corrections are 
welcome.
+
+== Overview
+
+Apache Tika is a content detection and extraction framework written in Java.
+
+== Using Tika
+
+* link:using-tika/index.html[Getting Started] - Choose your integration method
+* link:pipes/index.html[Pipes] - Scalable, fault-tolerant document processing
+
+== Configuration
+
+* link:configuration/index.html[Configuration] - JSON and XML configuration 
options
+
+== Migration
+
+* link:migration-to-4x/index.html[Migrating to 4.x] - Guides and background 
for upgrading to Tika 4.x
+
+== Advanced
+
+* link:advanced/index.html[Advanced Topics] - Custom parsers, performance 
tuning, internals
+
+== FAQ
+
+* link:faq.html[FAQ and Troubleshooting] - Common questions and issues
+
+== Security
+
+* link:security.html[Security] - Security considerations and reporting 
vulnerabilities
+
+== Roadmap
+
+* link:roadmap.html[Roadmap] - Planned features and improvements for upcoming 
releases
+
+== For Maintainers
+
+* link:maintainers/index.html[Maintainer Documentation] - Release guides and 
project maintenance
+
+== Links
+
+* https://tika.apache.org/[Apache Tika Website] - Official project website
+* https://tika.apache.org/{tika-stable-version}/formats.html[Supported 
Formats] - File formats Tika can parse
+* https://tika.apache.org/{tika-stable-version}/api/[API Documentation] - 
Javadoc
+* https://issues.apache.org/jira/projects/TIKA[JIRA] - Issue tracker
+* 
https://repository.apache.org/content/repositories/snapshots/org/apache/tika/[Maven
 Snapshots] - SNAPSHOT builds in Apache's Maven repository
+* https://ci-builds.apache.org/job/Tika/[CI Builds] - Continuous integration 
builds
+* https://cwiki.apache.org/confluence/display/TIKA/[Confluence Wiki] - Legacy 
wiki documentation
++
+NOTE: As of Tika 4.x, we are migrating content from Confluence to these 
AsciiDoc pages.
+The Confluence wiki will eventually be retired.
diff --git a/docs/src/main/asciidoc/maintainers/index.adoc 
b/docs/src/main/asciidoc/maintainers/index.adoc
new file mode 100644
index 0000000000..e0ce8ed6f6
--- /dev/null
+++ b/docs/src/main/asciidoc/maintainers/index.adoc
@@ -0,0 +1,29 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= For Maintainers
+
+This section contains documentation for Apache Tika project maintainers and 
committers.
+
+== Topics
+
+* link:release-guides/index.html[Release Guides] - How to release Apache Tika
+
+// Add links to specific topics as they are created
+// * link:voting.html[Voting Procedures]
+// * link:ci.html[Continuous Integration]
+// * link:website.html[Website Maintenance]
diff --git a/docs/src/main/asciidoc/maintainers/release-guides/docker.adoc 
b/docs/src/main/asciidoc/maintainers/release-guides/docker.adoc
new file mode 100644
index 0000000000..a8f2f8cbc7
--- /dev/null
+++ b/docs/src/main/asciidoc/maintainers/release-guides/docker.adoc
@@ -0,0 +1,133 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Releasing Tika Docker Images
+
+This guide covers the process for releasing Apache Tika Docker images.
+
+== Prerequisites
+
+=== DockerHub Access
+
+You need permissions on the `apache/tika` repository on DockerHub. To obtain 
access,
+create an INFRA JIRA ticket with the "Docker" label.
+
+=== Repository Access
+
+Clone the tika-docker repository:
+
+[source,bash]
+----
+git clone https://github.com/apache/tika-docker
+cd tika-docker
+----
+
+== Image Types
+
+The tika-docker repository produces two types of images:
+
+Minimal::
+Apache Tika with base dependencies (Java only)
+
+Full::
+Apache Tika plus Tesseract OCR and GDAL
+
+== Helper Tools
+
+docker-tool.sh::
+Automates building, testing, and publishing Docker images
+
+republish-images.sh::
+Legacy script for batch republishing images
+
+NOTE: The repository also contains Docker Compose files for advanced scenarios
+(Vision, Grobid, OCR, NER), but these are not used for official releases.
+
+== Release Process
+
+=== Step 1: Update README
+
+Update the "Available Tags" section in `README.md` to include the new version.
+
+=== Step 2: Update Version
+
+Increment the TAG version in the `.env` file.
+
+=== Step 3: Update Changelog
+
+Update `CHANGES.md` with release information and date.
+
+=== Step 4: Test Locally
+
+Test the release locally before publishing:
+
+[source,bash]
+----
+./docker-tool.sh build <docker-version> <tika-version>
+./docker-tool.sh test <docker-version>
+----
+
+=== Step 5: Commit Changes
+
+Commit all changes:
+
+[source,bash]
+----
+git add README.md .env CHANGES.md
+git commit -m "Prepare for Docker release <docker-version>"
+git push
+----
+
+=== Step 6: Build and Publish
+
+Build and publish the images using the docker-tool script.
+
+Example for version 3.1.0.0 based on Tika 3.1.0:
+
+[source,bash]
+----
+# Build the images
+./docker-tool.sh build 3.1.0.0 3.1.0
+
+# Test the images
+./docker-tool.sh test 3.1.0.0
+
+# Publish to DockerHub
+./docker-tool.sh publish 3.1.0.0 3.1.0
+----
+
+NOTE: Multi-architecture building takes time. The publish step automatically
+updates the `-latest` tag on DockerHub.
+
+=== Step 7: Tag the Release
+
+Create and push a git tag for the release:
+
+[source,bash]
+----
+git tag -a 3.1.0.0 -m "New release for 3.1.0.0"
+git push --tags
+----
+
+== Post-Release
+
+After publishing the Docker images:
+
+* Verify the images are available on DockerHub at 
https://hub.docker.com/r/apache/tika
+* Test pulling and running the new images
+* Update the main Tika website if needed
+* Proceed to release the link:helm.html[Helm charts] if applicable
diff --git a/docs/src/main/asciidoc/maintainers/release-guides/grpc.adoc 
b/docs/src/main/asciidoc/maintainers/release-guides/grpc.adoc
new file mode 100644
index 0000000000..0576d23bb8
--- /dev/null
+++ b/docs/src/main/asciidoc/maintainers/release-guides/grpc.adoc
@@ -0,0 +1,32 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Releasing Tika gRPC
+
+This guide covers the process for releasing Apache Tika gRPC components.
+
+== Prerequisites
+
+// TODO: Add prerequisites
+
+== Release Process
+
+// TODO: Add release steps
+
+== Post-Release
+
+// TODO: Add post-release steps
diff --git a/docs/src/main/asciidoc/maintainers/release-guides/helm.adoc 
b/docs/src/main/asciidoc/maintainers/release-guides/helm.adoc
new file mode 100644
index 0000000000..aa80120c6f
--- /dev/null
+++ b/docs/src/main/asciidoc/maintainers/release-guides/helm.adoc
@@ -0,0 +1,138 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Releasing Tika Helm Charts
+
+This guide covers the process for releasing Apache Tika Helm charts.
+
+== Prerequisites
+
+=== Apache JFrog Artifactory Access
+
+You need permissions to release the Apache Tika Helm chart to the Apache Infra
+Artifactory instance. Access is controlled by ASF Infra and can be requested
+via a JIRA ticket with the "Artifactory" label.
+
+=== Repository Access
+
+Clone the tika-helm repository:
+
+[source,bash]
+----
+git clone https://github.com/apache/tika-helm
+cd tika-helm
+----
+
+Apache Tika committers should have existing access to this repository.
+
+=== Install Helm and Plugins
+
+Install Helm and the Artifactory plugin:
+
+[source,bash]
+----
+# Install Helm (macOS)
+brew install helm
+
+# Install the Artifactory push plugin
+helm plugin install https://github.com/belitre/helm-push-artifactory-plugin 
--version 1.0.2
+----
+
+== Docker Image Types
+
+The Helm chart deploys one of two upstream Docker image types:
+
+Minimal::
+Contains Apache Tika and base dependencies (Java only)
+
+Full::
+Includes Tika, dependencies, Tesseract OCR, GDAL, etc.
+
+The Helm Chart uses the *Full* image by default, though either can be specified
+during Kubernetes deployment.
+
+== Versioning
+
+tika-helm Charts follow the https://semver.org/spec/v2.0.0.html[Semantic 
Versioning 2.0.0]
+specification, regardless of upstream container image versioning.
+
+== Release Process
+
+=== Step 1: Update Chart Configuration
+
+For each new upstream tika-docker FULL release, update the following files:
+
+Chart.yaml::
+* Line 22: Update `version` (chart version)
+* Line 23: Update `appVersion` (must match upstream tika-docker FULL release 
tag)
+
+values.yaml::
+* Line 26: Update the default image tag
+
+=== Step 2: Commit and Tag
+
+Commit the changes and create a release tag:
+
+[source,bash]
+----
+export RELEASE_VERSION=v3.2.2
+
+git add -A
+git commit -m "Release tika-helm $RELEASE_VERSION"
+git push origin main
+
+git tag -a $RELEASE_VERSION -m "Release tika-helm $RELEASE_VERSION"
+git push --tags
+----
+
+=== Step 3: Create GitHub Release
+
+. Navigate to the pushed tag on GitHub
+. Click the three-dot menu
+. Select "Create release"
+. Add release notes and publish
+
+=== Step 4: Publish to Apache JFrog Artifactory
+
+Add the Tika Helm repository and push the chart:
+
+[source,bash]
+----
+# Add the Tika Helm repository
+helm repo add tika https://apache.jfrog.io/artifactory/tika
+
+# Set your credentials
+export HELM_REPO_USERNAME="your-apache-id"
+export HELM_REPO_PASSWORD="your-password"
+
+# Push the chart to Artifactory
+helm push-artifactory . https://apache.jfrog.io/artifactory/tika
+----
+
+== Post-Release
+
+After publishing the Helm chart:
+
+* Verify the chart is available at https://apache.jfrog.io/artifactory/tika
+* Test installing the chart in a Kubernetes cluster
+* Update any documentation referencing the chart version
+
+== Questions
+
+For questions about the Helm release process, contact:
+
+* [email protected] mailing list
diff --git a/docs/src/main/asciidoc/maintainers/release-guides/index.adoc 
b/docs/src/main/asciidoc/maintainers/release-guides/index.adoc
new file mode 100644
index 0000000000..1e8d464628
--- /dev/null
+++ b/docs/src/main/asciidoc/maintainers/release-guides/index.adoc
@@ -0,0 +1,32 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Release Guides
+
+This section contains guides for releasing Apache Tika components.
+
+== Overview
+
+Apache Tika follows the standard Apache release process. This section provides
+step-by-step guides for releasing the various Tika components.
+
+== Topics
+
+* link:tika.html[Releasing Apache Tika] - Main Tika project release process
+* link:docker.html[Releasing Tika Docker Images] - Docker image release process
+* link:helm.html[Releasing Tika Helm Charts] - Helm chart release process
+* link:grpc.html[Releasing Tika gRPC] - gRPC component release process
diff --git a/docs/src/main/asciidoc/maintainers/release-guides/tika.adoc 
b/docs/src/main/asciidoc/maintainers/release-guides/tika.adoc
new file mode 100644
index 0000000000..a967c80421
--- /dev/null
+++ b/docs/src/main/asciidoc/maintainers/release-guides/tika.adoc
@@ -0,0 +1,271 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Releasing Apache Tika
+
+This guide covers the process for releasing the main Apache Tika project.
+
+== Prerequisites
+
+Before starting the release process, ensure you have:
+
+* Commit access to the Apache Tika repository
+* A valid GPG key published to a public keyserver
+* Maven credentials configured in `~/.m2/settings.xml`
+* Access to Apache's Nexus repository manager
+
+== Pre-Release Checks
+
+Before starting the release, run vulnerability and dependency audits:
+
+[source,bash]
+----
+# Identify vulnerable dependencies
+mvn ossindex:audit -Dossindex.fail=true
+
+# Check for outdated plugins
+mvn versions:display-plugin-updates
+
+# Check for outdated dependencies
+mvn versions:display-dependency-updates
+
+# Run full regression tests
+mvn -Prelease-profile clean verify
+----
+
+== Release Process
+
+=== Step 1: Clone the Repository
+
+Clone the repository if you haven't already:
+
+[source,bash]
+----
+git clone https://github.com/apache/tika.git
+cd tika
+----
+
+=== Step 2: Update Documentation
+
+Update `CHANGES.txt` with the release date:
+
+[source]
+----
+Release X.Y.Z - MM/dd/yyyy
+----
+
+Add any changelog entries as needed.
+
+=== Step 3: JIRA Management
+
+. Create versions X.Y.Z, X.(Y+1), and X.(Y+2) in JIRA if they don't exist
+. Reassign any unresolved X.Y.Z issues to X.(Y+1) via bulk change
+
+=== Step 4: Verify License Headers
+
+Run the Apache RAT plugin to verify all files have proper license headers:
+
+[source,bash]
+----
+mvn apache-rat:check
+----
+
+=== Step 5: Commit Changes
+
+Commit the CHANGES.txt updates:
+
+[source,bash]
+----
+git add CHANGES.txt
+git commit -m "Prepare for X.Y.Z release"
+git push
+----
+
+=== Step 6: Set Maven Memory
+
+Configure Maven memory settings:
+
+[source,bash]
+----
+export MAVEN_OPTS="-Xms128m -Xmx256m"
+----
+
+=== Step 7: Prepare the Release
+
+Execute the Maven release prepare goal:
+
+[source,bash]
+----
+mvn release:prepare
+----
+
+This will prompt you to confirm:
+
+* The release version (X.Y.Z)
+* The SCM tag name
+* The next development version
+
+=== Step 8: Perform the Release
+
+Execute the Maven release perform goal:
+
+[source,bash]
+----
+mvn release:perform
+----
+
+Ensure you have valid Maven credentials in `~/.m2/settings.xml`:
+
+[source,xml]
+----
+<servers>
+  <server>
+    <id>apache.releases.https</id>
+    <username>your-apache-id</username>
+    <password>your-password</password>
+  </server>
+</servers>
+----
+
+=== Step 9: Verify Staging Repository
+
+. Access Apache's Nexus at https://repository.apache.org
+. Log in with your Apache credentials
+. Navigate to "Staging Repositories"
+. Find the org.apache.tika staging repository
+. Verify it contains all expected artifacts
+. Click "Close" with an appropriate message
+
+=== Step 10: Upload Distribution Artifacts
+
+Upload artifacts to `dist.apache.org`:
+
+[source,bash]
+----
+svn co https://dist.apache.org/repos/dist/dev/tika tika-dist-dev
+cd tika-dist-dev
+----
+
+Upload the following files with their signatures (.asc) and checksums 
(.sha512):
+
+* `tika-X.Y.Z-src.zip`
+* `tika-app-X.Y.Z.jar`
+* `tika-server-standard-X.Y.Z.jar`
+
+Also:
+
+* Rename `CHANGES.txt` to `CHANGES-X.Y.Z.txt`
+* Ensure the `KEYS` file contains all contributor signatures
+
+=== Step 11: Call the Vote
+
+Send a vote request to the [email protected] mailing list:
+
+[source]
+----
+Subject: [VOTE] Release Apache Tika X.Y.Z
+
+Hi all,
+
+I have created a candidate build for Apache Tika X.Y.Z.
+
+The release candidate artifacts can be found at:
+https://dist.apache.org/repos/dist/dev/tika/
+
+The staging repository is:
+https://repository.apache.org/content/repositories/orgapachetika-XXXX
+
+The Git tag is:
+https://github.com/apache/tika/tree/X.Y.Z
+
+Please vote:
+[ ] +1 Release this package
+[ ] +0 No opinion
+[ ] -1 Do not release (please provide reason)
+
+This vote will remain open for at least 72 hours.
+----
+
+=== Step 12: Release the Artifacts
+
+Upon successful vote (at least 3 +1 votes from PMC members):
+
+. Release the Nexus staging repository (click "Release" button)
+. Move artifacts from dev to release distribution:
+
+[source,bash]
+----
+svn mv https://dist.apache.org/repos/dist/dev/tika/X.Y.Z \
+       https://dist.apache.org/repos/dist/release/tika/X.Y.Z \
+       -m "Release Apache Tika X.Y.Z"
+----
+
+== Post-Release
+
+=== Update Unreleased Modules
+
+Update any modules that weren't part of the release to the next SNAPSHOT 
version.
+
+=== Update Website
+
+Refresh the website documentation to reflect the new release:
+
+* Update download links
+* Update version numbers in documentation
+* Add release notes
+
+=== Release Docker and Helm Images
+
+Follow the separate guides for releasing:
+
+* link:docker.html[Docker images]
+* link:helm.html[Helm charts]
+
+=== Send Announcements
+
+Send release announcements to:
+
+* [email protected]
+* [email protected]
+* [email protected]
+
+[source]
+----
+Subject: [ANNOUNCE] Apache Tika X.Y.Z Released
+
+The Apache Tika team is pleased to announce the release of Apache Tika X.Y.Z.
+
+Apache Tika is a toolkit for detecting and extracting metadata and text
+from various types of files.
+
+This release includes:
+[List major changes/features]
+
+For a complete list of changes, see:
+https://tika.apache.org/X.Y.Z/changes.html
+
+Download:
+https://tika.apache.org/download.html
+
+Thanks to everyone who contributed to this release!
+
+The Apache Tika Team
+----
+
+=== Register the Release
+
+Register the release at https://reporter.apache.org
diff --git a/docs/src/main/asciidoc/migration-to-4x/design-notes-4x.adoc 
b/docs/src/main/asciidoc/migration-to-4x/design-notes-4x.adoc
new file mode 100644
index 0000000000..006c4775f9
--- /dev/null
+++ b/docs/src/main/asciidoc/migration-to-4x/design-notes-4x.adoc
@@ -0,0 +1,127 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Design Notes for Tika 4.x
+
+This document captures the design decisions and architectural changes in 
Apache Tika 4.x.
+
+== Metadata Keys
+
+The design addresses security concerns by implementing namespaced metadata 
keys. This prevents
+user-controlled data from potentially overwriting existing metadata values in 
the Metadata object.
+
+See link:migrating-to-4x.html[Migrating to Tika 4.x] for details on specific
+metadata key changes.
+
+== Fat Jars and Maven Shade Strategy
+
+Tika 4.x moves away from fat jar/shaded artifacts. The `tika-app` and 
`tika-server` now use
+separate `lib` and `plugins` directories alongside the jar file, enabling 
standard `java -jar`
+execution.
+
+== Plugins and PF4J Framework
+
+=== Plugin Packaging
+
+PF4J plugins are packaged exclusively as zips (not jars) to align with the 
move away from fat
+jars. Custom code addresses race conditions during the unzipping process 
across threads and
+processes.
+
+=== Classloader Management
+
+The team disabled PF4J's default classpath loading to avoid complexity in unit 
tests. A
+configured plugins directory is now required.
+
+This strict boundary prevents issues when components are loaded separately. 
For example, JSON
+strings replace `JsonNode` objects to avoid problems with independent Jackson 
loading in plugins.
+
+IMPORTANT: We tried to have as few Tika dependencies in the plugins as 
possible.
+
+== Serialization Architecture
+
+=== Design Principles
+
+* Maximize Jackson usage while minimizing custom serialization code
+* Exclude Jackson from `tika-core` and `tika-parsers-standard-modules` 
dependencies
+* Enable runtime configuration updates via Jackson's `readerForUpdating`
+
+=== Security Model
+
+Configuration files at initialization are treated as trusted sources. Runtime
+serialization/deserialization uses an allowlist of permitted packages via
+`PolymorphicObjectMapperFactory`.
+
+Custom components can add patterns to 
`META-INF/tika-serialization-allowlist.txt`.
+
+=== Implementation Challenges
+
+* Converted code to true Java beans with matching getters/setters
+* Used `ObjectMapper.DefaultTyping.OBJECT_AND_NON_CONCRETE` for polymorphic 
typing
+* Replaced generic collections (`List`, `Set`) with concrete types 
(`ArrayList`, `HashSet`)
+* Converted `Path` fields to `String` due to Jackson constraints
+* Avoided Java records to enable `readerForUpdating` functionality
+
+== Annotations System
+
+The `@TikaComponent` annotation handles:
+
+* Automatic service file generation at build time
+* Creation of `META-INF/tika/*.idx` mapping files
+* Kebab-case conversion of class names to friendly identifiers (e.g., 
`PDFParser` → `pdf-parser`)
+* Manual name overrides via `name` attribute
+* Optional `spi=false` setting for non-service-file registration
+
+== Migration Strategy
+
+The plan is to stabilize 4.x structures before backporting capabilities to 3.x 
and deprecating
+`TikaConfig` and `tika-config.xml`.
+
+A converter tool for transforming `tika-config.xml` to `tika-config.json` is 
planned, with
+support focused on components in `tika-parsers-standard-modules`.
+
+== Development Tips
+
+=== Common Issues
+
+* Plugin directories and `@TikaComponent` annotations becoming out of sync 
across modules
+* IntelliJ conflicts with command-line builds
+* Checkstyle running before Spotless, causing preventable failures
+
+=== Recommended Build Commands
+
+For faster builds during development:
+
+[source,bash]
+----
+mvn clean install -am -pl :tika-app -Pfast
+----
+
+To apply formatting and build:
+
+[source,bash]
+----
+mvn clean spotless:apply install
+----
+
+== Outstanding Tasks
+
+* Implement flexible component loading without `@TikaComponent` requirements
+* Enable friendly name usage throughout the codebase
+* Resolve gRPC issues
+* Fix mutool renderer byte-passing in open containers
+* Simplify and strengthen serialization code
+* Consider relocating `TikaConfig` and `ForkParser` to legacy module
diff --git a/docs/src/main/asciidoc/migration-to-4x/index.adoc 
b/docs/src/main/asciidoc/migration-to-4x/index.adoc
new file mode 100644
index 0000000000..c388e022cc
--- /dev/null
+++ b/docs/src/main/asciidoc/migration-to-4x/index.adoc
@@ -0,0 +1,32 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Migrating to Tika 4.x
+
+This section provides guides and background documentation for migrating to 
Apache Tika 4.x.
+
+See the link:../roadmap.html[Roadmap] for version timelines and support 
schedules.
+
+== Migration Guides
+
+* link:migrating-to-4x.html[Migration Guide] - Step-by-step guide for 
upgrading from Tika 3.x to 4.x
+* link:metadata-changes-4x.html[Metadata Changes] - Detailed metadata key 
changes and migration examples
+
+== Background Documentation
+
+* link:design-notes-4x.html[Design Notes] - Architectural decisions and design 
rationale
+* link:serialization-4x.html[Serialization] - JSON serialization design and 
implementation details
diff --git a/docs/src/main/asciidoc/migration-to-4x/metadata-changes-4x.adoc 
b/docs/src/main/asciidoc/migration-to-4x/metadata-changes-4x.adoc
new file mode 100644
index 0000000000..e129d33008
--- /dev/null
+++ b/docs/src/main/asciidoc/migration-to-4x/metadata-changes-4x.adoc
@@ -0,0 +1,121 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Metadata Changes in Tika 4.x
+
+This document details the metadata key changes in Apache Tika 4.x.
+
+== Overview
+
+Tika 4.x prefixes all "user generated" metadata keys to prevent overwrites and 
improve
+namespace clarity. This is a security-focused change that prevents 
user-controlled data
+from potentially overwriting existing metadata values in the Metadata object.
+
+== Metadata Key Changes
+
+[cols="2,2,3"]
+|===
+|Category |Change |Details
+
+|HTML custom metadata
+|Prefixed with `html:`
+|Custom metadata from HTML documents now uses the `html:` prefix
+
+|MAPI metadata
+|Prefix changed to `mapi:`
+|Microsoft MAPI properties now use the `mapi:` prefix
+
+|Resource name
+|Renamed
+|`resourceName` changed to `X-TIKA:resourceName`
+
+|Unrecognized image metadata
+|Prefixed with `img:`
+|Unrecognized image metadata keys now use the `img:` prefix
+
+|Office metadata
+|Prefix changed
+|Changed from `meta` prefix to `office` prefix
+|===
+
+== Migration Steps
+
+When upgrading to Tika 4.x, you will need to update any code that references 
metadata keys
+directly:
+
+=== HTML Metadata
+
+[source,java]
+----
+// Before (3.x)
+String value = metadata.get("custom-key");
+
+// After (4.x)
+String value = metadata.get("html:custom-key");
+----
+
+=== MAPI Metadata
+
+[source,java]
+----
+// Before (3.x)
+String value = metadata.get("mapi:some-property");
+
+// After (4.x) - prefix remains mapi: but verify specific keys
+String value = metadata.get("mapi:some-property");
+----
+
+=== Resource Name
+
+[source,java]
+----
+// Before (3.x)
+String name = metadata.get("resourceName");
+
+// After (4.x)
+String name = metadata.get("X-TIKA:resourceName");
+----
+
+=== Image Metadata
+
+[source,java]
+----
+// Before (3.x)
+String value = metadata.get("unknown-image-key");
+
+// After (4.x)
+String value = metadata.get("img:unknown-image-key");
+----
+
+=== Office Metadata
+
+[source,java]
+----
+// Before (3.x)
+String value = metadata.get("meta:some-property");
+
+// After (4.x)
+String value = metadata.get("office:some-property");
+----
+
+== Rationale
+
+The namespacing of metadata keys provides several benefits:
+
+* *Security*: Prevents user-controlled content from overwriting internal 
metadata
+* *Clarity*: Makes it clear which parser or source generated a metadata key
+* *Consistency*: Provides a uniform approach to metadata naming across all 
parsers
diff --git a/docs/src/main/asciidoc/migration-to-4x/migrating-to-4x.adoc 
b/docs/src/main/asciidoc/migration-to-4x/migrating-to-4x.adoc
new file mode 100644
index 0000000000..4c0b2f0f1d
--- /dev/null
+++ b/docs/src/main/asciidoc/migration-to-4x/migrating-to-4x.adoc
@@ -0,0 +1,46 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Migrating to Tika 4.x
+
+This guide covers the changes required when upgrading from Apache Tika 3.x to 
4.x.
+
+See the link:../roadmap.html[Roadmap] for version timelines and support 
schedules.
+
+== Requirements
+
+* Java 17 or later (upgraded from Java 11 in 3.x)
+
+== Metadata Key Changes
+
+Tika 4.x prefixes all "user generated" metadata keys to prevent overwrites and 
improve
+namespace clarity.
+
+See link:metadata-changes-4x.html[Metadata Changes in 4.x] for complete 
details, including
+a full table of changes and code migration examples.
+
+== API Changes
+
+// TODO: Document API changes
+
+== Configuration Changes
+
+// TODO: Document configuration changes
+
+== Deprecations and Removals
+
+// TODO: Document deprecated and removed features
diff --git a/docs/src/main/asciidoc/migration-to-4x/serialization-4x.adoc 
b/docs/src/main/asciidoc/migration-to-4x/serialization-4x.adoc
new file mode 100644
index 0000000000..e11bdc4959
--- /dev/null
+++ b/docs/src/main/asciidoc/migration-to-4x/serialization-4x.adoc
@@ -0,0 +1,101 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Serialization in Tika 4.x
+
+This document describes the JSON serialization design and implementation 
details for Apache Tika 4.x.
+
+== High-Level Goals
+
+=== Jackson Framework Integration
+
+Use Jackson as much as possible with as few custom serializers and as few 
annotations as possible.
+Jackson dependencies are kept out of core modules to maintain flexibility.
+
+=== Friendly Naming Conventions
+
+Implementation uses friendly names like `pdf-parser` rather than full class 
names. These friendly
+names are applied to configured items rather than configuration class names.
+
+=== Custom Class Support
+
+The design permits users to add custom classes through Jackson's polymorphic 
handling:
+
+* `org.apache.tika` patterns are allowed by default
+* Users can define additional inclusion patterns for security
+
+=== Configuration Consistency
+
+The approach seeks to make initialization and runtime configuration look 
exactly the same and use
+the same underlying code where possible. However, security constraints may 
require differences in
+which fields are modifiable at runtime.
+
+=== Configuration Objects Over Annotations
+
+Preference for config objects rather than field annotations to support 
multithreading. Parsers
+retrieve settings from `ParseContext` at runtime.
+
+=== Cross-System Configuration Flow
+
+Configuration must pass seamlessly from:
+
+. User clients
+. Through tika-server REST APIs
+. Into tika-pipes infrastructure
+
+== Initialization Structure
+
+=== Tier 1 Objects
+
+ID Objects::
+Fetchers, emitters - components with unique identifiers
+
+Composite Objects::
+Parsers, detectors - components that aggregate other components
+
+Single Objects::
+Pipes, gRPC, server configurations
+
+=== Tier 2 Objects
+
+Components that can be read via friendly names using `@TikaComponent` 
annotations in an
+`other-config` section.
+
+== Runtime Patterns
+
+=== Backwards Compatibility
+
+The design maintains backwards compatibility by allowing `ParseContext` 
additions where the
+interface serves as the key.
+
+=== Partial Configuration Updates
+
+Users can specify only updates to the initialization configuration through 
partial JSON objects,
+rather than requiring complete configuration documents.
+
+=== Self-Configuring Components in Pipes
+
+In the pipes infrastructure, objects should configure themselves to avoid 
classloading
+dependencies on components like `PDFParser`.
+
+== Security Considerations
+
+* Configuration files at initialization are treated as trusted sources
+* Runtime serialization/deserialization uses an allowlist of permitted packages
+* Custom components can register patterns in 
`META-INF/tika-serialization-allowlist.txt`
+
+See link:design-notes-4x.html[Design Notes for 4.x] for additional 
architectural context.
diff --git a/docs/src/main/asciidoc/pipes/index.adoc 
b/docs/src/main/asciidoc/pipes/index.adoc
new file mode 100644
index 0000000000..e7b49ebc3c
--- /dev/null
+++ b/docs/src/main/asciidoc/pipes/index.adoc
@@ -0,0 +1,37 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Tika Pipes
+
+This section covers Tika Pipes for scalable, fault-tolerant document 
processing.
+
+== Overview
+
+Tika Pipes provides a framework for processing large volumes of documents with:
+
+* **Fetchers** - Retrieve documents from various sources (filesystem, S3, 
HTTP, etc.)
+* **Emitters** - Send parsed results to various destinations (filesystem, 
OpenSearch, Solr, etc.)
+* **Pipelines** - Configure processing workflows
+
+== Topics
+
+// Add links to specific topics as they are created
+// * link:getting-started.html[Getting Started]
+// * link:fetchers.html[Fetchers]
+// * link:emitters.html[Emitters]
+// * link:configuration.html[Configuration]
+// * link:async.html[Async Processing]
diff --git a/docs/src/main/asciidoc/roadmap.adoc 
b/docs/src/main/asciidoc/roadmap.adoc
new file mode 100644
index 0000000000..d5d7e263c3
--- /dev/null
+++ b/docs/src/main/asciidoc/roadmap.adoc
@@ -0,0 +1,96 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Apache Tika Roadmap
+
+This page outlines the planned features and improvements for Apache Tika 
releases.
+
+NOTE: All dates are in Open Source Standard Time which does not always neatly 
align
+with traditional calendars.
+
+== Release Timeline
+
+[cols="1,3"]
+|===
+|Date |Milestone
+
+|October 2024
+|Release 3.0.0
+
+|October 2024
+|Move main branch to 4.x (Java 17) after 3.0.0 release
+
+|April 2025
+|End support for 2.x (and Java 8)
+
+|January 2026
+|Release 4.0.0
+
+|June 2026
+|End support for 3.x (and Java 11)
+|===
+
+== Version Support Matrix
+
+[cols="1,1,1,2,2"]
+|===
+|Version |Java |Jakarta/javax |Availability |Planned EOL
+
+|2.x
+|8
+|javax
+|Now
+|April 2025
+
+|3.x
+|11
+|jakarta
+|October 2024
+|June 2026 or 6 months after 4.0.0 release
+
+|4.x
+|17
+|jakarta
+|January 2026
+|TBD
+
+|5.x
+|21
+|jakarta
+|TBD
+|TBD
+
+|6.x
+|25
+|jakarta
+|TBD
+|TBD
+|===
+
+== Metadata Changes in 4.x
+
+Tika 4.x implements namespaced metadata keys to prevent overwrites and improve 
namespace clarity.
+
+See link:migration/metadata-changes-4x.html[Metadata Changes in 4.x] for 
complete details and
+migration examples.
+
+== Long-term Goals
+
+// Add long-term goals as they are defined
+// * Improved streaming support
+// * Enhanced language detection
+// * Better support for modern document formats
diff --git a/docs/src/main/asciidoc/security.adoc 
b/docs/src/main/asciidoc/security.adoc
new file mode 100644
index 0000000000..ddc09b7215
--- /dev/null
+++ b/docs/src/main/asciidoc/security.adoc
@@ -0,0 +1,34 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Security
+
+This page covers security considerations when using Apache Tika.
+
+== Security Model
+
+Apache Tika's security model describes the trust boundaries and assumptions 
that govern
+how Tika processes content. Understanding this model is essential for 
deploying Tika securely.
+
+* https://tika.apache.org/security-model.html[Apache Tika Security Model]
+
+== Known Vulnerabilities
+
+For information about known security vulnerabilities (CVEs) in Apache Tika and 
their
+remediation, please see:
+
+* https://tika.apache.org/security.html[Apache Tika Security Vulnerabilities]
diff --git a/docs/src/main/asciidoc/using-tika/cli/index.adoc 
b/docs/src/main/asciidoc/using-tika/cli/index.adoc
new file mode 100644
index 0000000000..56105528d7
--- /dev/null
+++ b/docs/src/main/asciidoc/using-tika/cli/index.adoc
@@ -0,0 +1,39 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Tika Command Line Interface
+
+This section covers using Apache Tika from the command line via `tika-app`.
+
+== Overview
+
+The Tika application (`tika-app.jar`) provides a command-line interface for
+parsing documents, detecting content types, and extracting metadata.
+
+== Basic Usage
+
+[source,bash]
+----
+java -jar tika-app.jar [options] <file>
+----
+
+== Topics
+
+// Add links to specific topics as they are created
+// * link:installation.html[Installation]
+// * link:options.html[Command Line Options]
+// * link:batch.html[Batch Processing]
diff --git a/docs/src/main/asciidoc/using-tika/grpc/index.adoc 
b/docs/src/main/asciidoc/using-tika/grpc/index.adoc
new file mode 100644
index 0000000000..2f1eb24adb
--- /dev/null
+++ b/docs/src/main/asciidoc/using-tika/grpc/index.adoc
@@ -0,0 +1,32 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Tika gRPC
+
+This section covers using Apache Tika via gRPC.
+
+== Overview
+
+Tika gRPC provides a high-performance gRPC interface for parsing documents.
+This is useful for microservices architectures and polyglot environments.
+
+== Topics
+
+// Add links to specific topics as they are created
+// * link:getting-started.html[Getting Started]
+// * link:api.html[gRPC API]
+// * link:clients.html[Client Libraries]
diff --git a/docs/src/main/asciidoc/using-tika/index.adoc 
b/docs/src/main/asciidoc/using-tika/index.adoc
new file mode 100644
index 0000000000..04e214c42f
--- /dev/null
+++ b/docs/src/main/asciidoc/using-tika/index.adoc
@@ -0,0 +1,65 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Getting Started with Apache Tika
+
+Apache Tika can be used in several ways depending on your needs. Choose the 
approach
+that best fits your use case.
+
+== Choose Your Integration Method
+
+link:java-api/index.html[Java API]::
+Use Tika directly in your Java application. Best for tight integration and 
full control
+over parsing behavior.
+
+link:cli/index.html[Command Line (tika-app)]::
+Run Tika from the command line. Best for quick extraction, scripting, and 
one-off tasks.
+
+link:server/index.html[Server (REST API)]::
+Run Tika as a standalone server with a REST API. Best for language-agnostic 
integration
+and microservice architectures.
+
+link:grpc/index.html[gRPC]::
+Use Tika via gRPC protocol. Best for high-performance, cross-language 
communication.
+
+== Which Should I Use?
+
+[cols="1,3"]
+|===
+|Use Case |Recommended Approach
+
+|Java application needing content extraction
+|Java API
+
+|Shell scripts or batch processing
+|Command Line
+
+|Non-Java application (Python, Node.js, etc.)
+|Server (REST) or gRPC
+
+|High-throughput processing pipeline
+|Server or gRPC with link:../pipes/index.html[Pipes]
+
+|Quick one-time extraction
+|Command Line
+|===
+
+== Scalable Processing
+
+For processing large volumes of documents, see link:../pipes/index.html[Tika 
Pipes],
+which provides fault-tolerant, scalable document processing and works with all 
of the
+above integration methods.
diff --git a/docs/src/main/asciidoc/using-tika/java-api/getting-started.adoc 
b/docs/src/main/asciidoc/using-tika/java-api/getting-started.adoc
new file mode 100644
index 0000000000..a03ca92a80
--- /dev/null
+++ b/docs/src/main/asciidoc/using-tika/java-api/getting-started.adoc
@@ -0,0 +1,130 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Getting Started with the Java API
+
+== Before You Start
+
+Before embedding Tika directly in your Java application, consider whether a
+client-server architecture would better suit your needs.
+
+=== Recommended: Use tika-server or tika-grpc
+
+For most use cases, we recommend running Tika as a separate service rather than
+embedding it directly:
+
+* **link:../server/index.html[tika-server]** - REST API, language-agnostic
+* **link:../grpc/index.html[tika-grpc]** - High-performance gRPC protocol
+
+**Why?**
+
+* **Process isolation** - Parser crashes don't affect your application
+* **Easier deployment** - Use official Docker images
+* **Language flexibility** - Call from any language, not just Java
+* **Simpler upgrades** - Update Tika independently of your application
+
+Docker images are available at https://hub.docker.com/r/apache/tika[Docker 
Hub].
+
+=== When to Use the Java API
+
+The Java API is appropriate when you:
+
+* Need tight integration with Tika internals
+* Cannot use a network service
+* Have specific customization requirements
+
+== Using PipesForkParser (Recommended)
+
+If you must use Tika as a library, use `PipesForkParser` from the
+`tika-pipes-fork-parser` module. It provides process isolation to protect your
+application from parser crashes, memory leaks, and infinite loops.
+
+=== Maven Dependency
+
+[source,xml]
+----
+<dependency>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-pipes-fork-parser</artifactId>
+    <version>${tika.version}</version>
+</dependency>
+----
+
+=== Basic Example
+
+[source,java]
+----
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.pipes.fork.PipesForkParser;
+import org.apache.tika.pipes.fork.PipesForkResult;
+
+try (PipesForkParser parser = new PipesForkParser();
+     TikaInputStream tis = TikaInputStream.get(filePath)) {
+
+    PipesForkResult result = parser.parse(tis);
+
+    if (result.isSuccess()) {
+        String content = result.getContent();
+        // process content...
+    } else {
+        // handle failure
+    }
+}
+----
+
+=== Key Features
+
+* **Process isolation** - Parsing runs in a separate JVM
+* **Automatic restart** - If the forked process crashes, it restarts 
automatically
+* **Configurable timeouts** - Prevent infinite loops
+* **Thread-safe** - Reuse across multiple threads
+
+=== Complete Examples
+
+See
+https://github.com/apache/tika/blob/main/tika-example/src/main/java/org/apache/tika/example/PipesForkParserExample.java[PipesForkParserExample.java]
+in the `tika-example` module for comprehensive examples including:
+
+* Basic parsing
+* Handling embedded documents
+* Custom configuration
+* Error handling
+* Batch processing
+
+== Without Pipes: Understanding the Risks
+
+If you choose not to use `PipesForkParser` and instead use Tika's parsers 
directly
+(e.g., `AutoDetectParser`), you are responsible for handling the risks of 
parsing
+untrusted content.
+
+WARNING: Running parsers directly on untrusted data can cause 
OutOfMemoryErrors,
+infinite loops, and crashes that will affect your entire application.
+
+Before proceeding without process isolation, read:
+
+* link:../../advanced/robustness.html[The Robustness of Apache Tika] - 
Understanding parser risks and mitigations
+* https://tika.apache.org/security-model.html[Apache Tika Security Model] - 
Trust boundaries and assumptions
+
+If you still need to use parsers directly, your application is responsible for
+implementing its own process isolation so that you can:
+
+* Set parse timeouts (Tika cannot enforce timeouts without process isolation)
+* Configure memory limits (requires separate JVM)
+* Kill runaway processes
+* Recover from crashes
+
+Never run Tika in the same JVM as critical infrastructure.
diff --git a/docs/src/main/asciidoc/using-tika/java-api/index.adoc 
b/docs/src/main/asciidoc/using-tika/java-api/index.adoc
new file mode 100644
index 0000000000..60e88050e4
--- /dev/null
+++ b/docs/src/main/asciidoc/using-tika/java-api/index.adoc
@@ -0,0 +1,38 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Using Tika as a Library (Java API)
+
+This section covers using Apache Tika programmatically in your Java 
applications.
+
+== Overview
+
+Tika can be embedded directly into your Java applications as a library. This 
gives you
+full control over parsing, detection, and configuration.
+
+However, for most use cases we recommend using 
link:../server/index.html[tika-server]
+or link:../grpc/index.html[tika-grpc] instead. See
+link:getting-started.html[Getting Started] for guidance on choosing the right 
approach.
+
+== Topics
+
+* link:getting-started.html[Getting Started] - Recommendations and 
PipesForkParser usage
+
+// Add links to specific topics as they are created
+// * link:parsing.html[Parsing Documents]
+// * link:detection.html[Content Detection]
+// * link:configuration.html[Configuration]
diff --git a/docs/src/main/asciidoc/using-tika/server/index.adoc 
b/docs/src/main/asciidoc/using-tika/server/index.adoc
new file mode 100644
index 0000000000..accfc02700
--- /dev/null
+++ b/docs/src/main/asciidoc/using-tika/server/index.adoc
@@ -0,0 +1,42 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Tika Server
+
+This section covers running Apache Tika as a REST server via `tika-server`.
+
+== Overview
+
+Tika Server provides a RESTful HTTP interface for parsing documents and 
extracting
+content. It can be deployed as a standalone service or in a containerized 
environment.
+
+== Basic Usage
+
+[source,bash]
+----
+java -jar tika-server-standard.jar
+----
+
+The server starts on port 9998 by default.
+
+== Topics
+
+// Add links to specific topics as they are created
+// * link:installation.html[Installation]
+// * link:endpoints.html[REST Endpoints]
+// * link:configuration.html[Configuration]
+// * link:docker.html[Docker Deployment]
diff --git a/pom.xml b/pom.xml
index 9e451d1006..417aab5f35 100644
--- a/pom.xml
+++ b/pom.xml
@@ -62,6 +62,9 @@
   <profiles>
     <profile>
       <id>apache-release</id>
+      <modules>
+        <module>docs</module>
+      </modules>
       <properties>
         <username>${user.name}</username>
       </properties>

Reply via email to