This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git
The following commit(s) were added to refs/heads/master by this push:
new d3013609 Move the User's Guide to the Javadoc Overview
d3013609 is described below
commit d3013609d0fdac93e7fd45e44dc192192011cfc9
Author: Gary Gregory <[email protected]>
AuthorDate: Wed Dec 3 07:49:20 2025 -0500
Move the User's Guide to the Javadoc Overview
It should always match the release that way.
---
.../org/apache/commons/text/doc-files/leaf.svg | 45 ++++
.../org/apache/commons/text/doc-files/logo.png | Bin 0 -> 9454 bytes
src/main/javadoc/overview.html | 262 ++++++++++++++++++
src/site/site.xml | 2 +-
src/site/xdoc/userguide.xml | 293 ---------------------
5 files changed, 308 insertions(+), 294 deletions(-)
diff --git a/src/main/java/org/apache/commons/text/doc-files/leaf.svg
b/src/main/java/org/apache/commons/text/doc-files/leaf.svg
new file mode 100644
index 00000000..71de588c
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/doc-files/leaf.svg
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<svg id="Layer_1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 650 1000">
+ <defs>
+ <style>
+ .cls-1 {
+ fill: #7c297d;
+ }
+
+ .cls-2 {
+ fill: #f79a23;
+ }
+
+ .cls-3 {
+ fill: #dd552c;
+ }
+
+ .cls-4 {
+ fill: #d22128;
+ }
+ </style>
+ </defs>
+ <path class="cls-3"
d="M276.7092915,398.1515795c25.5279479-63.1242453,54.0110775-126.1246793,84.5729347-181.9636035-45.6212286-33.8852148-89.4276433-106.9534674-107.4259055-139.3268803-6.4642564,7.3439687-10.6608099,15.8022396-12.563595,22.6835448-16.9556402,61.2214602,43.4023987,135.1498759-5.21311,108.1394499-40.5058645-22.5076023-131.7157397-71.797557-166.5067324-22.8073561,38.9647388,50.0654049,140.8777805,176.0271745,207.1364082,213.2748452Z"/>
+ <path class="cls-2"
d="M361.2822261,216.187976c29.6137228-54.1055651,61.1725873-101.4927347,93.8913687-135.6320886,0,0-32.6340684,47.2372927-79.2457879,141.6662634,28.2289905,7.7740502,108.6249208,23.7261667,220.6090393-5.2000772,2.7531737-20.350678-10.9279818-42.734469-79.1056856-50.2283145-44.5101845-4.8872906,53.4246026-106.2822795-17.5225659-154.2363748-2.2905102-1.5509002-4.5419221-2.9193416-6.7477192-4.1444224-2.3784814-.8536468-4.8905488-1.6356133-7.5720422-2.3328667-82.8591248-
[...]
+ <path class="cls-4"
d="M210.0661969,580.0239535c18.7052902-56.0344158,41.2063761-118.989235,66.6430946-181.872374-66.2586277-37.2476707-168.1716694-163.2094403-207.1364082-213.2748452-6.9562436,9.787614-11.7099483,23.4394457-13.2934304,42.1088958-8.4973692,100.2806866,94.9567981,174.521889,74.3324318,188.0824913-27.2808561,17.9396147-81.5786546-43.0928703-102.978471-4.3138485,31.0180043,39.8477093,94.2008971,111.8472744,182.4327833,169.2696807Z"/>
+ <path class="cls-3"
d="M496.7155649,363.7515701c-52.3819806-18.6824828,54.7376547-68.786986,89.5221309-121.9411586,4.4506926-6.7965921,9.0512622-15.5806824,10.2991504-24.7883379-111.9841185,28.9262439-192.3800488,12.9741274-220.6090393,5.2000772-24.1301828,48.8924551-51.9942555,110.5048986-81.0540854,184.7917156,30.2653616,12.9415455,153.8421334,60.7457639,328.3900879,60.9933866,29.3465509-76.4372249-76.8347245-86.5311091-126.5482445-104.255683Z"/>
+ <path class="cls-4"
d="M230.3060964,590.1113213c30.7801562,9.5921223,132.7681363,38.244678,241.0835287,33.9308295,14.5510932-39.3980786-39.8509675-43.2427472-44.282111-74.84071-3.430878-24.4494858,143.1682907,20.5461697,190.3371613-68.3569045,2.3849978-4.4963073,4.2454264-8.7384756,5.819134-12.8372833-174.5479545-.2476227-298.1247263-48.0518411-328.3900879-60.9933866-21.2369067,54.2880239-43.0830957,115.2162467-64.5676252,183.0974549Z"/>
+ <path class="cls-1"
d="M230.3060964,590.1113213c-13.8310324,43.6923779-27.4763477,90.3692613-40.7209052,139.6983144-4.6983154,17.4899839-9.3412414,35.3057873-13.9190036,53.5125738,102.8057868,33.9373459,197.4726056.0781966,200.6819264-41.8873386.0260655-.3323358-.0358401-.5799585-.016291-.8992615,2.4469035-44.4482789-64.1733837-19.8098179-62.5964179-46.5335229,1.5834822-26.9191966,116.3077416-.1563933,151.7862131-57.872037,2.7205918-4.4246271,4.4930492-8.3540087,5.8680069-12.0878987-10
[...]
+ <path class="cls-1"
d="M27.6334136,410.7542728c-1.4987691,2.7173336-2.8509195,5.8582323-4.0043201,9.6116715-19.9238547,64.7533422,120.9604422,151.7405984,101.7924885,170.7032859-17.2782014,17.0859679-39.7955782-21.9602257-67.5619052-5.8321668-3.0431529,1.7724574-6.1319206,4.0075783-9.3021431,7.2592556-31.4024712,32.1714049-.4919873,124.8539837,88.6033203,174.3263973-20.7905342,69.8100589-41.489839,147.8047004-61.7525458,229.3703222,7.3504851-2.573973,16.1476081-5.1544625,18.3371143-12.
[...]
+</svg>
\ No newline at end of file
diff --git a/src/main/java/org/apache/commons/text/doc-files/logo.png
b/src/main/java/org/apache/commons/text/doc-files/logo.png
new file mode 100644
index 00000000..0c32a3e4
Binary files /dev/null and
b/src/main/java/org/apache/commons/text/doc-files/logo.png differ
diff --git a/src/main/javadoc/overview.html b/src/main/javadoc/overview.html
new file mode 100644
index 00000000..1acbbbba
--- /dev/null
+++ b/src/main/javadoc/overview.html
@@ -0,0 +1,262 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+<html>
+<head>
+<title>Apache Commons Text Overview</title>
+</head>
+<body>
+ <a href="https://commons.apache.org/text"> <img
src="org/apache/commons/text/doc-files/logo.png" alt="Apache Commons Text">
+ </a>
+ <section id="Introducing">
+ <h1>
+ <img src="org/apache/commons/text/doc-files/leaf.svg" style="height:
1em; padding-right: 0.25em" alt="leaf">Introducing Apache Commons Text
+ </h1>
+ <p>Apache Commons Text is a set of utility functions and reusable
components for processing and manipulating text in a Java environment.</p>
+ </section>
+ <section>
+ <h1><img src="org/apache/commons/text/doc-files/leaf.svg" style="height:
1em; padding-right: 0.25em" alt="leaf">Table of Contents</h1>
+ <ol>
+ <li><a href="#Description">Description</a></li>
+ <li><a href="#text">Using the <code>text</code> package
+ </a></li>
+ <li><a href="#text.diff">Using the <code>text.diff</code> package
+ </a></li>
+ <li><a href="#text.lookup">Using the <code>text.lookup</code> package
+ </a></li>
+ <li><a href="#text.similarity">tUsing the <code>text.similarity</code>
package
+ </a></li>
+ <li><a href="#text.translate">Using the <code>text.translate</code>
package
+ </a></li>
+ <li><a href="#requirements">Requirements
+ </a></li>
+ </ol>
+ </section>
+ <section id="Description">
+ <h2><img src="org/apache/commons/text/doc-files/leaf.svg" style="height:
1em; padding-right: 0.25em" alt="leaf">Description</h2>
+ <p>The Commons Text library provides additions to the standard JDK's text
handling. Our goal is to provide a consistent set of tools for processing text
+ generally from computing distances between Strings to being able to
efficiently do String escaping of various types.</p>
+ </section>
+ <section id="text">
+ <h2><img src="org/apache/commons/text/doc-files/leaf.svg" style="height:
1em; padding-right: 0.25em" alt="leaf">Using the org.apache.commons.text
package</h2>
+ <p>
+ Originally the text package was added in Commons Lang 2.2. However, its
new home is here. It provides, amongst other classes, a replacement for
+ <code>StringBuffer</code>
+ named
+ <code> StrBuilder</code>
+ , a class for substituting variables within a String named
+ <code>StrSubstitutor</code>
+ and a replacement for StringTokenizer named
+ <code>StrTokenizer</code>
+ . While somewhat ungainly, the
+ <code> Str </code>
+ prefix has been used to ensure we don't clash with any current or future
standard Java classes.
+ </p>
+ <p>Beyond the text utilities ported over from Commons Lang, we have also
included various string similarity and distance functions. Lastly, there are
+ also utilities for addressing differences between bodies of text for the
sake of viewing these differences.</p>
+ <section>
+ <h3>Class StringEscapeUtils</h3>
+ <p>
+ From Lang 3.5, we have moved into Text StringEscapeUtils and
StrTokenizer. It provides ways in which to generate pieces of text, such as
might be used
+ for default passwords. StringEscapeUtils contains methods to escape
and unescape Java, JavaScript, HTML and XML. It is worth noting that the package
+ <code>org.apache.commons.text.translate</code>
+ holds the functionality underpinning the StringEscapeUtils with
mappings and translations between such mappings for the sake of doing String
escaping.
+ StrTokenizer is an improved alternative to java.util.StringTokenizer.
+ </p>
+ </section>
+ <section>
+ <h3>Class StringSubstitutor</h3>
+ <p>The simplest example is to use this class to replace Java System
properties. For example:</p>
+ <pre>
+ StringSubstitutor.replaceSystemProperties(
+ "You are running with java.version = ${java.version} and os.name =
${os.name}.");
+ </pre>
+ <p>
+ For details see <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html">StringSubstitutor</a>.
+ </p>
+ <p>
+ Use a <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/io/StringSubstitutorReader.html">StringSubstitutorReader</a>
+ to avoid reading a whole file into memory as a
+ <code>String</code>
+ to perform string substitution, for example, when a Servlet filters a
file to a client.
+ </p>
+ <p>To build a default full-featured substitutor, use:</p>
+ <ul>
+ <li>Commons Text >= 1.8: <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html">org.apache.commons.text.StringSubstitutor.createInterpolator()</a></li>
+ <li>Commons Text < 1.8: <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html">new
+
StringSubstitutor(StringLookupFactory.INSTANCE.interpolatorStringLookup())</a></li>
+ </ul>
+ <p>
+ The available substitutions are defined in <a
+
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html">org.apache.commons.text.lookup.StringLookupFactory</a>.
+ </p>
+ </section>
+ <section>
+ <h3>Similarity and Distance</h3>
+ <p>
+ The
+ <code>org.apache.commons.text.similarity</code>
+ packages contains various different mechanisms of calculating
"similarity scores" as well as "edit distances between Strings. Note, the
difference
+ between a "similarity score" and a "distance function" is that a
distance functions meets the following qualifications:
+ </p>
+ <ul>
+ <li><code>d(x,y) >= 0</code>, non-negativity or separation
axiom</li>
+ <li><code>d(x,y) == 0</code>, if and only if, <code>x == y</code></li>
+ <li><code>d(x,y) == d(y,x)</code>, symmetry, and</li>
+ <li><code>d(x,z) <= d(x,y) + d(y,z)</code>, the triangle
inequality</li>
+ </ul>
+ <p>whereas a "similarity score" need not satisfy all such properties.
Though, it is fairly easy to "normalize" a similarity score to manufacture an
+ "edit distance."</p>
+ <p>The list of "edit distances" that we currently support follow:</p>
+ <ul>
+ <li>Cosine Distance,</li>
+ <li>Hamming Distance,</li>
+ <li>Jaccard Distance,</li>
+ <li>Jaro Winkler Distance,</li>
+ <li>Levenshtein Distance,</li>
+ <li>Longest Commons Subsequence Distance,</li>
+ </ul>
+ <p>and the list of "similarity scores" that we support follows:</p>
+ <ul>
+ <li>Cosine Similarity,</li>
+ <li>Fuzzy Score Similarity,</li>
+ <li>Jaccard Similarity,</li>
+ <li>Jaro-Winkler Similarity, and</li>
+ <li>Longest Common Subsequence Similarity.</li>
+ </ul>
+ </section>
+ <section>
+ <h3>Text diff'ing</h3>
+ <p>
+ The
+ <code>org.apache.commons.text.diff</code>
+ package contains code for doing diff between strings. The initial
implementation of the Myers algorithm was adapted from the commons-collections
+ sequence package.
+ </p>
+ </section>
+ </section>
+ <section>
+ <h2>Package org.apache.commons.text.diff</h2>
+ <!--
+ CommandVisitor
+ DeleteCommand
+ EditCommand
+ EditScript
+ InsertCommand
+ KeepCommand
+ ReplacementsFinder
+ ReplacementsHandler
+ StringsComparator
+ -->
+ <p>Provides algorithms for diff between strings.</p>
+ <p>The initial implementation of the Myers algorithm was adapted from the
commons-collections sequence package.</p>
+ </section>
+ <section id="text.lookup">
+ <h2><img src="org/apache/commons/text/doc-files/leaf.svg" style="height:
1em; padding-right: 0.25em" alt="leaf">Using the org.apache.commons.text.lookup
package</h2>
+ <p>
+ Provides algorithms for looking up strings used by a <a
+
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html">StringSubstitutor</a>.
Standard lookups are
+ defined in <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html">StringLookupFactory</a>
+ and the associated <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/DefaultStringLookup.html">DefaultStringLookup</a>
+ enum.
+ </p>
+ <p>
+ The example below demonstrates use of the default lookups for
+ <code>StringSubstitutor</code>
+ in order to construct a complex string.
+ </p>
+ <p>
+ <strong>NOTE:</strong> The list of lookups available by default changed
in version 1.10.0. See the documentation for <a
+
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html">StringLookupFactory</a>
for
+ details and instructions on how to reproduce the previous behavior.
+ </p>
+ <pre>
+final StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
+final String text = interpolator.replace(
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#base64DecoderStringLookup()">Base64
Decoder</a>: ${base64Decoder:SGVsbG9Xb3JsZCE=}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#base64EncoderStringLookup()">Base64
Encoder</a>: ${base64Encoder:HelloWorld!}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#constantStringLookup()">Java
Constant</a>: ${const:java.awt.event.KeyEvent.VK_ESCAPE}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#dateStringLookup()">Date</a>:
${date:yyyy-MM-dd}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#environmentVariableStringLookup()">Environment
Variable</a>: ${env:USERNAME}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#fileStringLookup(java.nio.file.Path...)()">File
Content</a>: ${file:UTF-8:src/test/resources/document.properties}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#javaPlatformStringLookup()">Java</a>:
${java:version}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#localHostStringLookup()">Local
host</a>: ${localhost:canonical-name}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#localHostStringLookup()">Loopback
address</a>: ${loopbackAddress:canonical-name}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#propertiesStringLookup(java.nio.file.Path...)()">Properties
File</a>: ${properties:src/test/resources/document.properties::mykey}\n"
+
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#resourceBundleStringLookup(java.lang.String)()">Resource
Bundle</a>:
${resourceBundle:org.apache.commons.text.example.testResourceBundleLookup:mykey}\n"
+
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#systemPropertyStringLookup()">System
Property</a>: ${sys:user.dir}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#urlDecoderStringLookup()">URL
Decoder</a>: ${urlDecoder:Hello%20World%21}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#urlEncoderStringLookup()">URL
Encoder</a>: ${urlEncoder:Hello World!}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#xmlDecoderStringLookup()">XML
Decoder</a>: ${xmlDecoder:&lt;element&gt;}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#xmlEncoderStringLookup()">XML
Encoder</a>: ${xmlEncoder:<element>}\n" +
+ "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#xmlStringLookup()">XML
XPath</a>:
${xml:src/test/resources/document.xml:/root/path/to/node}\n");
+ </pre>
+ </section>
+ <section id="text.similarity">
+ <h2><img src="org/apache/commons/text/doc-files/leaf.svg" style="height:
1em; padding-right: 0.25em" alt="leaf">Using the
org.apache.commons.text.similarity package</h2>
+ <!--
+ Enum
+ EnumUtils
+ ValuedEnum
+ -->
+ <p>Provides algorithms for string similarity.</p>
+ <p>The algorithms that implement the EditDistance interface follow the
same simple principle: the more similar (closer) strings are, the lower is the
+ distance. For example, the words house and hose are closer than house
and trousers.</p>
+ <p>The following algorithms are available at the moment:</p>
+ <ul>
+ <li><code>CosineDistance</code></li>
+ <li><code>CosineSimilarity</code></li>
+ <li><code>FuzzyScore</code></li>
+ <li><code>HammingDistance</code></li>
+ <li><code>JaroWinklerDistance</code></li>
+ <li><code>JaroWinklerSimilarity</code></li>
+ <li><code>LevenshteinDistance</code></li>
+ <li><code>LongestCommonSubsequenceDistance</code></li>
+ </ul>
+ <p>
+ The
+ <code>CosineDistance</code>
+ utilises a
+ <code>RegexTokenizer</code>
+ regular expression tokenizer (\w+). And the
+ <code> LevenshteinDistance</code>
+ 's behavior can be changed to take into consideration a maximum
throughput.
+ </p>
+ </section>
+ <section id="text.translate">
+ <h2><img src="org/apache/commons/text/doc-files/leaf.svg" style="height:
1em; padding-right: 0.25em" alt="leaf">Using the
org.apache.commons.text.translate package</h2>
+ <!--
+ ExceptionUtils
+ Nestable
+ NestableDelegate
+ NestableError
+ NestableException
+ NestableRuntimeException
+ -->
+ <p>An API for creating text translation routines from a set of smaller
building blocks. Initially created to make it possible for the user to customize
+ the rules in the StringEscapeUtils class.</p>
+ <p>These classes are immutable, and therefore thread-safe.</p>
+ </section>
+ <section id="requirements">
+ <h2>
+ <img src="org/apache/commons/text/doc-files/leaf.svg" style="height: 1em;
padding-right: 0.25em" alt="leaf">Requirements
+ </h2>
+ <ul>
+ <li>Java 8 or above.</li>
+ <li>If using OSGi, R7 or above.</li>
+ </ul>
+ </section>
+</body>
\ No newline at end of file
diff --git a/src/site/site.xml b/src/site/site.xml
index 484a229c..b58c30f6 100644
--- a/src/site/site.xml
+++ b/src/site/site.xml
@@ -26,7 +26,7 @@
<menu name="Commons Text">
<item name="Overview" href="/index.html"/>
<item name="Download" href="/download_text.cgi"/>
- <item name="User Guide" href="/userguide.html" />
+ <item name="User Guide" href="/apidocs/index.html" />
<item name="Release History" href="/changes.html"/>
<item name="Javadoc" href="/apidocs/index.html"/>
<item name="Javadoc Archive"
href="https://javadoc.io/doc/org.apache.commons/commons-text"/>
diff --git a/src/site/xdoc/userguide.xml b/src/site/xdoc/userguide.xml
deleted file mode 100644
index 569256be..00000000
--- a/src/site/xdoc/userguide.xml
+++ /dev/null
@@ -1,293 +0,0 @@
-<?xml version="1.0"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one or more
-contributor license agreements. See the NOTICE file distributed with
-this work for additional information regarding copyright ownership.
-The ASF licenses this file to You under the Apache License, Version 2.0
-(the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
- https://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-<document>
-
- <properties>
- <title>Commons Text - User guide</title>
- <author email="[email protected]">Apache Commons Team</author>
- </properties>
-
- <body>
-
- <section name='User Guide for Commons "Text"'>
- <div align="center">
- <h1>The Commons <em>Text</em> Package
- </h1>
- <h2>Users Guide</h2>
- <br/>
- <a href="#Description">[Description]</a>
- <a href="#text">[text]</a>
- <a href="#text.diff">[text.diff]</a>
- <a href="#text.lookup">[text.lookup]</a>
- <a href="#text.similarity">[text.similarity]</a>
- <a href="#text.translate">[text.translate]</a>
- <br/>
- <br/>
- </div>
- </section>
-
- <section name="Description">
- <p>The Commons Text library provides additions to the standard JDK's
- text handling. Our goal is to provide a consistent set of tools for
- processing text generally from computing distances between Strings
- to being able to efficiently do String escaping of various types.
- </p>
- </section>
-
- <section name="Package org.apache.commons.text">
-
- <p>Originally the text package was added in Commons Lang 2.2. However,
its
- new home is here. It provides, amongst other
- classes, a replacement for <code>StringBuffer</code> named <code>
- StrBuilder</code>, a class for substituting variables within a String
- named <code>StrSubstitutor</code> and a replacement for StringTokenizer
- named <code>StrTokenizer</code>. While somewhat ungainly, the <code>
- Str
- </code> prefix has been used to ensure we don't clash with any current
- or future standard Java classes.
- </p>
-
- <p>Beyond the text utilities ported over from Commons Lang, we have also
included various
- string similarity and distance functions. Lastly, there are also
utilities for
- addressing differences between bodies of text for the sake of viewing
these
- differences.
- </p>
-
- <subsection name="Class StringEscapeUtils">
- <p>From Lang 3.5, we have moved into Text StringEscapeUtils and
StrTokenizer.
- It provides ways in which to generate pieces of text, such as might
- be used for default passwords. StringEscapeUtils contains methods to
- escape and unescape Java, JavaScript, HTML and XML. It is worth
noting that
- the package <code>org.apache.commons.text.translate</code> holds the
- functionality underpinning the StringEscapeUtils with mappings and
translations
- between such mappings for the sake of doing String escaping.
StrTokenizer is
- an improved alternative to java.util.StringTokenizer.
- </p>
- </subsection>
-
- <subsection name="Class StringSubstitutor">
- <p>
- The simplest example is to use this class to replace Java System
properties. For example:
- <pre>
- StringSubstitutor.replaceSystemProperties(
- "You are running with java.version = ${java.version} and os.name =
${os.name}.");
- </pre>
- </p>
- <p>
- For details see <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html">StringSubstitutor</a>.
- </p>
- <p>
- Use a <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/io/StringSubstitutorReader.html">StringSubstitutorReader</a>
- to avoid reading a whole file into memory as a <code>String</code> to
perform string substitution, for example, when a Servlet filters a file to a
client.
- </p>
- <p>
- To build a default full-featured substitutor, use:
- </p>
- <ul>
- <li>Commons Text >= 1.8:
- <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html">org.apache.commons.text.StringSubstitutor.createInterpolator()</a></li>
- <li>Commons Text < 1.8:
- <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html">new
StringSubstitutor(StringLookupFactory.INSTANCE.interpolatorStringLookup())</a></li>
- </ul>
- <p>
- The available substitutions are defined in
- <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html">org.apache.commons.text.lookup.StringLookupFactory</a>.
- </p>
- </subsection>
-
- <subsection name="Similarity and Distance">
- <p>The <code>org.apache.commons.text.similarity</code> packages
contains various different mechanisms of
- calculating "similarity scores" as well as "edit distances between
Strings. Note,
- the difference between a "similarity score" and a "distance
function" is that
- a distance functions meets the following qualifications:
- <ul>
- <li><code>d(x,y) >= 0</code>, non-negativity or separation axiom
- </li>
- <li><code>d(x,y) == 0</code>, if and only if,
- <code>x == y</code>
- </li>
- <li><code>d(x,y) == d(y,x)</code>, symmetry, and
- </li>
- <li><code>d(x,z) <= d(x,y) + d(y,z)</code>, the triangle
inequality
- </li>
- </ul>
- whereas a "similarity score" need not satisfy all such properties.
Though, it
- is fairly easy to "normalize" a similarity score to manufacture an
"edit distance."
- </p>
- <p>
- The list of "edit distances" that we currently support follow:
- <ul>
- <li>Cosine Distance,</li>
- <li>Hamming Distance,</li>
- <li>Jaccard Distance,</li>
- <li>Jaro Winkler Distance,</li>
- <li>Levenshtein Distance,</li>
- <li>Longest Commons Subsequence Distance,</li>
- </ul>
- and the list of "similarity scores" that we support follows:
- <ul>
- <li>Cosine Similarity,</li>
- <li>Fuzzy Score Similarity,</li>
- <li>Jaccard Similarity,</li>
- <li>Jaro-Winkler Similarity, and</li>
- <li>Longest Common Subsequence Similarity.</li>
- </ul>
- </p>
- </subsection>
-
- <subsection
- name="Text diff'ing">
- <p>The <code>org.apache.commons.text.diff</code> package contains code
for
- doing diff between strings. The initial implementation of the Myers
algorithm was adapted from the
- commons-collections sequence package.
- </p>
- </subsection>
-
-
- </section>
-
- <section name="Package org.apache.commons.text.diff">
- <!--
- CommandVisitor
- DeleteCommand
- EditCommand
- EditScript
- InsertCommand
- KeepCommand
- ReplacementsFinder
- ReplacementsHandler
- StringsComparator
- -->
- <p>Provides algorithms for diff between strings.</p>
- <p>The initial implementation of the Myers algorithm was adapted from the
- commons-collections sequence package.
- </p>
- </section>
-
- <section name="Package org.apache.commons.text.lookup">
- <p>Provides algorithms for looking up strings used by a
- <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html">StringSubstitutor</a>.
- Standard lookups are defined in
- <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html">StringLookupFactory</a>
- and the associated
- <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/DefaultStringLookup.html">DefaultStringLookup</a>
- enum.
- </p>
- <p>
- The example below demonstrates use of the default lookups for
<code>StringSubstitutor</code> in order to
- construct a complex string.
- </p>
- <p><strong>NOTE:</strong> The list of lookups available by default
changed in version 1.10.0. See the documentation for
- <a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html">StringLookupFactory</a>
- for details and instructions on how to reproduce the previous behavior.
- </p>
- <source>
-final StringSubstitutor interpolator = StringSubstitutor.createInterpolator();
-final String text = interpolator.replace(
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#base64DecoderStringLookup()">Base64
Decoder</a>: ${base64Decoder:SGVsbG9Xb3JsZCE=}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#base64EncoderStringLookup()">Base64
Encoder</a>: ${base64Encoder:HelloWorld!}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#constantStringLookup()">Java
Constant</a>: ${const:java.awt.event.KeyEvent.VK_ESCAPE}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#dateStringLookup()">Date</a>:
${date:yyyy-MM-dd}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#environmentVariableStringLookup()">Environment
Variable</a>: ${env:USERNAME}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#fileStringLookup(java.nio.file.Path...)()">File
Content</a>: ${file:UTF-8:src/test/resources/document.properties}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#javaPlatformStringLookup()">Java</a>:
${java:version}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#localHostStringLookup()">Local
host</a>: ${localhost:canonical-name}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#localHostStringLookup()">Loopback
address</a>: ${loopbackAddress:canonical-name}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#propertiesStringLookup(java.nio.file.Path...)()">Properties
File</a>: ${properties:src/test/resources/document.properties::mykey}\n"
+
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#resourceBundleStringLookup(java.lang.String)()">Resource
Bundle</a>:
${resourceBundle:org.apache.commons.text.example.testResourceBundleLookup:mykey}\n"
+
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#systemPropertyStringLookup()">System
Property</a>: ${sys:user.dir}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#urlDecoderStringLookup()">URL
Decoder</a>: ${urlDecoder:Hello%20World%21}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#urlEncoderStringLookup()">URL
Encoder</a>: ${urlEncoder:Hello World!}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#xmlDecoderStringLookup()">XML
Decoder</a>: ${xmlDecoder:&lt;element&gt;}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#xmlEncoderStringLookup()">XML
Encoder</a>: ${xmlEncoder:<element>}\n" +
- "<a
href="https://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/lookup/StringLookupFactory.html#xmlStringLookup()">XML
XPath</a>:
${xml:src/test/resources/document.xml:/root/path/to/node}\n"
-);
- </source>
- </section>
-
- <section name="Package org.apache.commons.text.similarity">
- <!--
- Enum
- EnumUtils
- ValuedEnum
- -->
- <p>Provides algorithms for string similarity.</p>
-
- <p>The algorithms that implement the EditDistance interface follow the
- same
- simple principle: the more similar (closer) strings are, the lower is
the
- distance.
- For example, the words house and hose are closer than house and
- trousers.
- </p>
-
- <p>The following algorithms are available at the moment:</p>
-
- <ul>
- <li>
- <code>CosineDistance</code>
- </li>
- <li>
- <code>CosineSimilarity</code>
- </li>
- <li>
- <code>FuzzyScore</code>
- </li>
- <li>
- <code>HammingDistance</code>
- </li>
- <li>
- <code>JaroWinklerDistance</code>
- </li>
- <li>
- <code>JaroWinklerSimilarity</code>
- </li>
- <li>
- <code>LevenshteinDistance</code>
- </li>
- <li>
- <code>LongestCommonSubsequenceDistance</code>
- </li>
- </ul>
-
- <p>The <code>CosineDistance</code> utilises a
- <code>RegexTokenizer</code>
- regular expression tokenizer (\w+). And the <code>
- LevenshteinDistance</code>'s
- behavior can be changed to take into consideration a maximum
- throughput.
- </p>
- </section>
-
- <section name="Package org.apache.commons.text.translate.*">
- <!--
- ExceptionUtils
- Nestable
- NestableDelegate
- NestableError
- NestableException
- NestableRuntimeException
- -->
- <p>An API for creating text translation routines from a set of smaller
- building blocks. Initially created to make it possible for the user to
- customize the rules in the StringEscapeUtils class.
- </p>
- <p>These classes are immutable, and therefore thread-safe.</p>
- </section>
-
- </body>
-</document>