This is an automated email from the ASF dual-hosted git repository.

elharo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/xerces-j.git


The following commit(s) were added to refs/heads/main by this push:
     new dcacb2034 [XERCESJ-1781] Javadoc fixes in org.apache.xerces.impl (#41)
dcacb2034 is described below

commit dcacb2034e50cbd3f21f0547f907b65cf5264af8
Author: Samael <[email protected]>
AuthorDate: Wed Nov 5 18:01:46 2025 +0000

    [XERCESJ-1781] Javadoc fixes in org.apache.xerces.impl (#41)
    
    * [XERCESJ-1781] Javadoc fixes in org.apache.xerces.impl
---
 .../impl/xpath/regex/ParserForXMLSchema.java       |  10 +-
 src/org/apache/xerces/impl/xpath/regex/REUtil.java |  13 +
 .../apache/xerces/impl/xpath/regex/RangeToken.java |   2 +-
 .../xerces/impl/xpath/regex/RegexParser.java       |  16 +
 .../xerces/impl/xpath/regex/RegularExpression.java | 610 ++++++++++-----------
 5 files changed, 332 insertions(+), 319 deletions(-)

diff --git a/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java 
b/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
index fca38bb07..1ed862986 100644
--- a/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
+++ b/src/org/apache/xerces/impl/xpath/regex/ParserForXMLSchema.java
@@ -152,6 +152,7 @@ class ParserForXMLSchema extends RegexParser {
     /**
      * Parses a character-class-expression, not a character-class-escape.
      *
+     * <pre>
      * c-c-expression   ::= '[' c-group ']'
      * c-group          ::= positive-c-group | negative-c-group | 
c-c-subtraction
      * positive-c-group ::= (c-range | c-c-escape)+
@@ -162,9 +163,12 @@ class ParserForXMLSchema extends RegexParser {
      * single-range     ::= multi-c-escape | category-c-escape | 
block-c-escape | &lt;any XML char&gt;
      * cc-normal-c      ::= &lt;any character except [, ], \&gt;
      * from-to-range    ::= cc-normal-c '-' cc-normal-c
+     * </pre>
      *
-     * @param useNrage Ignored.
-     * @return This returns no NrageToken.
+     * @param useNrange ignored
+     * @return returns no NRANGE token
+     * @throws ParseException if regex does not conform to the syntax
+     * @see Token#NRANGE
      */
     protected RangeToken parseCharacterClass(boolean useNrange) throws 
ParseException {
         this.setContext(S_INBRACKETS);
@@ -184,7 +188,7 @@ class ParserForXMLSchema extends RegexParser {
         }
         int type;
         boolean firstloop = true;
-        while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for 
this loop.
+        while ((type = this.read()) != T_EOF) { // Don't use 'continue' for 
this loop.
                
                wasDecoded = false;
             // single-range | from-to-range | subtraction
diff --git a/src/org/apache/xerces/impl/xpath/regex/REUtil.java 
b/src/org/apache/xerces/impl/xpath/regex/REUtil.java
index 2c547323c..f8a594412 100644
--- a/src/org/apache/xerces/impl/xpath/regex/REUtil.java
+++ b/src/org/apache/xerces/impl/xpath/regex/REUtil.java
@@ -95,6 +95,13 @@ public final class REUtil {
         return ret;
     }
 
+    /**
+     * Parses the regular expression options.
+     *
+     * @param opts a string of regular expression options consisting of "i" 
"m" "s" "u" "w" "," "X" or null
+     * @return an int equivalent the regular expression options
+     * @throws ParseException if regex is not conforming to the syntax
+     */
     static final int parseOptions(String opts) throws ParseException {
         if (opts == null)  return 0;
         int options = 0;
@@ -107,6 +114,12 @@ public final class REUtil {
         return options;
     }
 
+    /**
+     * Creates a string representation of regular expression options from an 
int.
+     *
+     * @param options an int value representation of regular expression options
+     * @return a string representation of regular expression options
+     */
     static final String createOptionString(int options) {
         StringBuffer sb = new StringBuffer(9);
         if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) 
!= 0)
diff --git a/src/org/apache/xerces/impl/xpath/regex/RangeToken.java 
b/src/org/apache/xerces/impl/xpath/regex/RangeToken.java
index 060eb6b31..5dc8cf9f7 100644
--- a/src/org/apache/xerces/impl/xpath/regex/RangeToken.java
+++ b/src/org/apache/xerces/impl/xpath/regex/RangeToken.java
@@ -317,7 +317,7 @@ final class RangeToken extends Token implements 
java.io.Serializable {
     }
 
     /**
-     * @param tok Ignore whether it is NRANGE or not.
+     * @param token ignore whether it is NRANGE or not
      */
     protected void intersectRanges(Token token) {
         RangeToken tok = (RangeToken)token;
diff --git a/src/org/apache/xerces/impl/xpath/regex/RegexParser.java 
b/src/org/apache/xerces/impl/xpath/regex/RegexParser.java
index 2da0e14c4..234221932 100644
--- a/src/org/apache/xerces/impl/xpath/regex/RegexParser.java
+++ b/src/org/apache/xerces/impl/xpath/regex/RegexParser.java
@@ -84,10 +84,21 @@ class RegexParser {
     public RegexParser() {
         this.setLocale(Locale.getDefault());
     }
+
+    /**
+     * Create a new RegexParser with the desired locale.
+     *
+     * @param locale the desired locale or null
+     */
     public RegexParser(Locale locale) {
         this.setLocale(locale);
     }
 
+    /**
+     * Set the locale for the regex parser.
+     *
+     * @param locale the desired locale or null
+     */
     public void setLocale(Locale locale) {
         try {
             if (locale != null) {
@@ -836,6 +847,11 @@ class RegexParser {
         return tok;
     }
 
+    /**
+     * @param c char data
+     * @return a {@link RangeToken}
+     * @throws ParseException if regex does not conform to the syntax
+     */
     protected RangeToken processBacksolidus_pP(int c) throws ParseException {
 
         this.next();
diff --git a/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java 
b/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java
index 85e54ee97..78ceb0032 100644
--- a/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java
+++ b/src/org/apache/xerces/impl/xpath/regex/RegularExpression.java
@@ -27,223 +27,212 @@ import org.apache.xerces.util.IntStack;
  * A regular expression matching engine using Non-deterministic Finite 
Automaton (NFA).
  * This engine does not conform to the POSIX regular expression.
  *
- * <hr width="50%">
  * <h3>How to use</h3>
  *
  * <dl>
  *   <dt>A. Standard way
  *   <dd>
  * <pre>
- * RegularExpression re = new RegularExpression(<var>regex</var>);
+ * {@code
+ * RegularExpression re = new RegularExpression(regex);
  * if (re.matches(text)) { ... }
+ * }
  * </pre>
  *
  *   <dt>B. Capturing groups
  *   <dd>
  * <pre>
- * RegularExpression re = new RegularExpression(<var>regex</var>);
+ * {@code
+ * RegularExpression re = new RegularExpression(regex);
  * Match match = new Match();
  * if (re.matches(text, match)) {
  *     ... // You can refer captured texts with methods of the 
<code>Match</code> class.
  * }
+ * }
  * </pre>
  *
  * </dl>
  *
  * <h4>Case-insensitive matching</h4>
  * <pre>
+ * {@code
  * RegularExpression re = new RegularExpression(<var>regex</var>, "i");
  * if (re.matches(text) >= 0) { ...}
+ * }
  * </pre>
  *
  * <h4>Options</h4>
- * <p>You can specify options to <a href="#RegularExpression(java.lang.String, 
java.lang.String)"><code>RegularExpression(</code><var>regex</var><code>, 
</code><var>options</var><code>)</code></a>
- *    or <a href="#setPattern(java.lang.String, 
java.lang.String)"><code>setPattern(</code><var>regex</var><code>, 
</code><var>options</var><code>)</code></a>.
- *    This <var>options</var> parameter consists of the following characters.
- * </p>
- * <dl>
- *   <dt><a name="I_OPTION"><code>"i"</code></a>
- *   <dd>This option indicates case-insensitive matching.
- *   <dt><a name="M_OPTION"><code>"m"</code></a>
- *   <dd class="REGEX"><kbd>^</kbd> and <kbd>$</kbd> consider the EOL 
characters within the text.
- *   <dt><a name="S_OPTION"><code>"s"</code></a>
- *   <dd class="REGEX"><kbd>.</kbd> matches any one character.
- *   <dt><a name="U_OPTION"><code>"u"</code></a>
- *   <dd class="REGEX">Redefines <Kbd>\d \D \w \W \s \S \b \B \&lt; \></kbd> 
as becoming to Unicode.
- *   <dt><a name="W_OPTION"><code>"w"</code></a>
- *   <dd class="REGEX">By this option, <kbd>\b \B \&lt; \></kbd> are processed 
with the method of
- *      'Unicode Regular Expression Guidelines' Revision 4.
- *      When "w" and "u" are specified at the same time,
- *      <kbd>\b \B \&lt; \></kbd> are processed for the "w" option.
- *   <dt><a name="COMMA_OPTION"><code>","</code></a>
- *   <dd>The parser treats a comma in a character class as a range separator.
- *      <kbd class="REGEX">[a,b]</kbd> matches <kbd>a</kbd> or <kbd>,</kbd> or 
<kbd>b</kbd> without this option.
- *      <kbd class="REGEX">[a,b]</kbd> matches <kbd>a</kbd> or <kbd>b</kbd> 
with this option.
- *
- *   <dt><a name="X_OPTION"><code>"X"</code></a>
- *   <dd class="REGEX">
- *       By this option, the engine confoms to <a 
href="http://www.w3.org/TR/2000/WD-xmlschema-2-20000407/#regexs";>XML Schema: 
Regular Expression</a>.
- *       The <code>match()</code> method does not do subsring matching
- *       but entire string matching.
+ * <p>You can specify options to {@link #RegularExpression(String, String)} or 
{@link #setPattern(String, String)}.</p>
+ * <p>This <code>options</code> parameter consists of the following 
characters:</p>
+ * <ul>
+ *   <li><code>i</code> : This option indicates case-insensitive matching.</li>
+ *   <li><code>m</code> : <code>^</code> and <code>$</code> consider the EOL 
characters within the text.</li>
+ *   <li><code>s</code> : <code>.</code> matches any one character.</li>
+ *   <li><code>u</code> : Redefines <code>\d \D \w \W \s \S \b \B \&lt; 
\></code> as being Unicode.</li>
+ *   <li><code>w</code> : With this option, <code>\b \B \&lt; \></code> are 
processed with the method of 'Unicode Regular Expression Guidelines' Revision 
4. When "w" and "u" are specified at the same time, <code>\b \B \&lt; \></code> 
are processed for the "w" option.</li>
+ *   <li><code>,</code> : The parser treats a comma in a character class as a 
range separator.
+ *   <ul>
+ *       <li><code>[a,b]</code> matches <code>a</code> or <code>,</code> or 
<code>b</code> without this option.</li>
+ *       <li><code>[a,b]</code> matches <code>a</code> or <code>b</code> with 
this option.</li>
+ *   </ul>
+ *   </li>
+ *   <li><code>X</code> : With this option, the engine conforms to <a 
href="https://www.w3.org/TR/2000/WD-xmlschema-2-20000407/#regexs";>XML Schema: 
Regular Expression</a>. The <code>match()</code> method does not do substring 
matching but entire string matching.</li>
+ * </ul>
  *
- * </dl>
- * 
- * <hr width="50%">
  * <h3>Syntax</h3>
- * <table border="1" bgcolor="#ddeeff">
- *   <tr>
- *    <td>
- *     <h4>Differences from the Perl 5 regular expression</h4>
- *     <ul>
- *      <li>There is 6-digit hexadecimal character representation  
(<kbd>\u005cv</kbd><var>HHHHHH</var>.)
- *      <li>Supports subtraction, union, and intersection operations for 
character classes.
- *      <li>Not supported: <kbd>\</kbd><var>ooo</var> (Octal character 
representations),
- *          <Kbd>\G</kbd>, <kbd>\C</kbd>, <kbd>\l</kbd><var>c</var>,
- *          <kbd>\u005c u</kbd><var>c</var>, <kbd>\L</kbd>, <kbd>\U</kbd>,
- *          <kbd>\E</kbd>, <kbd>\Q</kbd>, 
<kbd>\N{</kbd><var>name</var><kbd>}</kbd>,
- *          <Kbd>(?{<kbd><var>code</var><kbd>})</kbd>, 
<Kbd>(??{<kbd><var>code</var><kbd>})</kbd>
- *     </ul>
- *    </td>
- *   </tr>
- * </table>
  *
- * <p>Meta characters are `<KBD>. * + ? { [ ( ) | \ ^ $</KBD>'.</p>
+ * <h4>Differences from Perl 5 regular expression</h4>
+ * <ul>
+ *  <li>There is 6-digit hexadecimal character representation 
(<code>\vHHHHHH</code>).
+ *  <li>Supports subtraction, union, and intersection operations for character 
classes.
+ *  <li>Not supported:
+ *  <ul>
+ *    <li><code>\ooo</code> (Octal character representations)</li>
+ *    <li><code>\G</code>, <code>\C</code>, <code>\lc</code></li>
+ *    <li><code>\u005cuc</code>, <code>\L</code>, <code>\U</code></li>
+ *    <li><code>\E</code>, <code>\Q</code>, <code>\N{name}</code></li>
+ *    <li><code>(?{code})</code>, <code>(??{code})</code></li>
+ *  </ul>
+ * </ul>
+ *
+ * <p>Meta characters are <code>. * + ? { [ ( ) | \ ^ $</code>.</p>
  * <ul>
  *   <li>Character
  *     <dl>
- *       <dt class="REGEX"><kbd>.</kbd> (A period)
+ *       <dt class="REGEX"><code>.</code> (A period)
  *       <dd>Matches any one character except the following characters.
- *       <dd>LINE FEED (U+000A), CARRIAGE RETURN (U+000D),
- *           PARAGRAPH SEPARATOR (U+2029), LINE SEPARATOR (U+2028)
+ *       <dd>LINE FEED (U+000A), CARRIAGE RETURN (U+000D), PARAGRAPH SEPARATOR 
(U+2029), LINE SEPARATOR (U+2028)
  *       <dd>This expression matches one code point in Unicode. It can match a 
pair of surrogates.
  *       <dd>When <a href="#S_OPTION">the "s" option</a> is specified,
  *           it matches any character including the above four characters.
  *
- *       <dt class="REGEX"><Kbd>\e \f \n \r \t</kbd>
+ *       <dt class="REGEX"><code>\e \f \n \r \t</code>
  *       <dd>Matches ESCAPE (U+001B), FORM FEED (U+000C), LINE FEED (U+000A),
  *           CARRIAGE RETURN (U+000D), HORIZONTAL TABULATION (U+0009)
  *
- *       <dt class="REGEX"><kbd>\c</kbd><var>C</var>
+ *       <dt class="REGEX"><code>\cC</code>
  *       <dd>Matches a control character.
- *           The <var>C</var> must be one of '<kbd>@</kbd>', 
'<kbd>A</kbd>'-'<kbd>Z</kbd>',
- *           '<kbd>[</kbd>', '<kbd>\u005c</kbd>', '<kbd>]</kbd>', 
'<kbd>^</kbd>', '<kbd>_</kbd>'.
- *           It matches a control character of which the character code is 
less than
- *           the character code of the <var>C</var> by 0x0040.
- *       <dd class="REGEX">For example, a <kbd>\cJ</kbd> matches a LINE FEED 
(U+000A),
- *           and a <kbd>\c[</kbd> matches an ESCAPE (U+001B).
+ *           The <var>C</var> must be one of '<code>@</code>', 
'<code>A</code>'-'<code>Z</code>',
+ *           '<code>[</code>', '<code>\</code>', '<code>]</code>', 
'<code>^</code>', '<code>_</code>'.
+ *           It matches a control character of which the character code is 
less than the character code of
+ *           the <var>C</var> by 0x0040.
+ *       <dd class="REGEX">For example, a <code>\cJ</code> matches a LINE FEED 
(U+000A),
+ *           and a <code>\c[</code> matches an ESCAPE (U+001B).
  *
  *       <dt class="REGEX">a non-meta character
  *       <dd>Matches the character.
  *
- *       <dt class="REGEX"><KBD>\</KBD> + a meta character
+ *       <dt class="REGEX"><code>\</code> + a meta character
  *       <dd>Matches the meta character.
  *
- *       <dt class="REGEX"><kbd>\u005cx</kbd><var>HH</var> 
<kbd>\u005cx{</kbd><var>HHHH</var><kbd>}</kbd>
- *       <dd>Matches a character of which code point is <var>HH</var> 
(Hexadecimal) in Unicode.
- *           You can write just 2 digits for <kbd>\u005cx</kbd><var>HH</var>, 
and
- *           variable length digits for 
<kbd>\u005cx{</kbd><var>HHHH</var><kbd>}</kbd>.
+ *       <dt class="REGEX"><code>\xHH</code> <code>\x{HHHH}</code>
+ *       <dd>Matches a character of which code point is <var>HH</var> 
(Hexadecimal) in Unicode. You can write
+ *           just 2 digits for <code>\xHH</code>, and variable length digits 
for <code>\x{HHHH}</code>.
  *
- *       <!--
- *       <dt class="REGEX"><kbd>\u005c u</kbd><var>HHHH</var>
- *       <dd>Matches a character of which code point is <var>HHHH</var> 
(Hexadecimal) in Unicode.
- *       -->
- *
- *       <dt class="REGEX"><kbd>\u005cv</kbd><var>HHHHHH</var>
+ *       <dt><code>\vHHHHHH</code>
  *       <dd>Matches a character of which code point is <var>HHHHHH</var> 
(Hexadecimal) in Unicode.
  *
- *       <dt class="REGEX"><kbd>\g</kbd>
+ *       <dt class="REGEX"><code>\g</code>
  *       <dd>Matches a grapheme.
- *       <dd class="REGEX">It is equivalent to 
<kbd>(?[\p{ASSIGNED}]-[\p{M}\p{C}])?(?:\p{M}|[\x{094D}\x{09CD}\x{0A4D}\x{0ACD}\x{0B3D}\x{0BCD}\x{0C4D}\x{0CCD}\x{0D4D}\x{0E3A}\x{0F84}]\p{L}|[\x{1160}-\x{11A7}]|[\x{11A8}-\x{11FF}]|[\x{FF9E}\x{FF9F}])*</kbd>
+ *       <dd class="REGEX">It is equivalent to 
<code>(?[\p{ASSIGNED}]-[\p{M}\p{C}])?(?:\p{M}|[\x{094D}\x{09CD}\x{0A4D}\x{0ACD}\x{0B3D}\x{0BCD}\x{0C4D}\x{0CCD}\x{0D4D}\x{0E3A}\x{0F84}]\p{L}|[\x{1160}-\x{11A7}]|[\x{11A8}-\x{11FF}]|[\x{FF9E}\x{FF9F}])*</code>
  *
- *       <dt class="REGEX"><kbd>\X</kbd>
- *       <dd class="REGEX">Matches a combining character sequence.
- *       It is equivalent to <kbd>(?:\PM\pM*)</kbd>
+ *       <dt class="REGEX"><code>\X</code>
+ *       <dd class="REGEX">Matches a combining character sequence. It is 
equivalent to <code>(?:\PM\pM*)</code>
  *     </dl>
  *   </li>
  *
  *   <li>Character class
  *     <dl>
-+ *       <dt 
class="REGEX"><kbd>[</kbd><var>R<sub>1</sub></var><var>R<sub>2</sub></var><var>...</var><var>R<sub>n</sub></var><kbd>]</kbd>
 (without <a href="#COMMA_OPTION">"," option</a>)
-+ *       <dt 
class="REGEX"><kbd>[</kbd><var>R<sub>1</sub></var><kbd>,</kbd><var>R<sub>2</sub></var><kbd>,</kbd><var>...</var><kbd>,</kbd><var>R<sub>n</sub></var><kbd>]</kbd>
 (with <a href="#COMMA_OPTION">"," option</a>)
++ *      <dt 
class="REGEX">[<var>R<sub>1</sub></var><var>R<sub>2</sub></var><var>...</var><var>R<sub>n</sub></var>]
 (without a {@link #SPECIAL_COMMA} option)</dt>
++ *      <dt 
class="REGEX">[<var>R<sub>1</sub></var>,<var>R<sub>2</sub></var>,<var>...</var>,<var>R<sub>n</sub></var>]
 (with a {@link #SPECIAL_COMMA} option)</dt>
  *       <dd>Positive character class.  It matches a character in ranges.
  *       <dd><var>R<sub>n</sub></var>:
  *       <ul>
- *         <li class="REGEX">A character (including <Kbd>\e \f \n \r \t</kbd> 
<kbd>\u005cx</kbd><var>HH</var> <kbd>\u005cx{</kbd><var>HHHH</var><kbd>}</kbd> 
<!--kbd>\u005c u</kbd><var>HHHH</var--> <kbd>\u005cv</kbd><var>HHHHHH</var>)
- *             <p>This range matches the character.
- *         <li 
class="REGEX"><var>C<sub>1</sub></var><kbd>-</kbd><var>C<sub>2</sub></var>
- *             <p>This range matches a character which has a code point that 
is >= <var>C<sub>1</sub></var>'s code point and &lt;= 
<var>C<sub>2</sub></var>'s code point.
-+ *         <li class="REGEX">A POSIX character class: <Kbd>[:alpha:] 
[:alnum:] [:ascii:] [:cntrl:] [:digit:] [:graph:] [:lower:] [:print:] [:punct:] 
[:space:] [:upper:] [:xdigit:]</kbd>,
-+ *             and negative POSIX character classes in Perl like 
<kbd>[:^alpha:]</kbd>
- *             <p>...
- *         <li class="REGEX"><kbd>\d \D \s \S \w \W 
\p{</kbd><var>name</var><kbd>} \P{</kbd><var>name</var><kbd>}</kbd>
- *             <p>These expressions specifies the same ranges as the following 
expressions.
+ *         <li class="REGEX">A character (including <code>\e \f \n \r \t \xHH 
\x{HHHH} \vHHHHHH</code>)
+ *             <p>This range matches the character.</p>
+ *         </li>
+ *         <li class="REGEX"><var>C<sub>1</sub></var>-<var>C<sub>2</sub></var>
+ *             <p>This range matches a character which has a code point that 
is >= <var>C1</var>'s code point and &lt;= <var>C2</var>'s code point.</p>
+ *         </li>
++ *        <li class="REGEX">A POSIX character class: <code>[:alpha:] 
[:alnum:] [:ascii:] [:cntrl:] [:digit:] [:graph:] [:lower:] [:print:] [:punct:] 
[:space:] [:upper:] [:xdigit:]</code>,
++ *             and negative POSIX character classes in Perl like 
<code>[:^alpha:]</code></li>
+ *         <li class="REGEX"><code>\d \D \s \S \w \W \p{name} \P{name}</code>
+ *             <p>These expressions specify the same ranges as the following 
expressions.</p>
+ *         </li>
  *       </ul>
- *       <p class="REGEX">Enumerated ranges are merged (union operation).
- *          <kbd>[a-ec-z]</kbd> is equivalent to <kbd>[a-z]</kbd>
+ *       <p>Enumerated ranges are merged (union operation). 
<code>[a-ec-z]</code> is equivalent to <code>[a-z]</code></p>
+ *       </dd>
  *
- *       <dt 
class="REGEX"><kbd>[^</kbd><var>R<sub>1</sub></var><var>R<sub>2</sub></var><var>...</var><var>R<sub>n</sub></var><kbd>]</kbd>
 (without a <a href="#COMMA_OPTION">"," option</a>)
- *       <dt 
class="REGEX"><kbd>[^</kbd><var>R<sub>1</sub></var><kbd>,</kbd><var>R<sub>2</sub></var><kbd>,</kbd><var>...</var><kbd>,</kbd><var>R<sub>n</sub></var><kbd>]</kbd>
 (with a <a href="#COMMA_OPTION">"," option</a>)
- *       <dd>Negative character class.  It matches a character not in ranges.
+ *       <dt 
class="REGEX">[^<var>R<sub>1</sub></var><var>R<sub>2</sub></var><var>...</var><var>R<sub>n</sub></var>]
 (without a {@link #SPECIAL_COMMA} option)</dt>
+ *       <dt 
class="REGEX">[^<var>R<sub>1</sub></var>,<var>R<sub>2</sub></var>,<var>...</var>,<var>R<sub>n</sub></var>]
 (with a {@link #SPECIAL_COMMA} option)</dt>
+ *       <dd>Negative character class. It matches a character not in 
ranges.</dd>
  *
- *       <dt 
class="REGEX"><kbd>(?[</kbd><var>ranges</var><kbd>]</kbd><var>op</var><kbd>[</kbd><var>ranges</var><kbd>]</kbd><var>op</var><kbd>[</kbd><var>ranges</var><kbd>]</kbd>
 ... <Kbd>)</kbd>
- *       (<var>op</var> is <kbd>-</kbd> or <kbd>+</kbd> or <kbd>&</kbd>.)
+ *       <dt class="REGEX"><code>(?[ranges]op[ranges]op[ranges] ... )</code>
+ *       (where <var>op</var> is <code>-</code>, <code>+</code> or 
<code>&amp;</code>.)
+ *       </dt>
  *       <dd>Subtraction or union or intersection for character classes.
- *       <dd class="REGEX">For exmaple, <kbd>(?[A-Z]-[CF])</kbd> is equivalent 
to <kbd>[A-BD-EG-Z]</kbd>, and <kbd>(?[0x00-0x7f]-[K]&[\p{Lu}])</kbd> is 
equivalent to <kbd>[A-JL-Z]</kbd>.
- *       <dd>The result of this operations is a <u>positive character class</u>
+ *       <p>For example, <code>(?[A-Z]-[CF])</code> is equivalent to 
<code>[A-BD-EG-Z]</code>, and <code>(?[0x00-0x7f]-[K]&amp;[\p{Lu}])</code> is 
equivalent to <code>[A-JL-Z]</code>.</p>
+ *       <p>The result of this operation is a <u>positive character class</u>
  *           even if an expression includes any negative character classes.
- *           You have to take care on this in case-insensitive matching.
- *           For instance, <kbd>(?[^b])</kbd> is equivalent to 
<kbd>[\x00-ac-\x{10ffff}]</kbd>,
- *           which is equivalent to <kbd>[^b]</kbd> in case-sensitive matching.
- *           But, in case-insensitive matching, <kbd>(?[^b])</kbd> matches any 
character because
- *           it includes '<kbd>B</kbd>' and '<kbd>B</kbd>' matches 
'<kbd>b</kbd>'
- *           though <kbd>[^b]</kbd> is processed as <kbd>[^Bb]</kbd>.
+ *           You have to take care of this in case-insensitive matching.
+ *           For instance, <code>(?[^b])</code> is equivalent to 
<code>[\x00-ac-\x{10ffff}]</code>,
+ *           which is equivalent to <code>[^b]</code> in case-sensitive 
matching.
+ *           But, in case-insensitive matching, <code>(?[^b])</code> matches 
any character because
+ *           it includes '<code>B</code>' and '<code>B</code>' matches 
'<code>b</code>'
+ *           though <code>[^b]</code> is processed as <code>[^Bb]</code>.</p>
+ *       </dd>
  *
- *       <dt 
class="REGEX"><kbd>[</kbd><var>R<sub>1</sub>R<sub>2</sub>...</var><kbd>-[</kbd><var>R<sub>n</sub>R<sub>n+1</sub>...</var><kbd>]]</kbd>
 (with an <a href="#X_OPTION">"X" option</a>)</dt>
+ *       <dt 
class="REGEX">[<var>R<sub>1</sub></var><var>R<sub>2</sub></var><var>...</var>-[<var>R<sub>n</sub></var><var>R<sub>n+1</sub>...</var>]]
 (with an <code>X</code> option; {@link #XMLSCHEMA_MODE})</dt>
  *       <dd>Character class subtraction for the XML Schema.
- *           You can use this syntax when you specify an <a 
href="#X_OPTION">"X" option</a>.
- *           
- *       <dt class="REGEX"><kbd>\d</kbd>
- *       <dd class="REGEX">Equivalent to <kbd>[0-9]</kbd>.
- *       <dd>When <a href="#U_OPTION">a "u" option</a> is set, it is 
equivalent to
- *           <span class="REGEX"><kbd>\p{Nd}</kbd></span>.
+ *           You can use this syntax when you specify an <code>X</code> option 
({@link #XMLSCHEMA_MODE}).
+ *       </dd>
+ *
+ *       <dt class="REGEX"><code>\d</code></dt>
+ *       <dd class="REGEX">Equivalent to <code>[0-9]</code>.
+ *       <p>When a <code>u</code> ({@link #USE_UNICODE_CATEGORY}) option is 
set, it is equivalent to
+ *           <code>\p{Nd}</code>.</p>
+ *       </dd>
  *
- *       <dt class="REGEX"><kbd>\D</kbd>
- *       <dd class="REGEX">Equivalent to <kbd>[^0-9]</kbd>
- *       <dd>When <a href="#U_OPTION">a "u" option</a> is set, it is 
equivalent to
- *           <span class="REGEX"><kbd>\P{Nd}</kbd></span>.
+ *       <dt class="REGEX"><code>\D</code></dt>
+ *       <dd class="REGEX">Equivalent to <code>[^0-9]</code>
+ *       <p>When a <code>u</code> ({@link #USE_UNICODE_CATEGORY}) option is 
set, it is equivalent to
+ *           <code>\P{Nd}</code>.</p>
+ *       </dd>
  *
- *       <dt class="REGEX"><kbd>\s</kbd>
- *       <dd class="REGEX">Equivalent to <kbd>[ \f\n\r\t]</kbd>
- *       <dd>When <a href="#U_OPTION">a "u" option</a> is set, it is 
equivalent to
- *           <span class="REGEX"><kbd>[ \f\n\r\t\p{Z}]</kbd></span>.
+ *       <dt class="REGEX"><code>\s</code></dt>
+ *       <dd class="REGEX">Equivalent to <code>[ \f\n\r\t]</code>
+ *       <dd>When a <code>u</code> ({@link #USE_UNICODE_CATEGORY}) option is 
set, it is equivalent to
+ *           <code>[ \f\n\r\t\p{Z}]</code>.
  *
- *       <dt class="REGEX"><kbd>\S</kbd>
- *       <dd class="REGEX">Equivalent to <kbd>[^ \f\n\r\t]</kbd>
- *       <dd>When <a href="#U_OPTION">a "u" option</a> is set, it is 
equivalent to
- *           <span class="REGEX"><kbd>[^ \f\n\r\t\p{Z}]</kbd></span>.
+ *       <dt class="REGEX"><code>\S</code></dt>
+ *       <dd class="REGEX">Equivalent to <code>[^ \f\n\r\t]</code>
+ *       <p>When a <code>u</code> ({@link #USE_UNICODE_CATEGORY}) option is 
set, it is equivalent to
+ *           <code>[^ \f\n\r\t\p{Z}]</code>.</p>
+ *       </dd>
  *
- *       <dt class="REGEX"><kbd>\w</kbd>
- *       <dd class="REGEX">Equivalent to <kbd>[a-zA-Z0-9_]</kbd>
- *       <dd>When <a href="#U_OPTION">a "u" option</a> is set, it is 
equivalent to
- *           <span class="REGEX"><kbd>[\p{Lu}\p{Ll}\p{Lo}\p{Nd}_]</kbd></span>.
+ *       <dt class="REGEX"><code>\w</code></dt>
+ *       <dd class="REGEX">Equivalent to <code>[a-zA-Z0-9_]</code>
+ *       <p>When a <code>u</code> ({@link #USE_UNICODE_CATEGORY}) option is 
set, it is equivalent to
+ *           <code>[\p{Lu}\p{Ll}\p{Lo}\p{Nd}_]</code>.</p>
+ *       </dd>
  *
- *       <dt class="REGEX"><kbd>\W</kbd>
- *       <dd class="REGEX">Equivalent to <kbd>[^a-zA-Z0-9_]</kbd>
- *       <dd>When <a href="#U_OPTION">a "u" option</a> is set, it is 
equivalent to
- *           <span 
class="REGEX"><kbd>[^\p{Lu}\p{Ll}\p{Lo}\p{Nd}_]</kbd></span>.
+ *       <dt class="REGEX"><code>\W</code></dt>
+ *       <dd class="REGEX">Equivalent to <code>[^a-zA-Z0-9_]</code>
+ *       <p>When a <code>u</code> ({@link #USE_UNICODE_CATEGORY}) option is 
set, it is equivalent to
+ *           <code>[^\p{Lu}\p{Ll}\p{Lo}\p{Nd}_]</code>.</p>
+ *       </dd>
  *
- *       <dt class="REGEX"><kbd>\p{</kbd><var>name</var><kbd>}</kbd>
- *       <dd>Matches one character in the specified General Category (the 
second field in <a 
href="ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt";><kbd>UnicodeData.txt</kbd></a>)
 or the specified <a 
href="ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt";>Block</a>.
- *       The following names are available:
+ *       <dt class="REGEX"><code>\p{name}</code></dt>
+ *       <dd>Matches one character in the specified General Category (the 
second field in <a 
href="ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt";>UnicodeData.txt</a>)
 or the specified <a 
href="ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt";>Block</a>.</dd>
+ *       <dd>The following names are available:
  *       <dl>
- *         <dt>Unicode General Categories:
- *         <dd><kbd>
- *       L, M, N, Z, C, P, S, Lu, Ll, Lt, Lm, Lo, Mn, Me, Mc, Nd, Nl, No, Zs, 
Zl, Zp,
- *       Cc, Cf, Cn, Co, Cs, Pd, Ps, Pe, Pc, Po, Sm, Sc, Sk, So,
- *         </kbd>
- *         <dd>(Currently the Cn category includes U+10000-U+10FFFF characters)
- *         <dt>Unicode Blocks:
- *         <dd><kbd>
+ *         <dt>Unicode General Categories:</dt>
+ *         <dd><code>L, M, N, Z, C, P, S, Lu, Ll, Lt, Lm, Lo, Mn, Me, Mc, Nd, 
Nl, No, Zs, Zl, Zp, Cc, Cf, Cn,
+ *         Co, Cs, Pd, Ps, Pe, Pc, Po, Sm, Sc, Sk, So</code>
+ *         </dd>
+ *         <dd>(Currently the Cn category includes U+10000-U+10FFFF 
characters)</dd>
+ *         <dt>Unicode Blocks:</dt>
+ *         <dd>
  *       Basic Latin, Latin-1 Supplement, Latin Extended-A, Latin Extended-B,
  *       IPA Extensions, Spacing Modifier Letters, Combining Diacritical 
Marks, Greek,
  *       Cyrillic, Armenian, Hebrew, Arabic, Devanagari, Bengali, Gurmukhi, 
Gujarati,
@@ -261,175 +250,137 @@ import org.apache.xerces.util.IntStack;
  *       Arabic Presentation Forms-A, Combining Half Marks, CJK Compatibility 
Forms,
  *       Small Form Variants, Arabic Presentation Forms-B, Specials,
  *       Halfwidth and Fullwidth Forms
- *         </kbd>
- *         <dt>Others:
- *         <dd><kbd>ALL</kbd> (Equivalent to 
<kbd>[\u005cu0000-\u005cv10FFFF]</kbd>)
- *         <dd><kbd>ASSGINED</kbd> (<kbd>\p{ASSIGNED}</kbd> is equivalent to 
<kbd>\P{Cn}</kbd>)
- *         <dd><kbd>UNASSGINED</kbd>
- *             (<kbd>\p{UNASSIGNED}</kbd> is equivalent to <kbd>\p{Cn}</kbd>)
+ *         </dd>
+ *         <dt>Others:</dt>
+ *         <dd><code>ALL</code> (Equivalent to 
<code>[\u0000-\v10FFFF]</code>)</dd>
+ *         <dd><code>ASSIGNED</code> (<code>\p{ASSIGNED}</code> is equivalent 
to <code>\P{Cn}</code>)</dd>
+ *         <dd><code>UNASSIGNED</code> (<code>\p{UNASSIGNED}</code> is 
equivalent to <code>\p{Cn}</code>)</dd>
  *       </dl>
  *
- *       <dt class="REGEX"><kbd>\P{</kbd><var>name</var><kbd>}</kbd>
- *       <dd>Matches one character not in the specified General Category or 
the specified Block.
+ *       <dt class="REGEX"><code>\P{name}</code></dt>
+ *       <dd>Matches one character not in the specified General Category or 
the specified Block.</dd>
  *     </dl>
  *   </li>
  *
  *   <li>Selection and Quantifier
- *     <dl>
- *       <dt class="REGEX"><VAR>X</VAR><kbd>|</kbd><VAR>Y</VAR>
- *       <dd>...
- *
- *       <dt class="REGEX"><VAR>X</VAR><kbd>*</KBD>
- *       <dd>Matches 0 or more <var>X</var>.
- *
- *       <dt class="REGEX"><VAR>X</VAR><kbd>+</KBD>
- *       <dd>Matches 1 or more <var>X</var>.
- *
- *       <dt class="REGEX"><VAR>X</VAR><kbd>?</KBD>
- *       <dd>Matches 0 or 1 <var>X</var>.
- *
- *       <dt 
class="REGEX"><var>X</var><kbd>{</kbd><var>number</var><kbd>}</kbd>
- *       <dd>Matches <var>number</var> times.
- *
- *       <dt class="REGEX"><var>X</var><kbd>{</kbd><var>min</var><kbd>,}</kbd>
- *       <dd>...
- *
- *       <dt 
class="REGEX"><var>X</var><kbd>{</kbd><var>min</var><kbd>,</kbd><var>max</var><kbd>}</kbd>
- *       <dd>...
- *
- *       <dt class="REGEX"><VAR>X</VAR><kbd>*?</kbd>
- *       <dt class="REGEX"><VAR>X</VAR><kbd>+?</kbd>
- *       <dt class="REGEX"><VAR>X</VAR><kbd>??</kbd>
- *       <dt class="REGEX"><var>X</var><kbd>{</kbd><var>min</var><kbd>,}?</kbd>
- *       <dt 
class="REGEX"><var>X</var><kbd>{</kbd><var>min</var><kbd>,</kbd><var>max</var><kbd>}?</kbd>
- *       <dd>Non-greedy matching.
- *     </dl>
+ *     <ul>
+ *       <li><code>X | Y</code> matches either X or Y</li>
+ *       <li><code>X*</code> matches 0 or more of X</li>
+ *       <li><code>X+</code> matches 0 or more of X</li>
+ *       <li><code>X?</code> matches 0 or one of X</li>
+ *       <li><code>X{number}</code> matches <i>number</i> or more of X</li>
+ *       <li><code>X{min,}</code> matches <i>min</i> or more of X</li>
+ *       <li><code>X{min,max}</code> matches between <i>min</i> and <i>max</i> 
of X</li>
+ *       <li>Non-greedy equivalent of above
+ *         <ul>
+ *           <li><code>X*?</code> non-greedy</li>
+ *           <li><code>X+?</code> non-greedy</li>
+ *           <li><code>X??</code> non-greedy</li>
+ *           <li><code>X{min,}?</code></li>
+ *           <li><code>X{min,max}?</code></li>
+ *         </ul>
+ *       </li>
+ *     </ul>
  *   </li>
  *
  *   <li>Grouping, Capturing, and Back-reference
- *     <dl>
- *       <dt class="REGEX"><KBD>(?:</kbd><VAR>X</VAR><kbd>)</KBD>
- *       <dd>Grouping. "<KBD>foo+</KBD>" matches "<KBD>foo</KBD>" or 
"<KBD>foooo</KBD>".
- *       If you want it matches "<KBD>foofoo</KBD>" or "<KBD>foofoofoo</KBD>",
- *       you have to write "<KBD>(?:foo)+</KBD>".
- *
- *       <dt class="REGEX"><KBD>(</kbd><VAR>X</VAR><kbd>)</KBD>
- *       <dd>Grouping with capturing.
- * It make a group and applications can know
- * where in target text a group matched with methods of a <code>Match</code> 
instance
- * after <code><a href="#matches(java.lang.String, 
org.apache.xerces.utils.regex.Match)">matches(String,Match)</a></code>.
- * The 0th group means whole of this regular expression.
- * The <VAR>N</VAR>th gorup is the inside of the <VAR>N</VAR>th left 
parenthesis.
- * 
- *   <p>For instance, a regular expression is
- *   "<FONT color=blue><KBD> *([^&lt;:]*) +&lt;([^&gt;]*)&gt; *</KBD></FONT>"
- *   and target text is
- *   "<FONT color=red><KBD>From: TAMURA Kent 
&lt;[email protected]&gt;</KBD></FONT>":
- *   <ul>
- *     <li><code>Match.getCapturedText(0)</code>:
- *     "<FONT color=red><KBD> TAMURA Kent 
&lt;[email protected]&gt;</KBD></FONT>"
- *     <li><code>Match.getCapturedText(1)</code>: "<FONT color=red><KBD>TAMURA 
Kent</KBD></FONT>"
- *     <li><code>Match.getCapturedText(2)</code>: "<FONT 
color=red><KBD>[email protected]</KBD></FONT>"
- *   </ul>
- *
- *       <dt class="REGEX"><kbd>\1 \2 \3 \4 \5 \6 \7 \8 \9</kbd>
- *       <dd>
- *
- *       <dt class="REGEX"><kbd>(?></kbd><var>X</var><kbd>)</kbd>
- *       <dd>Independent expression group. ................
- *
- *       <dt 
class="REGEX"><kbd>(?</kbd><var>options</var><kbd>:</kbd><var>X</var><kbd>)</kbd>
- *       <dt 
class="REGEX"><kbd>(?</kbd><var>options</var><kbd>-</kbd><var>options2</var><kbd>:</kbd><var>X</var><kbd>)</kbd>
- *       <dd>............................
- *       <dd>The <var>options</var> or the <var>options2</var> consists of 'i' 
'm' 's' 'w'.
- *           Note that it can not contain 'u'.
- *
- *       <dt class="REGEX"><kbd>(?</kbd><var>options</var><kbd>)</kbd>
- *       <dt 
class="REGEX"><kbd>(?</kbd><var>options</var><kbd>-</kbd><var>options2</var><kbd>)</kbd>
- *       <dd>......
- *       <dd>These expressions must be at the beginning of a group.
- *     </dl>
+ *     <ul>
+ *       <li><code>(?:X)</code> Grouping. <code>foo+</code> matches 
<code>foo</code> or <code>foooo</code>.
+ *       <p>If you want it matches <code>foofoo</code> or 
<code>foofoofoo</code>, you have to write <code>(?:foo)+</code>.</p>
+ *       </li>
+ *       <li><code>(X)</code> Grouping with capturing.
+ *       <p>It makes a capturing group know where in target text a group 
matched with methods of a <code>Match</code> instance after {@link 
#matches(String, Match)}.</p>
+ *       <p>The 0th group means whole of this regular expression.</p>
+ *       <p>The <i>N</i>th group is the inside of the <i>N</i>th left 
parenthesis.</p>
+ *       <p>For instance, with a regular expression of <code> *([^&lt;:]*) 
+&lt;([^&gt;]*)&gt; *</code> and target text of</p>
+ *       <pre>From: TAMURA Kent &lt;[email protected]&gt;</pre>
+ *       <p>The result should be as followed:</p>
+ *       <ul>
+ *         <li><code>Match.getCapturedText(0)</code> : "<code> TAMURA Kent 
&lt;[email protected]&gt;</code>"</li>
+ *         <li><code>Match.getCapturedText(1)</code> : "<code>TAMURA 
Kent</code>"</li>
+ *         <li><code>Match.getCapturedText(2)</code> : 
"<code>[email protected]</code>"</li>
+ *       </ul>
+ *       </li>
+ *       <li><code>\1 \2 \3 \4 \5 \6 \7 \8 \9</code></li>
+ *       <li><code>(?>X)</code> Independent expression group. 
................</li>
+ *       <li><code>(?options:X)</code> or <code>(?options-options2:X)</code> 
The <i>options</i> or the <i>options2</i>
+ *       consists of 'i' 'm' 's' 'w'. Note that it can not contain 'u'.</li>
+ *       <li><code>(?options)</code> or <code>(?options-options2)</code> These 
expressions must be at the beginning of a group.</li>
+ *     </ul>
  *   </li>
  *
  *   <li>Anchor
  *     <dl>
- *       <dt class="REGEX"><kbd>\A</kbd>
- *       <dd>Matches the beginnig of the text.
+ *       <dt class="REGEX"><code>\A</code>
+ *       <dd>Matches the beginning of the text.
  *
- *       <dt class="REGEX"><kbd>\Z</kbd>
+ *       <dt class="REGEX"><code>\Z</code>
  *       <dd>Matches the end of the text, or before an EOL character at the 
end of the text,
  *           or CARRIAGE RETURN + LINE FEED at the end of the text.
  *
- *       <dt class="REGEX"><kbd>\z</kbd>
+ *       <dt class="REGEX"><code>\z</code>
  *       <dd>Matches the end of the text.
  *
- *       <dt class="REGEX"><kbd>^</kbd>
- *       <dd>Matches the beginning of the text.  It is equivalent to <span 
class="REGEX"><Kbd>\A</kbd></span>.
- *       <dd>When <a href="#M_OPTION">a "m" option</a> is set,
+ *       <dt class="REGEX"><code>^</code>
+ *       <dd>Matches the beginning of the text.  It is equivalent to 
<code>\A</code>.
+ *       <dd>When the <code>m</code> ({@link #MULTIPLE_LINES}) option is set,
  *           it matches the beginning of the text, or after one of EOL 
characters (
  *           LINE FEED (U+000A), CARRIAGE RETURN (U+000D), LINE SEPARATOR 
(U+2028),
  *           PARAGRAPH SEPARATOR (U+2029).)
  *
- *       <dt class="REGEX"><kbd>$</kbd>
+ *       <dt class="REGEX"><code>$</code>
  *       <dd>Matches the end of the text, or before an EOL character at the 
end of the text,
  *           or CARRIAGE RETURN + LINE FEED at the end of the text.
- *       <dd>When <a href="#M_OPTION">a "m" option</a> is set,
+ *       <dd>When the <code>m</code> ({@link #MULTIPLE_LINES}) option is set,
  *           it matches the end of the text, or before an EOL character.
  *
- *       <dt class="REGEX"><kbd>\b</kbd>
- *       <dd>Matches word boundary.
- *           (See <a href="#W_OPTION">a "w" option</a>)
+ *       <dt class="REGEX"><code>\b</code>
+ *       <dd>Matches word boundary. (See {@link #UNICODE_WORD_BOUNDARY})
  *
- *       <dt class="REGEX"><kbd>\B</kbd>
- *       <dd>Matches non word boundary.
- *           (See <a href="#W_OPTION">a "w" option</a>)
+ *       <dt class="REGEX"><code>\B</code>
+ *       <dd>Matches non word boundary. (See {@link #UNICODE_WORD_BOUNDARY})
  *
- *       <dt class="REGEX"><kbd>\&lt;</kbd>
- *       <dd>Matches the beginning of a word.
- *           (See <a href="#W_OPTION">a "w" option</a>)
+ *       <dt class="REGEX"><code>\&lt;</code>
+ *       <dd>Matches the beginning of a word. (See {@link 
#UNICODE_WORD_BOUNDARY})
  *
- *       <dt class="REGEX"><kbd>\&gt;</kbd>
- *       <dd>Matches the end of a word.
- *           (See <a href="#W_OPTION">a "w" option</a>)
+ *       <dt class="REGEX"><code>\&gt;</code>
+ *       <dd>Matches the end of a word. (See {@link #UNICODE_WORD_BOUNDARY})
  *     </dl>
  *   </li>
  *   <li>Lookahead and lookbehind
  *     <dl>
- *       <dt class="REGEX"><kbd>(?=</kbd><var>X</var><kbd>)</kbd>
+ *       <dt class="REGEX"><code>(?=X)</code>
  *       <dd>Lookahead.
  *
- *       <dt class="REGEX"><kbd>(?!</kbd><var>X</var><kbd>)</kbd>
+ *       <dt class="REGEX"><code>(?!X)</code>
  *       <dd>Negative lookahead.
  *
- *       <dt class="REGEX"><kbd>(?&lt;=</kbd><var>X</var><kbd>)</kbd>
+ *       <dt class="REGEX"><code>(?&lt;=X)</code>
  *       <dd>Lookbehind.
  *       <dd>(Note for text capturing......)
  *
- *       <dt class="REGEX"><kbd>(?&lt;!</kbd><var>X</var><kbd>)</kbd>
+ *       <dt class="REGEX"><code>(?&lt;!X)</code>
  *       <dd>Negative lookbehind.
  *     </dl>
  *   </li>
  *
  *   <li>Misc.
  *     <dl>
- *       <dt 
class="REGEX"><kbd>(?(</Kbd><var>condition</var><Kbd>)</kbd><var>yes-pattern</var><kbd>|</kbd><var>no-pattern</var><kbd>)</kbd>,
- *       <dt 
class="REGEX"><kbd>(?(</kbd><var>condition</var><kbd>)</kbd><var>yes-pattern</var><kbd>)</kbd>
+ *       <dt class="REGEX"><code>(?(condition)yes-pattern|no-pattern)</code>,
+ *       <dt class="REGEX"><code>(?(condition)yes-pattern)</code>
  *       <dd>......
- *       <dt class="REGEX"><kbd>(?#</kbd><var>comment</var><kbd>)</kbd>
- *       <dd>Comment.  A comment string consists of characters except 
'<kbd>)</kbd>'.
+ *       <dt class="REGEX"><code>(?#comment)</code> Comment
+ *       <dd>A comment string consists of characters except '<code>)</code>'.
  *           You can not write comments in character classes and before 
quantifiers.
  *     </dl>
  *   </li>
  * </ul>
  *
- *
- * <hr width="50%">
- * <h3>BNF for the regular expression</h3>
+ * <h3>BNF grammar for the regular expression</h3>
  * <pre>
  * regex ::= ('(?' options ')')? term ('|' term)*
  * term ::= factor+
- * factor ::= anchors | atom (('*' | '+' | '?' | minmax ) '?'? )?
- *            | '(?#' [^)]* ')'
+ * factor ::= anchors | atom (('*' | '+' | '?' | minmax ) '?'? )? | '(?#' 
[^)]* ')'
  * minmax ::= '{' ([0-9]+ | [0-9]+ ',' | ',' [0-9]+ | [0-9]+ ',' [0-9]+) '}'
  * atom ::= char | '.' | char-class | '(' regex ')' | '(?:' regex ')' | '\' 
[0-9]
  *          | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block | '\X'
@@ -437,12 +388,10 @@ import org.apache.xerces.util.IntStack;
  *          | '(?' ('(' [0-9] ')' | '(' anchors ')' | looks) term ('|' term)? 
')'
  * options ::= [imsw]* ('-' [imsw]+)?
  * anchors ::= '^' | '$' | '\A' | '\Z' | '\z' | '\b' | '\B' | '\&lt;' | '\>'
- * looks ::= '(?=' regex ')'  | '(?!' regex ')'
- *           | '(?&lt;=' regex ')' | '(?&lt;!' regex ')'
+ * looks ::= '(?=' regex ')'  | '(?!' regex ')' | '(?&lt;=' regex ')' | 
'(?&lt;!' regex ')'
  * char ::= '\\' | '\' [efnrtv] | '\c' [@-_] | code-point | character-1
  * category-block ::= '\' [pP] category-symbol-1
- *                    | ('\p{' | '\P{') (category-symbol | block-name
- *                                       | other-properties) '}'
+ *                    | ('\p{' | '\P{') (category-symbol | block-name | 
other-properties) '}'
  * category-symbol-1 ::= 'L' | 'M' | 'N' | 'Z' | 'C' | 'P' | 'S'
  * category-symbol ::= category-symbol-1 | 'Lu' | 'Ll' | 'Lt' | 'Lm' | Lo'
  *                     | 'Mn' | 'Me' | 'Mc' | 'Nd' | 'Nl' | 'No'
@@ -453,36 +402,24 @@ import org.apache.xerces.util.IntStack;
  * other-properties ::= 'ALL' | 'ASSIGNED' | 'UNASSIGNED'
  * character-1 ::= (any character except meta-characters)
  *
- * char-class ::= '[' ranges ']'
- *                | '(?[' ranges ']' ([-+&] '[' ranges ']')? ')'
+ * char-class ::= '[' ranges ']' | '(?[' ranges ']' ([-+&] '[' ranges ']')? ')'
  * ranges ::= '^'? (range <a href="#COMMA_OPTION">','?</a>)+
  * range ::= '\d' | '\w' | '\s' | '\D' | '\W' | '\S' | category-block
  *           | range-char | range-char '-' range-char
  * range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | code-point | 
character-2
  * code-point ::= '\x' hex-char hex-char
  *                | '\x{' hex-char+ '}'
- * <!--               | '\u005c u' hex-char hex-char hex-char hex-char
- * -->               | '\v' hex-char hex-char hex-char hex-char hex-char 
hex-char
+ *                | '\v' hex-char hex-char hex-char hex-char hex-char hex-char
  * hex-char ::= [0-9a-fA-F]
  * character-2 ::= (any character except \[]-,)
  * </pre>
  *
- * <hr width="50%">
- * <h3>TODO</h3>
- * <ul>
- *   <li><a href="http://www.unicode.org/unicode/reports/tr18/";>Unicode 
Regular Expression Guidelines</a>
- *     <ul>
- *       <li>2.4 Canonical Equivalents
- *       <li>Level 3
- *     </ul>
- *   <li>Parsing performance
- * </ul>
- *
- * <hr width="50%">
+ * <h3>Reference</h3>
+ * <a href="http://www.unicode.org/unicode/reports/tr18/";>Unicode Regular 
Expression Guidelines</a>
  * 
  * @xerces.internal
  *
- * @author TAMURA Kent &lt;[email protected]&gt;
+ * @author TAMURA Kent <a 
href="mailto:[email protected]";>[email protected]</a>
  * @version $Id$
  */
 public class RegularExpression implements java.io.Serializable {
@@ -683,8 +620,8 @@ public class RegularExpression implements 
java.io.Serializable {
     /**
      * Checks whether the <var>target</var> text <strong>contains</strong> 
this pattern or not.
      *
-     * @param match A Match instance for storing matching result.
-     * @return Offset of the start position in <VAR>target</VAR>; or -1 if not 
match.
+     * @param match A Match instance for storing matching result
+     * @return Offset of the start position in <var>target</var>; or -1 if not 
match
      */
     public boolean matches(char[]  target, Match match) {
         return this.matches(target, 0,  target .length , match);
@@ -695,10 +632,10 @@ public class RegularExpression implements 
java.io.Serializable {
      * Checks whether the <var>target</var> text <strong>contains</strong> 
this pattern
      * in specified range or not.
      *
-     * @param start Start offset of the range.
-     * @param end  End offset +1 of the range.
-     * @param match A Match instance for storing matching result.
-     * @return Offset of the start position in <VAR>target</VAR>; or -1 if not 
match.
+     * @param start Start offset of the range
+     * @param end  End offset +1 of the range
+     * @param match A Match instance for storing matching result
+     * @return Offset of the start position in <var>target</var>; or -1 if not 
match
      */
     public boolean matches(char[] target, int start, int end, Match match) {
 
@@ -869,8 +806,8 @@ public class RegularExpression implements 
java.io.Serializable {
     /**
      * Checks whether the <var>target</var> text <strong>contains</strong> 
this pattern or not.
      *
-     * @param match A Match instance for storing matching result.
-     * @return Offset of the start position in <VAR>target</VAR>; or -1 if not 
match.
+     * @param match A Match instance for storing matching result
+     * @return Offset of the start position in <var>target</var>; or -1 if not 
match
      */
     public boolean matches(String  target, Match match) {
         return this.matches(target, 0,  target .length() , match);
@@ -880,10 +817,10 @@ public class RegularExpression implements 
java.io.Serializable {
      * Checks whether the <var>target</var> text <strong>contains</strong> 
this pattern
      * in specified range or not.
      *
-     * @param start Start offset of the range.
-     * @param end  End offset +1 of the range.
-     * @param match A Match instance for storing matching result.
-     * @return Offset of the start position in <VAR>target</VAR>; or -1 if not 
match.
+     * @param start Start offset of the range
+     * @param end  End offset +1 of the range
+     * @param match A Match instance for storing matching result
+     * @return Offset of the start position in <var>target</var>; or -1 if not 
match
      */
     public boolean matches(String  target, int start, int end, Match match) {
 
@@ -1557,8 +1494,8 @@ public class RegularExpression implements 
java.io.Serializable {
     /**
      * Checks whether the <var>target</var> text <strong>contains</strong> 
this pattern or not.
      *
-     * @param match A Match instance for storing matching result.
-     * @return Offset of the start position in <VAR>target</VAR>; or -1 if not 
match.
+     * @param match A Match instance for storing matching result
+     * @return Offset of the start position in <var>target</var>; or -1 if not 
match
      */
     public boolean matches(CharacterIterator  target, Match match) {
         int start = target.getBeginIndex();
@@ -2155,10 +2092,10 @@ public class RegularExpression implements 
java.io.Serializable {
         }
     }
 
-    /**
+    /*
      * An option.
-     * If you specify this option, <span 
class="REGEX"><kbd>(</kbd><var>X</var><kbd>)</kbd></span>
-     * captures matched text, and <span 
class="REGEX"><kbd>(:?</kbd><var>X</var><kbd>)</kbd></span>
+     * If you specify this option, <span><code>(X)</code></span>
+     * captures matched text, and <span><code>(:?X)</code></span>
      * does not capture.
      *
      * @see #RegularExpression(java.lang.String,int)
@@ -2187,25 +2124,25 @@ public class RegularExpression implements 
java.io.Serializable {
     static final int EXTENDED_COMMENT = 1<<4;
 
     /**
-     * This option redefines <span class="REGEX"><kbd>\d \D \w \W \s 
\S</kbd></span>.
+     * This option redefines <span><code>\d \D \w \W \s \S</code></span>.
      *
-     * @see #RegularExpression(java.lang.String,int)
-     * @see #setPattern(java.lang.String,int)
+     * @see #RegularExpression(String,String)
+     * @see #setPattern(String,int,Locale)
      * @see #UNICODE_WORD_BOUNDARY
      */
     static final int USE_UNICODE_CATEGORY = 1<<5; // "u"
 
     /**
      * An option.
-     * This enables to process locale-independent word boundary for <span 
class="REGEX"><kbd>\b \B \&lt; \></kbd></span>.
+     * This enables to process locale-independent word boundary for 
<span><code>\b \B \&lt; \></code></span>.
      * <p>By default, the engine considers a position between a word character
-     * (<span class="REGEX"><Kbd>\w</kbd></span>) and a non word character
+     * (<span><code>\w</code></span>) and a non word character
      * is a word boundary.
      * <p>By this option, the engine checks word boundaries with the method of
      * 'Unicode Regular Expression Guidelines' Revision 4.
      *
-     * @see #RegularExpression(java.lang.String,int)
-     * @see #setPattern(java.lang.String,int)
+     * @see #RegularExpression(String,String)
+     * @see #setPattern(String,int,Locale)
      */
     static final int UNICODE_WORD_BOUNDARY = 1<<6; // "w"
 
@@ -2234,8 +2171,8 @@ public class RegularExpression implements 
java.io.Serializable {
     /**
      * Creates a new RegularExpression instance.
      *
-     * @param regex A regular expression
-     * @exception org.apache.xerces.utils.regex.ParseException 
<VAR>regex</VAR> is not conforming to the syntax.
+     * @param regex a regular expression
+     * @throws ParseException if regex is not conforming to the syntax
      */
     public RegularExpression(String regex) throws ParseException {
         this(regex, null);
@@ -2244,9 +2181,9 @@ public class RegularExpression implements 
java.io.Serializable {
     /**
      * Creates a new RegularExpression instance with options.
      *
-     * @param regex A regular expression
-     * @param options A String consisted of "i" "m" "s" "u" "w" "," "X"
-     * @exception org.apache.xerces.utils.regex.ParseException 
<VAR>regex</VAR> is not conforming to the syntax.
+     * @param regex a regular expression
+     * @param options a string of options consisted of "i" "m" "s" "u" "w" "," 
"X" or null
+     * @throws ParseException if regex is not conforming to the syntax
      */
     public RegularExpression(String regex, String options) throws 
ParseException {
         this.setPattern(regex, options);
@@ -2255,9 +2192,11 @@ public class RegularExpression implements 
java.io.Serializable {
     /**
      * Creates a new RegularExpression instance with options.
      *
-     * @param regex A regular expression
-     * @param options A String consisted of "i" "m" "s" "u" "w" "," "X"
-     * @exception org.apache.xerces.utils.regex.ParseException 
<VAR>regex</VAR> is not conforming to the syntax.
+     * @param regex a regular expression pattern
+     * @param options a string of options consisted of "i" "m" "s" "u" "w" "," 
"X" or null
+     * @param locale value of the desired locale or null
+     * @throws ParseException if regex is not conforming to the syntax
+     * @see Locale
      */
     public RegularExpression(String regex, String options, Locale locale) 
throws ParseException {
         this.setPattern(regex, options, locale);
@@ -2272,16 +2211,36 @@ public class RegularExpression implements 
java.io.Serializable {
     }
 
     /**
+     * Set a new regular expression pattern with the default Locale.
      *
+     * @param newPattern a new regular expression pattern
+     * @throws ParseException if regex is not conforming to the syntax
      */
     public void setPattern(String newPattern) throws ParseException {
         this.setPattern(newPattern, Locale.getDefault());
     }
-    
+
+    /**
+     * Set a new regular expression pattern with the provided Locale.
+     *
+     * @param newPattern a new regular expression pattern
+     * @param locale value of the desired locale or null
+     * @throws ParseException if regex is not conforming to the syntax
+     * @see Locale
+     */
     public void setPattern(String newPattern, Locale locale) throws 
ParseException {
         this.setPattern(newPattern, this.options, locale);
     }
 
+    /**
+     * Set a new regular expression pattern with the provided options and 
Locale.
+     *
+     * @param newPattern a new regular expression pattern
+     * @param options an int value representation of regular expression options
+     * @param locale value of the desired locale or null
+     * @throws ParseException if regex is not conforming to the syntax
+     * @see Locale
+     */
     private void setPattern(String newPattern, int options, Locale locale) 
throws ParseException {
         this.regex = newPattern;
         this.options = options;
@@ -2294,36 +2253,55 @@ public class RegularExpression implements 
java.io.Serializable {
         this.operations = null;
         this.context = null;
     }
+
     /**
+     * Set a new regular expression pattern with the provided options and the 
default locale.
      *
+     * @param newPattern a new regular expression pattern
+     * @param options a string of options consisted of "i" "m" "s" "u" "w" "," 
"X" or null
+     * @throws ParseException if regex is not conforming to the syntax
+     * @see Locale#getDefault()
      */
     public void setPattern(String newPattern, String options) throws 
ParseException {
         this.setPattern(newPattern, options, Locale.getDefault());
     }
-    
+
+    /**
+     * Set a new regular expression pattern with the provided options and 
Locale.
+     *
+     * @param newPattern a new regular expression pattern
+     * @param options a string of options consisted of "i" "m" "s" "u" "w" "," 
"X" or null
+     * @param locale value of the desired locale or null
+     * @throws ParseException if regex is not conforming to the syntax
+     * @see Locale
+     */
     public void setPattern(String newPattern, String options, Locale locale) 
throws ParseException {
         this.setPattern(newPattern, REUtil.parseOptions(options), locale);
     }
 
     /**
+     * Returns the regular expression pattern.
      *
+     * @return the regular expression pattern
      */
     public String getPattern() {
         return this.regex;
     }
 
     /**
-     * Represents this instence in String.
+     * Represents this instance in String.
      */
     public String toString() {
         return this.tokentree.toString(this.options);
     }
 
     /**
-     * Returns a option string.
-     * The order of letters in it may be different from a string specified
-     * in a constructor or <code>setPattern()</code>.
+     * Returns a string representation of the regular expression's options.
      *
+     * <p>The order of letters in it may be different from a string specified
+     * in a constructor or <code>setPattern()</code>.</p>
+     *
+     * @return a string representation of the regular expression's options
      * @see #RegularExpression(java.lang.String,java.lang.String)
      * @see #setPattern(java.lang.String,java.lang.String)
      */
@@ -2332,7 +2310,9 @@ public class RegularExpression implements 
java.io.Serializable {
     }
 
     /**
-     *  Return true if patterns are the same and the options are equivalent.
+     * Return true if patterns are the same and the options are equivalent.
+     *
+     * @return true if patterns are the same and the options are equivalent
      */
     public boolean equals(Object obj) {
         if (obj == null)  return false;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to