Hi All,
Below is a piece of code I created for several projects I do, and which I
like to contribute to ORO if it seems useful. It's a pretty simple yet
powerful class. Also, attached is a file called STA.java, which shows
example usage of MethodSubstitution (and the other types of substitutions).
Greets,
Laurens
-----------------------------------
import java.lang.reflect.*;
import org.apache.oro.text.regex.*;
/**
* Implements the /e modifier of perl regular expressions that makes
* Perl interpret the expression before processing it. This class is
* intended for use with {@link Util#substitute Util.substitute}.
* <p>
* In perl you can do something like:
* <pre><b>s/A(.*?)B(.*?)C/&MyMethod($1, $2)/ge</b></pre>
* When the match is made this will call the sub "MyMethod", pass it
* the two matched variables, and finally the match is substituted
* by whatever the sub returns.
* <p>
* MethodSubstitution works in a similar way. You can do something like:
* <pre><b>
* MethodSubstitution sub = new MethodSubstitution(this, "myMethod");
* substitute("A(.*?)B(.*?)C", sub);
* </b></pre>
* See also {@link Util#substitute Util.substitute}).
* Here the object "this" must have a public method "myMethod". The
* signature of the method must be:
* <pre><b>public String myMethod(MatchResult match)</b></pre>
* So when the match "A(.*?)B(.*?)C" is made the method will
* be called where the parenthesized subgroups of the match will
* be available through the MatchResult parameter. The match will
* be replaced by the String that the method returns.
* <p>
* You can also construct a MethodSubstitution like this:
* <pre><b>
* MethodSubstitution sub =
* new MethodSubstitution(MyClass.class, "myMethod");
* </b></pre>
* In this case there must be a public static method "myMethod" present
* in the class "MyClass". For example:
* <pre><b>public static String myMethod(MatchResult match)</b></pre>
* <p>
* @author <a href="mailto:[EMAIL PROTECTED]">Laurens Pit</a>
* @version @version@
* @see Util
* @see Util#substitute
* @see Substitution
* @see StringSubstitution
* @see Perl5Substitution
*/
public class MethodSubstitution implements Substitution
{
/* parameters signature of methods */
private static final Class[] paramTypes = new Class[]
{MatchResult.class};
private Object obj;
private Method method;
/**
* Creates a MethodSubstitution where a Java method will be responsible
* for returning the String to replace a match.
* <p>
* If obj is a class the signature of the method must look like:
* <pre> public static String methodName(MatchResult match) </pre>
* <p>
* If obj is not a class the signature of the method must look like:
* <pre> public String methodName(MatchResult match) </pre>
* <p>
* @param obj the class or object having the public method that will
* return the substitution
* @param methodName the name of the method that will return the
* substitution
* @exception NoSuchMethodException if a matching method is not found
* or if the name is "<init>"or "<clinit>"
* @exception SecurityException if access to the information is denied
*/
public MethodSubstitution(Object obj, String methodName)
throws NoSuchMethodException, SecurityException
{
this.obj = obj;
if (obj instanceof Class) {
this.method = ((Class) obj).getMethod(methodName, paramTypes);
} else {
this.method = obj.getClass().getMethod(methodName, paramTypes);
}
}
/**
* Appends the substitution to a buffer containing the original input
* with substitutions applied for the pattern matches found so far.
* See
* {@link Substitution#appendSubstitution
Substitution.appendSubstition()}
* for more details regarding the expected behavior of this method.
* <p>
* @param appendBuffer The buffer containing the new string resulting
* from performing substitutions on the original input.
* @param match The current match causing a substitution to be made.
* @param substitutionCount The number of substitutions that have been
* performed so far by Util.substitute.
* @param originalInput The original input upon which the substitutions
* are being performed. This is a read-only parameter and is not
* modified.
* @param matcher The PatternMatcher used to find the current match.
* @param pattern The Pattern used to find the current match.
*/
public void appendSubstitution(StringBuffer appendBuffer,
MatchResult match,
int substitutionCount,
PatternMatcherInput originalInput,
PatternMatcher matcher, Pattern pattern)
{
try {
appendBuffer.append(method.invoke(obj, new Object[] {match}));
} catch (Throwable t) {
t.printStackTrace();
}
}
}
import java.util.*;
import org.apache.oro.text.regex.*;
/**
* Structured Text Area.
*
* No attempt has yet been made to optimize this code for performance.
*
* This class is based on
* the GPLed UseModWiki 0.91 (C) 2000-2001 Clifford Adams
* <[EMAIL PROTECTED]>
* ...which was based on
* the GPLed AtisWiki 0.3 (C) 1998 Markus Denker
* <[EMAIL PROTECTED]>
* ...which was based on
* the LGPLed CVWiki CVS-patches (C) 1997 Peter Merel
* and The Original WikiWikiWeb (C) Ward Cunningham
* <[EMAIL PROTECTED]> (code reused with permission)
*
* @author Laurens Pit <[EMAIL PROTECTED]>
*/
public final class STA
{
private static String gFS = "\u00b3"; // The FS character is a superscript "3"
private static String gQDelim = "(?:\"\")?"; // Optional quote delimiter (not in output)
private static String gUrlProtocols = "http|https|ftp|afs|news|nntp|mid|cid|mailto|wais|prospero|telnet|gopher";
private static String gUrlPattern = "((?:(?:" + gUrlProtocols + "):[^\\]\\s\"<>" + gFS + "]+)" + gQDelim + ")";
private static String gMailPattern = "([a-zA-Z_0-9\\.]+@[a-zA-Z_0-9]+\\.[a-zA-Z_0-9\\.]+[a-zA-Z_0-9])";
private static String gImageExtensions = "(gif|jpg|png|bmp|jpeg)";
private static String gDocExtensions = "(doc|gz|jpg|mov|mpeg|mpg|pdf|ppt|txt|xls|zip|gif|jpg|png|bmp|jpeg)";
private static String gDocPattern = "([a-zA-Z_0-9\\/\\xc0-\\xff\\.]+\\." + gDocExtensions + ")";
private static String gISBNPattern = "ISBN:?([0-9- xX]{10,})";
private static int gIndentLimit = 10; // Maximum depth of nested lists
// TODO: create cached compiled patterns and substitutions
private String input;
private Vector rawData;
private Vector brackets;
private STA(String input) {
this.input = input;
this.rawData = new Vector();
}
public static String convert(String input) {
STA sta = new STA(input);
return sta.convert();
}
private String convert() {
try {
// Remove separators (paranoia)
input = StringUtils.replace(input, gFS, "");
// Encode HTML
input = StringUtils.replace(input, "&", "&");
input = StringUtils.replace(input, "<", "<");
input = StringUtils.replace(input, ">", ">");
// Allow character references
substitute("&([#a-zA-Z0-9]+);", new Perl5Substitution("&$1;"));
// Join lines with backslash at end
substitute("\\\\ *\\r?\\n", new StringSubstitution(" "));
// Everything between <code> tags should be taken literally, i.e.
// make sure no other substitutions are performed on the data between
// the <code> tags. We must use a MethodSubstitution here !
substitute("<code>((.|\n)*?)</code>", new MethodSubstitution(this, "storeCode"));
// Allow some common HTML tags
substitute("<pre>((.|\n)*?)</pre>", new Perl5Substitution("<pre>$1</pre>"));
substitute("<b>(.*?)</b>", new Perl5Substitution("<b>$1</b>"));
substitute("<i>(.*?)</i>", new Perl5Substitution("<i>$1</i>"));
substitute("<tt>(.*?)</tt>", new Perl5Substitution("<tt>$1</tt>"));
// Allow HTML anchor links
// Again, we must use a MethodSubstitution here in order to make sure the
// url encapsulated in the "href" tag isn't matched later on.
substitute("<A(\\s[^<>]+?)>(.*?)</A>", new MethodSubstitution(this, "storeHref"));
// Link patterns
substitute("\\[" + gUrlPattern + "\\s+([^\\]]+?)\\]", new MethodSubstitution(this, "storeBacketUrl"));
substitute("\\[" + gUrlPattern + "\\]", new MethodSubstitution(this, "storeBacketUrl"));
substitute(gUrlPattern, new MethodSubstitution(this, "storeUrl"));
substitute(gMailPattern, new MethodSubstitution(this, "storeMail"));
substitute(gISBNPattern, new MethodSubstitution(this, "storeISBN"));
// substitute(gDocPattern, new MethodSubstitution(this, "storeDoc"));
// Four or more dashes are turned into a horizontal rule.
substitute("----+", new StringSubstitution("<hr noshade=\"noshade\" size=\"1\">"));
// Headers, like = h1 =, == h2 ==, === h3 ===, etc.
substitute("(\\=+)[ \\t]+([^\\n]+)[ \\t]+\\=+", new MethodSubstitution(this, "storeHeader"));
// Line-oriented markup
input = linesToHtml(input);
// Restore saved text and nested saved text
substitute(gFS + "(\\d+)" + gFS, new MethodSubstitution(this, "getRaw"));
substitute(gFS + "(\\d+)" + gFS, new MethodSubstitution(this, "getRaw"));
} catch (NoSuchMethodException nsmex) {
nsmex.printStackTrace();
} catch (SecurityException sex) {
sex.printStackTrace();
}
return input;
}
// process one line at a time
private String linesToHtml(String input) {
Stack tags = new Stack();
StringBuffer html = new StringBuffer(input.length() + 100);
String line = null;
String code = null;
String temp = null;
int depth = 0;
int len = 0;
int e = 0;
int s = 0;
// replace tabs by 8 spaces
input = substitute(input, "\\t", new StringSubstitution(" "));
input = input + "\n";
while ((e = input.indexOf("\n", s)) >= 0) {
line = input.substring(s, e);
len = line.length();
if ((len == 0) || (line.charAt(0) != ' ')) {
depth = 0;
} else {
char c = ' ';
int i = 1;
while ((i < len) && ((c = line.charAt(i)) == ' ')) {
i++;
}
c = (i == len) ? '\n' : line.charAt(i);
char nextc = (i >= (len - 1)) ? '\n' : line.charAt(i + 1);
char nextnextc = (i >= (len - 2)) ? '\n' : line.charAt(i + 2);
if (c == '*' && nextc == ' ') {
line = "<li>" + line.substring(i + 2) + "</li>";
code = "ul";
depth = (i + 1) / 2;
} else if (c >= '0' && c <= '9' && nextc == '.' && nextnextc == ' ') {
line = "<li>" + line.substring(i + 3) + "</li>";
code = "ol";
depth = (i + 1) / 2;
} else if (c == ':' && nextc == ' ') {
line = "<dt><dd>" + line.substring(i + 2) + "</dd></dt>";
code = "dl";
depth = (i + 1) / 2;
} else {
code = "pre";
depth = 1;
}
}
// close tags as needed
while (tags.size() > depth) {
html.append("</").append(tags.pop()).append(">\n");
}
if (depth > 0) {
if (depth > gIndentLimit) {
depth = gIndentLimit;
}
if (!tags.isEmpty()) {
String oldCode = (String) tags.pop();
if (!oldCode.equals(code)) {
html.append("</").append(oldCode).append("><").append(code).append(">\n");
}
tags.push(code);
}
while (tags.size() < depth) {
tags.push(code);
html.append("<").append(code).append(">\n");
}
}
// blank lines
line = substitute(line, "^\\s*$", new StringSubstitution("<p></p>"));
// line-oriented alternatives to make text strong and emphasized
line = substitute(line, "('*)'''(.*?)'''", new Perl5Substitution("$1<strong>$2</strong>"));
line = substitute(line, "''(.*?)''", new Perl5Substitution("<em>$1</em>"));
html.append(line).append("\n");
s = e + 1;
}
while (!tags.isEmpty()) {
html.append("</").append(tags.pop()).append(">\n");
}
return html.toString();
}
/**
* Substitute all with case insensitive mask.
*/
private void substitute(String expression, Substitution sub) {
input = substitute(input, expression, sub);
}
/**
* Substitute all with case insensitive mask.
*/
private String substitute(String input, String expression, Substitution sub) {
// Create Perl5Compiler and Perl5Matcher instances.
PatternCompiler compiler = new Perl5Compiler();
PatternMatcher matcher = new Perl5Matcher();
Pattern pattern = null;
// Attempt to compile the pattern.
try {
pattern = compiler.compile(expression, Perl5Compiler.CASE_INSENSITIVE_MASK);
} catch (MalformedPatternException e) {
System.err.println("Bad pattern.");
System.err.println(e.getMessage());
return "";
}
return Util.substitute(matcher, pattern, sub, input, Util.SUBSTITUTE_ALL);
}
private boolean contains(String input, String expression) {
// Create Perl5Compiler and Perl5Matcher instances.
PatternCompiler compiler = new Perl5Compiler();
PatternMatcher matcher = new Perl5Matcher();
Pattern pattern = null;
// Attempt to compile the pattern.
try {
pattern = compiler.compile(expression, Perl5Compiler.CASE_INSENSITIVE_MASK);
} catch (MalformedPatternException e) {
System.err.println("Bad pattern.");
System.err.println(e.getMessage());
return false;
}
return matcher.contains(input, pattern);
}
private String storeRaw(String text) {
rawData.addElement(text);
return gFS + rawData.size() + gFS;
}
public String getRaw(MatchResult match) {
return (String) rawData.elementAt(Integer.parseInt(match.group(1)) - 1);
}
public String storeCode(MatchResult match) {
return storeRaw("<pre class=\"code\">" + match.group(1) + "</pre>");
}
public String storeHref(MatchResult match) {
return storeRaw("<a" + match.group(1) + ">" + match.group(2) + "</a>");
}
public String storeBacketUrl(MatchResult match) {
String url = match.group(1);
String text = null;
if (match.groups() == 3) {
text = match.group(2);
} else {
if (brackets == null) {
brackets = new Vector();
} else {
int index = brackets.indexOf(url);
if (index != 1) {
text = "" + index;
}
}
if (text == null) {
brackets.addElement(url);
text = "" + brackets.size();
}
text = "[" + text + "]";
}
return storeRaw("<a href=\"" + url + "\">" + text + "</a>");
}
public String storeUrl(MatchResult match) {
String url = match.group(1);
String junk = "";
if (url.endsWith("\"\"")) {
url = url.substring(0, url.length() - 2);
} else {
String temp = substitute(url, "([^a-zA-Z0-9\\/\\xc0-\\xff]+)$", new StringSubstitution(""));
junk = url.substring(temp.length(), url.length());
url = temp;
}
// do we have an image?
if (contains(url, "^(http:|https:|ftp:).+\\." + gImageExtensions + "$")) {
url = "<img src=\"" + url + "\">";
} else {
url = "<a href=\"" + url + "\">" + url + "</a>";
}
return storeRaw(url) + junk;
}
public String storeMail(MatchResult match) {
return storeRaw("<a href=\"mailto:" + match.group(1) + "\">" + match.group(1) + "</a>");
}
public String storeISBN(MatchResult match) {
System.out.println("storeISBN");
String raw = match.group(1);
String num = StringUtils.replace(StringUtils.replace(raw, " ", ""), "-", "");
if (num.length() != 10) {
return match.group(0);
}
int pos = raw.trim().length() + 1;
String spaces = (pos <= raw.length()) ? raw.substring(pos) : "";
return storeRaw("<a href=\"http://www.amazon.com/exec/obidos/ISBN=" + num + "\">ISBN " + raw.trim() + "</a>" + spaces);
}
public String storeHeader(MatchResult match) {
int depth = match.group(1).length();
if (depth > 5) {
depth = 5;
}
return storeRaw("<h" + depth + ">" + match.group(2) + "</h" + depth + ">");
}
// helper method to debug MethodSubstitutions
private void dumpMatches(MatchResult match) {
for (int i = 1; i < match.groups(); i++) {
System.out.println(i + " -- " + match.group(i));
}
}
// unit test
public static void main(String[] args) {
String input =
"=== Introduction ===\n" +
"Hello world & werelden\n" +
" * item a\n" +
" * item b\n" +
" * item a.1\n" +
" * item a.2\n" +
" * item c\n" +
"\n" +
" preformatted text\n" +
" x = 0;\n" +
"\n" +
" 1. item 1\n" +
" 1. item 1.1\n" +
" 1. item 1.2\n" +
" 2. item 2\n" +
" 3. item 3\n" +
"==== Markup of Text ====\n" +
" '''bold''' text and ''italic'' text and\\\n" +
"more '''bold text'''.\n" +
"and <b>even</b> more <b>bold text!</b>\n" +
"--------\n" +
"and <i><b>even</b></i> more <i>italic text!</i>\n" +
"==== Links ====\n" +
"http://www.google.com respect naar hen\n" +
"http://www.google.com, respect naar hen\n" +
"http://www.google.com\"\" respect naar hen\n" +
"http://jakarta.apache.org/oro/images/logo.gif\n" +
"Bogus: mailto://jakarta.apache.org/foobar.gif\n" +
"stuur mail naar mailto:[EMAIL PROTECTED] of naar <b>[EMAIL PROTECTED]\n" +
"en hier een</b> pure HTML link: <a href=\"http://www.google.com\">Respect naar Hen!</a> zo dan.\n" +
"here is an example of a bracketed url: [http://jakarta.apache.org], and here is an " +
"example of another version: [http://jakarta.apache.org Apache Jakarta].\n" +
"example of a link to a newsgroup: news:alt.delete.this.newsgroup\n" +
"books ========= Books ==== and more books coming:\n" +
"<pre>\n" +
"een boek: ISBN 0123456789111 enzo. ofzo.\n" +
"een echt boek: ISBN 123456789X enzo. ofzo.\n" +
"een echt boek: ISBN 123-45 67x 89z enzo. ofzo.\n" +
"</pre>\n" +
"\n" +
"<code>\n" +
" public static void main(String[] args) {\n" +
" System.out.println(\"Hello http://www.apache.org Apache\");\n" +
" }\n" +
"</code>\n" +
"\n" +
"Copyright © OpenWiki" +
"\n" +
"<i>It was a deadly mistake. Rumors about an ancient secret society that was still alive and kicking snooped into the radical magazines. Now offices have been bombed. Even George Bush can't guess how far into the pinnacles of power this conspiracy of evil has penetrated.</i> -- ISBN:0-440-53981-1 ofzo"
;
System.out.println("\n");
System.out.println(STA.convert(input));
System.out.println("\n");
}
}