Update of /var/cvs/src/org/mmbase/util/transformers
In directory james.mmbase.org:/tmp/cvs-serv3853
Modified Files:
Tag: MMBase-1_8
ChunkedTransformer.java LinkFinder.java RegexpReplacer.java
RegexpReplacerFactory.java ResourceBundleReplacerFactory.java
Log Message:
Ported from HEAD, to fix MMB-1568
See also: http://cvs.mmbase.org/viewcvs/src/org/mmbase/util/transformers
See also: http://www.mmbase.org/jira/browse/MMB-1568
Index: ChunkedTransformer.java
===================================================================
RCS file: /var/cvs/src/org/mmbase/util/transformers/ChunkedTransformer.java,v
retrieving revision 1.4.2.2
retrieving revision 1.4.2.3
diff -u -b -r1.4.2.2 -r1.4.2.3
--- ChunkedTransformer.java 18 Mar 2008 15:09:41 -0000 1.4.2.2
+++ ChunkedTransformer.java 24 Jul 2008 16:16:08 -0000 1.4.2.3
@@ -11,10 +11,6 @@
import java.util.*;
import java.io.*;
-import java.util.regex.*;
-import org.mmbase.util.ResourceWatcher;
-import org.mmbase.util.xml.UtilReader;
-import org.mmbase.util.Entry;
import org.mmbase.util.logging.*;
@@ -28,7 +24,6 @@
*
* @author Michiel Meeuwissen
* @since MMBase-1.8
- * @version $Id
*/
public abstract class ChunkedTransformer extends ConfigurableReaderTransformer
implements CharTransformer {
@@ -59,29 +54,29 @@
*/
public final static int ENTIRE = 5;
+ // about 3 bits used now.
/**
- * If this is added to the config-int, then only the first match should be
used.
+ * If this is added to the config-int, then only the first match of any
one pattern should be used.
*/
- public final static int REPLACE_FIRST = 100;
+ public final static int ONLY_USE_FIRST_MATCHING_PATTERN = 1 << 5;
+
/**
- * If this is added to the config-int, then only the first match should be
used.
+ * If this is added to the config-int, then only the first match of all
patterns should be used.
*/
- public final static int REPLACE_FIRST_ALL = 200;
+ public final static int ONLY_REPLACE_FIRST_MATCH = 1 << 6;
- protected boolean replaceFirst = false;
- protected boolean replaceFirstAll = false;
+ protected boolean onlyFirstPattern = false;
+ protected boolean onlyFirstMatch = false;
public void configure(int i) {
- if (i >= REPLACE_FIRST_ALL) {
- replaceFirstAll = true;
- i -= REPLACE_FIRST_ALL;
- }
- if (i >= REPLACE_FIRST) {
- replaceFirst = true;
- i -= REPLACE_FIRST;
- }
+ onlyFirstMatch = ((i & ONLY_REPLACE_FIRST_MATCH) > 0);
+ onlyFirstPattern = ((i & ONLY_USE_FIRST_MATCHING_PATTERN) > 0);
+ // set corresponding bits to 0, they will not be needed any more.
+ i &= ~ONLY_USE_FIRST_MATCHING_PATTERN;
+ i &= ~ONLY_REPLACE_FIRST_MATCH;
+
super.configure(i);
}
@@ -95,16 +90,12 @@
protected class Status {
int replaced = 0;
- Set used = null;
- {
- if (replaceFirstAll) used = new HashSet();
+ final Set used = onlyFirstMatch ? new HashSet() : null;
}
- }
-
protected Status newStatus() {
return new Status();
- }
+ }
/**
* Implement this. Return true if a replacement done.
*/
@@ -162,7 +153,7 @@
* Whether still to do replacing, given status.
*/
protected boolean replace(Status status) {
- return !replaceFirst || status.replaced == 0;
+ return ! onlyFirstMatch || status.replaced == 0;
}
public Writer transformXmlTextWords(Reader r, Writer w) {
@@ -200,8 +191,6 @@
// write last word
if (replace(status)) {
if (translating) replaceWord(word, w, status);
- } else {
- w.write(word.toString());
}
if (log.isDebugEnabled()) {
log.debug("Finished replacing. Replaced " + status.replaced +
" words");
@@ -229,6 +218,7 @@
if (c == '<') { // don't do it in existing tags and attributes
translating = false;
replace(xmltext.toString(), w, status);
+ xmltext.setLength(0);
w.write(c);
} else if (c == '>') {
translating = true;
@@ -243,8 +233,6 @@
// write last word
if (replace(status)) {
if (translating) replace(xmltext.toString(), w, status);
- } else {
- w.write(xmltext.toString());
}
log.debug("Finished replacing. Replaced " + status.replaced + "
words");
} catch (java.io.IOException e) {
@@ -311,14 +299,16 @@
StringWriter sw = new StringWriter();
Status status = newStatus();
try {
- while (true) {
- int c = r.read();
- if (c == -1) break;
- sw.write(c);
+ BufferedReader br = new BufferedReader(r);
+ char[] buf = new char[200];
+ int n = br.read(buf, 0, buf.length);
+ while (n > 0) {
+ sw.write(buf, 0, n);
+ n = br.read(buf, 0, buf.length);
}
replace(sw.toString(), w, status);
} catch (java.io.IOException e) {
- log.error(e.toString());
+ log.error(e.getMessage(), e);
}
return w;
@@ -366,6 +356,42 @@
return Collections.unmodifiableMap(h);
}
+ public static void main(String [] argv) {
+ CharTransformer trans = new ChunkedTransformer(XMLTEXT) {
+ protected boolean replace(String string, Writer w, Status
status) throws IOException {
+ w.write(string);
+ return false;
+ }
+ protected String base() {
+ return "test";
+ }
+ };
+ CharTransformer trans2 = new BufferedReaderTransformer() {
+ protected boolean transform(PrintWriter bw, String line,Status
status) {
+ bw.println(line);
+ return true;
+ }
+
+ protected Status createNewStatus() {
+ return null;
+ }
+ };
+ long startTime = System.currentTimeMillis();
+ if (argv.length > 0) {
+ if("buf1".equals(argv[0])) {
+ trans.transform(new BufferedReader(new
InputStreamReader(System.in)), new BufferedWriter(new
OutputStreamWriter(System.out)));
+ } else if ("buf2".equals(argv[0])) {
+ trans2.transform(new InputStreamReader(System.in), new
BufferedWriter(new OutputStreamWriter(System.out)));
+ } else {
+ System.err.println("Don't understand '" + argv[0] + "'");
+ }
+ } else {
+ trans.transform(new InputStreamReader(System.in), new
OutputStreamWriter(System.out));
+ }
+ long duration = System.currentTimeMillis() - startTime;
+ System.err.println("Converstion took " + duration + " ms");
+
+ }
}
Index: LinkFinder.java
===================================================================
RCS file: /var/cvs/src/org/mmbase/util/transformers/LinkFinder.java,v
retrieving revision 1.9
retrieving revision 1.9.2.1
diff -u -b -r1.9 -r1.9.2.1
--- LinkFinder.java 3 Apr 2006 14:12:36 -0000 1.9
+++ LinkFinder.java 24 Jul 2008 16:16:08 -0000 1.9.2.1
@@ -33,6 +33,7 @@
public LinkFinder() {
super(XMLTEXT_WORDS);
+ onlyFirstPattern = true;
}
@@ -46,7 +47,6 @@
protected void readDefaultPatterns(Collection patterns) {
-
patterns.add(new Entry(Pattern.compile("[EMAIL PROTECTED]"), "<a
href=\"mailto:$0\">$0</a>"));
patterns.add(new Entry(Pattern.compile("http://.+"), "<a
href=\"$0\">$0</a>"));
patterns.add(new Entry(Pattern.compile("https://.+"), "<a
href=\"$0\">$0</a>"));
Index: RegexpReplacer.java
===================================================================
RCS file: /var/cvs/src/org/mmbase/util/transformers/RegexpReplacer.java,v
retrieving revision 1.14.2.2
retrieving revision 1.14.2.3
diff -u -b -r1.14.2.2 -r1.14.2.3
--- RegexpReplacer.java 18 Mar 2008 16:31:30 -0000 1.14.2.2
+++ RegexpReplacer.java 24 Jul 2008 16:16:08 -0000 1.14.2.3
@@ -12,6 +12,7 @@
import java.util.*;
import java.io.*;
import java.util.regex.*;
+
import org.mmbase.util.ResourceWatcher;
import org.mmbase.util.xml.UtilReader;
import org.mmbase.util.Entry;
@@ -101,7 +102,7 @@
patterns.clear();
- Collection regs = (Collection)
utilReader.getProperties().get("regexps");
+ Collection regs = (Collection) utilReader.getMaps().get("regexps");
if (regs != null) {
addPatterns(regs, patterns);
} else {
@@ -163,15 +164,49 @@
}
}
+ private class Chunk {
+ String string;
+ boolean replaced = false;
+ Chunk(String s) {
+ string = s;
+ }
+ Chunk(String s, boolean r) {
+ string = s; replaced = r;
+ }
+ public String toString() { return "'" + string + "'" + (replaced ? "."
: ""); }
+
+ }
+
protected boolean replace(String string, Writer w, Status status) throws
IOException {
- Iterator i = getPatterns().iterator();
- boolean r = false;
- while (i.hasNext()) {
- Entry entry = (Entry) i.next();
+ boolean r = false; // result value
+
+ List chunks;
+ if (onlyFirstMatch) {
+ // linked list while we're going to do a lot of changing:
+ chunks = new LinkedList();
+ chunks.add(new Chunk(string));
+ } else {
+ // will not make any changes
+ chunks = new ArrayList();
+ chunks.add(new Chunk(string));
+ }
+
+ Iterator j = getPatterns().iterator();
+ while (j.hasNext()) {
+ Map.Entry entry = (Map.Entry) j.next();
Pattern p = (Pattern) entry.getKey();
- if (replaceFirstAll && status.used.contains(p)) continue;
- Matcher m = p.matcher(string);
+
+
+ if (onlyFirstMatch && status.used.contains(p)) continue;
+
+ ListIterator i = chunks.listIterator();
+ while (i.hasNext()) {
+ Chunk chunk = (Chunk) i.next();
+ if (onlyFirstPattern && chunk.replaced) {
+ continue;
+ }
+ Matcher m = p.matcher(chunk.string);
String replacement = (String) entry.getValue();
boolean result = false;
if (to == ChunkedTransformer.XMLTEXT_WORDS || to ==
ChunkedTransformer.WORDS) {
@@ -185,20 +220,43 @@
do {
status.replaced++;
m.appendReplacement(sb, replacement);
- if (replaceFirst || replaceFirstAll ||
+ if (onlyFirstMatch || onlyFirstPattern ||
to == ChunkedTransformer.XMLTEXT_WORDS ||
to == ChunkedTransformer.WORDS) break;
result = m.find();
} while (result);
+
+ if (onlyFirstPattern) {
+ // make a new chunk.
+ i.remove();
+ int s = m.start();
+ if (s > 0) {
+ i.add(new Chunk(sb.toString().substring(0, s)));
+ sb.delete(0, s);
+ }
+ i.add(new Chunk(sb.toString(), true));
+ sb.setLength(0);
+ m.appendTail(sb);
+ i.add(new Chunk(sb.toString()));
+ i.previous();
+ } else {
m.appendTail(sb);
- if (replaceFirstAll) status.used.add(p);
- string = sb.toString();
- if (replaceFirst ||
+ i.set(new Chunk(sb.toString()));
+ }
+ if (onlyFirstMatch ||
to == ChunkedTransformer.XMLTEXT_WORDS ||
- to == ChunkedTransformer.WORDS) break;
+ to == ChunkedTransformer.WORDS) {
+ // next pattern
+ break;
+ }
+ }
}
}
- w.write(string);
+ Iterator k = chunks.iterator();
+ while (k.hasNext()) {
+ Chunk s = (Chunk) k.next();
+ w.write(s.string);
+ }
return r;
}
@@ -210,6 +268,22 @@
return getEncoding() + " " + getPatterns();
}
+ public static void main(String[] arg) {
+ StringBuffer b = new StringBuffer();
+ Pattern p = Pattern.compile(arg[0]);
+ String input = arg[1];
+ Matcher m = p.matcher(input);
+ while (m.find()) {
+ b.append("'");
+ m.appendReplacement(b, m.group().toUpperCase());
+ b.append("'");
+ System.out.println("s: " + m.start() + " e: " + m.end() + "g: " +
m.group());
+ }
+ b.append("X");
+ m.appendTail(b);
+ System.out.println("buf : " + b);
+ }
+
}
Index: RegexpReplacerFactory.java
===================================================================
RCS file: /var/cvs/src/org/mmbase/util/transformers/RegexpReplacerFactory.java,v
retrieving revision 1.11.2.1
retrieving revision 1.11.2.2
diff -u -b -r1.11.2.1 -r1.11.2.2
--- RegexpReplacerFactory.java 18 Mar 2008 15:09:41 -0000 1.11.2.1
+++ RegexpReplacerFactory.java 24 Jul 2008 16:16:08 -0000 1.11.2.2
@@ -10,7 +10,9 @@
package org.mmbase.util.transformers;
import java.util.*;
+import java.util.regex.Pattern;
+import org.mmbase.util.Entry;
import org.mmbase.util.logging.*;
import org.mmbase.util.functions.*;
@@ -21,18 +23,19 @@
*
* @author Michiel Meeuwissen
* @since MMBase-1.8
- * @version $Id
+ * @version $Id: RegexpReplacerFactory.java,v 1.11.2.2 2008/07/24 16:16:08
michiel Exp $
*/
-public class RegexpReplacerFactory implements ParameterizedTransformerFactory {
+public class RegexpReplacerFactory implements ParameterizedTransformerFactory{
private static final Logger log =
Logging.getLoggerInstance(RegexpReplacerFactory.class);
+ protected static final Parameter PATTERNS =
+ new Parameter("patterns", Collection.class, Collections.EMPTY_LIST);
+ protected static final Parameter MODE = new Parameter("mode",
String.class, "WORDS");
+ protected static final Parameter FIRST_MATCH = new
Parameter("onlyFirstMatch", String.class);
+ protected static final Parameter FIRST_PATTERN = new
Parameter("onlyFirstPattern", String.class);
- protected static final Parameter[] PARAMS = new Parameter[] {
- new Parameter("patterns", Collection.class, Collections.EMPTY_LIST),
- new Parameter("mode", String.class),
- new Parameter("replacefirst", String.class)
- };
+ protected static final Parameter[] PARAMS = new Parameter[] { PATTERNS,
MODE, FIRST_MATCH, FIRST_PATTERN };
public Parameters createParameters() {
return new Parameters(PARAMS);
@@ -49,28 +52,38 @@
RegexpReplacer trans = new RegexpReplacer() {
private Collection patterns = new ArrayList();
{
- addPatterns((Collection) parameters.get("patterns"),
patterns);
+ addPatterns((Collection)parameters.get(PATTERNS),
patterns);
}
public Collection getPatterns() {
return patterns;
}
};
- String mode = (String) parameters.get("mode");
- if (mode == null) mode = "WORDS";
- Config c = (Config)trans.transformers().get("REGEXPS_" +
mode.toUpperCase());
- if (c == null) c = (Config)trans.transformers().get(mode);
+ String mode = (String) parameters.get(MODE);
+ Config c = (Config) trans.transformers().get("REGEXPS_" +
mode.toUpperCase());
+ if (c == null) c = (Config) trans.transformers().get(mode);
if (c == null) throw new IllegalArgumentException("" + mode + " cannot
be found in " + trans.transformers());
- String firstParam = (String) parameters.get("replacefirst");
- boolean replaceFirst = "true".equals(firstParam);
- boolean replaceFirstAll = "all".equals(firstParam);
- trans.configure(c.config +
- (replaceFirst ? ChunkedTransformer.REPLACE_FIRST : 0) +
- (replaceFirstAll ?
ChunkedTransformer.REPLACE_FIRST_ALL : 0)
- );
+ boolean firstMatch = "true".equals(parameters.get(FIRST_MATCH));
+ boolean firstPattern = "true".equals(parameters.get(FIRST_PATTERN));
+ int i = c.config +
+ (firstMatch ? ChunkedTransformer.ONLY_REPLACE_FIRST_MATCH : 0) +
+ (firstPattern ? ChunkedTransformer.ONLY_USE_FIRST_MATCHING_PATTERN
: 0);
+ trans.configure(i);
+
return trans;
}
+ public static void main(String[] argv) {
+ RegexpReplacerFactory fact = new RegexpReplacerFactory();
+ Parameters pars = fact.createParameters();
+ pars.set("mode", "ENTIRE");
+ List patterns = new ArrayList();
+ patterns.add(new Entry("\\s+", " "));
+ pars.set("patterns", patterns);
+ CharTransformer reg = (CharTransformer) fact.createTransformer(pars);
+ System.out.println(reg.transform(argv[0]));
+
+ }
Index: ResourceBundleReplacerFactory.java
===================================================================
RCS file:
/var/cvs/src/org/mmbase/util/transformers/ResourceBundleReplacerFactory.java,v
retrieving revision 1.4
retrieving revision 1.4.2.1
diff -u -b -r1.4 -r1.4.2.1
--- ResourceBundleReplacerFactory.java 25 Jan 2006 18:03:47 -0000 1.4
+++ ResourceBundleReplacerFactory.java 24 Jul 2008 16:16:08 -0000 1.4.2.1
@@ -24,7 +24,7 @@
*
* @author Michiel Meeuwissen
* @since MMBase-1.8
- * @version $Id: ResourceBundleReplacerFactory.java,v 1.4 2006/01/25 18:03:47
michiel Exp $
+ * @version $Id: ResourceBundleReplacerFactory.java,v 1.4.2.1 2008/07/24
16:16:08 michiel Exp $
*/
public class ResourceBundleReplacerFactory implements
ParameterizedTransformerFactory {
@@ -65,14 +65,14 @@
this.name = name;
}
protected boolean replace(final String word, final Writer w, final Status
status) throws IOException {
- if (replaceFirstAll && status.used.contains(word)) {
+ if (onlyFirstPattern && status.used.contains(word)) {
w.write(word);
return false;
}
try {
w.write("" + bundle.getObject(word));
status.replaced++;
- if (replaceFirstAll) status.used.add(word);
+ if (onlyFirstMatch) status.used.add(word);
return true;
} catch (MissingResourceException mre) {
w.write(word);
_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs