http://git-wip-us.apache.org/repos/asf/groovy/blob/d638ca43/src/main/groovy/groovy/ui/GroovyMain.java
----------------------------------------------------------------------
diff --git a/src/main/groovy/groovy/ui/GroovyMain.java 
b/src/main/groovy/groovy/ui/GroovyMain.java
new file mode 100644
index 0000000..a4d5986
--- /dev/null
+++ b/src/main/groovy/groovy/ui/GroovyMain.java
@@ -0,0 +1,597 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.ui;
+
+import groovy.lang.Binding;
+import groovy.lang.GroovyCodeSource;
+import groovy.lang.GroovyRuntimeException;
+import groovy.lang.GroovyShell;
+import groovy.lang.GroovySystem;
+import groovy.lang.MissingMethodException;
+import groovy.lang.Script;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.codehaus.groovy.control.CompilationFailedException;
+import org.codehaus.groovy.control.CompilerConfiguration;
+import org.codehaus.groovy.control.customizers.ImportCustomizer;
+import org.codehaus.groovy.runtime.InvokerHelper;
+import org.codehaus.groovy.runtime.InvokerInvocationException;
+import org.codehaus.groovy.runtime.ResourceGroovyMethods;
+import org.codehaus.groovy.runtime.StackTraceUtils;
+import org.codehaus.groovy.runtime.StringGroovyMethods;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Properties;
+import java.util.regex.Pattern;
+
+import static org.apache.commons.cli.Option.builder;
+
+/**
+ * A Command line to execute groovy.
+ */
+public class GroovyMain {
+
+    // arguments to the script
+    private List args;
+
+    // is this a file on disk
+    private boolean isScriptFile;
+
+    // filename or content of script
+    private String script;
+
+    // process args as input files
+    private boolean processFiles;
+
+    // edit input files in place
+    private boolean editFiles;
+
+    // automatically output the result of each script
+    private boolean autoOutput;
+
+    // automatically split each line using the splitpattern
+    private boolean autoSplit;
+
+    // The pattern used to split the current line
+    private String splitPattern = " ";
+
+    // process sockets
+    private boolean processSockets;
+
+    // port to listen on when processing sockets
+    private int port;
+
+    // backup input files with extension
+    private String backupExtension;
+
+    // do you want full stack traces in script exceptions?
+    private boolean debug = false;
+
+    // Compiler configuration, used to set the encodings of the scripts/classes
+    private CompilerConfiguration conf = new 
CompilerConfiguration(System.getProperties());
+
+    /**
+     * Main CLI interface.
+     *
+     * @param args all command line args.
+     */
+    public static void main(String args[]) {
+        processArgs(args, System.out);
+    }
+
+    // package-level visibility for testing purposes (just usage/errors at 
this stage)
+    // TODO: should we have an 'err' printstream too for ParseException?
+    static void processArgs(String[] args, final PrintStream out) {
+        Options options = buildOptions();
+
+        try {
+            CommandLine cmd = parseCommandLine(options, args);
+
+            if (cmd.hasOption('h')) {
+                printHelp(out, options);
+            } else if (cmd.hasOption('v')) {
+                String version = GroovySystem.getVersion();
+                out.println("Groovy Version: " + version + " JVM: " + 
System.getProperty("java.version") +
+                        " Vendor: " + System.getProperty("java.vm.vendor")  + 
" OS: " + System.getProperty("os.name"));
+            } else {
+                // If we fail, then exit with an error so scripting frameworks 
can catch it
+                // TODO: pass printstream(s) down through process
+                if (!process(cmd)) {
+                    System.exit(1);
+                }
+            }
+        } catch (ParseException pe) {
+            out.println("error: " + pe.getMessage());
+            printHelp(out, options);
+        } catch (IOException ioe) {
+            out.println("error: " + ioe.getMessage());
+        }
+    }
+
+    private static void printHelp(PrintStream out, Options options) {
+        HelpFormatter formatter = new HelpFormatter();
+        PrintWriter pw = new PrintWriter(out);
+
+        formatter.printHelp(
+                pw,
+                80,
+                "groovy [options] [filename] [args]",
+                "The Groovy command line processor.\nOptions:",
+                options,
+                2,
+                4,
+                null, // footer
+                false);
+
+        pw.flush();
+    }
+
+    /**
+     * Parse the command line.
+     *
+     * @param options the options parser.
+     * @param args    the command line args.
+     * @return parsed command line.
+     * @throws ParseException if there was a problem.
+     */
+    private static CommandLine parseCommandLine(Options options, String[] 
args) throws ParseException {
+        CommandLineParser parser = new DefaultParser();
+        return parser.parse(options, args, true);
+    }
+
+    /**
+     * Build the options parser.
+     *
+     * @return an options parser.
+     */
+    private static Options buildOptions() {
+        return new Options()
+                
.addOption(builder("classpath").hasArg().argName("path").desc("Specify where to 
find the class files - must be first argument").build())
+                
.addOption(builder("cp").longOpt("classpath").hasArg().argName("path").desc("Aliases
 for '-classpath'").build())
+                .addOption(builder("D").longOpt("define").desc("Define a 
system 
property").numberOfArgs(2).valueSeparator().argName("name=value").build())
+                .addOption(
+                        builder().longOpt("disableopt")
+                                .desc("Disables one or all optimization 
elements; " +
+                                        "optlist can be a comma separated list 
with the elements: " +
+                                        "all (disables all optimizations), " +
+                                        "int (disable any int based 
optimizations)")
+                                .hasArg().argName("optlist").build())
+                .addOption(builder("h").hasArg(false).desc("Usage 
information").longOpt("help").build())
+                .addOption(builder("d").hasArg(false).desc("Debug mode will 
print out full stack traces").longOpt("debug").build())
+                .addOption(builder("v").hasArg(false).desc("Display the Groovy 
and JVM versions").longOpt("version").build())
+                
.addOption(builder("c").argName("charset").hasArg().desc("Specify the encoding 
of the files").longOpt("encoding").build())
+                
.addOption(builder("e").argName("script").hasArg().desc("Specify a command line 
script").build())
+                
.addOption(builder("i").argName("extension").optionalArg(true).desc("Modify 
files in place; create backup if extension is given (e.g. \'.bak\')").build())
+                .addOption(builder("n").hasArg(false).desc("Process files line 
by line using implicit 'line' variable").build())
+                .addOption(builder("p").hasArg(false).desc("Process files line 
by line and print result (see also -n)").build())
+                .addOption(builder("pa").hasArg(false).desc("Generate metadata 
for reflection on method parameter names (jdk8+ 
only)").longOpt("parameters").build())
+                
.addOption(builder("l").argName("port").optionalArg(true).desc("Listen on a 
port and process inbound lines (default: 1960)").build())
+                
.addOption(builder("a").argName("splitPattern").optionalArg(true).desc("Split 
lines using splitPattern (default '\\s') using implicit 'split' 
variable").longOpt("autosplit").build())
+                .addOption(builder().longOpt("indy").desc("Enables compilation 
using invokedynamic").build())
+                .addOption(builder().longOpt("configscript").hasArg().desc("A 
script for tweaking the configuration options").build())
+                
.addOption(builder("b").longOpt("basescript").hasArg().argName("class").desc("Base
 class name for scripts (must derive from Script)").build());
+    }
+
+    /**
+     * Process the users request.
+     *
+     * @param line the parsed command line.
+     * @throws ParseException if invalid options are chosen
+     */
+    private static boolean process(CommandLine line) throws ParseException, 
IOException {
+        List args = line.getArgList();
+
+        if (line.hasOption('D')) {
+            Properties optionProperties = line.getOptionProperties("D");
+            Enumeration<String> propertyNames = (Enumeration<String>) 
optionProperties.propertyNames();
+            while (propertyNames.hasMoreElements()) {
+                String nextName = propertyNames.nextElement();
+                System.setProperty(nextName, 
optionProperties.getProperty(nextName));
+            }
+        }
+
+        GroovyMain main = new GroovyMain();
+
+        // add the ability to parse scripts with a specified encoding
+        
main.conf.setSourceEncoding(line.getOptionValue('c',main.conf.getSourceEncoding()));
+
+        main.isScriptFile = !line.hasOption('e');
+        main.debug = line.hasOption('d');
+        main.conf.setDebug(main.debug);
+        main.conf.setParameters(line.hasOption("pa"));
+        main.processFiles = line.hasOption('p') || line.hasOption('n');
+        main.autoOutput = line.hasOption('p');
+        main.editFiles = line.hasOption('i');
+        if (main.editFiles) {
+            main.backupExtension = line.getOptionValue('i');
+        }
+        main.autoSplit = line.hasOption('a');
+        String sp = line.getOptionValue('a');
+        if (sp != null)
+            main.splitPattern = sp;
+
+        if (main.isScriptFile) {
+            if (args.isEmpty())
+                throw new ParseException("neither -e or filename provided");
+
+            main.script = (String) args.remove(0);
+            if (main.script.endsWith(".java"))
+                throw new ParseException("error: cannot compile file with 
.java extension: " + main.script);
+        } else {
+            main.script = line.getOptionValue('e');
+        }
+
+        main.processSockets = line.hasOption('l');
+        if (main.processSockets) {
+            String p = line.getOptionValue('l', "1960"); // default port to 
listen to
+            main.port = Integer.parseInt(p);
+        }
+
+        // we use "," as default, because then split will create
+        // an empty array if no option is set
+        String disabled = line.getOptionValue("disableopt", ",");
+        String[] deopts = disabled.split(",");
+        for (String deopt_i : deopts) {
+            main.conf.getOptimizationOptions().put(deopt_i,false);
+        }
+
+        if (line.hasOption("indy")) {
+            CompilerConfiguration.DEFAULT.getOptimizationOptions().put("indy", 
true);
+            main.conf.getOptimizationOptions().put("indy", true);
+        }
+
+        if (line.hasOption("basescript")) {
+            main.conf.setScriptBaseClass(line.getOptionValue("basescript"));
+        }
+
+        String configScripts = 
System.getProperty("groovy.starter.configscripts", null);
+        if (line.hasOption("configscript") || (configScripts != null && 
!configScripts.isEmpty())) {
+            List<String> scripts = new ArrayList<String>();
+            if (line.hasOption("configscript")) {
+                scripts.add(line.getOptionValue("configscript"));
+            }
+            if (configScripts != null) {
+                scripts.addAll(StringGroovyMethods.tokenize((CharSequence) 
configScripts, ','));
+            }
+            processConfigScripts(scripts, main.conf);
+        }
+
+        main.args = args;
+        return main.run();
+    }
+
+    public static void processConfigScripts(List<String> scripts, 
CompilerConfiguration conf) throws IOException {
+        if (scripts.isEmpty()) return;
+        Binding binding = new Binding();
+        binding.setVariable("configuration", conf);
+        CompilerConfiguration configuratorConfig = new CompilerConfiguration();
+        ImportCustomizer customizer = new ImportCustomizer();
+        
customizer.addStaticStars("org.codehaus.groovy.control.customizers.builder.CompilerCustomizationBuilder");
+        configuratorConfig.addCompilationCustomizers(customizer);
+        GroovyShell shell = new GroovyShell(binding, configuratorConfig);
+        for (String script : scripts) {
+            shell.evaluate(new File(script));
+        }
+    }
+
+
+    /**
+     * Run the script.
+     */
+    private boolean run() {
+        try {
+            if (processSockets) {
+                processSockets();
+            } else if (processFiles) {
+                processFiles();
+            } else {
+                processOnce();
+            }
+            return true;
+        } catch (CompilationFailedException e) {
+            System.err.println(e);
+            return false;
+        } catch (Throwable e) {
+            if (e instanceof InvokerInvocationException) {
+                InvokerInvocationException iie = (InvokerInvocationException) 
e;
+                e = iie.getCause();
+            }
+            System.err.println("Caught: " + e);
+            if (!debug) {
+                StackTraceUtils.deepSanitize(e);
+            }
+            e.printStackTrace();
+            return false;
+        }
+    }
+
+    /**
+     * Process Sockets.
+     */
+    private void processSockets() throws CompilationFailedException, 
IOException, URISyntaxException {
+        GroovyShell groovy = new GroovyShell(conf);
+        new GroovySocketServer(groovy, getScriptSource(isScriptFile, script), 
autoOutput, port);
+    }
+
+    /**
+     * Get the text of the Groovy script at the given location.
+     * If the location is a file path and it does not exist as given,
+     * then {@link GroovyMain#huntForTheScriptFile(String)} is called to try
+     * with some Groovy extensions appended.
+     *
+     * This method is not used to process scripts and is retained for backward
+     * compatibility.  If you want to modify how GroovyMain processes scripts
+     * then use {@link GroovyMain#getScriptSource(boolean, String)}.
+     *
+     * @param uriOrFilename
+     * @return the text content at the location
+     * @throws IOException
+     * @deprecated
+     */
+    @Deprecated
+    public String getText(String uriOrFilename) throws IOException {
+        if (URI_PATTERN.matcher(uriOrFilename).matches()) {
+            try {
+                return ResourceGroovyMethods.getText(new URL(uriOrFilename));
+            } catch (Exception e) {
+                throw new GroovyRuntimeException("Unable to get script from 
URL: ", e);
+            }
+        }
+        return 
ResourceGroovyMethods.getText(huntForTheScriptFile(uriOrFilename));
+    }
+
+    /**
+     * Get a new GroovyCodeSource for a script which may be given as a location
+     * (isScript is true) or as text (isScript is false).
+     *
+     * @param isScriptFile indicates whether the script parameter is a 
location or content
+     * @param script the location or context of the script
+     * @return a new GroovyCodeSource for the given script
+     * @throws IOException
+     * @throws URISyntaxException
+     * @since 2.3.0
+     */
+    protected GroovyCodeSource getScriptSource(boolean isScriptFile, String 
script) throws IOException, URISyntaxException {
+        //check the script is currently valid before starting a server against 
the script
+        if (isScriptFile) {
+            // search for the file and if it exists don't try to use URIs ...
+            File scriptFile = huntForTheScriptFile(script);
+            if (!scriptFile.exists() && URI_PATTERN.matcher(script).matches()) 
{
+                return new GroovyCodeSource(new URI(script));
+            }
+            return new GroovyCodeSource( scriptFile );
+        }
+        return new GroovyCodeSource(script, "script_from_command_line", 
GroovyShell.DEFAULT_CODE_BASE);
+    }
+
+    // RFC2396
+    // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
+    // match URIs but not Windows filenames, e.g.: http://cnn.com but not 
C:\xxx\file.ext
+    private static final Pattern URI_PATTERN = 
Pattern.compile("\\p{Alpha}[-+.\\p{Alnum}]*:[^\\\\]*");
+
+    /**
+     * Search for the script file, doesn't bother if it is named precisely.
+     *
+     * Tries in this order:
+     * - actual supplied name
+     * - name.groovy
+     * - name.gvy
+     * - name.gy
+     * - name.gsh
+     *
+     * @since 2.3.0
+     */
+    public static File searchForGroovyScriptFile(String input) {
+        String scriptFileName = input.trim();
+        File scriptFile = new File(scriptFileName);
+        // TODO: Shouldn't these extensions be kept elsewhere?  What about 
CompilerConfiguration?
+        // This method probably shouldn't be in GroovyMain either.
+        String[] standardExtensions = {".groovy",".gvy",".gy",".gsh"};
+        int i = 0;
+        while (i < standardExtensions.length && !scriptFile.exists()) {
+            scriptFile = new File(scriptFileName + standardExtensions[i]);
+            i++;
+        }
+        // if we still haven't found the file, point back to the originally 
specified filename
+        if (!scriptFile.exists()) {
+            scriptFile = new File(scriptFileName);
+        }
+        return scriptFile;
+    }
+
+    /**
+     * Hunt for the script file by calling searchForGroovyScriptFile(String).
+     *
+     * @see GroovyMain#searchForGroovyScriptFile(String)
+     */
+    public File huntForTheScriptFile(String input) {
+        return GroovyMain.searchForGroovyScriptFile(input);
+    }
+
+    // GROOVY-6771
+    private static void setupContextClassLoader(GroovyShell shell) {
+        final Thread current = Thread.currentThread();
+        class DoSetContext implements PrivilegedAction {
+            ClassLoader classLoader;
+
+            public DoSetContext(ClassLoader loader) {
+                classLoader = loader;
+            }
+
+            public Object run() {
+                current.setContextClassLoader(classLoader);
+                return null;
+            }
+        }
+
+        AccessController.doPrivileged(new 
DoSetContext(shell.getClassLoader()));
+    }
+
+    /**
+     * Process the input files.
+     */
+    private void processFiles() throws CompilationFailedException, 
IOException, URISyntaxException {
+        GroovyShell groovy = new GroovyShell(conf);
+        setupContextClassLoader(groovy);
+
+        Script s = groovy.parse(getScriptSource(isScriptFile, script));
+
+        if (args.isEmpty()) {
+            BufferedReader reader = new BufferedReader(new 
InputStreamReader(System.in));
+            PrintWriter writer = new PrintWriter(System.out);
+
+            try {
+                processReader(s, reader, writer);
+            } finally {
+                reader.close();
+                writer.close();
+            }
+
+        } else {
+            Iterator i = args.iterator();
+            while (i.hasNext()) {
+                String filename = (String) i.next();
+                //TODO: These are the arguments for -p and -i.  Why are we 
searching using Groovy script extensions?
+                // Where is this documented?
+                File file = huntForTheScriptFile(filename);
+                processFile(s, file);
+            }
+        }
+    }
+
+    /**
+     * Process a single input file.
+     *
+     * @param s    the script to execute.
+     * @param file the input file.
+     */
+    private void processFile(Script s, File file) throws IOException {
+        if (!file.exists())
+            throw new FileNotFoundException(file.getName());
+
+        if (!editFiles) {
+            BufferedReader reader = new BufferedReader(new FileReader(file));
+            try {
+                PrintWriter writer = new PrintWriter(System.out);
+                processReader(s, reader, writer);
+                writer.flush();
+            } finally {
+                reader.close();
+            }
+        } else {
+            File backup;
+            if (backupExtension == null) {
+                backup = File.createTempFile("groovy_", ".tmp");
+                backup.deleteOnExit();
+            } else {
+                backup = new File(file.getPath() + backupExtension);
+            }
+            backup.delete();
+            if (!file.renameTo(backup))
+                throw new IOException("unable to rename " + file + " to " + 
backup);
+
+            BufferedReader reader = new BufferedReader(new FileReader(backup));
+            try {
+                PrintWriter writer = new PrintWriter(new FileWriter(file));
+                try {
+                    processReader(s, reader, writer);
+                } finally {
+                    writer.close();
+                }
+            } finally {
+                reader.close();
+            }
+        }
+    }
+
+    /**
+     * Process a script against a single input file.
+     *
+     * @param s      script to execute.
+     * @param reader input file.
+     * @param pw     output sink.
+     */
+    private void processReader(Script s, BufferedReader reader, PrintWriter 
pw) throws IOException {
+        String line;
+        String lineCountName = "count";
+        s.setProperty(lineCountName, BigInteger.ZERO);
+        String autoSplitName = "split";
+        s.setProperty("out", pw);
+
+        try {
+            InvokerHelper.invokeMethod(s, "begin", null);
+        } catch (MissingMethodException mme) {
+            // ignore the missing method exception
+            // as it means no begin() method is present
+        }
+
+        while ((line = reader.readLine()) != null) {
+            s.setProperty("line", line);
+            s.setProperty(lineCountName, 
((BigInteger)s.getProperty(lineCountName)).add(BigInteger.ONE));
+
+            if(autoSplit) {
+                s.setProperty(autoSplitName, line.split(splitPattern));
+            }
+
+            Object o = s.run();
+
+            if (autoOutput && o != null) {
+                pw.println(o);
+            }
+        }
+
+        try {
+            InvokerHelper.invokeMethod(s, "end", null);
+        } catch (MissingMethodException mme) {
+            // ignore the missing method exception
+            // as it means no end() method is present
+        }
+    }
+
+    /**
+     * Process the standard, single script with args.
+     */
+    private void processOnce() throws CompilationFailedException, IOException, 
URISyntaxException {
+        GroovyShell groovy = new GroovyShell(conf);
+        setupContextClassLoader(groovy);
+        groovy.run(getScriptSource(isScriptFile, script), args);
+    }
+}

http://git-wip-us.apache.org/repos/asf/groovy/blob/d638ca43/src/main/groovy/groovy/ui/GroovySocketServer.java
----------------------------------------------------------------------
diff --git a/src/main/groovy/groovy/ui/GroovySocketServer.java 
b/src/main/groovy/groovy/ui/GroovySocketServer.java
new file mode 100644
index 0000000..b0d27c5
--- /dev/null
+++ b/src/main/groovy/groovy/ui/GroovySocketServer.java
@@ -0,0 +1,226 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.ui;
+
+import groovy.lang.GroovyCodeSource;
+import groovy.lang.GroovyRuntimeException;
+import groovy.lang.GroovyShell;
+import groovy.lang.Script;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+import java.net.InetAddress;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.regex.Pattern;
+
+/**
+ * Simple server that executes supplied script against a socket.
+ * <p>
+ * Typically this is used from the groovy command line agent but it can be 
+ * invoked programmatically. To run this program from the command line please
+ * refer to the command line documentation at
+ * <a 
href="http://docs.groovy-lang.org/docs/latest/html/documentation/#_running_groovy_from_the_commandline";>
+ * Running Groovy from the commandline</a>.
+ * <p>
+ * Here is an example of how to use this class to open a listening socket on 
the server, 
+ * listen for incoming data, and then echo the data back to the client in 
reverse order: 
+ * <pre>
+ * new GroovySocketServer(
+ *         new GroovyShell(),      // evaluator
+ *         false,                  // is not a file
+ *         "println line.reverse()",         // script to evaluate
+ *         true,                   // return result to client
+ *         1960)                   //port
+ * </pre>
+ * There are several variables in the script binding:
+ * <ul>
+ * <li>line - The data from the socket</li> 
+ * <li>out - The output PrintWriter, should you need it for some reason.</li> 
+ * <li>socket - The socket, should you need it for some reason.</li> 
+ * </ul>
+ * 
+ * @author Jeremy Rayner
+ */
+public class GroovySocketServer implements Runnable {
+    private URL url;
+    private final GroovyShell groovy;
+    private final GroovyCodeSource source;
+    private final boolean autoOutput;
+    private static int counter;
+
+    /**
+     * This creates and starts the socket server on a new Thread. There is no 
need to call run or spawn
+     * a new thread yourself.
+     * @param groovy
+     *       The GroovyShell object that evaluates the incoming text. If you 
need additional classes in the
+     *       classloader then configure that through this object.
+     * @param isScriptFile
+     *       Whether the incoming socket data String will be a script or a 
file path.
+     * @param scriptFilenameOrText
+     *       This will be a groovy script or a file location depending on the 
argument isScriptFile.
+     * @param autoOutput
+     *       whether output should be automatically echoed back to the client
+     * @param port
+     *       the port to listen on
+     *
+     */
+    public GroovySocketServer(GroovyShell groovy, boolean isScriptFile, String 
scriptFilenameOrText, boolean autoOutput, int port) {
+        this(groovy, getCodeSource(isScriptFile, scriptFilenameOrText), 
autoOutput, port);
+    }
+
+    private static GroovyCodeSource getCodeSource(boolean scriptFile, String 
scriptFilenameOrText) {
+        if (scriptFile) {
+            try {
+                if (URI_PATTERN.matcher(scriptFilenameOrText).matches()) {
+                    return new GroovyCodeSource(new URI(scriptFilenameOrText));
+                } else {
+                    return new 
GroovyCodeSource(GroovyMain.searchForGroovyScriptFile(scriptFilenameOrText));
+                }
+            } catch (IOException e) {
+                throw new GroovyRuntimeException("Unable to get script from: " 
+ scriptFilenameOrText, e);
+            } catch (URISyntaxException e) {
+                throw new GroovyRuntimeException("Unable to get script from 
URI: " + scriptFilenameOrText, e);
+            }
+        } else {
+            // We could jump through some hoops to have GroovyShell make our 
script name, but that seems unwarranted.
+            // If we *did* jump through that hoop then we should probably 
change the run loop to not recompile
+            // the script on every iteration since the script text can't 
change (the reason for the recompilation).
+            return new GroovyCodeSource(scriptFilenameOrText, 
generateScriptName(), GroovyShell.DEFAULT_CODE_BASE);
+        }
+    }
+
+    private static synchronized String generateScriptName() {
+        return "ServerSocketScript" + (++counter) + ".groovy";
+    }
+
+
+    // RFC2396
+    // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
+    private static final Pattern URI_PATTERN = 
Pattern.compile("\\p{Alpha}[-+.\\p{Alnum}]*:.*");
+
+    /**
+    * This creates and starts the socket server on a new Thread. There is no 
need to call run or spawn
+    * a new thread yourself. 
+    * @param groovy
+    *       The GroovyShell object that evaluates the incoming text. If you 
need additional classes in the 
+    *       classloader then configure that through this object. 
+    * @param source
+    *       GroovyCodeSource for the Groovy script
+    * @param autoOutput
+    *       whether output should be automatically echoed back to the client
+    * @param port
+    *       the port to listen on
+    * @since 2.3.0
+    */ 
+    public GroovySocketServer(GroovyShell groovy, GroovyCodeSource source, 
boolean autoOutput, int port) {
+        this.groovy = groovy;
+        this.source = source;
+        this.autoOutput = autoOutput;
+        try {
+            url = new URL("http", InetAddress.getLocalHost().getHostAddress(), 
port, "/");
+            System.out.println("groovy is listening on port " + port);
+        } catch (IOException e) { 
+            e.printStackTrace();
+        }
+        new Thread(this).start();
+    }
+
+    /**
+    * Runs this server. There is typically no need to call this method, as the 
object's constructor
+    * creates a new thread and runs this object automatically. 
+    */ 
+    public void run() {
+        try {
+            ServerSocket serverSocket = new ServerSocket(url.getPort());
+            while (true) {
+                // Create one script per socket connection.
+                // This is purposefully not caching the Script
+                // so that the script source file can be changed on the fly,
+                // as each connection is made to the server.
+                //FIXME: Groovy has other mechanisms specifically for watching 
to see if source code changes.
+                // We should probably be using that here.
+                // See also the comment about the fact we recompile a script 
that can't change.
+                Script script = groovy.parse(source);
+                new GroovyClientConnection(script, autoOutput, 
serverSocket.accept());
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+    
+    static class GroovyClientConnection implements Runnable {
+        private Script script;
+        private Socket socket;
+        private BufferedReader reader;
+        private PrintWriter writer;
+        private boolean autoOutputFlag;
+    
+        GroovyClientConnection(Script script, boolean autoOutput,Socket 
socket) throws IOException {
+            this.script = script;
+            this.autoOutputFlag = autoOutput;
+            this.socket = socket;
+            reader = new BufferedReader(new 
InputStreamReader(socket.getInputStream()));
+            writer = new PrintWriter(socket.getOutputStream());
+            new Thread(this, "Groovy client connection - " + 
socket.getInetAddress().getHostAddress()).start();
+        }
+        public void run() {
+            try {
+                String line = null;
+                script.setProperty("out", writer);
+                script.setProperty("socket", socket);
+                script.setProperty("init", Boolean.TRUE);
+                while ((line = reader.readLine()) != null) {
+                    // System.out.println(line);
+                    script.setProperty("line", line);
+                    Object o = script.run();
+                    script.setProperty("init", Boolean.FALSE);
+                    if (o != null) {
+                        if ("success".equals(o)) {
+                            break; // to close sockets gracefully etc...
+                        } else {
+                            if (autoOutputFlag) {
+                                writer.println(o);
+                            }
+                        }
+                    }
+                    writer.flush();
+                }
+            } catch (IOException e) {
+                e.printStackTrace();
+            } finally {
+                try {
+                    writer.flush();
+                    writer.close();
+                } finally {
+                    try {
+                        socket.close();
+                    } catch (IOException e3) {
+                        e3.printStackTrace();
+                    }
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/groovy/blob/d638ca43/src/main/groovy/groovy/util/AbstractFactory.java
----------------------------------------------------------------------
diff --git a/src/main/groovy/groovy/util/AbstractFactory.java 
b/src/main/groovy/groovy/util/AbstractFactory.java
new file mode 100644
index 0000000..54e68e1
--- /dev/null
+++ b/src/main/groovy/groovy/util/AbstractFactory.java
@@ -0,0 +1,63 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.util;
+
+import groovy.lang.Closure;
+
+import java.util.Map;
+
+/**
+ * @author <a href="mailto:[email protected]";>Andres Almiray</a>
+ * @author Danno Ferrin
+ */
+public abstract class AbstractFactory implements Factory {
+    public boolean isLeaf() {
+        return false;
+    }
+
+    public boolean isHandlesNodeChildren() {
+        return false;
+    }
+
+    public void onFactoryRegistration(FactoryBuilderSupport builder, String 
registeredName, String group) {
+        // do nothing
+    }
+
+    public boolean onHandleNodeAttributes( FactoryBuilderSupport builder, 
Object node,
+            Map attributes ) {
+        return true;
+    }
+
+    public boolean onNodeChildren( FactoryBuilderSupport builder, Object node, 
Closure childContent) {
+        return true;
+    }
+
+    public void onNodeCompleted( FactoryBuilderSupport builder, Object parent, 
Object node ) {
+        // do nothing
+    }
+
+    public void setParent( FactoryBuilderSupport builder, Object parent, 
Object child ) {
+        // do nothing
+    }
+
+    public void setChild( FactoryBuilderSupport builder, Object parent, Object 
child ) {
+        // do nothing
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/groovy/blob/d638ca43/src/main/groovy/groovy/util/BufferedIterator.java
----------------------------------------------------------------------
diff --git a/src/main/groovy/groovy/util/BufferedIterator.java 
b/src/main/groovy/groovy/util/BufferedIterator.java
new file mode 100644
index 0000000..6fa50a9
--- /dev/null
+++ b/src/main/groovy/groovy/util/BufferedIterator.java
@@ -0,0 +1,31 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.util;
+
+import java.util.Iterator;
+
+/**
+ * An iterator that allows examining the next element without consuming it.
+ *
+ * @author Andrew Taylor
+ * @since 2.5.0
+ */
+public interface BufferedIterator<T> extends Iterator<T> {
+    T head();
+}

http://git-wip-us.apache.org/repos/asf/groovy/blob/d638ca43/src/main/groovy/groovy/util/BuilderSupport.java
----------------------------------------------------------------------
diff --git a/src/main/groovy/groovy/util/BuilderSupport.java 
b/src/main/groovy/groovy/util/BuilderSupport.java
new file mode 100644
index 0000000..f634f1f
--- /dev/null
+++ b/src/main/groovy/groovy/util/BuilderSupport.java
@@ -0,0 +1,228 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.util;
+
+import groovy.lang.Closure;
+import groovy.lang.GroovyObjectSupport;
+import groovy.lang.GroovyRuntimeException;
+import groovy.lang.MissingMethodException;
+import org.codehaus.groovy.runtime.InvokerHelper;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An abstract base class for creating arbitrary nested trees of objects
+ * or events
+ *
+ * @author <a href="mailto:[email protected]";>James Strachan</a>
+ */
+public abstract class BuilderSupport extends GroovyObjectSupport {
+
+    private Object current;
+    private Closure nameMappingClosure;
+    private final BuilderSupport proxyBuilder;
+
+    public BuilderSupport() {
+        this.proxyBuilder = this;
+    }
+
+    public BuilderSupport(BuilderSupport proxyBuilder) {
+        this(null, proxyBuilder);
+    }
+
+    public BuilderSupport(Closure nameMappingClosure, BuilderSupport 
proxyBuilder) {
+        this.nameMappingClosure = nameMappingClosure;
+        this.proxyBuilder = proxyBuilder;
+    }
+
+    /**
+     * Convenience method when no arguments are required
+     *
+     * @param methodName the name of the method to invoke
+     * @return the result of the call
+     */
+    public Object invokeMethod(String methodName) {
+        return invokeMethod(methodName, null);
+    }
+
+    public Object invokeMethod(String methodName, Object args) {
+        Object name = getName(methodName);
+        return doInvokeMethod(methodName, name, args);
+    }
+
+    protected Object doInvokeMethod(String methodName, Object name, Object 
args) {
+        Object node = null;
+        Closure closure = null;
+        List list = InvokerHelper.asList(args);
+
+        //System.out.println("Called invokeMethod with name: " + name + " 
arguments: " + list);
+
+        switch (list.size()) {
+            case 0:
+                node = proxyBuilder.createNode(name);
+                break;
+            case 1: {
+                Object object = list.get(0);
+                if (object instanceof Map) {
+                    node = proxyBuilder.createNode(name, (Map) object);
+                } else if (object instanceof Closure) {
+                    closure = (Closure) object;
+                    node = proxyBuilder.createNode(name);
+                } else {
+                    node = proxyBuilder.createNode(name, object);
+                }
+            }
+            break;
+            case 2: {
+                Object object1 = list.get(0);
+                Object object2 = list.get(1);
+                if (object1 instanceof Map) {
+                    if (object2 instanceof Closure) {
+                        closure = (Closure) object2;
+                        node = proxyBuilder.createNode(name, (Map) object1);
+                    } else {
+                        node = proxyBuilder.createNode(name, (Map) object1, 
object2);
+                    }
+                } else {
+                    if (object2 instanceof Closure) {
+                        closure = (Closure) object2;
+                        node = proxyBuilder.createNode(name, object1);
+                    } else if (object2 instanceof Map) {
+                        node = proxyBuilder.createNode(name, (Map) object2, 
object1);
+                    } else {
+                        throw new MissingMethodException(name.toString(), 
getClass(), list.toArray(), false);
+                    }
+                }
+            }
+            break;
+            case 3: {
+                Object arg0 = list.get(0);
+                Object arg1 = list.get(1);
+                Object arg2 = list.get(2);
+                if (arg0 instanceof Map && arg2 instanceof Closure) {
+                    closure = (Closure) arg2;
+                    node = proxyBuilder.createNode(name, (Map) arg0, arg1);
+                } else if (arg1 instanceof Map && arg2 instanceof Closure) {
+                    closure = (Closure) arg2;
+                    node = proxyBuilder.createNode(name, (Map) arg1, arg0);
+                } else {
+                    throw new MissingMethodException(name.toString(), 
getClass(), list.toArray(), false);
+                }
+            }
+            break;
+            default: {
+                throw new MissingMethodException(name.toString(), getClass(), 
list.toArray(), false);
+            }
+
+        }
+
+        if (current != null) {
+            proxyBuilder.setParent(current, node);
+        }
+
+        if (closure != null) {
+            // push new node on stack
+            Object oldCurrent = getCurrent();
+            setCurrent(node);
+            // let's register the builder as the delegate
+            setClosureDelegate(closure, node);
+            try {
+                closure.call();
+            } catch (Exception e) {
+                throw new GroovyRuntimeException(e);
+            }
+            setCurrent(oldCurrent);
+        }
+
+        proxyBuilder.nodeCompleted(current, node);
+        return proxyBuilder.postNodeCompletion(current, node);
+    }
+
+    /**
+     * A strategy method to allow derived builders to use
+     * builder-trees and switch in different kinds of builders.
+     * This method should call the setDelegate() method on the closure
+     * which by default passes in this but if node is-a builder
+     * we could pass that in instead (or do something wacky too)
+     *
+     * @param closure the closure on which to call setDelegate()
+     * @param node    the node value that we've just created, which could be
+     *                a builder
+     */
+    protected void setClosureDelegate(Closure closure, Object node) {
+        closure.setDelegate(this);
+    }
+
+    protected abstract void setParent(Object parent, Object child);
+
+    protected abstract Object createNode(Object name);
+
+    protected abstract Object createNode(Object name, Object value);
+
+    protected abstract Object createNode(Object name, Map attributes);
+
+    protected abstract Object createNode(Object name, Map attributes, Object 
value);
+
+    /**
+     * A hook to allow names to be converted into some other object
+     * such as a QName in XML or ObjectName in JMX.
+     *
+     * @param methodName the name of the desired method
+     * @return the object representing the name
+     */
+    protected Object getName(String methodName) {
+        if (nameMappingClosure != null) {
+            return nameMappingClosure.call(methodName);
+        }
+        return methodName;
+    }
+
+
+    /**
+     * A hook to allow nodes to be processed once they have had all of their
+     * children applied.
+     *
+     * @param node   the current node being processed
+     * @param parent the parent of the node being processed
+     */
+    protected void nodeCompleted(Object parent, Object node) {
+    }
+
+    /**
+     * A hook to allow nodes to be processed once they have had all of their
+     * children applied and allows the actual node object that represents
+     * the Markup element to be changed
+     *
+     * @param node   the current node being processed
+     * @param parent the parent of the node being processed
+     * @return the node, possibly new, that represents the markup element
+     */
+    protected Object postNodeCompletion(Object parent, Object node) {
+        return node;
+    }
+
+    protected Object getCurrent() {
+        return current;
+    }
+
+    protected void setCurrent(Object current) {
+        this.current = current;
+    }
+}

http://git-wip-us.apache.org/repos/asf/groovy/blob/d638ca43/src/main/groovy/groovy/util/CharsetToolkit.java
----------------------------------------------------------------------
diff --git a/src/main/groovy/groovy/util/CharsetToolkit.java 
b/src/main/groovy/groovy/util/CharsetToolkit.java
new file mode 100644
index 0000000..e127459
--- /dev/null
+++ b/src/main/groovy/groovy/util/CharsetToolkit.java
@@ -0,0 +1,419 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.nio.charset.Charset;
+import java.util.Collection;
+
+/**
+ * Utility class to guess the encoding of a given text file.
+ * <p>
+ * Unicode files encoded in UTF-16 (low or big endian) or UTF-8 files
+ * with a Byte Order Marker are correctly discovered. For UTF-8 files with no 
BOM, if the buffer
+ * is wide enough, the charset should also be discovered.
+ * <p>
+ * A byte buffer of 4KB is used to be able to guess the encoding.
+ * <p>
+ * Usage:
+ * <pre>
+ * CharsetToolkit toolkit = new CharsetToolkit(file);
+ *
+ * // guess the encoding
+ * Charset guessedCharset = toolkit.getCharset();
+ *
+ * // create a reader with the correct charset
+ * BufferedReader reader = toolkit.getReader();
+ *
+ * // read the file content
+ * String line;
+ * while ((line = br.readLine())!= null)
+ * {
+ *     System.out.println(line);
+ * }
+ * </pre>
+ *
+ * @author Guillaume Laforge
+ */
+public class CharsetToolkit {
+    private final byte[] buffer;
+    private Charset defaultCharset;
+    private Charset charset;
+    private boolean enforce8Bit = true;
+    private final File file;
+    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+
+    /**
+     * Constructor of the <code>CharsetToolkit</code> utility class.
+     *
+     * @param file of which we want to know the encoding.
+     */
+    public CharsetToolkit(File file) throws IOException {
+        this.file = file;
+        this.defaultCharset = getDefaultSystemCharset();
+        this.charset = null;
+        InputStream input = new FileInputStream(file);
+        try {
+            byte[] bytes = new byte[4096];
+            int bytesRead = input.read(bytes);
+            if (bytesRead == -1) {
+                this.buffer = EMPTY_BYTE_ARRAY;
+            }
+            else if (bytesRead < 4096) {
+                byte[] bytesToGuess = new byte[bytesRead];
+                System.arraycopy(bytes, 0, bytesToGuess, 0, bytesRead);
+                this.buffer = bytesToGuess;
+            }
+            else {
+                this.buffer = bytes;
+            }
+        } finally {
+            try {input.close();} catch (IOException e){
+                // IGNORE
+            }
+        }
+    }
+
+    /**
+     * Defines the default <code>Charset</code> used in case the buffer 
represents
+     * an 8-bit <code>Charset</code>.
+     *
+     * @param defaultCharset the default <code>Charset</code> to be returned
+     * if an 8-bit <code>Charset</code> is encountered.
+     */
+    public void setDefaultCharset(Charset defaultCharset) {
+        if (defaultCharset != null)
+            this.defaultCharset = defaultCharset;
+        else
+            this.defaultCharset = getDefaultSystemCharset();
+    }
+
+    public Charset getCharset() {
+        if (this.charset == null)
+            this.charset = guessEncoding();
+        return charset;
+    }
+
+    /**
+     * If US-ASCII is recognized, enforce to return the default encoding, 
rather than US-ASCII.
+     * It might be a file without any special character in the range 128-255, 
but that may be or become
+     * a file encoded with the default <code>charset</code> rather than 
US-ASCII.
+     *
+     * @param enforce a boolean specifying the use or not of US-ASCII.
+     */
+    public void setEnforce8Bit(boolean enforce) {
+        this.enforce8Bit = enforce;
+    }
+
+    /**
+     * Gets the enforce8Bit flag, in case we do not want to ever get a 
US-ASCII encoding.
+     *
+     * @return a boolean representing the flag of use of US-ASCII.
+     */
+    public boolean getEnforce8Bit() {
+        return this.enforce8Bit;
+    }
+
+    /**
+     * Retrieves the default Charset
+     */
+    public Charset getDefaultCharset() {
+        return defaultCharset;
+    }
+
+    /**
+     * Guess the encoding of the provided buffer.
+     * If Byte Order Markers are encountered at the beginning of the buffer, 
we immediately
+     * return the charset implied by this BOM. Otherwise, the file would not 
be a human
+     * readable text file.
+     * <p>
+     * If there is no BOM, this method tries to discern whether the file is 
UTF-8 or not.
+     * If it is not UTF-8, we assume the encoding is the default system 
encoding
+     * (of course, it might be any 8-bit charset, but usually, an 8-bit 
charset is the default one).
+     * <p>
+     * It is possible to discern UTF-8 thanks to the pattern of characters 
with a multi-byte sequence.
+     * <pre>
+     * UCS-4 range (hex.)        UTF-8 octet sequence (binary)
+     * 0000 0000-0000 007F       0xxxxxxx
+     * 0000 0080-0000 07FF       110xxxxx 10xxxxxx
+     * 0000 0800-0000 FFFF       1110xxxx 10xxxxxx 10xxxxxx
+     * 0001 0000-001F FFFF       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+     * 0020 0000-03FF FFFF       111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+     * 0400 0000-7FFF FFFF       1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 
10xxxxxx
+     * </pre>
+     * With UTF-8, 0xFE and 0xFF never appear.
+     *
+     * @return the Charset recognized.
+     */
+    private Charset guessEncoding() {
+        // if the file has a Byte Order Marker, we can assume the file is in 
UTF-xx
+        // otherwise, the file would not be human readable
+        if (hasUTF8Bom())
+            return Charset.forName("UTF-8");
+        if (hasUTF16LEBom())
+            return Charset.forName("UTF-16LE");
+        if (hasUTF16BEBom())
+            return Charset.forName("UTF-16BE");
+
+        // if a byte has its most significant bit set, the file is in UTF-8 or 
in the default encoding
+        // otherwise, the file is in US-ASCII
+        boolean highOrderBit = false;
+
+        // if the file is in UTF-8, high order bytes must have a certain 
value, in order to be valid
+        // if it's not the case, we can assume the encoding is the default 
encoding of the system
+        boolean validU8Char = true;
+
+        // TODO the buffer is not read up to the end, but up to length - 6
+
+        int length = buffer.length;
+        int i = 0;
+        while (i < length - 6) {
+            byte b0 = buffer[i];
+            byte b1 = buffer[i + 1];
+            byte b2 = buffer[i + 2];
+            byte b3 = buffer[i + 3];
+            byte b4 = buffer[i + 4];
+            byte b5 = buffer[i + 5];
+            if (b0 < 0) {
+                // a high order bit was encountered, thus the encoding is not 
US-ASCII
+                // it may be either an 8-bit encoding or UTF-8
+                highOrderBit = true;
+                // a two-bytes sequence was encountered
+                if (isTwoBytesSequence(b0)) {
+                    // there must be one continuation byte of the form 
10xxxxxx,
+                    // otherwise the following character is is not a valid 
UTF-8 construct
+                    if (!isContinuationChar(b1))
+                        validU8Char = false;
+                    else
+                        i++;
+                }
+                // a three-bytes sequence was encountered
+                else if (isThreeBytesSequence(b0)) {
+                    // there must be two continuation bytes of the form 
10xxxxxx,
+                    // otherwise the following character is is not a valid 
UTF-8 construct
+                    if (!(isContinuationChar(b1) && isContinuationChar(b2)))
+                        validU8Char = false;
+                    else
+                        i += 2;
+                }
+                // a four-bytes sequence was encountered
+                else if (isFourBytesSequence(b0)) {
+                    // there must be three continuation bytes of the form 
10xxxxxx,
+                    // otherwise the following character is is not a valid 
UTF-8 construct
+                    if (!(isContinuationChar(b1) && isContinuationChar(b2) && 
isContinuationChar(b3)))
+                        validU8Char = false;
+                    else
+                        i += 3;
+                }
+                // a five-bytes sequence was encountered
+                else if (isFiveBytesSequence(b0)) {
+                    // there must be four continuation bytes of the form 
10xxxxxx,
+                    // otherwise the following character is is not a valid 
UTF-8 construct
+                    if (!(isContinuationChar(b1)
+                        && isContinuationChar(b2)
+                        && isContinuationChar(b3)
+                        && isContinuationChar(b4)))
+                        validU8Char = false;
+                    else
+                        i += 4;
+                }
+                // a six-bytes sequence was encountered
+                else if (isSixBytesSequence(b0)) {
+                    // there must be five continuation bytes of the form 
10xxxxxx,
+                    // otherwise the following character is is not a valid 
UTF-8 construct
+                    if (!(isContinuationChar(b1)
+                        && isContinuationChar(b2)
+                        && isContinuationChar(b3)
+                        && isContinuationChar(b4)
+                        && isContinuationChar(b5)))
+                        validU8Char = false;
+                    else
+                        i += 5;
+                }
+                else
+                    validU8Char = false;
+            }
+            if (!validU8Char)
+                break;
+            i++;
+        }
+        // if no byte with an high order bit set, the encoding is US-ASCII
+        // (it might have been UTF-7, but this encoding is usually internally 
used only by mail systems)
+        if (!highOrderBit) {
+            // returns the default charset rather than US-ASCII if the 
enforce8Bit flag is set.
+            if (this.enforce8Bit)
+                return this.defaultCharset;
+            else
+                return Charset.forName("US-ASCII");
+        }
+        // if no invalid UTF-8 were encountered, we can assume the encoding is 
UTF-8,
+        // otherwise the file would not be human readable
+        if (validU8Char)
+            return Charset.forName("UTF-8");
+        // finally, if it's not UTF-8 nor US-ASCII, let's assume the encoding 
is the default encoding
+        return this.defaultCharset;
+    }
+
+    /**
+     * If the byte has the form 10xxxxx, then it's a continuation byte of a 
multiple byte character;
+     *
+     * @param b a byte.
+     * @return true if it's a continuation char.
+     */
+    private static boolean isContinuationChar(byte b) {
+        return -128 <= b && b <= -65;
+    }
+
+    /**
+     * If the byte has the form 110xxxx, then it's the first byte of a 
two-bytes sequence character.
+     *
+     * @param b a byte.
+     * @return true if it's the first byte of a two-bytes sequence.
+     */
+    private static boolean isTwoBytesSequence(byte b) {
+        return -64 <= b && b <= -33;
+    }
+
+    /**
+     * If the byte has the form 1110xxx, then it's the first byte of a 
three-bytes sequence character.
+     *
+     * @param b a byte.
+     * @return true if it's the first byte of a three-bytes sequence.
+     */
+    private static boolean isThreeBytesSequence(byte b) {
+        return -32 <= b && b <= -17;
+    }
+
+    /**
+     * If the byte has the form 11110xx, then it's the first byte of a 
four-bytes sequence character.
+     *
+     * @param b a byte.
+     * @return true if it's the first byte of a four-bytes sequence.
+     */
+    private static boolean isFourBytesSequence(byte b) {
+        return -16 <= b && b <= -9;
+    }
+
+    /**
+     * If the byte has the form 11110xx, then it's the first byte of a 
five-bytes sequence character.
+     *
+     * @param b a byte.
+     * @return true if it's the first byte of a five-bytes sequence.
+     */
+    private static boolean isFiveBytesSequence(byte b) {
+        return -8 <= b && b <= -5;
+    }
+
+    /**
+     * If the byte has the form 1110xxx, then it's the first byte of a 
six-bytes sequence character.
+     *
+     * @param b a byte.
+     * @return true if it's the first byte of a six-bytes sequence.
+     */
+    private static boolean isSixBytesSequence(byte b) {
+        return -4 <= b && b <= -3;
+    }
+
+    /**
+     * Retrieve the default charset of the system.
+     *
+     * @return the default <code>Charset</code>.
+     */
+    public static Charset getDefaultSystemCharset() {
+        return Charset.forName(System.getProperty("file.encoding"));
+    }
+
+    /**
+     * Has a Byte Order Marker for UTF-8 (Used by Microsoft's Notepad and 
other editors).
+     *
+     * @return true if the buffer has a BOM for UTF8.
+     */
+    public boolean hasUTF8Bom() {
+        if (buffer.length >= 3)
+            return (buffer[0] == -17 && buffer[1] == -69 && buffer[2] == -65);
+        else
+            return false;
+    }
+
+    /**
+     * Has a Byte Order Marker for UTF-16 Low Endian
+     * (ucs-2le, ucs-4le, and ucs-16le).
+     *
+     * @return true if the buffer has a BOM for UTF-16 Low Endian.
+     */
+    public boolean hasUTF16LEBom() {
+        if (buffer.length >= 2)
+            return (buffer[0] == -1 && buffer[1] == -2);
+        else
+            return false;
+    }
+
+    /**
+     * Has a Byte Order Marker for UTF-16 Big Endian
+     * (utf-16 and ucs-2).
+     *
+     * @return true if the buffer has a BOM for UTF-16 Big Endian.
+     */
+    public boolean hasUTF16BEBom() {
+        if (buffer.length >= 2)
+            return (buffer[0] == -2 && buffer[1] == -1);
+        else
+            return false;
+    }
+
+    /**
+     * Gets a <code>BufferedReader</code> (indeed a 
<code>LineNumberReader</code>) from the <code>File</code>
+     * specified in the constructor of <code>CharsetToolkit</code> using the 
charset discovered or the default
+     * charset if an 8-bit <code>Charset</code> is encountered.
+     *
+     * @return a <code>BufferedReader</code>
+     * @throws FileNotFoundException if the file is not found.
+     */
+    public BufferedReader getReader() throws FileNotFoundException {
+        LineNumberReader reader = new LineNumberReader(new 
InputStreamReader(new FileInputStream(file), getCharset()));
+        if (hasUTF8Bom() || hasUTF16LEBom() || hasUTF16BEBom()) {
+            try {
+                reader.read();
+            }
+            catch (IOException e) {
+                // should never happen, as a file with no content
+                // but with a BOM has at least one char
+            }
+        }
+        return reader;
+    }
+
+    /**
+     * Retrieves all the available <code>Charset</code>s on the platform,
+     * among which the default <code>charset</code>.
+     *
+     * @return an array of <code>Charset</code>s.
+     */
+    public static Charset[] getAvailableCharsets() {
+        Collection collection = Charset.availableCharsets().values();
+        return (Charset[]) collection.toArray(new Charset[collection.size()]);
+    }
+}

Reply via email to