Author: grossws
Date: Wed Sep 30 16:38:24 2015
New Revision: 1706073

URL: http://svn.apache.org/viewvc?rev=1706073&view=rev
Log:
Reformat to avoid tabs and use JUL for logging

Related to #TIKA-1752

Modified:
    
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NNExampleModelDetector.java
    
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/TrainedModelDetector.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NNExampleModelDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NNExampleModelDetector.java?rev=1706073&r1=1706072&r2=1706073&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NNExampleModelDetector.java
 (original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/NNExampleModelDetector.java
 Wed Sep 30 16:38:24 2015
@@ -24,136 +24,137 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.URL;
 import java.nio.file.Path;
+import java.util.Objects;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import org.apache.tika.mime.MediaType;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 public class NNExampleModelDetector extends TrainedModelDetector {
-       private static final String EXAMPLE_NNMODEL_FILE = 
"tika-example.nnmodel";
+    private static final String EXAMPLE_NNMODEL_FILE = "tika-example.nnmodel";
 
-       private static final long serialVersionUID = 1L;
+    private static final long serialVersionUID = 1L;
 
-       public NNExampleModelDetector() {
-               super();
-       }
-
-       public NNExampleModelDetector(final Path modelFile) {
-               loadDefaultModels(modelFile);
-       }
-
-       public NNExampleModelDetector(final File modelFile) {
-               loadDefaultModels(modelFile);
-       }
-
-       @Override
-       public void loadDefaultModels(InputStream modelStream) {
-           BufferedReader bReader =
-                new BufferedReader(new InputStreamReader(modelStream, UTF_8));
-
-               NNTrainedModelBuilder nnBuilder = new NNTrainedModelBuilder();
-               String line;
-               try {
-                       while ((line = bReader.readLine()) != null) {
-                               line = line.trim();
-                               if (line.startsWith("#")) {
-                                       readDescription(nnBuilder, line);
-                               } else {
-                                       readNNParams(nnBuilder, line);
-                                       // add this model into map of trained 
models.
-                                       
super.registerModels(nnBuilder.getType(), nnBuilder.build());
-                               }
-
-                       }
-               } catch (IOException e) {
-                       throw new RuntimeException(
-                                       "Unable to read the default media type 
registry", e);
-               }
-       }
-
-       /**
-        * this method gets overwritten to register load neural network models
-        */
-       @Override
-       public void loadDefaultModels(ClassLoader classLoader) {
-               if (classLoader == null) {
-                       classLoader = 
TrainedModelDetector.class.getClassLoader();
-               }
-
-               // This allows us to replicate class.getResource() when using
-               // the classloader directly
-               String classPrefix = 
TrainedModelDetector.class.getPackage().getName()
-                               .replace('.', '/')
-                               + "/";
-
-               // Get the core URL, and all the extensions URLs
-               URL modelURL = classLoader.getResource(classPrefix
-                               + EXAMPLE_NNMODEL_FILE);
-               try (InputStream stream = modelURL.openStream()) {
-                       loadDefaultModels(stream);
-               } catch (IOException e) {
-                       throw new RuntimeException(
-                                       "Unable to read the default media type 
registry", e);
-               }
-
-       }
-
-       /**
-        * read the comments where the model configuration is written, e.g the
-        * number of inputs, hiddens and output please ensure the first char in 
the
-        * given string is # In this example grb model file, there are 4 
elements 1)
-        * type 2) number of input units 3) number of hidden units. 4) number of
-        * output units.
-        * 
-        */
-       private void readDescription(final NNTrainedModelBuilder builder,
-                       final String line) {
-               int numInputs;
-               int numHidden;
-               int numOutputs;
-               String[] sarr = line.split("\t");
-
-               try {
-                       MediaType type = MediaType.parse(sarr[1]);
-                       numInputs = Integer.parseInt(sarr[2]);
-                       numHidden = Integer.parseInt(sarr[3]);
-                       numOutputs = Integer.parseInt(sarr[4]);
-                       builder.setNumOfInputs(numInputs);
-                       builder.setNumOfHidden(numHidden);
-                       builder.setNumOfOutputs(numOutputs);
-                       builder.setType(type);
-               } catch (Exception e) {
-                       e.printStackTrace();
-                       throw new RuntimeException(
-                                       "Unable to parse the model 
configuration", e);
-               }
-       }
-
-       /**
-        * Read the next line for the model parameters and populate the build 
which
-        * later will be used to instantiate the instance of TrainedModel
-        * 
-        * @param builder
-        * @param line
-        */
-       private void readNNParams(final NNTrainedModelBuilder builder,
-                       final String line) {
-               String[] sarr = line.split("\t");
-               int n = sarr.length;
-               float[] params = new float[n];
-               try {
-                       int i = 0;
-                       for (String fstr : sarr) {
-                               params[i] = Float.parseFloat(fstr);
-                               i++;
-                       }
-                       builder.setParams(params);
-               } catch (Exception e) {
-                       e.printStackTrace();
-                       throw new RuntimeException(
-                                       "Unable to parse the model 
configuration", e);
-               }
-
-       }
+    private static final Logger log = 
Logger.getLogger(NNExampleModelDetector.class.getName());
 
+    public NNExampleModelDetector() {
+        super();
+    }
+
+    public NNExampleModelDetector(final Path modelFile) {
+        loadDefaultModels(modelFile);
+    }
+
+    public NNExampleModelDetector(final File modelFile) {
+        loadDefaultModels(modelFile);
+    }
+
+    @Override
+    public void loadDefaultModels(InputStream modelStream) {
+        BufferedReader bReader = new BufferedReader(new 
InputStreamReader(modelStream, UTF_8));
+
+        NNTrainedModelBuilder nnBuilder = new NNTrainedModelBuilder();
+        String line;
+        try {
+            while ((line = bReader.readLine()) != null) {
+                line = line.trim();
+                if (line.startsWith("#")) {
+                    readDescription(nnBuilder, line);
+                } else {
+                    readNNParams(nnBuilder, line);
+                    // add this model into map of trained models.
+                    super.registerModels(nnBuilder.getType(), 
nnBuilder.build());
+                }
+
+            }
+        } catch (IOException e) {
+            throw new RuntimeException("Unable to read the default media type 
registry", e);
+        }
+    }
+
+    /**
+     * this method gets overwritten to register load neural network models
+     */
+    @Override
+    public void loadDefaultModels(ClassLoader classLoader) {
+        if (classLoader == null) {
+            classLoader = TrainedModelDetector.class.getClassLoader();
+        }
+
+        // This allows us to replicate class.getResource() when using
+        // the classloader directly
+        String classPrefix = TrainedModelDetector.class.getPackage().getName()
+                .replace('.', '/')
+                + "/";
+
+        // Get the core URL, and all the extensions URLs
+        URL modelURL = classLoader.getResource(classPrefix + 
EXAMPLE_NNMODEL_FILE);
+        Objects.requireNonNull(modelURL, "required resource " + classPrefix + 
EXAMPLE_NNMODEL_FILE + " not found");
+        try (InputStream stream = modelURL.openStream()) {
+            loadDefaultModels(stream);
+        } catch (IOException e) {
+            throw new RuntimeException("Unable to read the default media type 
registry", e);
+        }
+
+    }
+
+    /**
+     * read the comments where the model configuration is written, e.g the
+     * number of inputs, hiddens and output please ensure the first char in the
+     * given string is # In this example grb model file, there are 4 elements 
1)
+     * type 2) number of input units 3) number of hidden units. 4) number of
+     * output units.
+     */
+    private void readDescription(final NNTrainedModelBuilder builder,
+                                 final String line) {
+        int numInputs;
+        int numHidden;
+        int numOutputs;
+        String[] sarr = line.split("\t");
+
+        try {
+            MediaType type = MediaType.parse(sarr[1]);
+            numInputs = Integer.parseInt(sarr[2]);
+            numHidden = Integer.parseInt(sarr[3]);
+            numOutputs = Integer.parseInt(sarr[4]);
+            builder.setNumOfInputs(numInputs);
+            builder.setNumOfHidden(numHidden);
+            builder.setNumOfOutputs(numOutputs);
+            builder.setType(type);
+        } catch (Exception e) {
+            if (log.isLoggable(Level.WARNING)) {
+                log.log(Level.WARNING, "Unable to parse the model 
configuration", e);
+            }
+            throw new RuntimeException("Unable to parse the model 
configuration", e);
+        }
+    }
+
+    /**
+     * Read the next line for the model parameters and populate the build which
+     * later will be used to instantiate the instance of TrainedModel
+     *
+     * @param builder
+     * @param line
+     */
+    private void readNNParams(final NNTrainedModelBuilder builder,
+                              final String line) {
+        String[] sarr = line.split("\t");
+        int n = sarr.length;
+        float[] params = new float[n];
+        try {
+            int i = 0;
+            for (String fstr : sarr) {
+                params[i] = Float.parseFloat(fstr);
+                i++;
+            }
+            builder.setParams(params);
+        } catch (Exception e) {
+            if (log.isLoggable(Level.WARNING)) {
+                log.log(Level.WARNING, "Unable to parse the model 
configuration", e);
+            }
+            throw new RuntimeException("Unable to parse the model 
configuration", e);
+        }
+    }
 }

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/TrainedModelDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/TrainedModelDetector.java?rev=1706073&r1=1706072&r2=1706073&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/TrainedModelDetector.java
 (original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/TrainedModelDetector.java
 Wed Sep 30 16:38:24 2015
@@ -14,6 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.tika.detect;
 
 import java.io.File;
@@ -29,150 +30,147 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
-import org.apache.tika.io.TemporaryResources;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 public abstract class TrainedModelDetector implements Detector {
-       private final Map<MediaType, TrainedModel> MODEL_MAP = new 
HashMap<MediaType, TrainedModel>();
-
-       private static final long serialVersionUID = 1L;
+    private final Map<MediaType, TrainedModel> MODEL_MAP = new HashMap<>();
 
-       public TrainedModelDetector() {
-               loadDefaultModels(getClass().getClassLoader());
-       }
-
-       public int getMinLength() {
-               return Integer.MAX_VALUE;
-       }
-
-       public MediaType detect(InputStream input, Metadata metadata)
-                       throws IOException {
-               // convert to byte-histogram
-               if (input != null) {
-                       input.mark(getMinLength());
-                       float[] histogram = readByteFrequencies(input);
-                       // writeHisto(histogram); //on testing purpose
-                       /*
-                        * iterate the map to find out the one that gives the 
higher
-                        * prediction value.
-                        */
-                       Iterator<MediaType> iter = 
MODEL_MAP.keySet().iterator();
-                       float threshold = 0.5f;// probability threshold, any 
value below the
-                                                                       // 
threshold will be considered as
-                                                                       // 
MediaType.OCTET_STREAM
-                       float maxprob = threshold;
-                       MediaType maxType = MediaType.OCTET_STREAM;
-                       while (iter.hasNext()) {
-                               MediaType key = iter.next();
-                               TrainedModel model = MODEL_MAP.get(key);
-                               float prob = model.predict(histogram);
-                               if (maxprob < prob) {
-                                       maxprob = prob;
-                                       maxType = key;
-                               }
-                       }
-                       input.reset();
-                       return maxType;
-               }
-               return null;
-       }
-
-       /**
-        * read the inputstream and build a byte frequence histogram
-        *
-        * @param input
-        * @return
-        * @throws IOException
-        */
-       protected float[] readByteFrequencies(final InputStream input)
-                       throws IOException {
-
-               ReadableByteChannel inputChannel;
-               try {
-                       inputChannel = Channels.newChannel(input);
-                       // long inSize = inputChannel.size();
-                       float histogram[] = new float[257];
-                       histogram[0] = 1;
-
-                       // create buffer with capacity of maxBufSize bytes
-                       ByteBuffer buf = ByteBuffer.allocate(1024 * 5);
-                       int bytesRead = inputChannel.read(buf); // read into 
buffer.
-
-                       float max = -1;
-                       while (bytesRead != -1) {
-
-                               buf.flip(); // make buffer ready for read
-
-                               while (buf.hasRemaining()) {
-                                       byte byt = buf.get();
-                                       int idx = byt;
-                                       idx++;
-                                       if (byt < 0) {
-                                               idx = 256 + idx;
-                                               histogram[idx]++;
-                                       } else {
-                                               histogram[idx]++;
-                                       }
-                                       max = max < histogram[idx] ? 
histogram[idx] : max;
-                               }
-
-                               buf.clear(); // make buffer ready for writing
-                               bytesRead = inputChannel.read(buf);
-                       }
-
-                       int i;
-                       for (i = 1; i < histogram.length; i++) {
-                               histogram[i] /= max;
-                               histogram[i] = (float) Math.sqrt(histogram[i]);
-                       }
-
-                       return histogram;
-               } finally {
-                       // inputChannel.close();
-               }
-
-       }
-
-       /**
-        * for testing purposes; this method write the histogram vector to a 
file.
-        *
-        * @param histogram
-        * @throws IOException
-        */
-       private void writeHisto(final float[] histogram)
-                       throws IOException {
-               Path histPath = new TemporaryResources().createTempFile();
-               try (Writer writer = Files.newBufferedWriter(histPath, UTF_8)) {
-                       for (float bin : histogram) {
-                               writer.write(String.valueOf(bin) + "\t");
-                               // writer.write(i + "\t");
-                       }
-                       writer.write("\r\n");
-               }
-       }
-
-       public void loadDefaultModels(Path modelFile) {
-               try (InputStream in = Files.newInputStream(modelFile)) {
-                       loadDefaultModels(in);
-               } catch (IOException e) {
-                       throw new RuntimeException(
-                                       "Unable to read the default media type 
registry", e);
-               }
-       }
-
-       public void loadDefaultModels(File modelFile) {
-               loadDefaultModels(modelFile.toPath());
-       }
-
-       public abstract void loadDefaultModels(final InputStream modelStream);
-
-       public abstract void loadDefaultModels(final ClassLoader classLoader);
-
-       protected void registerModels(MediaType type, TrainedModel model) {
-               MODEL_MAP.put(type, model);
-       }
+    private static final long serialVersionUID = 1L;
 
+    public TrainedModelDetector() {
+        loadDefaultModels(getClass().getClassLoader());
+    }
+
+    public int getMinLength() {
+        return Integer.MAX_VALUE;
+    }
+
+    public MediaType detect(InputStream input, Metadata metadata)
+            throws IOException {
+        // convert to byte-histogram
+        if (input != null) {
+            input.mark(getMinLength());
+            float[] histogram = readByteFrequencies(input);
+            // writeHisto(histogram); //on testing purpose
+            /*
+             * iterate the map to find out the one that gives the higher
+             * prediction value.
+             */
+            Iterator<MediaType> iter = MODEL_MAP.keySet().iterator();
+            float threshold = 0.5f;// probability threshold, any value below 
the
+            // threshold will be considered as
+            // MediaType.OCTET_STREAM
+            float maxprob = threshold;
+            MediaType maxType = MediaType.OCTET_STREAM;
+            while (iter.hasNext()) {
+                MediaType key = iter.next();
+                TrainedModel model = MODEL_MAP.get(key);
+                float prob = model.predict(histogram);
+                if (maxprob < prob) {
+                    maxprob = prob;
+                    maxType = key;
+                }
+            }
+            input.reset();
+            return maxType;
+        }
+        return null;
+    }
+
+    /**
+     * Read the {@code inputstream} and build a byte frequency histogram
+     *
+     * @param input stream to read from
+     * @return byte frequencies array
+     * @throws IOException
+     */
+    protected float[] readByteFrequencies(final InputStream input)
+            throws IOException {
+        ReadableByteChannel inputChannel;
+        // TODO: any reason to avoid closing of input & inputChannel?
+        try {
+            inputChannel = Channels.newChannel(input);
+            // long inSize = inputChannel.size();
+            float histogram[] = new float[257];
+            histogram[0] = 1;
+
+            // create buffer with capacity of maxBufSize bytes
+            ByteBuffer buf = ByteBuffer.allocate(1024 * 5);
+            int bytesRead = inputChannel.read(buf); // read into buffer.
+
+            float max = -1;
+            while (bytesRead != -1) {
+
+                buf.flip(); // make buffer ready for read
+
+                while (buf.hasRemaining()) {
+                    byte byt = buf.get();
+                    int idx = byt;
+                    idx++;
+                    if (byt < 0) {
+                        idx = 256 + idx;
+                        histogram[idx]++;
+                    } else {
+                        histogram[idx]++;
+                    }
+                    max = max < histogram[idx] ? histogram[idx] : max;
+                }
+
+                buf.clear(); // make buffer ready for writing
+                bytesRead = inputChannel.read(buf);
+            }
+
+            int i;
+            for (i = 1; i < histogram.length; i++) {
+                histogram[i] /= max;
+                histogram[i] = (float) Math.sqrt(histogram[i]);
+            }
+
+            return histogram;
+        } finally {
+            // inputChannel.close();
+        }
+    }
+
+    /**
+     * for testing purposes; this method write the histogram vector to a file.
+     *
+     * @param histogram
+     * @throws IOException
+     */
+    private void writeHisto(final float[] histogram)
+            throws IOException {
+        Path histPath = new TemporaryResources().createTempFile();
+        try (Writer writer = Files.newBufferedWriter(histPath, UTF_8)) {
+            for (float bin : histogram) {
+                writer.write(String.valueOf(bin) + "\t");
+                // writer.write(i + "\t");
+            }
+            writer.write("\r\n");
+        }
+    }
+
+    public void loadDefaultModels(Path modelFile) {
+        try (InputStream in = Files.newInputStream(modelFile)) {
+            loadDefaultModels(in);
+        } catch (IOException e) {
+            throw new RuntimeException("Unable to read the default media type 
registry", e);
+        }
+    }
+
+    public void loadDefaultModels(File modelFile) {
+        loadDefaultModels(modelFile.toPath());
+    }
+
+    public abstract void loadDefaultModels(final InputStream modelStream);
+
+    public abstract void loadDefaultModels(final ClassLoader classLoader);
+
+    protected void registerModels(MediaType type, TrainedModel model) {
+        MODEL_MAP.put(type, model);
+    }
 }


Reply via email to