Author: tpalsulich
Date: Wed Dec 24 08:16:45 2014
New Revision: 1647743

URL: http://svn.apache.org/r1647743
Log:
Pure whitespace change. Reformat the GDALParser and its test.

Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java?rev=1647743&r1=1647742&r2=1647743&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
 Wed Dec 24 08:16:45 2014
@@ -31,7 +31,6 @@ import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-
 //Tika imports
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryResources;
@@ -42,6 +41,7 @@ import org.apache.tika.parser.AbstractPa
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.external.ExternalParser;
 import org.apache.tika.sax.XHTMLContentHandler;
+
 import static org.apache.tika.parser.external.ExternalParser.INPUT_FILE_TOKEN;
 
 //SAX imports
@@ -49,376 +49,373 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 /**
- * 
  * Wraps execution of the <a href="http//gdal.org/">Geospatial Data Abstraction
  * Library (GDAL)</a> <code>gdalinfo</code> tool used to extract geospatial
  * information out of hundreds of geo file formats.
- * 
+ * <p/>
  * The parser requires the installation of GDAL and for <code>gdalinfo</code> 
to
  * be located on the path.
- * 
+ * <p/>
  * Basic information (Size, Coordinate System, Bounding Box, Driver, and
  * resource info) are extracted as metadata, and the remaining metadata 
patterns
  * are extracted and added.
- * 
+ * <p/>
  * The output of the command is available from the provided
  * {@link ContentHandler} in the
  * {@link #parse(InputStream, ContentHandler, Metadata, ParseContext)} method.
- * 
  */
 public class GDALParser extends AbstractParser {
 
-       private static final long serialVersionUID = -3869130527323941401L;
+    private static final long serialVersionUID = -3869130527323941401L;
 
-       private String command;
+    private String command;
 
-       public GDALParser() {
-               setCommand("gdalinfo ${INPUT}");
-       }
-
-       public void setCommand(String command) {
-               this.command = command;
-       }
-
-       public String getCommand() {
-               return this.command;
-       }
-
-       public String processCommand(InputStream stream) {
-               TikaInputStream tis = (TikaInputStream) stream;
-               String pCommand = this.command;
-               try {
-                       if (this.command.indexOf(INPUT_FILE_TOKEN) != -1) {
-                               pCommand = 
this.command.replace(INPUT_FILE_TOKEN, tis.getFile()
-                                               .getPath());
-                       }
-               } catch (Exception e) {
-                       e.printStackTrace();
-               }
-
-               return pCommand;
-       }
-
-       @Override
-       public Set<MediaType> getSupportedTypes(ParseContext context) {
-               Set<MediaType> types = new HashSet<MediaType>();
-               types.add(MediaType.application("x-netcdf"));
-               types.add(MediaType.application("vrt"));
-               types.add(MediaType.image("geotiff"));
-               types.add(MediaType.image("ntif"));
-               types.add(MediaType.application("x-rpf-toc"));
-               types.add(MediaType.application("x-ecrg-toc"));
-               types.add(MediaType.image("hfa"));
-               types.add(MediaType.image("sar-ceos"));
-               types.add(MediaType.image("ceos"));
-               types.add(MediaType.application("jaxa-pal-sar"));
-               types.add(MediaType.application("gff"));
-               types.add(MediaType.application("elas"));
-               types.add(MediaType.application("aig"));
-               types.add(MediaType.application("aaigrid"));
-               types.add(MediaType.application("grass-ascii-grid"));
-               types.add(MediaType.application("sdts-raster"));
-               types.add(MediaType.application("dted"));
-               types.add(MediaType.image("png"));
-               types.add(MediaType.image("jpeg"));
-               types.add(MediaType.image("raster"));
-               types.add(MediaType.application("jdem"));
-               types.add(MediaType.image("gif"));
-               types.add(MediaType.image("big-gif"));
-               types.add(MediaType.image("envisat"));
-               types.add(MediaType.image("fits"));
-               types.add(MediaType.application("fits"));
-               types.add(MediaType.image("bsb"));
-               types.add(MediaType.application("xpm"));
-               types.add(MediaType.image("bmp"));
-               types.add(MediaType.image("x-dimap"));
-               types.add(MediaType.image("x-airsar"));
-               types.add(MediaType.application("x-rs2"));
-               types.add(MediaType.application("x-pcidsk"));
-               types.add(MediaType.application("pcisdk"));
-               types.add(MediaType.image("x-pcraster"));
-               types.add(MediaType.image("ilwis"));
-               types.add(MediaType.image("sgi"));
-               types.add(MediaType.application("x-srtmhgt"));
-               types.add(MediaType.application("leveller"));
-               types.add(MediaType.application("terragen"));
-               types.add(MediaType.application("x-gmt"));
-               types.add(MediaType.application("x-isis3"));
-               types.add(MediaType.application("x-isis2"));
-               types.add(MediaType.application("x-pds"));
-               types.add(MediaType.application("x-til"));
-               types.add(MediaType.application("x-ers"));
-               types.add(MediaType.application("x-l1b"));
-               types.add(MediaType.image("fit"));
-               types.add(MediaType.application("x-grib"));
-               types.add(MediaType.image("jp2"));
-               types.add(MediaType.application("x-rmf"));
-               types.add(MediaType.application("x-wcs"));
-               types.add(MediaType.application("x-wms"));
-               types.add(MediaType.application("x-msgn"));
-               types.add(MediaType.application("x-wms"));
-               types.add(MediaType.application("x-wms"));
-               types.add(MediaType.application("x-rst"));
-               types.add(MediaType.application("x-ingr"));
-               types.add(MediaType.application("x-gsag"));
-               types.add(MediaType.application("x-gsbg"));
-               types.add(MediaType.application("x-gs7bg"));
-               types.add(MediaType.application("x-cosar"));
-               types.add(MediaType.application("x-tsx"));
-               types.add(MediaType.application("x-coasp"));
-               types.add(MediaType.application("x-r"));
-               types.add(MediaType.application("x-map"));
-               types.add(MediaType.application("x-pnm"));
-               types.add(MediaType.application("x-doq1"));
-               types.add(MediaType.application("x-doq2"));
-               types.add(MediaType.application("x-envi"));
-               types.add(MediaType.application("x-envi-hdr"));
-               types.add(MediaType.application("x-generic-bin"));
-               types.add(MediaType.application("x-p-aux"));
-               types.add(MediaType.image("x-mff"));
-               types.add(MediaType.image("x-mff2"));
-               types.add(MediaType.image("x-fujibas"));
-               types.add(MediaType.application("x-gsc"));
-               types.add(MediaType.application("x-fast"));
-               types.add(MediaType.application("x-bt"));
-               types.add(MediaType.application("x-lan"));
-               types.add(MediaType.application("x-cpg"));
-               types.add(MediaType.image("ida"));
-               types.add(MediaType.application("x-ndf"));
-               types.add(MediaType.image("eir"));
-               types.add(MediaType.application("x-dipex"));
-               types.add(MediaType.application("x-lcp"));
-               types.add(MediaType.application("x-gtx"));
-               types.add(MediaType.application("x-los-las"));
-               types.add(MediaType.application("x-ntv2"));
-               types.add(MediaType.application("x-ctable2"));
-               types.add(MediaType.application("x-ace2"));
-               types.add(MediaType.application("x-snodas"));
-               types.add(MediaType.application("x-kro"));
-               types.add(MediaType.image("arg"));
-               types.add(MediaType.application("x-rik"));
-               types.add(MediaType.application("x-usgs-dem"));
-               types.add(MediaType.application("x-gxf"));
-               types.add(MediaType.application("x-dods"));
-               types.add(MediaType.application("x-http"));
-               types.add(MediaType.application("x-bag"));
-               types.add(MediaType.application("x-hdf"));
-               types.add(MediaType.image("x-hdf5-image"));
-               types.add(MediaType.application("x-nwt-grd"));
-               types.add(MediaType.application("x-nwt-grc"));
-               types.add(MediaType.image("adrg"));
-               types.add(MediaType.image("x-srp"));
-               types.add(MediaType.application("x-blx"));
-               types.add(MediaType.application("x-rasterlite"));
-               types.add(MediaType.application("x-epsilon"));
-               types.add(MediaType.application("x-sdat"));
-               types.add(MediaType.application("x-kml"));
-               types.add(MediaType.application("x-xyz"));
-               types.add(MediaType.application("x-geo-pdf"));
-               types.add(MediaType.image("x-ozi"));
-               types.add(MediaType.application("x-ctg"));
-               types.add(MediaType.application("x-e00-grid"));
-               types.add(MediaType.application("x-zmap"));
-               types.add(MediaType.application("x-webp"));
-               types.add(MediaType.application("x-ngs-geoid"));
-               types.add(MediaType.application("x-mbtiles"));
-               types.add(MediaType.application("x-ppi"));
-               types.add(MediaType.application("x-cappi"));
-               return types;
-       }
-
-       @Override
-       public void parse(InputStream stream, ContentHandler handler,
-                       Metadata metadata, ParseContext context) throws 
IOException,
-                       SAXException, TikaException {
-
-               if (!ExternalParser.check("gdalinfo")){
-                       return;
-               }
-               
-               // first set up and run GDAL
-               // process the command
-               TemporaryResources tmp = new TemporaryResources();
-               TikaInputStream tis = TikaInputStream.get(stream, tmp);
-
-               String runCommand = processCommand(tis);
-               String output = execCommand(new String[] { runCommand });
-
-               // now extract the actual metadata params
-               // from the GDAL output in the content stream
-               // to do this, we need to literally process the output
-               // from the invoked command b/c we can't read metadata and
-               // output text from the handler in ExternalParser
-               // at the same time, so for now, we can't use the
-               // ExternalParser to do this and I've had to bring some of
-               // that functionality directly into this class
-               // TODO: investigate a way to do both using ExternalParser
-
-               extractMetFromOutput(output, metadata);
-               applyPatternsToOutput(output, metadata, getPatterns());
-
-               // make the content handler and provide output there
-               // now that we have metadata
-               processOutput(handler, metadata, output);
-       }
-
-       private Map<Pattern, String> getPatterns() {
-               Map<Pattern, String> patterns = new HashMap<Pattern, String>();
-               this.addPatternWithColon("Driver", patterns);
-               this.addPatternWithColon("Files", patterns);
-               this.addPatternWithIs("Size", patterns);
-               this.addPatternWithIs("Coordinate System", patterns);
-               this.addBoundingBoxPattern("Upper Left", patterns);
-               this.addBoundingBoxPattern("Lower Left", patterns);
-               this.addBoundingBoxPattern("Upper Right", patterns);
-               this.addBoundingBoxPattern("Lower Right", patterns);
-               return patterns;
-       }
-
-       private void addPatternWithColon(String name, Map<Pattern, String> 
patterns) {
-               patterns.put(
-                               Pattern.compile(name + "\\:\\s*([A-Za-z0-9/ 
_\\-\\.]+)\\s*"),
-                               name);
-       }
-
-       private void addPatternWithIs(String name, Map<Pattern, String> 
patterns) {
-               patterns.put(Pattern.compile(name + " is 
([A-Za-z0-9\\.,\\s`']+)"),
-                               name);
-       }
-
-       private void addBoundingBoxPattern(String name,
-                       Map<Pattern, String> patterns) {
-               patterns.put(
-                               Pattern.compile(name
-                                               + 
"\\s*\\(\\s*([0-9]+\\.[0-9]+\\s*,\\s*[0-9]+\\.[0-9]+\\s*)\\)\\s*"),
-                               name);
-       }
-
-       private void extractMetFromOutput(String output, Metadata met) {
-               Scanner scanner = new Scanner(output);
-               String currentKey = null;
-               String[] headings = {"Subdatasets", "Corner Coordinates"};
-               StringBuilder metVal = new StringBuilder();
-               while (scanner.hasNextLine()) {
-                       String line = scanner.nextLine();
-                       if (line.contains("=") || hasHeadings(line, headings)) {
-                               if (currentKey != null) {
-                                       // time to flush this key and met val
-                                       met.add(currentKey, metVal.toString());
-                               }
-                               metVal.setLength(0);
-
-                               String[] lineToks = line.split("=");
-                               currentKey = lineToks[0].trim();
-                               if (lineToks.length == 2) {
-                                       metVal.append(lineToks[1]);
-                               } else {
-                                       metVal.append("");
-                               }
-                       } else {
-                               metVal.append(line);
-                       }
-
-               }
-       }
-       
-       private boolean hasHeadings(String line, String[] headings){
-               if (headings != null && headings.length > 0){
-                       for(String heading: headings){
-                               if(line.contains(heading)){
-                                       return true;
-                               }
-                       }
-                       return false;
-               }
-               else return false;
-       }
-
-       private void applyPatternsToOutput(String output, Metadata metadata,
-                       Map<Pattern, String> metadataPatterns) {
-               Scanner scanner = new Scanner(output);
-               while (scanner.hasNextLine()) {
-                       String line = scanner.nextLine();
-                       for (Pattern p : metadataPatterns.keySet()) {
-                               Matcher m = p.matcher(line);
-                               if (m.find()) {
-                                       if (metadataPatterns.get(p) != null
-                                                       && 
!metadataPatterns.get(p).equals("")) {
-                                               
metadata.add(metadataPatterns.get(p), m.group(1));
-                                       } else {
-                                               metadata.add(m.group(1), 
m.group(2));
-                                       }
-                               }
-                       }
-               }
-
-       }
-
-       private String execCommand(String[] cmd) throws IOException {
-               // Execute
-               Process process;
-               String output = null;
-               if (cmd.length == 1) {
-                       process = Runtime.getRuntime().exec(cmd[0]);
-               } else {
-                       process = Runtime.getRuntime().exec(cmd);
-               }
-
-               try {
-                       InputStream out = process.getInputStream();
-
-                       try {
-                               output = extractOutput(out);
-                       } catch (Exception e) {
-                               e.printStackTrace();
-                               output = "";
-                       }
-
-               } finally {
-                       try {
-                               process.waitFor();
-                       } catch (InterruptedException ignore) {
-                       }
-
-                       return output;
-               }
-
-       }
-
-       private String extractOutput(InputStream stream) throws SAXException,
-                       IOException {
-               StringBuffer sb = new StringBuffer();
-               Reader reader = new InputStreamReader(stream, "UTF-8");
-               try {
-                       char[] buffer = new char[1024];
-                       for (int n = reader.read(buffer); n != -1; n = 
reader.read(buffer)) {
-                               sb.append(buffer, 0, n);
-                       }
-               } finally {
-                       reader.close();
-                       return sb.toString();
-               }
-       }
-
-       private void processOutput(ContentHandler handler, Metadata metadata,
-                       String output) throws SAXException, IOException {
-               XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, 
metadata);
-               InputStream stream = new 
ByteArrayInputStream(output.getBytes("UTF-8"));
-               Reader reader = new InputStreamReader(stream, "UTF-8");
-               try {
-                       xhtml.startDocument();
-                       xhtml.startElement("p");
-                       char[] buffer = new char[1024];
-                       for (int n = reader.read(buffer); n != -1; n = 
reader.read(buffer)) {
-                               xhtml.characters(buffer, 0, n);
-                       }
-                       xhtml.endElement("p");
-
-               } finally {
-                       reader.close();
-                       xhtml.endDocument();
-               }
+    public GDALParser() {
+        setCommand("gdalinfo ${INPUT}");
+    }
+
+    public void setCommand(String command) {
+        this.command = command;
+    }
+
+    public String getCommand() {
+        return this.command;
+    }
+
+    public String processCommand(InputStream stream) {
+        TikaInputStream tis = (TikaInputStream) stream;
+        String pCommand = this.command;
+        try {
+            if (this.command.indexOf(INPUT_FILE_TOKEN) != -1) {
+                pCommand = this.command.replace(INPUT_FILE_TOKEN, tis.getFile()
+                        .getPath());
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+
+        return pCommand;
+    }
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        Set<MediaType> types = new HashSet<MediaType>();
+        types.add(MediaType.application("x-netcdf"));
+        types.add(MediaType.application("vrt"));
+        types.add(MediaType.image("geotiff"));
+        types.add(MediaType.image("ntif"));
+        types.add(MediaType.application("x-rpf-toc"));
+        types.add(MediaType.application("x-ecrg-toc"));
+        types.add(MediaType.image("hfa"));
+        types.add(MediaType.image("sar-ceos"));
+        types.add(MediaType.image("ceos"));
+        types.add(MediaType.application("jaxa-pal-sar"));
+        types.add(MediaType.application("gff"));
+        types.add(MediaType.application("elas"));
+        types.add(MediaType.application("aig"));
+        types.add(MediaType.application("aaigrid"));
+        types.add(MediaType.application("grass-ascii-grid"));
+        types.add(MediaType.application("sdts-raster"));
+        types.add(MediaType.application("dted"));
+        types.add(MediaType.image("png"));
+        types.add(MediaType.image("jpeg"));
+        types.add(MediaType.image("raster"));
+        types.add(MediaType.application("jdem"));
+        types.add(MediaType.image("gif"));
+        types.add(MediaType.image("big-gif"));
+        types.add(MediaType.image("envisat"));
+        types.add(MediaType.image("fits"));
+        types.add(MediaType.application("fits"));
+        types.add(MediaType.image("bsb"));
+        types.add(MediaType.application("xpm"));
+        types.add(MediaType.image("bmp"));
+        types.add(MediaType.image("x-dimap"));
+        types.add(MediaType.image("x-airsar"));
+        types.add(MediaType.application("x-rs2"));
+        types.add(MediaType.application("x-pcidsk"));
+        types.add(MediaType.application("pcisdk"));
+        types.add(MediaType.image("x-pcraster"));
+        types.add(MediaType.image("ilwis"));
+        types.add(MediaType.image("sgi"));
+        types.add(MediaType.application("x-srtmhgt"));
+        types.add(MediaType.application("leveller"));
+        types.add(MediaType.application("terragen"));
+        types.add(MediaType.application("x-gmt"));
+        types.add(MediaType.application("x-isis3"));
+        types.add(MediaType.application("x-isis2"));
+        types.add(MediaType.application("x-pds"));
+        types.add(MediaType.application("x-til"));
+        types.add(MediaType.application("x-ers"));
+        types.add(MediaType.application("x-l1b"));
+        types.add(MediaType.image("fit"));
+        types.add(MediaType.application("x-grib"));
+        types.add(MediaType.image("jp2"));
+        types.add(MediaType.application("x-rmf"));
+        types.add(MediaType.application("x-wcs"));
+        types.add(MediaType.application("x-wms"));
+        types.add(MediaType.application("x-msgn"));
+        types.add(MediaType.application("x-wms"));
+        types.add(MediaType.application("x-wms"));
+        types.add(MediaType.application("x-rst"));
+        types.add(MediaType.application("x-ingr"));
+        types.add(MediaType.application("x-gsag"));
+        types.add(MediaType.application("x-gsbg"));
+        types.add(MediaType.application("x-gs7bg"));
+        types.add(MediaType.application("x-cosar"));
+        types.add(MediaType.application("x-tsx"));
+        types.add(MediaType.application("x-coasp"));
+        types.add(MediaType.application("x-r"));
+        types.add(MediaType.application("x-map"));
+        types.add(MediaType.application("x-pnm"));
+        types.add(MediaType.application("x-doq1"));
+        types.add(MediaType.application("x-doq2"));
+        types.add(MediaType.application("x-envi"));
+        types.add(MediaType.application("x-envi-hdr"));
+        types.add(MediaType.application("x-generic-bin"));
+        types.add(MediaType.application("x-p-aux"));
+        types.add(MediaType.image("x-mff"));
+        types.add(MediaType.image("x-mff2"));
+        types.add(MediaType.image("x-fujibas"));
+        types.add(MediaType.application("x-gsc"));
+        types.add(MediaType.application("x-fast"));
+        types.add(MediaType.application("x-bt"));
+        types.add(MediaType.application("x-lan"));
+        types.add(MediaType.application("x-cpg"));
+        types.add(MediaType.image("ida"));
+        types.add(MediaType.application("x-ndf"));
+        types.add(MediaType.image("eir"));
+        types.add(MediaType.application("x-dipex"));
+        types.add(MediaType.application("x-lcp"));
+        types.add(MediaType.application("x-gtx"));
+        types.add(MediaType.application("x-los-las"));
+        types.add(MediaType.application("x-ntv2"));
+        types.add(MediaType.application("x-ctable2"));
+        types.add(MediaType.application("x-ace2"));
+        types.add(MediaType.application("x-snodas"));
+        types.add(MediaType.application("x-kro"));
+        types.add(MediaType.image("arg"));
+        types.add(MediaType.application("x-rik"));
+        types.add(MediaType.application("x-usgs-dem"));
+        types.add(MediaType.application("x-gxf"));
+        types.add(MediaType.application("x-dods"));
+        types.add(MediaType.application("x-http"));
+        types.add(MediaType.application("x-bag"));
+        types.add(MediaType.application("x-hdf"));
+        types.add(MediaType.image("x-hdf5-image"));
+        types.add(MediaType.application("x-nwt-grd"));
+        types.add(MediaType.application("x-nwt-grc"));
+        types.add(MediaType.image("adrg"));
+        types.add(MediaType.image("x-srp"));
+        types.add(MediaType.application("x-blx"));
+        types.add(MediaType.application("x-rasterlite"));
+        types.add(MediaType.application("x-epsilon"));
+        types.add(MediaType.application("x-sdat"));
+        types.add(MediaType.application("x-kml"));
+        types.add(MediaType.application("x-xyz"));
+        types.add(MediaType.application("x-geo-pdf"));
+        types.add(MediaType.image("x-ozi"));
+        types.add(MediaType.application("x-ctg"));
+        types.add(MediaType.application("x-e00-grid"));
+        types.add(MediaType.application("x-zmap"));
+        types.add(MediaType.application("x-webp"));
+        types.add(MediaType.application("x-ngs-geoid"));
+        types.add(MediaType.application("x-mbtiles"));
+        types.add(MediaType.application("x-ppi"));
+        types.add(MediaType.application("x-cappi"));
+        return types;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws 
IOException,
+            SAXException, TikaException {
+
+        if (!ExternalParser.check("gdalinfo")) {
+            return;
+        }
+
+        // first set up and run GDAL
+        // process the command
+        TemporaryResources tmp = new TemporaryResources();
+        TikaInputStream tis = TikaInputStream.get(stream, tmp);
+
+        String runCommand = processCommand(tis);
+        String output = execCommand(new String[]{runCommand});
+
+        // now extract the actual metadata params
+        // from the GDAL output in the content stream
+        // to do this, we need to literally process the output
+        // from the invoked command b/c we can't read metadata and
+        // output text from the handler in ExternalParser
+        // at the same time, so for now, we can't use the
+        // ExternalParser to do this and I've had to bring some of
+        // that functionality directly into this class
+        // TODO: investigate a way to do both using ExternalParser
+
+        extractMetFromOutput(output, metadata);
+        applyPatternsToOutput(output, metadata, getPatterns());
+
+        // make the content handler and provide output there
+        // now that we have metadata
+        processOutput(handler, metadata, output);
+    }
+
+    private Map<Pattern, String> getPatterns() {
+        Map<Pattern, String> patterns = new HashMap<Pattern, String>();
+        this.addPatternWithColon("Driver", patterns);
+        this.addPatternWithColon("Files", patterns);
+        this.addPatternWithIs("Size", patterns);
+        this.addPatternWithIs("Coordinate System", patterns);
+        this.addBoundingBoxPattern("Upper Left", patterns);
+        this.addBoundingBoxPattern("Lower Left", patterns);
+        this.addBoundingBoxPattern("Upper Right", patterns);
+        this.addBoundingBoxPattern("Lower Right", patterns);
+        return patterns;
+    }
+
+    private void addPatternWithColon(String name, Map<Pattern, String> 
patterns) {
+        patterns.put(
+                Pattern.compile(name + "\\:\\s*([A-Za-z0-9/ _\\-\\.]+)\\s*"),
+                name);
+    }
+
+    private void addPatternWithIs(String name, Map<Pattern, String> patterns) {
+        patterns.put(Pattern.compile(name + " is ([A-Za-z0-9\\.,\\s`']+)"),
+                name);
+    }
+
+    private void addBoundingBoxPattern(String name,
+                                       Map<Pattern, String> patterns) {
+        patterns.put(
+                Pattern.compile(name
+                        + 
"\\s*\\(\\s*([0-9]+\\.[0-9]+\\s*,\\s*[0-9]+\\.[0-9]+\\s*)\\)\\s*"),
+                name);
+    }
+
+    private void extractMetFromOutput(String output, Metadata met) {
+        Scanner scanner = new Scanner(output);
+        String currentKey = null;
+        String[] headings = {"Subdatasets", "Corner Coordinates"};
+        StringBuilder metVal = new StringBuilder();
+        while (scanner.hasNextLine()) {
+            String line = scanner.nextLine();
+            if (line.contains("=") || hasHeadings(line, headings)) {
+                if (currentKey != null) {
+                    // time to flush this key and met val
+                    met.add(currentKey, metVal.toString());
+                }
+                metVal.setLength(0);
+
+                String[] lineToks = line.split("=");
+                currentKey = lineToks[0].trim();
+                if (lineToks.length == 2) {
+                    metVal.append(lineToks[1]);
+                } else {
+                    metVal.append("");
+                }
+            } else {
+                metVal.append(line);
+            }
+
+        }
+    }
+
+    private boolean hasHeadings(String line, String[] headings) {
+        if (headings != null && headings.length > 0) {
+            for (String heading : headings) {
+                if (line.contains(heading)) {
+                    return true;
+                }
+            }
+            return false;
+        } else return false;
+    }
+
+    private void applyPatternsToOutput(String output, Metadata metadata,
+                                       Map<Pattern, String> metadataPatterns) {
+        Scanner scanner = new Scanner(output);
+        while (scanner.hasNextLine()) {
+            String line = scanner.nextLine();
+            for (Pattern p : metadataPatterns.keySet()) {
+                Matcher m = p.matcher(line);
+                if (m.find()) {
+                    if (metadataPatterns.get(p) != null
+                            && !metadataPatterns.get(p).equals("")) {
+                        metadata.add(metadataPatterns.get(p), m.group(1));
+                    } else {
+                        metadata.add(m.group(1), m.group(2));
+                    }
+                }
+            }
+        }
+
+    }
+
+    private String execCommand(String[] cmd) throws IOException {
+        // Execute
+        Process process;
+        String output = null;
+        if (cmd.length == 1) {
+            process = Runtime.getRuntime().exec(cmd[0]);
+        } else {
+            process = Runtime.getRuntime().exec(cmd);
+        }
+
+        try {
+            InputStream out = process.getInputStream();
+
+            try {
+                output = extractOutput(out);
+            } catch (Exception e) {
+                e.printStackTrace();
+                output = "";
+            }
+
+        } finally {
+            try {
+                process.waitFor();
+            } catch (InterruptedException ignore) {
+            }
+
+            return output;
+        }
+
+    }
+
+    private String extractOutput(InputStream stream) throws SAXException,
+            IOException {
+        StringBuffer sb = new StringBuffer();
+        Reader reader = new InputStreamReader(stream, "UTF-8");
+        try {
+            char[] buffer = new char[1024];
+            for (int n = reader.read(buffer); n != -1; n = 
reader.read(buffer)) {
+                sb.append(buffer, 0, n);
+            }
+        } finally {
+            reader.close();
+            return sb.toString();
+        }
+    }
+
+    private void processOutput(ContentHandler handler, Metadata metadata,
+                               String output) throws SAXException, IOException 
{
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        InputStream stream = new 
ByteArrayInputStream(output.getBytes("UTF-8"));
+        Reader reader = new InputStreamReader(stream, "UTF-8");
+        try {
+            xhtml.startDocument();
+            xhtml.startElement("p");
+            char[] buffer = new char[1024];
+            for (int n = reader.read(buffer); n != -1; n = 
reader.read(buffer)) {
+                xhtml.characters(buffer, 0, n);
+            }
+            xhtml.endElement("p");
+
+        } finally {
+            reader.close();
+            xhtml.endDocument();
+        }
 
-       }
+    }
 
 }

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java?rev=1647743&r1=1647742&r2=1647743&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
 Wed Dec 24 08:16:45 2014
@@ -18,6 +18,7 @@
 package org.apache.tika.parser.gdal;
 
 //JDK imports
+
 import java.io.InputStream;
 
 //Tika imports
@@ -29,6 +30,7 @@ import org.apache.tika.sax.BodyContentHa
 
 //Junit imports
 import org.junit.Test;
+
 import static org.junit.Assert.fail;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertEquals;
@@ -36,140 +38,138 @@ import static org.junit.Assert.assertNot
 import static org.junit.Assume.assumeTrue;
 
 /**
- * 
  * Test harness for the GDAL parser.
- * 
  */
 public class TestGDALParser extends TikaTest {
 
-       private boolean canRun() {
-               String[] checkCmd = { "gdalinfo" };
-               // If GDAL is not on the path, do not run the test.
-               return ExternalParser.check(checkCmd);
-       }
-
-       @Test
-       public void testParseBasicInfo() {
-               assumeTrue(canRun());
-               final String expectedDriver = "netCDF/Network Common Data 
Format";
-               final String expectedUpperRight = "512.0,    0.0";
-               final String expectedUpperLeft = "0.0,    0.0";
-               final String expectedLowerLeft = "0.0,  512.0";
-               final String expectedLowerRight = "512.0,  512.0";
-               final String expectedCoordinateSystem = "`'";
-               final String expectedSize = "512, 512";
-
-               GDALParser parser = new GDALParser();
-               InputStream stream = TestGDALParser.class
-                               
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
-               Metadata met = new Metadata();
-               BodyContentHandler handler = new BodyContentHandler();
-               try {
-                       parser.parse(stream, handler, met, new ParseContext());
-                       assertNotNull(met);
-                       assertNotNull(met.get("Driver"));
-                       assertEquals(expectedDriver, met.get("Driver"));
-                       assertNotNull(met.get("Files"));
-                       assertNotNull(met.get("Coordinate System"));
-                       assertEquals(expectedCoordinateSystem, 
met.get("Coordinate System"));
-                       assertNotNull(met.get("Size"));
-                       assertEquals(expectedSize, met.get("Size"));
-                       assertNotNull(met.get("Upper Right"));
-                       assertEquals(expectedUpperRight, met.get("Upper 
Right"));
-                       assertNotNull(met.get("Upper Left"));
-                       assertEquals(expectedUpperLeft, met.get("Upper Left"));
-                       assertNotNull(met.get("Upper Right"));
-                       assertEquals(expectedLowerRight, met.get("Lower 
Right"));
-                       assertNotNull(met.get("Upper Right"));
-                       assertEquals(expectedLowerLeft, met.get("Lower Left"));
-               } catch (Exception e) {
-                       e.printStackTrace();
-                       fail(e.getMessage());
-               }
-       }
-
-       @Test
-       public void testParseMetadata() {
-               assumeTrue(canRun());
-               final String expectedNcInst = "NCAR (National Center for 
Atmospheric Research, Boulder, CO, USA)";
-               final String expectedModelNameEnglish = "NCAR CCSM";
-               final String expectedProgramId = "Source file unknown Version 
unknown Date unknown";
-               final String expectedProjectId = "IPCC Fourth Assessment";
-               final String expectedRealization = "1";
-               final String expectedTitle = "model output prepared for IPCC 
AR4";
-               final String expectedSub8Name = "\":ua";
-               final String expectedSub8Desc = "[1x17x128x256] eastward_wind 
(32-bit floating-point)";
-
-               GDALParser parser = new GDALParser();
-               InputStream stream = TestGDALParser.class
-                               
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
-               Metadata met = new Metadata();
-               BodyContentHandler handler = new BodyContentHandler();
-               try {
-                       parser.parse(stream, handler, met, new ParseContext());
-                       assertNotNull(met);
-                       assertNotNull(met.get("NC_GLOBAL#institution"));
-                       assertEquals(expectedNcInst, 
met.get("NC_GLOBAL#institution"));
-                       assertNotNull(met.get("NC_GLOBAL#model_name_english"));
-                       assertEquals(expectedModelNameEnglish,
-                                       
met.get("NC_GLOBAL#model_name_english"));
-                       assertNotNull(met.get("NC_GLOBAL#prg_ID"));
-                       assertEquals(expectedProgramId, 
met.get("NC_GLOBAL#prg_ID"));
-                       assertNotNull(met.get("NC_GLOBAL#prg_ID"));
-                       assertEquals(expectedProgramId, 
met.get("NC_GLOBAL#prg_ID"));
-                       assertNotNull(met.get("NC_GLOBAL#project_id"));
-                       assertEquals(expectedProjectId, 
met.get("NC_GLOBAL#project_id"));
-                       assertNotNull(met.get("NC_GLOBAL#realization"));
-                       assertEquals(expectedRealization, 
met.get("NC_GLOBAL#realization"));
-                       assertNotNull(met.get("NC_GLOBAL#title"));
-                       assertEquals(expectedTitle, met.get("NC_GLOBAL#title"));
-                       assertNotNull(met.get("SUBDATASET_8_NAME"));
-                       
assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name));
-                       assertNotNull(met.get("SUBDATASET_8_DESC"));
-                       assertEquals(expectedSub8Desc, 
met.get("SUBDATASET_8_DESC"));
-               } catch (Exception e) {
-                       e.printStackTrace();
-                       fail(e.getMessage());
-               }
-       }
+    private boolean canRun() {
+        String[] checkCmd = {"gdalinfo"};
+        // If GDAL is not on the path, do not run the test.
+        return ExternalParser.check(checkCmd);
+    }
+
+    @Test
+    public void testParseBasicInfo() {
+        assumeTrue(canRun());
+        final String expectedDriver = "netCDF/Network Common Data Format";
+        final String expectedUpperRight = "512.0,    0.0";
+        final String expectedUpperLeft = "0.0,    0.0";
+        final String expectedLowerLeft = "0.0,  512.0";
+        final String expectedLowerRight = "512.0,  512.0";
+        final String expectedCoordinateSystem = "`'";
+        final String expectedSize = "512, 512";
+
+        GDALParser parser = new GDALParser();
+        InputStream stream = TestGDALParser.class
+                
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
+        Metadata met = new Metadata();
+        BodyContentHandler handler = new BodyContentHandler();
+        try {
+            parser.parse(stream, handler, met, new ParseContext());
+            assertNotNull(met);
+            assertNotNull(met.get("Driver"));
+            assertEquals(expectedDriver, met.get("Driver"));
+            assertNotNull(met.get("Files"));
+            assertNotNull(met.get("Coordinate System"));
+            assertEquals(expectedCoordinateSystem, met.get("Coordinate 
System"));
+            assertNotNull(met.get("Size"));
+            assertEquals(expectedSize, met.get("Size"));
+            assertNotNull(met.get("Upper Right"));
+            assertEquals(expectedUpperRight, met.get("Upper Right"));
+            assertNotNull(met.get("Upper Left"));
+            assertEquals(expectedUpperLeft, met.get("Upper Left"));
+            assertNotNull(met.get("Upper Right"));
+            assertEquals(expectedLowerRight, met.get("Lower Right"));
+            assertNotNull(met.get("Upper Right"));
+            assertEquals(expectedLowerLeft, met.get("Lower Left"));
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail(e.getMessage());
+        }
+    }
+
+    @Test
+    public void testParseMetadata() {
+        assumeTrue(canRun());
+        final String expectedNcInst = "NCAR (National Center for Atmospheric 
Research, Boulder, CO, USA)";
+        final String expectedModelNameEnglish = "NCAR CCSM";
+        final String expectedProgramId = "Source file unknown Version unknown 
Date unknown";
+        final String expectedProjectId = "IPCC Fourth Assessment";
+        final String expectedRealization = "1";
+        final String expectedTitle = "model output prepared for IPCC AR4";
+        final String expectedSub8Name = "\":ua";
+        final String expectedSub8Desc = "[1x17x128x256] eastward_wind (32-bit 
floating-point)";
+
+        GDALParser parser = new GDALParser();
+        InputStream stream = TestGDALParser.class
+                
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
+        Metadata met = new Metadata();
+        BodyContentHandler handler = new BodyContentHandler();
+        try {
+            parser.parse(stream, handler, met, new ParseContext());
+            assertNotNull(met);
+            assertNotNull(met.get("NC_GLOBAL#institution"));
+            assertEquals(expectedNcInst, met.get("NC_GLOBAL#institution"));
+            assertNotNull(met.get("NC_GLOBAL#model_name_english"));
+            assertEquals(expectedModelNameEnglish,
+                    met.get("NC_GLOBAL#model_name_english"));
+            assertNotNull(met.get("NC_GLOBAL#prg_ID"));
+            assertEquals(expectedProgramId, met.get("NC_GLOBAL#prg_ID"));
+            assertNotNull(met.get("NC_GLOBAL#prg_ID"));
+            assertEquals(expectedProgramId, met.get("NC_GLOBAL#prg_ID"));
+            assertNotNull(met.get("NC_GLOBAL#project_id"));
+            assertEquals(expectedProjectId, met.get("NC_GLOBAL#project_id"));
+            assertNotNull(met.get("NC_GLOBAL#realization"));
+            assertEquals(expectedRealization, 
met.get("NC_GLOBAL#realization"));
+            assertNotNull(met.get("NC_GLOBAL#title"));
+            assertEquals(expectedTitle, met.get("NC_GLOBAL#title"));
+            assertNotNull(met.get("SUBDATASET_8_NAME"));
+            
assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name));
+            assertNotNull(met.get("SUBDATASET_8_DESC"));
+            assertEquals(expectedSub8Desc, met.get("SUBDATASET_8_DESC"));
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail(e.getMessage());
+        }
+    }
 
-       @Test
-       public void testParseFITS() {
+    @Test
+    public void testParseFITS() {
         String fitsFilename = "/test-documents/WFPC2u5780205r_c0fx.fits";
 
-               assumeTrue(canRun());
+        assumeTrue(canRun());
         // If the exit code is 1 (meaning FITS isn't supported by the 
installed version of gdalinfo, don't run this test.
-        String[] fitsCommand = { "gdalinfo", 
TestGDALParser.class.getResource(fitsFilename).getPath() };
+        String[] fitsCommand = {"gdalinfo", 
TestGDALParser.class.getResource(fitsFilename).getPath()};
         assumeTrue(ExternalParser.check(fitsCommand, 1));
 
-               String expectedAllgMin = "-7.319537E1";
-               String expectedAtodcorr = "COMPLETE";
-               String expectedAtodfile = "uref$dbu1405iu.r1h";
-               String expectedCalVersion = "                        ";
-               String expectedCalibDef = "1466";
-
-               GDALParser parser = new GDALParser();
-               InputStream stream = TestGDALParser.class
-                               .getResourceAsStream(fitsFilename);
-               Metadata met = new Metadata();
-               BodyContentHandler handler = new BodyContentHandler();
-               try {
-                       parser.parse(stream, handler, met, new ParseContext());
-                       assertNotNull(met);
-                       assertNotNull(met.get("ALLG-MIN"));
-                       assertEquals(expectedAllgMin, met.get("ALLG-MIN"));
-                       assertNotNull(met.get("ATODCORR"));
-                       assertEquals(expectedAtodcorr, met.get("ATODCORR"));
-                       assertNotNull(met.get("ATODFILE"));
-                       assertEquals(expectedAtodfile, met.get("ATODFILE"));
-                       assertNotNull(met.get("CAL_VER"));
-                       assertEquals(expectedCalVersion, met.get("CAL_VER"));
-                       assertNotNull(met.get("CALIBDEF"));
-                       assertEquals(expectedCalibDef, met.get("CALIBDEF"));
-
-               } catch (Exception e) {
-                       e.printStackTrace();
-                       fail(e.getMessage());
-               }
-       }
+        String expectedAllgMin = "-7.319537E1";
+        String expectedAtodcorr = "COMPLETE";
+        String expectedAtodfile = "uref$dbu1405iu.r1h";
+        String expectedCalVersion = "                        ";
+        String expectedCalibDef = "1466";
+
+        GDALParser parser = new GDALParser();
+        InputStream stream = TestGDALParser.class
+                .getResourceAsStream(fitsFilename);
+        Metadata met = new Metadata();
+        BodyContentHandler handler = new BodyContentHandler();
+        try {
+            parser.parse(stream, handler, met, new ParseContext());
+            assertNotNull(met);
+            assertNotNull(met.get("ALLG-MIN"));
+            assertEquals(expectedAllgMin, met.get("ALLG-MIN"));
+            assertNotNull(met.get("ATODCORR"));
+            assertEquals(expectedAtodcorr, met.get("ATODCORR"));
+            assertNotNull(met.get("ATODFILE"));
+            assertEquals(expectedAtodfile, met.get("ATODFILE"));
+            assertNotNull(met.get("CAL_VER"));
+            assertEquals(expectedCalVersion, met.get("CAL_VER"));
+            assertNotNull(met.get("CALIBDEF"));
+            assertEquals(expectedCalibDef, met.get("CALIBDEF"));
+
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail(e.getMessage());
+        }
+    }
 }


Reply via email to