JENA-959 : Add --compress. Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/43efe529 Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/43efe529 Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/43efe529
Branch: refs/heads/JENA-491-construct-quads Commit: 43efe529cb99aeaca6e94e91ac819e5ce4c20c2c Parents: b2045b2 Author: Andy Seaborne <[email protected]> Authored: Sun Aug 16 17:01:41 2015 +0100 Committer: Andy Seaborne <[email protected]> Committed: Sun Aug 16 17:01:41 2015 +0100 ---------------------------------------------------------------------- .../main/java/arq/cmdline/ModLangOutput.java | 33 +++----- .../src/main/java/riotcmd/CmdLangParse.java | 79 ++++++++++---------- 2 files changed, 51 insertions(+), 61 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/43efe529/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java b/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java index 7b12231..a4ed89d 100644 --- a/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java +++ b/jena-arq/src/main/java/arq/cmdline/ModLangOutput.java @@ -39,6 +39,8 @@ public class ModLangOutput extends ModBase protected ArgDecl argOutput = new ArgDecl(ArgDecl.HasValue, "out", "output") ; protected ArgDecl argPretty = new ArgDecl(ArgDecl.HasValue, "formatted", "pretty", "fmt") ; protected ArgDecl argStream = new ArgDecl(ArgDecl.HasValue, "stream") ; + protected ArgDecl argCompress = new ArgDecl(ArgDecl.NoValue, "compress") ; + private boolean compressedOutput = false ; private RDFFormat streamOutput = null ; private RDFFormat formattedOutput = null ; @@ -48,6 +50,7 @@ public class ModLangOutput extends ModBase cmdLine.add(argOutput, "--output=FMT", "Output in the given format, streaming if possible.") ; cmdLine.add(argPretty, "--formatted=FMT", "Output, using pretty printing (consumes memory)") ; cmdLine.add(argStream, "--stream=FMT", "Output, using a streaming format") ; + cmdLine.add(argCompress, "--compress=FMT", "Compress the output with gzip") ; } @Override @@ -96,6 +99,9 @@ public class ModLangOutput extends ModBase } } + if ( cmdLine.contains(argCompress)) + compressedOutput = true ; + if ( streamOutput == null && formattedOutput == null ) streamOutput = RDFFormat.NQUADS ; } @@ -129,28 +135,7 @@ public class ModLangOutput extends ModBase out.println(" "+lang.getLabel()) ; } } - // Stream-only code. -// if ( ! StreamRDFWriter.registered(output) ) { -// // ** Java8 -//// StreamRDFWriter.registered().stream() -//// .map(fmt -> fmt.getLang()) -//// .distinct() -//// .forEach(x -> System.err.println(" "+x.getLabel())) ; -// -// System.err.println("Language '"+output.getLabel()+"' can not be used for streamed out (try rdfcat)") ; -// System.err.println("Streaming languages are:") ; -// Set<Lang> seen = new HashSet<>() ; -// for ( RDFFormat fmt : StreamRDFWriter.registered()) { -// if ( seen.contains(fmt.getLang()) ) -// continue ; -// seen.add(fmt.getLang()) ; -// System.err.println(" "+fmt.getLang().getLabel()) ; -// } -// -// throw new CmdException("Not a streaming RDF language : '"+langName+"'") ; -// } -// format = StreamRDFWriter.defaultSerialization(output) ; - + public RDFFormat getOutputStreamFormat() { return streamOutput ; } @@ -158,4 +143,8 @@ public class ModLangOutput extends ModBase public RDFFormat getOutputFormatted() { return formattedOutput ; } + + public boolean compressedOutput() { + return compressedOutput ; + } } http://git-wip-us.apache.org/repos/asf/jena/blob/43efe529/jena-arq/src/main/java/riotcmd/CmdLangParse.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/riotcmd/CmdLangParse.java b/jena-arq/src/main/java/riotcmd/CmdLangParse.java index 754f749..aa04052 100644 --- a/jena-arq/src/main/java/riotcmd/CmdLangParse.java +++ b/jena-arq/src/main/java/riotcmd/CmdLangParse.java @@ -18,9 +18,15 @@ package riotcmd; +import java.io.IOException ; import java.io.InputStream ; import java.io.OutputStream ; +import java.util.zip.GZIPOutputStream ; +import arq.cmdline.ModLangOutput ; +import arq.cmdline.ModLangParse ; +import arq.cmdline.ModSymbol ; +import arq.cmdline.ModTime ; import jena.cmd.ArgDecl ; import jena.cmd.CmdException; import jena.cmd.CmdGeneral ; @@ -42,7 +48,6 @@ import org.apache.jena.riot.tokens.Tokenizer ; import org.apache.jena.riot.tokens.TokenizerFactory ; import org.apache.jena.sparql.core.DatasetGraph ; import org.apache.jena.sparql.core.DatasetGraphFactory ; -import arq.cmdline.* ; /** Common framework for running RIOT parsers */ public abstract class CmdLangParse extends CmdGeneral @@ -62,22 +67,19 @@ public abstract class CmdLangParse extends CmdGeneral String getRateName() ; } - static LangHandler langHandlerQuads = new LangHandler() - { + static LangHandler langHandlerQuads = new LangHandler() { @Override public String getItemsName() { return "quads" ; } @Override public String getRateName() { return "QPS" ; } } ; - static LangHandler langHandlerTriples = new LangHandler() - { + static LangHandler langHandlerTriples = new LangHandler() { @Override public String getItemsName() { return "triples" ; } @Override public String getRateName() { return "TPS" ; } } ; - static LangHandler langHandlerAny = new LangHandler() - { + static LangHandler langHandlerAny = new LangHandler() { @Override public String getItemsName() { return "tuples" ; } @Override @@ -104,10 +106,8 @@ public abstract class CmdLangParse extends CmdGeneral } @Override - protected String getSummary() - { - //return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--skip | --stopOnError] file ..." ; - return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--out=FORMAT] file ..." ; + protected String getSummary() { + return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--out=FORMAT] [--compress] file ..." ; } protected long totalMillis = 0 ; @@ -115,7 +115,6 @@ public abstract class CmdLangParse extends CmdGeneral OutputStream output = System.out ; StreamRDF outputStream = null ; - @Override protected void processModulesAndArgs() { @@ -125,14 +124,18 @@ public abstract class CmdLangParse extends CmdGeneral protected interface PostParseHandler { void postParse(); } @Override - protected void exec() - { + protected void exec() { if ( modLangParse.strictMode() ) RIOT.setStrictMode(true) ; if ( modLangParse.getRDFSVocab() != null ) setup = new InferenceSetupRDFS(modLangParse.getRDFSVocab()) ; + if ( modLangOutput.compressedOutput() ) { + try { output = new GZIPOutputStream(output, true) ; } + catch (IOException e) { IO.exception(e);} + } + outputStream = null ; PostParseHandler postParse = null ; @@ -145,20 +148,23 @@ public abstract class CmdLangParse extends CmdGeneral try { if ( super.getPositional().isEmpty() ) - parseFile("-") ; - else - { - boolean b = super.getPositional().size() > 1 ; - for ( String fn : super.getPositional() ) - { - if ( b && ! super.isQuiet() ) - SysRIOT.getLogger().info("File: "+fn) ; - parseFile(fn) ; + parseFile("-"); + else { + boolean b = super.getPositional().size() > 1; + for ( String fn : super.getPositional() ) { + if ( b && !super.isQuiet() ) + SysRIOT.getLogger().info("File: " + fn); + parseFile(fn); } } + if ( postParse != null ) + postParse.postParse(); } finally { + if ( output != System.out ) + IO.close(output) ; + else + IO.flush(output); System.err.flush() ; - System.out.flush() ; if ( super.getPositional().size() > 1 && modTime.timingEnabled() ) output("Total", totalTuples, totalMillis, langHandlerOverall) ; } @@ -167,8 +173,7 @@ public abstract class CmdLangParse extends CmdGeneral postParse.postParse() ; } - public void parseFile(String filename) - { + public void parseFile(String filename) { TypedInputStream in = null ; if ( filename.equals("-") ) { in = new TypedInputStream(System.in) ; @@ -182,11 +187,11 @@ public abstract class CmdLangParse extends CmdGeneral } parseFile(null, filename, in) ; IO.close(in) ; + } } - public void parseFile(String defaultBaseURI, String filename, TypedInputStream in) - { + public void parseFile(String defaultBaseURI, String filename, TypedInputStream in) { String baseURI = modLangParse.getBaseIRI() ; if ( baseURI == null ) baseURI = defaultBaseURI ; @@ -195,8 +200,7 @@ public abstract class CmdLangParse extends CmdGeneral protected abstract Lang selectLang(String filename, ContentType contentType, Lang dftLang ) ; - protected void parseRIOT(String baseURI, String filename, TypedInputStream in) - { + protected void parseRIOT(String baseURI, String filename, TypedInputStream in) { ContentType ct = in.getMediaType() ; baseURI = SysRIOT.chooseBaseIRI(baseURI, filename) ; @@ -293,7 +297,6 @@ public abstract class CmdLangParse extends CmdGeneral totalTuples += n ; } - /** Create a streaming output sink if possible */ protected StreamRDF createStreamSink() { if ( modLangParse.toBitBucket() ) @@ -302,7 +305,8 @@ public abstract class CmdLangParse extends CmdGeneral RDFFormat fmt = modLangOutput.getOutputStreamFormat() ; if ( fmt == null ) return null ; - return StreamRDFWriter.getWriterStream(System.out, fmt) ; + /** Create an accumulating output stream for later pretty printing */ + return StreamRDFWriter.getWriterStream(output, fmt) ; } /** Create an accumulating output stream for later pretty printing */ @@ -316,7 +320,7 @@ public abstract class CmdLangParse extends CmdGeneral // Try as dataset, then as graph. WriterDatasetRIOTFactory w = RDFWriterRegistry.getWriterDatasetFactory(fmt) ; if ( w != null ) { - RDFDataMgr.write(System.out, dsg, fmt) ; + RDFDataMgr.write(output, dsg.getDefaultGraph(), fmt) ; return ; } WriterGraphRIOTFactory wg = RDFWriterRegistry.getWriterGraphFactory(fmt) ; @@ -330,14 +334,12 @@ public abstract class CmdLangParse extends CmdGeneral return Pair.create(sink, handler) ; } - protected Tokenizer makeTokenizer(InputStream in) - { + protected Tokenizer makeTokenizer(InputStream in) { Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in) ; return tokenizer ; } - protected void output(String label, long numberTriples, long timeMillis, LangHandler handler) - { + protected void output(String label, long numberTriples, long timeMillis, LangHandler handler) { double timeSec = timeMillis/1000.0 ; System.out.flush() ; @@ -349,8 +351,7 @@ public abstract class CmdLangParse extends CmdGeneral handler.getRateName()) ; } - protected void output(String label) - { + protected void output(String label) { System.err.printf("%s : \n", label) ; } }
