http://www.mediawiki.org/wiki/Special:Code/MediaWiki/73651
Revision: 73651
Author: daniel
Date: 2010-09-24 10:18:07 +0000 (Fri, 24 Sep 2010)
Log Message:
-----------
DumpTable app
Modified Paths:
--------------
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java
Added Paths:
-----------
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamOutputApp.java
Added:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java
(rev 0)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java
2010-09-24 10:18:07 UTC (rev 73651)
@@ -0,0 +1,40 @@
+package de.brightbyte.wikiword.dump;
+
+import de.brightbyte.util.PersistenceException;
+
+public class DumpTable extends SQLDumperApp {
+
+ protected String table;
+
+ public DumpTable() {
+ super(true, true);
+ }
+
+ @Override
+ protected void declareOptions() {
+ super.declareOptions();
+
+ args.declare("fields", null, false, Boolean.class, "Database
fields to dump, as a comma-separated list. Supports SQL syntax, like \"AS\".");
+ }
+
+ @Override
+ protected String getQuerySQL() {
+ String fields = args.getOption("fields", "*"); //TODO:
split, sanitize and quote to avoid injection!
+
+ String t = conceptStoreDB.getSQLTableName(table, true);
+ String sql = "SELECT "+fields+" FROM " + t;
+ return sql;
+ }
+
+ protected void open(int paramOffset) throws PersistenceException {
+ this.table = args.getParameter(paramOffset);
+
+ sink = openSink(paramOffset+1);
+ }
+
+
+ public static void main(String[] argv) throws Exception {
+ DumpTable app = new DumpTable();
+ app.launch(argv);
+ }
+}
Property changes on:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java
___________________________________________________________________
Added: svn:mergeinfo
+
Added:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java
(rev 0)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java
2010-09-24 10:18:07 UTC (rev 73651)
@@ -0,0 +1,112 @@
+package de.brightbyte.wikiword.dump;
+
+import java.io.IOException;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+
+import de.brightbyte.data.cursor.DataSink;
+import de.brightbyte.data.cursor.JoiningSink;
+import de.brightbyte.db.QueryDumper;
+import de.brightbyte.io.LineSink;
+import de.brightbyte.job.ChunkedProgressRateTracker;
+import de.brightbyte.text.CsvLineJoiner;
+import de.brightbyte.text.Joiner;
+import de.brightbyte.util.PersistenceException;
+import de.brightbyte.wikiword.extract.StreamOutputApp;
+import de.brightbyte.wikiword.schema.GlobalConceptStoreSchema;
+import de.brightbyte.wikiword.schema.LocalConceptStoreSchema;
+import de.brightbyte.wikiword.schema.WikiWordConceptStoreSchema;
+import de.brightbyte.wikiword.store.DatabaseConceptStores;
+import de.brightbyte.wikiword.store.DatabaseWikiWordStore;
+import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
+
+public abstract class SQLDumperApp extends StreamOutputApp<List<String>,
WikiWordConceptStoreBase> {
+
+ protected ChunkedProgressRateTracker dumpTracker;
+
+ public SQLDumperApp(boolean allowGlobal, boolean allowLocal) {
+ super(allowGlobal, allowLocal);
+
+ dumpTracker = new ChunkedProgressRateTracker("dumping", 10000,
10); //TODO: init later, get values from tweaks
+ dumpTracker.setLogOutput(out);
+ }
+
+ @Override
+ protected void declareOptions() {
+ super.declareOptions();
+
+ args.declare("no-output-header", null, false, Boolean.class,
"The first line of the output file will not be a column header");
+ args.declare("output-format", null, true, String.class, "Format
of the output file. May be csv or tsv, default is csv.");
+ }
+
+ protected WikiWordConceptStoreSchema conceptStoreDB;
+
+ @Override
+ protected void createStores() throws IOException, PersistenceException {
+ conceptStore =
DatabaseConceptStores.createConceptStore(getConfiguredDataSource(),
getConfiguredDataset(), tweaks, true, true);
+
+ registerStore(conceptStore);
+
+ if (conceptStore instanceof DatabaseWikiWordStore) {
+ conceptStoreDB =
(WikiWordConceptStoreSchema)((DatabaseWikiWordStore)conceptStore).getDatabaseAccess();
+ } else {
+ try {
+ if ( isDatasetLocal() ) conceptStoreDB = new
LocalConceptStoreSchema(getCorpus(), getConfiguredDataSource(), this.tweaks,
false);
+ else conceptStoreDB = new
GlobalConceptStoreSchema(getConfiguredDataset(), getConfiguredDataSource(),
this.tweaks, false);
+ } catch (SQLException e) {
+ throw new PersistenceException(e);
+ }
+ }
+ }
+
+ @Override
+ public void run() throws Exception {
+ boolean outputHasHeader = !args.isSet("no-output-header");
+
+ String sql = getQuerySQL();
+
+ info("Running query...");
+ ResultSet rs = conceptStoreDB.executeBigQuery("dumpList", sql);
+
+ QueryDumper dumper = new QueryDumper(sink, (String[])null);
+ dumper.addProgressListener(dumpTracker);
+ configureDumper(dumper);
+
+ info("dumping rows...");
+
+ if (outputHasHeader) dumper.dumpHeader(rs);
+ int c = dumper.dumpRows(rs);
+
+ rs.close();
+
+ info("complete, dumped "+c+" rows.");
+ }
+
+ protected abstract String getQuerySQL();
+
+ protected void configureDumper(QueryDumper dumper) {
+ // NOOP
+ }
+
+ private Joiner joiner;
+
+ @Override
+ protected DataSink<? super List<String>> openSink(int paramOffset)
throws PersistenceException {
+ if (joiner==null) {
+ String format = args.getOption("output-format",
"csv").toLowerCase();
+
+ if (format.equals("csv")) joiner = new
CsvLineJoiner(",", null, '"', false);
+ else if (format.equals("tsv")) joiner = new
CsvLineJoiner("\t", null, '\0', true);
+ else throw new IllegalArgumentException("bad output
format: "+format);
+ }
+
+ try {
+ JoiningSink sink = new JoiningSink(new
LineSink(getOutputWriter(paramOffset)), joiner);
+ return sink;
+ } catch (IOException e) {
+ throw new PersistenceException();
+ }
+ }
+
+}
Property changes on:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java
___________________________________________________________________
Added: svn:mergeinfo
+
Added:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamOutputApp.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamOutputApp.java
(rev 0)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamOutputApp.java
2010-09-24 10:18:07 UTC (rev 73651)
@@ -0,0 +1,108 @@
+package de.brightbyte.wikiword.extract;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import de.brightbyte.data.cursor.DataSink;
+import de.brightbyte.io.ConsoleIO;
+import de.brightbyte.util.PersistenceException;
+import de.brightbyte.wikiword.StoreBackedApp;
+import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
+
+public abstract class StreamOutputApp<O, S extends WikiWordConceptStoreBase>
extends StoreBackedApp<S> {
+
+ protected DataSink<? super O> sink;
+
+ protected boolean usingStdout;
+
+ public StreamOutputApp(boolean allowGlobal, boolean allowLocal) {
+ super(allowGlobal, allowLocal);
+ }
+
+
+ protected File getOutputFile(int paramIndex) {
+ if (outputFile==null) {
+ if (args.getParameterCount()>paramIndex) {
+ String f = args.getParameter(paramIndex);
+ if (!f.equals("-")) outputFile = new File(f);
+ }
+ }
+ return outputFile;
+ }
+
+ protected String getOutputFileEncoding() {
+ return args.getStringOption("output-encoding", "UTF-8");
+ }
+
+ protected void declareOptions() {
+ super.declareOptions();
+
+ args.declare("output-encoding", null, true, String.class,
"Encoding to use for the poutput file");
+ }
+
+ protected File outputFile;
+ protected Writer outputWriter;
+ protected OutputStream outputStream;
+
+ protected Writer getOutputWriter(int paramIndex) throws
FileNotFoundException, UnsupportedEncodingException {
+ if (outputWriter==null) {
+ File f = getOutputFile(paramIndex);
+ if (f==null) {
+ outputWriter = ConsoleIO.writer;
+ usingStdout = true;
+ } else {
+ OutputStream out = getOutputStream(paramIndex);
+ outputWriter = new OutputStreamWriter(out,
getOutputFileEncoding());
+ usingStdout = out == System.out;
+ }
+ }
+
+ if (usingStdout && out.getOutput() == ConsoleIO.output) {
+ out.setOutput(ConsoleIO.errorOutput);
+ }
+
+ return outputWriter;
+ }
+
+ protected OutputStream getOutputStream(int paramIndex) throws
FileNotFoundException {
+ if (outputStream==null) {
+ File f = getOutputFile(paramIndex);
+ if (f==null) {
+ outputStream = System.out;
+ usingStdout = true;
+ } else {
+ outputStream = new BufferedOutputStream(new
FileOutputStream(f, args.isSet("append")));
+ usingStdout = false;
+ info("Writing output to "+f);
+ }
+ }
+
+ return outputStream;
+ }
+
+
+ protected void open(int paramOffset) throws PersistenceException {
+ sink = openSink(paramOffset);
+ }
+
+ protected abstract DataSink<? super O> openSink(int paramIndex) throws
PersistenceException;
+
+ protected void init() throws Exception {
+ // noop
+ }
+
+ protected void close() throws PersistenceException {
+ sink.close();
+ }
+
+ protected void commit(O rec) throws PersistenceException {
+ sink.commit(rec);
+ }
+
+}
Modified:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java
2010-09-24 00:21:57 UTC (rev 73650)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java
2010-09-24 10:18:07 UTC (rev 73651)
@@ -1,51 +1,27 @@
package de.brightbyte.wikiword.extract;
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
import java.io.Reader;
-import java.io.UnsupportedEncodingException;
-import java.io.Writer;
import de.brightbyte.data.cursor.DataCursor;
-import de.brightbyte.data.cursor.DataSink;
import de.brightbyte.io.ConsoleIO;
import de.brightbyte.util.PersistenceException;
-import de.brightbyte.wikiword.StoreBackedApp;
import de.brightbyte.wikiword.builder.InputFileHelper;
import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
-public abstract class StreamProcessorApp<I, O, S extends
WikiWordConceptStoreBase> extends StoreBackedApp<S> {
+public abstract class StreamProcessorApp<I, O, S extends
WikiWordConceptStoreBase> extends StreamOutputApp<O, S> {
protected DataCursor<? extends I> cursor;
- protected DataSink<? super O> sink;
protected boolean usingStdin;
- protected boolean usingStdout;
-
protected InputFileHelper inputHelper;
public StreamProcessorApp(boolean allowGlobal, boolean allowLocal) {
super(allowGlobal, allowLocal);
}
-
- protected File getOutputFile(int paramIndex) {
- if (outputFile==null) {
- if (args.getParameterCount()>paramIndex) {
- String f = args.getParameter(paramIndex);
- if (!f.equals("-")) outputFile = new File(f);
- }
- }
- return outputFile;
- }
-
protected String getInputPath(int paramIndex) {
if (inputPath==null) {
if (args.getParameterCount()>paramIndex) {
@@ -55,59 +31,11 @@
return inputPath;
}
- protected String getOutputFileEncoding() {
- return args.getStringOption("output-encoding", "UTF-8");
- }
-
- protected void declareOptions() {
- super.declareOptions();
-
- args.declare("output-encoding", null, true, String.class,
"Encoding to use for the poutput file");
- }
-
protected String inputPath;
- protected File outputFile;
- protected Writer outputWriter;
- protected OutputStream outputStream;
private InputStream inputStream;
private Reader inputReader;
- protected Writer getOutputWriter(int paramIndex) throws
FileNotFoundException, UnsupportedEncodingException {
- if (outputWriter==null) {
- File f = getOutputFile(paramIndex);
- if (f==null) {
- outputWriter = ConsoleIO.writer;
- usingStdout = true;
- } else {
- OutputStream out = getOutputStream(paramIndex);
- outputWriter = new OutputStreamWriter(out,
getOutputFileEncoding());
- usingStdout = out == System.out;
- }
- }
-
- if (usingStdout && out.getOutput() == ConsoleIO.output) {
- out.setOutput(ConsoleIO.errorOutput);
- }
-
- return outputWriter;
- }
- protected OutputStream getOutputStream(int paramIndex) throws
FileNotFoundException {
- if (outputStream==null) {
- File f = getOutputFile(paramIndex);
- if (f==null) {
- outputStream = System.out;
- usingStdout = true;
- } else {
- outputStream = new BufferedOutputStream(new
FileOutputStream(f, args.isSet("append")));
- usingStdout = false;
- info("Writing output to "+f);
- }
- }
-
- return outputStream;
- }
-
protected Reader getInputReader(int paramIndex) throws IOException {
if (inputReader==null) {
String path = getInputPath(paramIndex);
@@ -157,15 +85,7 @@
}
protected abstract DataCursor<? extends I> openCursor(int paramIndex)
throws PersistenceException;
- protected abstract DataSink<? super O> openSink(int paramIndex) throws
PersistenceException;
- protected void init() throws Exception {
- // noop
- }
- protected void close() throws PersistenceException {
- sink.close();
- }
-
public void runTransfer(DataCursor<? extends I> cursor) throws
Exception {
I rec;
while ((rec = cursor.next()) != null) {
@@ -174,10 +94,6 @@
}
}
- protected void commit(O rec) throws PersistenceException {
- sink.commit(rec);
- }
-
protected abstract void process(I rec) throws Exception;
}
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs