http://www.mediawiki.org/wiki/Special:Code/MediaWiki/73793
Revision: 73793
Author: daniel
Date: 2010-09-26 21:36:46 +0000 (Sun, 26 Sep 2010)
Log Message:
-----------
DumpThesaurus
Added Paths:
-----------
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpThesaurus.java
Added:
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpThesaurus.java
===================================================================
---
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpThesaurus.java
(rev 0)
+++
trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpThesaurus.java
2010-09-26 21:36:46 UTC (rev 73793)
@@ -0,0 +1,182 @@
+package de.brightbyte.wikiword.dump;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import de.brightbyte.db.DatabaseConnectionInfo;
+import de.brightbyte.io.IOUtil;
+import de.brightbyte.util.PersistenceException;
+import de.brightbyte.util.StringUtils;
+import de.brightbyte.util.SystemUtils;
+import de.brightbyte.wikiword.CliApp;
+import de.brightbyte.wikiword.DatasetIdentifier;
+
+public class DumpThesaurus extends CliApp {
+
+ @Override
+ protected void declareOptions() {
+ super.declareOptions();
+
+ declareOption("outdir", "O", true, String.class, "the directory
to write the output to. Defaults to the current dir");
+ }
+
+ private List<String> getThesaurusFields(String t, boolean local) {
+ return Arrays.asList( new String[] { "*" } );
+ }
+
+ private List<String> getThesaurusTables(boolean local) {
+ ArrayList<String> tables = new ArrayList<String>();
+
+ tables.add( "about" );
+ tables.add( "concept" );
+ tables.add( "broader" );
+ tables.add( "relation" );
+ tables.add( "meaning" );
+
+ tables.add( "property" );
+ tables.add( "feature" );
+ tables.add( "degree" );
+
+ if ( local ) {
+ tables.add( "resource" );
+ } else {
+ tables.add( "origin" );
+ }
+
+ return tables;
+ }
+
+ @Override
+ protected void run() throws Exception {
+ DatabaseConnectionInfo dbSpec =
(DatabaseConnectionInfo)getConfiguredDataSource() ;
+ DatasetIdentifier dataset = getConfiguredDataset();
+
+ File dir = new File( args.getStringOption("outdir", ".") );
+
+ if ( !dir.exists() ) throw new IOException("not found: "+dir);
+ if ( !dir.isDirectory() ) throw new IOException("not a
directory: "+dir);
+
+ List<String> tables;
+ if ( args.getParameterCount() > 1 ) {
+ tables = args.getParameters().subList(1,
args.getParameters().size());
+ } else {
+ tables = getThesaurusTables( isDatasetLocal() );
+ }
+
+ for (String table: tables) {
+ List<String> fields = getThesaurusFields(
table, isDatasetLocal() );
+
+ table = dataset.getDbPrefix() + table;
+ dumpTable( dbSpec, table, fields, dir);
+ }
+
+ }
+
+ protected File dumpTable(DatabaseConnectionInfo dbCredentials, String
table, List<String> fields, File outFile) throws IOException,
PersistenceException {
+ String dbName = dbCredentials.getDatabaseName();
+ if ( dbName == null ) throw new PersistenceException( "no
database name configured" );
+
+ if (outFile==null) outFile =
File.createTempFile(dbName+"-"+table, ".csv");
+ else if (outFile.isDirectory()) outFile = new File(outFile,
dbName+"-"+table+"-"+SystemUtils.timestamp()+".csv");
+
+ info("Dumping table "+table+" to "+outFile+"...");
+
+ List<String> cmd = getDumpCommand( dbCredentials, table,
fields, outFile );
+ String input = getDumpCommandInput( dbCredentials, table,
fields, outFile );
+
+ runCommand(cmd, input);
+
+ //info("Table "+table+" dumped to "+outFile);
+ return outFile;
+ }
+
+ protected void runCommand(List<String> cmd, String input) throws
IOException, PersistenceException {
+ String enc = tweaks.getTweak("console.encoding", "UTF-8");
+
+ ProcessBuilder pb = new ProcessBuilder(cmd);
+
+ debug("Running Command: "+pb.command().toString());
+ Process p = pb.start();
+
+ if (input!=null) {
+ OutputStream toProcess = p.getOutputStream();
+ toProcess.write( input.getBytes(enc) );
+ toProcess.flush();
+ }
+
+ IOUtil.pump(p.getInputStream(), System.out);
+ String error = IOUtil.slurp(p.getErrorStream(), enc); //just
hope the buffer is big enough...
+
+ try {
+ p.waitFor();
+ } catch (InterruptedException e) {
+ throw (InterruptedIOException)new
InterruptedIOException(e.getMessage()).initCause(e);
+ }
+
+ if ( p.exitValue() > 0 ) {
+ throw new IOException( "dump command returned error
code #"+ p.exitValue() + ". Output: " + error );
+ }
+ }
+
+ private String getDumpCommandInput(DatabaseConnectionInfo
dbCredentials, String table, List<String> fields, File tmp) {
+ return dbCredentials.getPassword() + "\n";
+ }
+
+ private List<String> getDumpCommand(DatabaseConnectionInfo
dbCredentials, String table, List<String> fields, File tmp) throws
PersistenceException {
+ String dbName = dbCredentials.getDatabaseName();
+ if ( dbName == null ) throw new PersistenceException( "no
database name configured" );
+
+ List<String> shell = getShellCommand();
+ String sql = getDumpSQL( dbName, table, fields );
+
+ StringBuilder command = new StringBuilder(
tweaks.getTweak("mysql.command", "mysql") );
+
+ command.append( " " ).append( shellQuote( dbName ) );
+ command.append( " -u " ).append( shellQuote(
dbCredentials.getUser() ) );
+ command.append( " -p " );
+ command.append( " -e " ).append( shellQuote( sql ) );
+ command.append( " > " ).append( shellQuote(
tmp.getAbsolutePath() ) );
+
+ List<String> cmd = new ArrayList<String>( shell );
+ cmd.add( command.toString() );
+ return cmd;
+ }
+
+ private List<String> getShellCommand() {
+ String s = tweaks.getTweak("console.shell", null);
+ if ( s==null ) {
+ if ( SystemUtils.isWindows() ) s = "cmd.exe /c";
+ else s = "/bin/sh -c";
+ }
+
+ String[] ss = s.split(" ");
+ List<String> shell = Arrays.asList( ss );
+ return shell;
+ }
+
+ private String getDumpSQL(String dbName, String table, List<String>
fields) {
+ String f = StringUtils.join(", ", fields);
+ String sql = "SELECT " + f + " FROM " + table;
+ return sql;
+ }
+
+ private static Pattern shellQuotePattern =
Pattern.compile("([\"!\\\\])");
+
+ protected static String shellQuote(String s) {
+ s = shellQuotePattern.matcher(s).replaceAll("\\\\\\1");
+ return "\"" + s + "\"";
+ }
+
+ public static void main(String[] argv) throws Exception {
+ DumpThesaurus app = new DumpThesaurus();
+ app.launch(argv);
+ }
+
+}
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs