Author: reschke
Date: Fri Nov 20 13:47:39 2015
New Revision: 1715361
URL: http://svn.apache.org/viewvc?rev=1715361&view=rev
Log:
OAK-3652: RDB support: extend RDB export tool for CSV export (ported to 1.2)
Modified:
jackrabbit/oak/branches/1.2/ (props changed)
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java
Propchange: jackrabbit/oak/branches/1.2/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Nov 20 13:47:39 2015
@@ -1,3 +1,3 @@
/jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414-1673415,1673436,1673644,1673662-1673664,1673669,1673695,1673713,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674228,1674780,1674880,1675054-1675055,1675319,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677774,1677788,1677797,1677804,1677806,1677939,1677991,1678023,1678095-1678096,1678124,1678171,1678173,1678211,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679232,1679235,1679503,1679958,1679961,1680170,1680172,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1681921,1681955,1682042,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174-1684175,1684186,1684376,1684442,1684561,1684570,1684601,1684618
,1684820,1684868,1685023,1685075,1685370,1685552,1685589-1685590,1685840,1685964,1685977,1685989,1685999,1686023,1686032,1686097,1686162,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,1687053-1687055,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688089-1688090,1688172,1688179,1688349,1688421,1688436,1688453,1688616,1688622,1688634,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689810,1689828,1689831,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690249,1690634-1690637,1690650,1690669,1690674,1690885,1690941,1691139,1691151,1691159,1691167,1691183,1691188,1691210,1691280,1691307,1691331-1691333,1691345,1691384-1691385,1691401,1691509,1692133-1692134,1692156,1692250,1692274,1692363,1692382,1692478,1692955,1693002,1693030,1693050,1693209,1693421,1693525-1693526,1694007,1694393-1694394,1694651,1694653-1694654,1695032,1695050,1695122,1695280,1695299,1695420,1695457,1695482,1695492,1695507,1695521,1695540,1695905,1696
190,1696194,1696242,1696285,1696375,1696522,1696578,1696759,1696916,1697363,1697373,1697410,1697582,1697589,1697616,1697672,1700191,1700231,1700397,1700403,1700506,1700571,1700718,1700727,1700749,1700769,1700775,1701065,1701619,1701733,1701743,1701750,1701768,1701806,1701810,1701814,1701948,1701955,1701959,1701965,1701986,1702014,1702022,1702045,1702051,1702241,1702272,1702387,1702405,1702423,1702860,1702942,1702960,1703212,1703382,1703395,1703411,1703428,1703430,1703568,1703592,1703758,1703858,1703878,1704256,1704282,1704285,1704457,1704479,1704490,1704614,1704629,1704636,1704655,1704670,1704886,1705005,1705027,1705043,1705055,1705250,1705268,1705273,1705323,1705677,1705701,1705871,1705992,1705998,1706009,1706037,1706059,1706212,1706218,1706270,1706764,1706772,1707049,1707191,1707435,1708105,1708315,1708546,1708592,1708766,1709012,1709852,1709978,1710013,1710031,1710049,1710205,1710242,1710559,1710575,1710590,1710614,1710637,1710789,1710811,1710816,1710972,1711248,1711282,1711296,1
711498,1712042,1712319,1712490,1712531,1712730,1712785,1712963,1713008,1713439,1713461,1713580,1713586,1713599-1713600,1713626,1713698,1713803,1713809,1714034,1714061,1714084,1714170,1714213,1714229,1714238,1714519-1714520,1714543-1714544,1714730,1714739,1714779,1714956,1714961,1715191
+/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414-1673415,1673436,1673644,1673662-1673664,1673669,1673695,1673713,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674228,1674780,1674880,1675054-1675055,1675319,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677774,1677788,1677797,1677804,1677806,1677939,1677991,1678023,1678095-1678096,1678124,1678171,1678173,1678211,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679232,1679235,1679503,1679958,1679961,1680170,1680172,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1681921,1681955,1682042,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174-1684175,1684186,1684376,1684442,1684561,1684570,1684601,1684618
,1684820,1684868,1685023,1685075,1685370,1685552,1685589-1685590,1685840,1685964,1685977,1685989,1685999,1686023,1686032,1686097,1686162,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,1687053-1687055,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688089-1688090,1688172,1688179,1688349,1688421,1688436,1688453,1688616,1688622,1688634,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689810,1689828,1689831,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690249,1690634-1690637,1690650,1690669,1690674,1690885,1690941,1691139,1691151,1691159,1691167,1691183,1691188,1691210,1691280,1691307,1691331-1691333,1691345,1691384-1691385,1691401,1691509,1692133-1692134,1692156,1692250,1692274,1692363,1692382,1692478,1692955,1693002,1693030,1693050,1693209,1693421,1693525-1693526,1694007,1694393-1694394,1694651,1694653-1694654,1695032,1695050,1695122,1695280,1695299,1695420,1695457,1695482,1695492,1695507,1695521,1695540,1695905,1696
190,1696194,1696242,1696285,1696375,1696522,1696578,1696759,1696916,1697363,1697373,1697410,1697582,1697589,1697616,1697672,1700191,1700231,1700397,1700403,1700506,1700571,1700718,1700727,1700749,1700769,1700775,1701065,1701619,1701733,1701743,1701750,1701768,1701806,1701810,1701814,1701948,1701955,1701959,1701965,1701986,1702014,1702022,1702045,1702051,1702241,1702272,1702387,1702405,1702423,1702860,1702942,1702960,1703212,1703382,1703395,1703411,1703428,1703430,1703568,1703592,1703758,1703858,1703878,1704256,1704282,1704285,1704457,1704479,1704490,1704614,1704629,1704636,1704655,1704670,1704886,1705005,1705027,1705043,1705055,1705250,1705268,1705273,1705323,1705677,1705701,1705871,1705992,1705998,1706009,1706037,1706059,1706212,1706218,1706270,1706764,1706772,1707049,1707191,1707435,1708105,1708315,1708546,1708592,1708766,1709012,1709852,1709978,1710013,1710031,1710049,1710205,1710242,1710559,1710575,1710590,1710614,1710637,1710789,1710811,1710816,1710972,1711248,1711282,1711296,1
711498,1712042,1712319,1712490,1712531,1712730,1712785,1712963,1713008,1713439,1713461,1713580,1713586,1713599-1713600,1713626,1713698,1713803,1713809,1714034,1714061,1714084,1714170,1714213,1714229,1714238,1714519-1714520,1714543-1714544,1714730,1714739,1714779,1714956,1714961,1715010,1715191
/jackrabbit/trunk:1345480
Modified:
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java?rev=1715361&r1=1715360&r2=1715361&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java
(original)
+++
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java
Fri Nov 20 13:47:39 2015
@@ -34,11 +34,21 @@ import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
+import org.apache.jackrabbit.oak.commons.json.JsopReader;
+import org.apache.jackrabbit.oak.commons.json.JsopTokenizer;
import org.apache.jackrabbit.oak.plugins.document.Collection;
import org.apache.jackrabbit.oak.plugins.document.Document;
import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
@@ -53,10 +63,15 @@ public class RDBExport {
private static final Charset UTF8 = Charset.forName("UTF-8");
+ private enum Format {
+ JSON, JSONARRAY, CSV
+ };
+
public static void main(String[] args) throws ClassNotFoundException,
SQLException, IOException {
String url = null, user = null, pw = null, table = "nodes", query =
null, dumpfile = null, lobdir = null;
- boolean asArray = false;
+ List<String> fieldList = Collections.emptyList();
+ Format format = Format.JSON;
PrintStream out = System.out;
Set<String> excl = new HashSet<String>();
excl.add(Document.ID);
@@ -84,7 +99,12 @@ public class RDBExport {
} else if ("--lobdir".equals(param)) {
lobdir = args[++i];
} else if ("--jsonArray".equals(param)) {
- asArray = true;
+ format = Format.JSONARRAY;
+ } else if ("--csv".equals(param)) {
+ format = Format.CSV;
+ } else if ("--fields".equals(param)) {
+ String fields = args[++i];
+ fieldList = Arrays.asList(fields.split(","));
} else if ("--version".equals(param)) {
System.out.println(RDBExport.class.getName() + " version "
+ OakVersion.getVersion());
System.exit(0);
@@ -103,32 +123,45 @@ public class RDBExport {
System.exit(2);
}
- if (dumpfile != null && url != null) {
+ if (format == Format.CSV && fieldList.isEmpty()) {
+ System.err.println(RDBExport.class.getName() + ": csv output
requires specification of field list");
+ System.exit(2);
+ }
+
+ // JSON output with fieldList missing "_id"
+ if ((format == Format.JSON || format == Format.JSONARRAY) &&
!fieldList.isEmpty() && !fieldList.contains("_id")) {
+ fieldList = new ArrayList<String>(fieldList);
+ fieldList.add(0, "_id");
+ }
+
+ if (dumpfile == null && url == null) {
System.err.println(RDBExport.class.getName() + ": must use either
dump file or JDBC URL");
printUsage();
System.exit(2);
} else if (dumpfile != null) {
- dumpFile(dumpfile, lobdir, asArray, out, ser);
+ dumpFile(dumpfile, lobdir, format, out, fieldList, ser);
} else {
- dumpJDBC(url, user, pw, table, query, asArray, out, ser);
+ dumpJDBC(url, user, pw, table, query, format, out, fieldList, ser);
}
out.flush();
out.close();
}
- private static void dumpFile(String filename, String lobdir, boolean
asArray, PrintStream out, RDBDocumentSerializer ser)
- throws IOException {
+ private static void dumpFile(String filename, String lobdir, Format
format, PrintStream out, List<String> fieldNames,
+ RDBDocumentSerializer ser) throws IOException {
File f = new File(filename);
File lobDirectory = lobdir == null ? new File(f.getParentFile(),
"lobdir") : new File(lobdir);
FileInputStream fis = new FileInputStream(f);
InputStreamReader ir = new InputStreamReader(fis, UTF8);
BufferedReader br = new BufferedReader(ir);
- if (asArray) {
+ if (format == Format.JSONARRAY) {
out.println("[");
+ } else if (format == Format.CSV) {
+ out.println(dumpFieldNames(fieldNames));
}
- boolean needComma = asArray;
+ boolean needComma = format == Format.JSONARRAY;
String line = br.readLine();
while (line != null) {
ArrayList<String> fields = parseDel(line);
@@ -164,18 +197,23 @@ public class RDBExport {
smodified.length() == 0 ? 0 :
Long.parseLong(smodified), Long.parseLong(smodcount),
Long.parseLong(scmodcount), sdata, bytes);
StringBuilder fulljson = dumpRow(ser, id, row);
- if (asArray && needComma) {
- fulljson.append(",");
+ if (format == Format.CSV) {
+ out.println(asCSV(fieldNames, fulljson));
+ } else {
+ fulljson = asJSON(fieldNames, fulljson);
+ if (format == Format.JSONARRAY && needComma) {
+ fulljson.append(",");
+ }
+ out.println(fulljson);
+ needComma = true;
}
- out.println(fulljson);
- needComma = true;
} catch (DocumentStoreException ex) {
System.err.println("Error: skipping line for ID " + id + "
because of " + ex.getMessage());
}
line = br.readLine();
}
br.close();
- if (asArray) {
+ if (format == Format.JSONARRAY) {
out.println("]");
}
}
@@ -219,8 +257,8 @@ public class RDBExport {
return result;
}
- private static void dumpJDBC(String url, String user, String pw, String
table, String query, boolean asArray, PrintStream out,
- RDBDocumentSerializer ser) throws SQLException {
+ private static void dumpJDBC(String url, String user, String pw, String
table, String query, Format format, PrintStream out,
+ List<String> fieldNames, RDBDocumentSerializer ser) throws
SQLException {
String driver =
RDBJDBCTools.driverForDBType(RDBJDBCTools.jdbctype(url));
try {
Class.forName(driver);
@@ -230,17 +268,19 @@ public class RDBExport {
Connection c = DriverManager.getConnection(url, user, pw);
c.setReadOnly(true);
Statement stmt = c.createStatement();
- String sql = "select ID, MODIFIED, MODCOUNT, CMODCOUNT, HASBINARY,
DELETEDONCE, DATA, BDATA from " + table;
+ String sql = "select ID, MODIFIED, MODCOUNT, CMODCOUNT, HASBINARY,
DELETEDONCE, DATA, BDATA from " + table;
if (query != null) {
sql += " where " + query;
}
sql += " order by id";
ResultSet rs = stmt.executeQuery(sql);
- if (asArray) {
+ if (format == Format.JSONARRAY) {
out.println("[");
+ } else if (format == Format.CSV) {
+ out.println(dumpFieldNames(fieldNames));
}
- boolean needComma = asArray;
+ boolean needComma = format == Format.JSONARRAY;
ResultSetMetaData rsm = null;
boolean idIsAscii = true;
while (rs.next()) {
@@ -259,13 +299,18 @@ public class RDBExport {
RDBRow row = new RDBRow(id, hasBinary == 1, deletedOnce == 1,
modified, modcount, cmodcount, data, bdata);
StringBuilder fulljson = dumpRow(ser, id, row);
- if (asArray && needComma && !rs.isLast()) {
- fulljson.append(",");
+ if (format == Format.CSV) {
+ out.println(asCSV(fieldNames, fulljson));
+ } else {
+ fulljson = asJSON(fieldNames, fulljson);
+ if (format == Format.JSONARRAY && needComma && !rs.isLast()) {
+ fulljson.append(",");
+ }
+ out.println(fulljson);
+ needComma = true;
}
- out.println(fulljson);
- needComma = true;
}
- if (asArray) {
+ if (format == Format.JSONARRAY) {
out.println("]");
}
out.close();
@@ -274,6 +319,7 @@ public class RDBExport {
c.close();
}
+ @Nonnull
private static StringBuilder dumpRow(RDBDocumentSerializer ser, String id,
RDBRow row) {
NodeDocument doc = ser.fromRow(Collection.NODES, row);
String docjson = ser.asString(doc);
@@ -285,15 +331,177 @@ public class RDBExport {
return fulljson;
}
+ @Nonnull
+ private static String dumpFieldNames(List<String> fieldNames) {
+ StringBuilder result = new StringBuilder();
+ for (String f : fieldNames) {
+ if (result.length() != 0) {
+ result.append(',');
+ }
+ result.append(f);
+ }
+ return result.toString();
+ }
+
+ @Nonnull
+ private static StringBuilder asJSON(List<String> fieldNames, StringBuilder
fulljson) {
+ if (fieldNames.isEmpty()) {
+ return fulljson;
+ } else {
+ JsopTokenizer t = new JsopTokenizer(fulljson.toString());
+ Map<String, Object> doc = (Map<String, Object>)
readValueFromJson(t);
+ StringBuilder buf = new StringBuilder();
+ buf.append('{');
+ String delim = "";
+ for (String field : fieldNames) {
+ buf.append(delim);
+ delim = ",";
+ String[] fn = field.split("\\.");
+ if (doc.containsKey(fn[0])) {
+ Object o = doc.get(fn[0]);
+ appendJsonMember(buf, fn[0], o);
+ }
+ }
+ buf.append('}');
+ return buf;
+ }
+ }
+
+ private static void appendJsonMember(StringBuilder sb, String key, Object
value) {
+ appendJsonString(sb, key);
+ sb.append(":");
+ appendJsonValue(sb, value);
+ }
+
+ private static void appendJsonString(StringBuilder sb, String s) {
+ sb.append('"');
+ JsopBuilder.escape(s, sb);
+ sb.append('"');
+ }
+
+ private static void appendJsonMap(StringBuilder sb, Map<Object, Object>
map) {
+ sb.append("{");
+ boolean needComma = false;
+ for (Map.Entry<Object, Object> e : map.entrySet()) {
+ if (needComma) {
+ sb.append(",");
+ }
+ appendJsonMember(sb, e.getKey().toString(), e.getValue());
+ needComma = true;
+ }
+ sb.append("}");
+ }
+
+ private static void appendJsonValue(StringBuilder sb, Object value) {
+ if (value == null) {
+ sb.append("null");
+ } else if (value instanceof Number) {
+ sb.append(value.toString());
+ } else if (value instanceof Boolean) {
+ sb.append(value.toString());
+ } else if (value instanceof String) {
+ appendJsonString(sb, (String) value);
+ } else if (value instanceof Map) {
+ appendJsonMap(sb, (Map<Object, Object>) value);
+ } else {
+ throw new IllegalArgumentException("unexpected type: " +
value.getClass());
+ }
+ }
+
+ @Nonnull
+ private static StringBuilder asCSV(List<String> csvFieldNames,
StringBuilder fulljson) {
+ JsopTokenizer t = new JsopTokenizer(fulljson.toString());
+ Map<String, Object> doc = (Map<String, Object>) readValueFromJson(t);
+ StringBuilder buf = new StringBuilder();
+ String delim = "";
+ for (String field : csvFieldNames) {
+ buf.append(delim);
+ delim = ",";
+ String[] fn = field.split("\\.");
+ boolean checkMember = fn.length > 1;
+ if (doc.containsKey(fn[0])) {
+ Object o = doc.get(fn[0]);
+ if (checkMember) {
+ if (o instanceof Map) {
+ Map<String, Object> m = (Map<String, Object>) o;
+ if (m.containsKey(fn[1])) {
+ dumpJsonValuetoCsv(buf, m.get(fn[1]));
+ }
+ }
+ } else {
+ dumpJsonValuetoCsv(buf, o);
+ }
+ }
+ }
+ return buf;
+ }
+
+ private static void dumpJsonValuetoCsv(StringBuilder buf, Object o) {
+ if (o == null) {
+ buf.append("null");
+ } else if (o instanceof Boolean) {
+ buf.append(o.toString());
+ } else if (o instanceof Long) {
+ buf.append(((Long) o).longValue());
+ } else {
+ buf.append('"');
+ buf.append(o.toString().replace("\"", "\"\""));
+ buf.append('"');
+ }
+ }
+
+ @Nullable
+ private static Object readValueFromJson(@Nonnull JsopTokenizer json) {
+ switch (json.read()) {
+ case JsopReader.NULL:
+ return null;
+ case JsopReader.TRUE:
+ return true;
+ case JsopReader.FALSE:
+ return false;
+ case JsopReader.NUMBER:
+ return Long.parseLong(json.getToken());
+ case JsopReader.STRING:
+ return json.getToken();
+ case '{':
+ Map<String, Object> map = new HashMap<String, Object>();
+ while (true) {
+ if (json.matches('}')) {
+ break;
+ }
+ String k = json.readString();
+ if (k == null) {
+ throw new IllegalArgumentException();
+ }
+ json.read(':');
+ map.put(k, readValueFromJson(json));
+ json.matches(',');
+ }
+ return map;
+ case '[':
+ List<Object> list = new ArrayList<Object>();
+ while (true) {
+ if (json.matches(']')) {
+ break;
+ }
+ list.add(readValueFromJson(json));
+ json.matches(',');
+ }
+ return list;
+ default:
+ throw new IllegalArgumentException(json.readRawValue());
+ }
+ }
+
private static boolean isBinaryType(int sqlType) {
return sqlType == Types.VARBINARY || sqlType == Types.BINARY ||
sqlType == Types.LONGVARBINARY;
}
private static void printUsage() {
System.err.println("Usage: " + RDBExport.class.getName()
- + " -j/--jdbc-url JDBC-URL [-u/--username username]
[-p/--password password] [-c/--collection table] [-q/--query query] [-o/--out
file] [--jsonArray]");
+ + " -j/--jdbc-url JDBC-URL [-u/--username username]
[-p/--password password] [-c/--collection table] [-q/--query query] [-o/--out
file] [--fields list] [--csv] [--jsonArray]");
System.err.println(
- "Usage: " + RDBExport.class.getName() + " --from-db2-dump file
[--lobdir lobdir] [-o/--out file] [--jsonArray]");
+ "Usage: " + RDBExport.class.getName() + " --from-db2-dump file
[--lobdir lobdir] [-o/--out file] [--fields list] [--csv] [--jsonArray]");
System.err.println("Usage: " + RDBExport.class.getName() + "
--version");
System.err.println("Usage: " + RDBExport.class.getName() + " --help");
}
@@ -319,6 +527,10 @@ public class RDBExport {
System.err.println("");
System.err.println("Output options:");
System.err.println(" -o/--out file Output to
name file (instead of stdout)");
- System.err.println(" --jsonArray Output a JSON
array (instead of one JSON doc per line)");
+ System.err.println(" --jsonArray Output a JSON
array (instead of one");
+ System.err.println(" JSON doc per
line)");
+ System.err.println(" --csv Output in CSV
format (requires --fields");
+ System.err.println(" --fields names field names
(comma separated); required");
+ System.err.println(" for CSV
output");
}
}