Author: reschke
Date: Wed Nov 18 15:22:42 2015
New Revision: 1715010

URL: http://svn.apache.org/viewvc?rev=1715010&view=rev
Log:
OAK-3652: RDB support: extend RDB export tool for CSV export

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java?rev=1715010&r1=1715009&r2=1715010&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBExport.java
 Wed Nov 18 15:22:42 2015
@@ -34,11 +34,21 @@ import java.sql.SQLException;
 import java.sql.Statement;
 import java.sql.Types;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
+import org.apache.jackrabbit.oak.commons.json.JsopReader;
+import org.apache.jackrabbit.oak.commons.json.JsopTokenizer;
 import org.apache.jackrabbit.oak.plugins.document.Collection;
 import org.apache.jackrabbit.oak.plugins.document.Document;
 import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
@@ -53,10 +63,15 @@ public class RDBExport {
 
     private static final Charset UTF8 = Charset.forName("UTF-8");
 
+    private enum Format {
+        JSON, JSONARRAY, CSV
+    };
+
     public static void main(String[] args) throws ClassNotFoundException, 
SQLException, IOException {
 
         String url = null, user = null, pw = null, table = "nodes", query = 
null, dumpfile = null, lobdir = null;
-        boolean asArray = false;
+        List<String> fieldList = Collections.emptyList();
+        Format format = Format.JSON;
         PrintStream out = System.out;
         Set<String> excl = new HashSet<String>();
         excl.add(Document.ID);
@@ -84,7 +99,12 @@ public class RDBExport {
                 } else if ("--lobdir".equals(param)) {
                     lobdir = args[++i];
                 } else if ("--jsonArray".equals(param)) {
-                    asArray = true;
+                    format = Format.JSONARRAY;
+                } else if ("--csv".equals(param)) {
+                    format = Format.CSV;
+                } else if ("--fields".equals(param)) {
+                    String fields = args[++i];
+                    fieldList = Arrays.asList(fields.split(","));
                 } else if ("--version".equals(param)) {
                     System.out.println(RDBExport.class.getName() + " version " 
+ OakVersion.getVersion());
                     System.exit(0);
@@ -103,32 +123,45 @@ public class RDBExport {
             System.exit(2);
         }
 
-        if (dumpfile != null && url != null) {
+        if (format == Format.CSV && fieldList.isEmpty()) {
+            System.err.println(RDBExport.class.getName() + ": csv output 
requires specification of field list");
+            System.exit(2);
+        }
+
+        // JSON output with fieldList missing "_id"
+        if ((format == Format.JSON || format == Format.JSONARRAY) && 
!fieldList.isEmpty() && !fieldList.contains("_id")) {
+            fieldList = new ArrayList<String>(fieldList);
+            fieldList.add(0, "_id");
+        }
+
+        if (dumpfile == null && url == null) {
             System.err.println(RDBExport.class.getName() + ": must use either 
dump file or JDBC URL");
             printUsage();
             System.exit(2);
         } else if (dumpfile != null) {
-            dumpFile(dumpfile, lobdir, asArray, out, ser);
+            dumpFile(dumpfile, lobdir, format, out, fieldList, ser);
         } else {
-            dumpJDBC(url, user, pw, table, query, asArray, out, ser);
+            dumpJDBC(url, user, pw, table, query, format, out, fieldList, ser);
         }
 
         out.flush();
         out.close();
     }
 
-    private static void dumpFile(String filename, String lobdir, boolean 
asArray, PrintStream out, RDBDocumentSerializer ser)
-            throws IOException {
+    private static void dumpFile(String filename, String lobdir, Format 
format, PrintStream out, List<String> fieldNames,
+            RDBDocumentSerializer ser) throws IOException {
         File f = new File(filename);
         File lobDirectory = lobdir == null ? new File(f.getParentFile(), 
"lobdir") : new File(lobdir);
         FileInputStream fis = new FileInputStream(f);
         InputStreamReader ir = new InputStreamReader(fis, UTF8);
         BufferedReader br = new BufferedReader(ir);
 
-        if (asArray) {
+        if (format == Format.JSONARRAY) {
             out.println("[");
+        } else if (format == Format.CSV) {
+            out.println(dumpFieldNames(fieldNames));
         }
-        boolean needComma = asArray;
+        boolean needComma = format == Format.JSONARRAY;
         String line = br.readLine();
         while (line != null) {
             ArrayList<String> fields = parseDel(line);
@@ -164,18 +197,23 @@ public class RDBExport {
                         smodified.length() == 0 ? 0 : 
Long.parseLong(smodified), Long.parseLong(smodcount),
                         Long.parseLong(scmodcount), sdata, bytes);
                 StringBuilder fulljson = dumpRow(ser, id, row);
-                if (asArray && needComma) {
-                    fulljson.append(",");
+                if (format == Format.CSV) {
+                    out.println(asCSV(fieldNames, fulljson));
+                } else {
+                    fulljson = asJSON(fieldNames, fulljson);
+                    if (format == Format.JSONARRAY && needComma) {
+                        fulljson.append(",");
+                    }
+                    out.println(fulljson);
+                    needComma = true;
                 }
-                out.println(fulljson);
-                needComma = true;
             } catch (DocumentStoreException ex) {
                 System.err.println("Error: skipping line for ID " + id + " 
because of " + ex.getMessage());
             }
             line = br.readLine();
         }
         br.close();
-        if (asArray) {
+        if (format == Format.JSONARRAY) {
             out.println("]");
         }
     }
@@ -219,8 +257,8 @@ public class RDBExport {
         return result;
     }
 
-    private static void dumpJDBC(String url, String user, String pw, String 
table, String query, boolean asArray, PrintStream out,
-            RDBDocumentSerializer ser) throws SQLException {
+    private static void dumpJDBC(String url, String user, String pw, String 
table, String query, Format format, PrintStream out,
+            List<String> fieldNames, RDBDocumentSerializer ser) throws 
SQLException {
         String driver = 
RDBJDBCTools.driverForDBType(RDBJDBCTools.jdbctype(url));
         try {
             Class.forName(driver);
@@ -230,17 +268,19 @@ public class RDBExport {
         Connection c = DriverManager.getConnection(url, user, pw);
         c.setReadOnly(true);
         Statement stmt = c.createStatement();
-        String sql = "select ID, MODIFIED, MODCOUNT, CMODCOUNT, HASBINARY, 
DELETEDONCE, DATA, BDATA  from " + table;
+        String sql = "select ID, MODIFIED, MODCOUNT, CMODCOUNT, HASBINARY, 
DELETEDONCE, DATA, BDATA from " + table;
         if (query != null) {
             sql += " where " + query;
         }
         sql += " order by id";
         ResultSet rs = stmt.executeQuery(sql);
 
-        if (asArray) {
+        if (format == Format.JSONARRAY) {
             out.println("[");
+        } else if (format == Format.CSV) {
+            out.println(dumpFieldNames(fieldNames));
         }
-        boolean needComma = asArray;
+        boolean needComma = format == Format.JSONARRAY;
         ResultSetMetaData rsm = null;
         boolean idIsAscii = true;
         while (rs.next()) {
@@ -259,13 +299,18 @@ public class RDBExport {
 
             RDBRow row = new RDBRow(id, hasBinary == 1, deletedOnce == 1, 
modified, modcount, cmodcount, data, bdata);
             StringBuilder fulljson = dumpRow(ser, id, row);
-            if (asArray && needComma && !rs.isLast()) {
-                fulljson.append(",");
+            if (format == Format.CSV) {
+                out.println(asCSV(fieldNames, fulljson));
+            } else {
+                fulljson = asJSON(fieldNames, fulljson);
+                if (format == Format.JSONARRAY && needComma && !rs.isLast()) {
+                    fulljson.append(",");
+                }
+                out.println(fulljson);
+                needComma = true;
             }
-            out.println(fulljson);
-            needComma = true;
         }
-        if (asArray) {
+        if (format == Format.JSONARRAY) {
             out.println("]");
         }
         out.close();
@@ -274,6 +319,7 @@ public class RDBExport {
         c.close();
     }
 
+    @Nonnull
     private static StringBuilder dumpRow(RDBDocumentSerializer ser, String id, 
RDBRow row) {
         NodeDocument doc = ser.fromRow(Collection.NODES, row);
         String docjson = ser.asString(doc);
@@ -285,15 +331,177 @@ public class RDBExport {
         return fulljson;
     }
 
+    @Nonnull
+    private static String dumpFieldNames(List<String> fieldNames) {
+        StringBuilder result = new StringBuilder();
+        for (String f : fieldNames) {
+            if (result.length() != 0) {
+                result.append(',');
+            }
+            result.append(f);
+        }
+        return result.toString();
+    }
+
+    @Nonnull
+    private static StringBuilder asJSON(List<String> fieldNames, StringBuilder 
fulljson) {
+        if (fieldNames.isEmpty()) {
+            return fulljson;
+        } else {
+            JsopTokenizer t = new JsopTokenizer(fulljson.toString());
+            Map<String, Object> doc = (Map<String, Object>) 
readValueFromJson(t);
+            StringBuilder buf = new StringBuilder();
+            buf.append('{');
+            String delim = "";
+            for (String field : fieldNames) {
+                buf.append(delim);
+                delim = ",";
+                String[] fn = field.split("\\.");
+                if (doc.containsKey(fn[0])) {
+                    Object o = doc.get(fn[0]);
+                    appendJsonMember(buf, fn[0], o);
+                }
+            }
+            buf.append('}');
+            return buf;
+        }
+    }
+
+    private static void appendJsonMember(StringBuilder sb, String key, Object 
value) {
+        appendJsonString(sb, key);
+        sb.append(":");
+        appendJsonValue(sb, value);
+    }
+
+    private static void appendJsonString(StringBuilder sb, String s) {
+        sb.append('"');
+        JsopBuilder.escape(s, sb);
+        sb.append('"');
+    }
+
+    private static void appendJsonMap(StringBuilder sb, Map<Object, Object> 
map) {
+        sb.append("{");
+        boolean needComma = false;
+        for (Map.Entry<Object, Object> e : map.entrySet()) {
+            if (needComma) {
+                sb.append(",");
+            }
+            appendJsonMember(sb, e.getKey().toString(), e.getValue());
+            needComma = true;
+        }
+        sb.append("}");
+    }
+
+    private static void appendJsonValue(StringBuilder sb, Object value) {
+        if (value == null) {
+            sb.append("null");
+        } else if (value instanceof Number) {
+            sb.append(value.toString());
+        } else if (value instanceof Boolean) {
+            sb.append(value.toString());
+        } else if (value instanceof String) {
+            appendJsonString(sb, (String) value);
+        } else if (value instanceof Map) {
+            appendJsonMap(sb, (Map<Object, Object>) value);
+        } else {
+            throw new IllegalArgumentException("unexpected type: " + 
value.getClass());
+        }
+    }
+
+    @Nonnull
+    private static StringBuilder asCSV(List<String> csvFieldNames, 
StringBuilder fulljson) {
+        JsopTokenizer t = new JsopTokenizer(fulljson.toString());
+        Map<String, Object> doc = (Map<String, Object>) readValueFromJson(t);
+        StringBuilder buf = new StringBuilder();
+        String delim = "";
+        for (String field : csvFieldNames) {
+            buf.append(delim);
+            delim = ",";
+            String[] fn = field.split("\\.");
+            boolean checkMember = fn.length > 1;
+            if (doc.containsKey(fn[0])) {
+                Object o = doc.get(fn[0]);
+                if (checkMember) {
+                    if (o instanceof Map) {
+                        Map<String, Object> m = (Map<String, Object>) o;
+                        if (m.containsKey(fn[1])) {
+                            dumpJsonValuetoCsv(buf, m.get(fn[1]));
+                        }
+                    }
+                } else {
+                    dumpJsonValuetoCsv(buf, o);
+                }
+            }
+        }
+        return buf;
+    }
+
+    private static void dumpJsonValuetoCsv(StringBuilder buf, Object o) {
+        if (o == null) {
+            buf.append("null");
+        } else if (o instanceof Boolean) {
+            buf.append(o.toString());
+        } else if (o instanceof Long) {
+            buf.append(((Long) o).longValue());
+        } else {
+            buf.append('"');
+            buf.append(o.toString().replace("\"", "\"\""));
+            buf.append('"');
+        }
+    }
+
+    @Nullable
+    private static Object readValueFromJson(@Nonnull JsopTokenizer json) {
+        switch (json.read()) {
+            case JsopReader.NULL:
+                return null;
+            case JsopReader.TRUE:
+                return true;
+            case JsopReader.FALSE:
+                return false;
+            case JsopReader.NUMBER:
+                return Long.parseLong(json.getToken());
+            case JsopReader.STRING:
+                return json.getToken();
+            case '{':
+                Map<String, Object> map = new HashMap<String, Object>();
+                while (true) {
+                    if (json.matches('}')) {
+                        break;
+                    }
+                    String k = json.readString();
+                    if (k == null) {
+                        throw new IllegalArgumentException();
+                    }
+                    json.read(':');
+                    map.put(k, readValueFromJson(json));
+                    json.matches(',');
+                }
+                return map;
+            case '[':
+                List<Object> list = new ArrayList<Object>();
+                while (true) {
+                    if (json.matches(']')) {
+                        break;
+                    }
+                    list.add(readValueFromJson(json));
+                    json.matches(',');
+                }
+                return list;
+            default:
+                throw new IllegalArgumentException(json.readRawValue());
+        }
+    }
+
     private static boolean isBinaryType(int sqlType) {
         return sqlType == Types.VARBINARY || sqlType == Types.BINARY || 
sqlType == Types.LONGVARBINARY;
     }
 
     private static void printUsage() {
         System.err.println("Usage: " + RDBExport.class.getName()
-                + " -j/--jdbc-url JDBC-URL [-u/--username username] 
[-p/--password password] [-c/--collection table] [-q/--query query] [-o/--out 
file] [--jsonArray]");
+                + " -j/--jdbc-url JDBC-URL [-u/--username username] 
[-p/--password password] [-c/--collection table] [-q/--query query] [-o/--out 
file] [--fields list] [--csv] [--jsonArray]");
         System.err.println(
-                "Usage: " + RDBExport.class.getName() + " --from-db2-dump file 
[--lobdir lobdir] [-o/--out file] [--jsonArray]");
+                "Usage: " + RDBExport.class.getName() + " --from-db2-dump file 
[--lobdir lobdir] [-o/--out file] [--fields list] [--csv] [--jsonArray]");
         System.err.println("Usage: " + RDBExport.class.getName() + " 
--version");
         System.err.println("Usage: " + RDBExport.class.getName() + " --help");
     }
@@ -319,6 +527,10 @@ public class RDBExport {
         System.err.println("");
         System.err.println("Output options:");
         System.err.println("  -o/--out file                      Output to 
name file (instead of stdout)");
-        System.err.println("  --jsonArray                        Output a JSON 
array (instead of one JSON doc per line)");
+        System.err.println("  --jsonArray                        Output a JSON 
array (instead of one");
+        System.err.println("                                     JSON doc per 
line)");
+        System.err.println("  --csv                              Output in CSV 
format (requires --fields");
+        System.err.println("  --fields names                     field names 
(comma separated); required");
+        System.err.println("                                     for CSV 
output");
     }
 }


Reply via email to