Sorry, the source code follows:
public class InitialDBCreator {
private static final SimpleDateFormat DATE_PARSER = new
SimpleDateFormat("dd/MM/yyyy");
private static final SimpleDateFormat DATE_FORMATTER = new
SimpleDateFormat("yyyyMMdd");
private static final int GRP_DEST_DOC = 1;
private static final int GRP_DEST_NAME = 2;
private static final int GRP_SRC_DOC = 3;
private static final int GRP_SRC_NAME = 5;
private static final int GRP_QUAL = 6;
private static final int GRP_ENTRY_DATE = 7;
private static final int GRP_PART_INT = 8;
private static final int GRP_PART_DEC = 9;
private static final Pattern PTRN_LINE =
Pattern.compile("(\\d{11,14})\\t([^\\t]+)\\t(\\d{11,14})\\t"+
"([^\\t]+)\\t([^\\t]+)\\t([^\\t]+)\\t(\\d{2}/\\d{2}/"+
"\\d{4})\\t(\\d{1,3}),(\\d{2})%\\t(\\d{2}/\\d{2}/\\d{4})");
private final BatchInserter inserter;
private final GraphDatabaseService dbService;
private final BatchInserterIndexProvider indexProvider;
private final BatchInserterIndex index;
public InitialDBCreator(final String storeDir, final Map<String, String>
config, final String indexName) {
System.out.println("Iniciando inserter...");
inserter = new BatchInserterImpl(storeDir, config);
dbService = inserter.getGraphDbService();
System.out.println("Iniciando indexProvider...");
indexProvider = new LuceneBatchInserterIndexProvider(inserter);
System.out.println("Iniciando index...");
index = indexProvider.nodeIndex(indexName, MapUtil.stringMap("type",
"exact"));
System.out.println("DB iniciado!");
Runtime.getRuntime().addShutdownHook(
new Thread() {
@Override
public void run() {
indexProvider.shutdown();
inserter.shutdown();
}
});
}
public void shutdown() {
index.flush();
indexProvider.shutdown();
inserter.shutdown();
}
private File prepareNodesFile(final File initialFile) {
File nodesFile = null;
int count;
int countErr;
try {
System.out.println("Extracting nodes...");
File tmpFile = File.createTempFile("qsa-tempnodes", ".txt");
BufferedWriter writer = new BufferedWriter(new FileWriter(tmpFile));
InputStream in = FUtils.getInputStream(initialFile);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = null;
count = 0;
countErr = 0;
while ((line = reader.readLine()) != null) {
Matcher matcher = PTRN_LINE.matcher(line);
if (matcher.matches()) {
String docOne = matcher.group(GRP_SRC_DOC);
String nameOne = matcher.group(GRP_SRC_NAME);
if (!docOne.equals("") && !nameOne.equals("")) {
writer.write(docOne+"|"+nameOne+"\n");
}
String docTwo = matcher.group(GRP_DEST_DOC);
String nameTwo = matcher.group(GRP_DEST_NAME);
if (!docTwo.equals("") && !nameTwo.equals("")) {
writer.write(docTwo+"|"+nameTwo+"\n");
}
count++;
} else {
System.err.println("ERRO: the line '"+line+"' doesn't match the pattern.");
System.err.println("---");
countErr++;
}
if (((count > 0) && (count % 5000 == 0)) || ((countErr > 0) && (countErr %
500 == 0))) {
System.out.print("\r"+count+" rows processed, "+countErr+" erroneous
lines.");
}
}
System.out.println("\r"+count+" rows processed, "+countErr+" erroneous
lines.");
in.close();
reader.close();
writer.close();
File sortedFile = FUtils.sortFile(tmpFile);
System.out.println("Unifying nodes...");
nodesFile = File.createTempFile("qsa-nodes", ".txt");
writer = new BufferedWriter(new FileWriter(nodesFile));
in = FUtils.getInputStream(sortedFile);
reader = new BufferedReader(new InputStreamReader(in));
line = null;
count = 0;
String lastDoc = "-1";
String lastLine = "";
while ((line = reader.readLine()) != null) {
String doc = line.substring(0, line.indexOf("|"));
if (!doc.equals(lastDoc) && !lastDoc.equals("-1")) {
writer.write(lastLine+"\n");
}
lastDoc = doc;
lastLine = line;
count++;
if ((count > 0) && (count % 5000 == 0)) {
System.out.print("\r"+count+" rows processed.");
}
}
writer.write(lastLine+"\n");
System.out.println("\r"+count+" rows processed.");
in.close();
reader.close();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
return nodesFile;
}
private void addPerson(final String doc, final String name) {
PersonType tipo = (doc.length() <= 11) ? PersonType.INDIVIDUAL :
PersonType.LEGAL;
Map<String, Object> pessoaProperties = new HashMap<String, Object>();
pessoaProperties.put(Person.KEY_DOC , doc);
pessoaProperties.put(Person.KEY_NAME, name);
pessoaProperties.put(Person.KEY_TYPE, tipo.toString());
Map<String, Object> indexInfo = new HashMap<String, Object>();
indexInfo.put(Person.KEY_DOC, doc);
index.add(inserter.createNode(pessoaProperties), indexInfo);
tipo = null;
pessoaProperties = null;
indexInfo = null;
}
private void addSociety(final String srcDoc, final String destDoc, final
long entryDate,
final String qualification, final double participation) {
Person source = null;
Person destination = null;
try {
IndexHits<Long> hits = index.get(Person.KEY_DOC, srcDoc);
source = new Person(dbService.getNodeById(hits.getSingle()));
hits = index.get(Person.KEY_DOC, destDoc);
destination = new Person(dbService.getNodeById(hits.getSingle()));
CorporateRelationship sociedade = source.getSociety(destination);
if (sociedade == null) {
sociedade = source.addSociety(destination, qualification,
participation, entryDate);
} else {
sociedade.setQualification(qualification);
sociedade.setParticipation(participation);
sociedade.setEntryDate(entryDate);
}
} catch (Exception e) {
System.err.println("Error creating society between '"+srcDoc+"' and
'"+destDoc+"'.");
System.err.println("Source : "+source);
System.err.println("Destination: "+destination);
System.err.println(e.getMessage());
System.err.println("---");
}
}
public void createNodes(final File nodesFile) {
System.out.println("Creating nodes...");
int count = 0;
InputStream in = FUtils.getInputStream(nodesFile);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = null;
try {
while ((line = reader.readLine()) != null) {
int i = line.indexOf("|");
if (i != -1) {
String doc = line.substring(0, i);
String name = line.substring(i+1);
addPerson(doc, name);
doc = null;
name = null;
count++;
} else {
System.err.println("ERROR: invalid line '"+line+"'");
}
if (count % 5000 == 0) {
System.out.print("\r"+count+" added nodes.");
}
}
System.out.println("\r"+count+" added nodes.");
} catch (IOException e) {
e.printStackTrace();
}
}
public void createRelationships(final File relationshipsFile) {
System.out.println("Creating edges...");
int count = 0;
int countErr = 0;
InputStream in = FUtils.getInputStream(relationshipsFile);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = null;
try {
while ((line = reader.readLine()) != null) {Matcher matcher =
PTRN_LINE.matcher(line);
if (matcher.matches()) {
String srcDoc = matcher.group(GRP_SRC_DOC);
String destDoc = matcher.group(GRP_DEST_DOC);
long entryDate =
Long.parseLong(DATE_FORMATTER.format(DATE_PARSER.parse(matcher.group(GRP_ENTRY_DATE))));
String qualification = matcher.group(GRP_QUAL);
double participation =
Double.parseDouble(matcher.group(GRP_PART_INT)) / 100 +
Double.parseDouble(matcher.group(GRP_PART_DEC)) / 10000;
if (!srcDoc.equals(destDoc)) {
addSociety(srcDoc, destDoc, entryDate, qualification,
participation);
count++;
} else {
System.err.println("ERROR: invalid society.");
System.err.println("linha: '"+line+"'");
System.err.println("---");
countErr++;
}
} else {
System.err.println("ERROR: the line '"+line+"' doesn't match the pattern.");
System.err.println("---");
countErr++;
}
if (((count > 0) && (count % 5000 == 0)) || ((countErr > 0) && (countErr %
500 == 0))) {
System.out.print("\r"+count+" edges added, "+countErr+" invalid
societies.");
}
}
System.out.println("\r"+count+" edges added, "+countErr+" invalid
societies.");
} catch (IOException e) {
e.printStackTrace();
} catch (NumberFormatException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
public void updateDB(final File file) {
InputStream in = FUtils.getInputStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = null;
try {
while ((line = reader.readLine()) != null) {
Matcher matcher = PTRN_LINE.matcher(line);
if (matcher.matches()) {
String srcDoc = matcher.group(GRP_SRC_DOC);
String srcName = matcher.group(GRP_SRC_NAME);
IndexHits<Long> srcNode = index.get(Person.KEY_DOC, srcDoc);
String destDoc = matcher.group(GRP_DEST_DOC);
String destName = matcher.group(GRP_DEST_NAME);
long entryDate =
Long.parseLong(DATE_FORMATTER.format(DATE_PARSER.parse(matcher.group(GRP_ENTRY_DATE))));
String qualification = matcher.group(GRP_QUAL);
double participation =
Double.parseDouble(matcher.group(GRP_PART_INT)) / 100 +
Double.parseDouble(matcher.group(GRP_PART_DEC)) / 10000;
} else {
System.err.println("ERRO: the line '"+line+"' doesn't match the pattern.");
System.err.println("---");
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (NumberFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void createDB(final File initialFile) {
File nodesFile = prepareNodesFile(initialFile);
createNodes(nodesFile);
index.flush();
createRelationships(initialFile);
}
}
_______________________________________________
Neo4j mailing list
[email protected]
https://lists.neo4j.org/mailman/listinfo/user