I'm importing the dataset for Texas. My version is a few weeks old, but 
you can find the newest here:

http://downloads.cloudmade.com/americas/northern_america/united_states/texas/texas.osm.bz2

My import code, more or less, let me know if you need more 
implementation details:

class Neo4jImport(filename:String, layer:String = "map") extends Import {

   val importer = new OSMImporter(layer)

   private var processed = 0

   def processedEntities = processed

   private def ds = dataset.asInstanceOf[Neo4JDataSet]
   private def database = ds.database

   class MyBatchInserter extends BatchInserterImpl(database.getStoreDir) {

     override def createNode(properties:JMap[String, Object]) = {
       processed += 1
       super.createNode(properties)
     }

     override def createNode(id:Long, properties:JMap[String, Object]){
       super.createNode(id, properties)
       processed += 1
     }

     override def createRelationship(n1:Long, n2:Long, 
rt:RelationshipType, properties:JMap[String, Object]) = {
       processed += 1
       super.createRelationship(n1, n2, rt, properties)
     }

   }

   def performImport() {
     database.shutdown()
     val batchInserter = new MyBatchInserter
     importer.importFile(batchInserter, filename)
     batchInserter.shutdown()
     ds.init(true)
     importer.reIndex(database, 1000)
   }

}

Console output:

Fri May 13 10:22:20 CDT 2011: Saving node 6525309     
(13713.904715468341 node/second)
Fri May 13 10:22:21 CDT 2011: Saving node 6539916     
(13703.333682556313 node/second)
java.lang.OutOfMemoryError: Java heap space
Dumping heap to java_pid13506.hprof ...
Heap dump file created [1426787760 bytes in 30.001 secs]
scala.actors.Actor$$anon$1@764e2837: caught 
java.lang.IllegalStateException: this writer hit an OutOfMemoryError; 
cannot flush
java.lang.IllegalStateException: this writer hit an OutOfMemoryError; 
cannot flush
     at org.apache.lucene.index.IndexWriter.doFlush(IndexWriter.java:3307)
     at org.apache.lucene.index.IndexWriter.flush(IndexWriter.java:3296)
     at org.apache.lucene.index.IndexWriter.optimize(IndexWriter.java:2376)
     at org.apache.lucene.index.IndexWriter.optimize(IndexWriter.java:2352)
     at 
org.neo4j.index.impl.lucene.LuceneBatchInserterIndex.closeWriter(LuceneBatchInserterIndex.java:279)
 

     at 
org.neo4j.index.impl.lucene.LuceneBatchInserterIndex.shutdown(LuceneBatchInserterIndex.java:354)
 

     at 
org.neo4j.index.impl.lucene.LuceneBatchInserterIndexProvider.shutdown(LuceneBatchInserterIndexProvider.java:145)
 

     at 
org.neo4j.gis.spatial.osm.OSMImporter$OSMBatchWriter.finish(OSMImporter.java:1144)
 

     at 
org.neo4j.gis.spatial.osm.OSMImporter.importFile(OSMImporter.java:1320)
     at 
org.neo4j.gis.spatial.osm.OSMImporter.importFile(OSMImporter.java:1219)
     at 
org.neo4j.gis.spatial.osm.OSMImporter.importFile(OSMImporter.java:1215)
     at 
info.hermesnav.core.model.data.impl.neo4j.Neo4jImport.performImport(neo4j.scala:54)
 

     at 
info.hermesnav.core.model.data.Import$$anonfun$start$1.apply$mcV$sp(data.scala:25)
 

     at scala.actors.Actor$$anon$1.act(Actor.scala:135)
     at scala.actors.Reactor$$anonfun$dostart$1.apply(Reactor.scala:222)
     at scala.actors.Reactor$$anonfun$dostart$1.apply(Reactor.scala:222)
     at scala.actors.ReactorTask.run(ReactorTask.scala:36)
     at 
scala.concurrent.forkjoin.ForkJoinPool$AdaptedRunnable.exec(ForkJoinPool.java:611)
 

     at 
scala.concurrent.forkjoin.ForkJoinTask.quietlyExec(ForkJoinTask.java:422)
     at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.mainLoop(ForkJoinWorkerThread.java:340)
 

     at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:325)
 



On 05/13/2011 09:34 AM, Peter Neubauer wrote:
> Nolan,
> do you have the importing code and what dataset are you importing? Also, do
> you have any console output? It could be very big transactions or other
> database settings not adjusted to the size of your import ...
>
> Cheers,
>
> /peter neubauer
>
> GTalk:      neubauer.peter
> Skype       peter.neubauer
> Phone       +46 704 106975
> LinkedIn   http://www.linkedin.com/in/neubauer
> Twitter      http://twitter.com/peterneubauer
>
> http://www.neo4j.org               - Your high performance graph database.
> http://startupbootcamp.org/    - Ă–resund - Innovation happens HERE.
> http://www.thoughtmade.com - Scandinavia's coolest Bring-a-Thing party.
>
>
> On Fri, May 13, 2011 at 4:13 AM, Nolan Darilek<no...@thewordnerd.info>wrote:
>
>> Picking up my slow port to Neo4j Spatial again, and am hitting an
>> out-of-memory error when trying to import large datasets. Given that
>> this code works fine if I use a different database and swap out the
>> implementations, I suspect Neo4j as the issue. This is Neo4j
>> 1.4-SNAPSHOT and Spatial 0.6-SNAPSHOT.
>>
>> Not sure if this is enough to diagnose the issue, but I have a heap dump:
>>
>> http://dl.dropbox.com/u/147071/java_pid7405.hprof.bz2
>>
>> It's currently uploading to Dropbox, so maybe grab it in an hour or two.
>> It's something like 185M compressed, 1.5G uncompressed.
>>
>> Thanks, please let me know if I might provide any more details.
>> The exact error was "this writer hit an OutOfMemoryError; cannot flush",
>> at org.apache.lucene.IndexWriter.doFlush():3307. Not sure how relevant
>> the stacktrace is, as my experience with OutOfMemoryErrors is that the
>> code just fails wherever.
>> _______________________________________________
>> Neo4j mailing list
>> User@lists.neo4j.org
>> https://lists.neo4j.org/mailman/listinfo/user
>>
> _______________________________________________
> Neo4j mailing list
> User@lists.neo4j.org
> https://lists.neo4j.org/mailman/listinfo/user

_______________________________________________
Neo4j mailing list
User@lists.neo4j.org
https://lists.neo4j.org/mailman/listinfo/user

Reply via email to