Hi @osmosis-dev,

during my recent work I noticed that --merge, --merge-change, --apply-change, --derive-change and --append-change use input buffers (DataPostbox) but don't allow me to set the buffer size, hardcoding it instead to very low values (10 or 20). I was able to achieve serious performance improvements for big data sets (up to 3x faster with apply-change) by modifying those tasks to accept an additional command line parameter and setting the input buffer size to something more appropriate for my hardware, like 10000.

The attached patch contains my modifications. It's mostly trivial, as the input buffer size is already a parameter and I only needed to add argument handling in the task factory.

This change is fully backwards compatible with respect to the behavior or the user interface of the tasks (I believe at least).

Needless to say, if the patch gets applied, I volunteer to update the wiki pages :)

Greetings from Stuttgart
Igor
diff --git a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeAppender.java 
b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeAppender.java
index 2b0ea9f..2505d67 100644
--- a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeAppender.java
+++ b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeAppender.java
@@ -21,19 +21,20 @@ public class ChangeAppender implements 
MultiChangeSinkRunnableChangeSource {
        
        private List<DataPostbox<ChangeContainer>> sources;
        private ChangeSink changeSink;
-       
-       
+
        /**
         * Creates a new instance.
         * 
         * @param sourceCount
         *            The number of sources to be appended.
+        * @param inputBufferCapacity
+        *            The capacity of the buffer to use for each source, in 
objects.
         */
-       public ChangeAppender(int sourceCount) {
+       public ChangeAppender(int sourceCount, int inputBufferCapacity) {
                sources = new 
ArrayList<DataPostbox<ChangeContainer>>(sourceCount);
                
                for (int i = 0; i < sourceCount; i++) {
-                       sources.add(new DataPostbox<ChangeContainer>(10));
+                       sources.add(new 
DataPostbox<ChangeContainer>(inputBufferCapacity));
                }
        }
        
diff --git 
a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeAppenderFactory.java 
b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeAppenderFactory.java
index 3c4f553..61a6c26 100644
--- a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeAppenderFactory.java
+++ b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeAppenderFactory.java
@@ -16,6 +16,8 @@ public class ChangeAppenderFactory extends TaskManagerFactory 
{
        private static final String ARG_SOURCE_COUNT = "sourceCount";
        private static final int DEFAULT_SOURCE_COUNT = 2;
        
+       private static final String ARG_BUFFER_CAPACITY = "bufferCapacity";
+       private static final int DEFAULT_BUFFER_CAPACITY = 20;
        
        /**
         * {@inheritDoc}
@@ -26,9 +28,16 @@ public class ChangeAppenderFactory extends 
TaskManagerFactory {
                
                sourceCount = getIntegerArgument(taskConfig, ARG_SOURCE_COUNT, 
DEFAULT_SOURCE_COUNT);
                
+               int bufferCapacity = getIntegerArgument(
+                               taskConfig,
+                               ARG_BUFFER_CAPACITY,
+                               getDefaultIntegerArgument(taskConfig, 
DEFAULT_BUFFER_CAPACITY)
+                       );
+
+               
                return new MultiChangeSinkRunnableChangeSourceManager(
                        taskConfig.getId(),
-                       new ChangeAppender(sourceCount),
+                       new ChangeAppender(sourceCount, bufferCapacity),
                        taskConfig.getPipeArgs()
                );
        }
diff --git 
a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeApplierFactory.java 
b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeApplierFactory.java
index f52c373..832776b 100644
--- a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeApplierFactory.java
+++ b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeApplierFactory.java
@@ -13,15 +13,25 @@ import 
org.openstreetmap.osmosis.core.pipeline.v0_6.MultiSinkMultiChangeSinkRunn
  * @author Brett Henderson
  */
 public class ChangeApplierFactory extends TaskManagerFactory {
+
+       private static final String ARG_BUFFER_CAPACITY = "bufferCapacity";
+       private static final int DEFAULT_BUFFER_CAPACITY = 20;
        
        /**
         * {@inheritDoc}
         */
        @Override
        protected TaskManager createTaskManagerImpl(TaskConfiguration 
taskConfig) {
+               
+               int bufferCapacity = getIntegerArgument(
+                               taskConfig,
+                               ARG_BUFFER_CAPACITY,
+                               getDefaultIntegerArgument(taskConfig, 
DEFAULT_BUFFER_CAPACITY)
+                       );
+               
                return new MultiSinkMultiChangeSinkRunnableSourceManager(
                        taskConfig.getId(),
-                       new ChangeApplier(10),
+                       new ChangeApplier(bufferCapacity),
                        taskConfig.getPipeArgs()
                );
        }
diff --git 
a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeDeriverFactory.java 
b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeDeriverFactory.java
index c2e1b67..87d1464 100644
--- a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeDeriverFactory.java
+++ b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeDeriverFactory.java
@@ -14,14 +14,25 @@ import 
org.openstreetmap.osmosis.core.pipeline.v0_6.MultiSinkRunnableChangeSourc
  */
 public class ChangeDeriverFactory extends TaskManagerFactory {
        
+       private static final String ARG_BUFFER_CAPACITY = "bufferCapacity";
+       private static final int DEFAULT_BUFFER_CAPACITY = 20;
+       
        /**
         * {@inheritDoc}
         */
        @Override
        protected TaskManager createTaskManagerImpl(TaskConfiguration 
taskConfig) {
+               
+               int bufferCapacity = getIntegerArgument(
+                               taskConfig,
+                               ARG_BUFFER_CAPACITY,
+                               getDefaultIntegerArgument(taskConfig, 
DEFAULT_BUFFER_CAPACITY)
+                       );
+
+               
                return new MultiSinkRunnableChangeSourceManager(
                        taskConfig.getId(),
-                       new ChangeDeriver(10),
+                       new ChangeDeriver(bufferCapacity),
                        taskConfig.getPipeArgs()
                );
        }
diff --git 
a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeMergerFactory.java 
b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeMergerFactory.java
index c24dad9..e3aa594 100644
--- a/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeMergerFactory.java
+++ b/set/src/org/openstreetmap/osmosis/set/v0_6/ChangeMergerFactory.java
@@ -24,6 +24,9 @@ public class ChangeMergerFactory extends TaskManagerFactory {
        private static final String ALTERNATIVE_CONFLICT_RESOLUTION_METHOD_2 = 
"lastSource";
        private static final Map<String, ConflictResolutionMethod> 
CONFLICT_RESOLUTION_METHOD_MAP =
                new HashMap<String, ConflictResolutionMethod>();
+
+       private static final String ARG_BUFFER_CAPACITY = "bufferCapacity";
+       private static final int DEFAULT_BUFFER_CAPACITY = 20;
        
        static {
                CONFLICT_RESOLUTION_METHOD_MAP.put(
@@ -51,9 +54,15 @@ public class ChangeMergerFactory extends TaskManagerFactory {
                                        + " has value \"" + 
conflictResolutionMethod + "\" which is unrecognised.");
                }
                
+               int bufferCapacity = getIntegerArgument(
+                               taskConfig,
+                               ARG_BUFFER_CAPACITY,
+                               getDefaultIntegerArgument(taskConfig, 
DEFAULT_BUFFER_CAPACITY)
+                       );
+               
                return new MultiChangeSinkRunnableChangeSourceManager(
                        taskConfig.getId(),
-                       new 
ChangeMerger(CONFLICT_RESOLUTION_METHOD_MAP.get(conflictResolutionMethod), 10),
+                       new 
ChangeMerger(CONFLICT_RESOLUTION_METHOD_MAP.get(conflictResolutionMethod), 
bufferCapacity),
                        taskConfig.getPipeArgs()
                );
        }
diff --git 
a/set/src/org/openstreetmap/osmosis/set/v0_6/EntityMergerFactory.java 
b/set/src/org/openstreetmap/osmosis/set/v0_6/EntityMergerFactory.java
index d0c3bfc..99650a6 100644
--- a/set/src/org/openstreetmap/osmosis/set/v0_6/EntityMergerFactory.java
+++ b/set/src/org/openstreetmap/osmosis/set/v0_6/EntityMergerFactory.java
@@ -28,6 +28,9 @@ public class EntityMergerFactory extends TaskManagerFactory {
        private static final String ARG_BOUND_REMOVED_ACTION = 
"boundRemovedAction";
        private static final String DEFAULT_BOUND_REMOVED_ACTION = "warn";
        
+       private static final String ARG_BUFFER_CAPACITY = "bufferCapacity";
+       private static final int DEFAULT_BUFFER_CAPACITY = 20;
+       
        static {
                CONFLICT_RESOLUTION_METHOD_MAP.put(
                                DEFAULT_CONFLICT_RESOLUTION_METHOD, 
ConflictResolutionMethod.Version);
@@ -53,6 +56,12 @@ public class EntityMergerFactory extends TaskManagerFactory {
                boundRemovedAction = BoundRemovedAction.parse(
                                getStringArgument(taskConfig, 
ARG_BOUND_REMOVED_ACTION, DEFAULT_BOUND_REMOVED_ACTION));
                
+               int bufferCapacity = getIntegerArgument(
+                               taskConfig,
+                               ARG_BUFFER_CAPACITY,
+                               getDefaultIntegerArgument(taskConfig, 
DEFAULT_BUFFER_CAPACITY)
+                       );
+               
                if 
(!CONFLICT_RESOLUTION_METHOD_MAP.containsKey(conflictResolutionMethod)) {
                        throw new OsmosisRuntimeException(
                                        "Argument " + 
ARG_CONFLICT_RESOLUTION_METHOD + " for task " + taskConfig.getId()
@@ -61,7 +70,7 @@ public class EntityMergerFactory extends TaskManagerFactory {
                
                return new MultiSinkRunnableSourceManager(
                        taskConfig.getId(),
-                       new 
EntityMerger(CONFLICT_RESOLUTION_METHOD_MAP.get(conflictResolutionMethod),  10,
+                       new 
EntityMerger(CONFLICT_RESOLUTION_METHOD_MAP.get(conflictResolutionMethod),  
bufferCapacity,
                                        boundRemovedAction),
                        taskConfig.getPipeArgs()
                );
_______________________________________________
osmosis-dev mailing list
[email protected]
http://lists.openstreetmap.org/listinfo/osmosis-dev

Reply via email to