[
https://issues.apache.org/jira/browse/HDFS-2091?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Alberto Andreotti updated HDFS-2091:
------------------------------------
Description:
The more nodes I add to this application, the slower it goes. This is the app's
map,
public void map(IntWritable linearPos, FloatWritable heat, Context context
) throws IOException, InterruptedException {
int myLinearPos = linearPos.get();
//Distribute my value to the previous and the next
linearPos.set(myLinearPos - 1);
context.write(linearPos, heat);
linearPos.set(myLinearPos + 1);
context.write(linearPos, heat);
//Distribute my value to the cells above and below
linearPos.set(myLinearPos - MatrixData.Length());
context.write(linearPos, heat);
linearPos.set(myLinearPos + MatrixData.Length());
context.write(linearPos, heat);
}//end map
and this is the reduce,
public void reduce(IntWritable linearPos, Iterable<FloatWritable> fwValues,
Context context) throws IOException, InterruptedException {
//Handle first and last "cold" boundaries
if(linearPos.get()<0 || linearPos.get()>MatrixData.LinearSize()){
return;
}
if(linearPos.get()==MatrixData.HeatSourceLinearPos()){
context.write(linearPos, new
FloatWritable(MatrixData.HeatSourceTemperature()));
return;
}
float result = 0.0f;
//Add all the values
for(FloatWritable heat : fwValues) {
result += heat.get();
}
context.write(linearPos, new FloatWritable(result/4) );
}
For example, with 6 nodes I get a running time of 15minutes, and with 4 nodes I
get a running time of 8minutes!.
This is how I generated the input,
public static void main(String[] args) throws IOException {
//Write file in the local dir
String uri = "/home/beto/mySeq";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path path = new Path(uri);
IntWritable key = new IntWritable();
FloatWritable value = new FloatWritable(0.0f);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),
value.getClass());
int step = MatrixData.LinearSize()/10;
int limit = step;
for (int i = 0; i <= MatrixData.LinearSize(); i++) {
key.set(i);
if(i>limit){
System.out.println("*");
limit +=step;
}
if(i==MatrixData.HeatSourceLinearPos()) {
writer.append(key, new
FloatWritable(MatrixData.HeatSourceTemperature()));
continue;
}
writer.append(key, value);
}
} finally {
IOUtils.closeStream(writer);
}
}
I'm basically solving a heat transfer problem in a squared section. Pretty
simple. The input data is being stored as a (key, value) pairs, read in this
way, processed, and written again in the same format.
Any thoughts?
Alberto.
was:
The more nodes I add to this application, the slower it goes. This is the app's
map,
public void map(IntWritable linearPos, FloatWritable heat, Context context
) throws IOException, InterruptedException {
int myLinearPos = linearPos.get();
//Distribute my value to the previous and the next
linearPos.set(myLinearPos - 1);
context.write(linearPos, heat);
linearPos.set(myLinearPos + 1);
context.write(linearPos, heat);
//Distribute my value to the cells above and below
linearPos.set(myLinearPos - MatrixData.Length());
context.write(linearPos, heat);
linearPos.set(myLinearPos + MatrixData.Length());
context.write(linearPos, heat);
}//end map
and this is the reduce,
public void reduce(IntWritable linearPos, Iterable<FloatWritable> fwValues,
Context context) throws IOException, InterruptedException {
//Handle first and last "cold" boundaries
if(linearPos.get()<0 || linearPos.get()>MatrixData.LinearSize()){
return;
}
if(linearPos.get()==MatrixData.HeatSourceLinearPos()){
context.write(linearPos, new
FloatWritable(MatrixData.HeatSourceTemperature()));
return;
}
float result = 0.0f;
//Add all the values
for(FloatWritable heat : fwValues) {
result += heat.get();
}
context.write(linearPos, new FloatWritable(result/4) );
}
For example, with 6 nodes I get a running time of 15minutes, and with 4 nodes I
get a running time of 8minutes!.
This is how I generated the input,
public static void main(String[] args) throws IOException {
//Write file in the local dir
String uri = "/home/beto/mySeq";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path path = new Path(uri);
IntWritable key = new IntWritable();
FloatWritable value = new FloatWritable(0.0f);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),
value.getClass());
int step = MatrixData.LinearSize()/10;
int limit = step;
for (int i = 0; i <= MatrixData.LinearSize(); i++) {
key.set(i);
if(i>limit){
System.out.println("*");
limit +=step;
}
if(i==MatrixData.HeatSourceLinearPos()) {
writer.append(key, new
FloatWritable(MatrixData.HeatSourceTemperature()));
continue;
}
writer.append(key, value);
}
} finally {
IOUtils.closeStream(writer);
}
}
I'm basically solving a heat transfer problem in a squared section. Pretty
simple. The input data is being stored as a (key, value) pairs, read in this
way, processed, and written again in the same format.
Any thoughts?
Alberto.
> Hadoop does not scale as expected
> ---------------------------------
>
> Key: HDFS-2091
> URL: https://issues.apache.org/jira/browse/HDFS-2091
> Project: Hadoop HDFS
> Issue Type: Bug
> Environment: Linux, 8 nodes.
> Reporter: Alberto Andreotti
> Original Estimate: 504h
> Remaining Estimate: 504h
>
> The more nodes I add to this application, the slower it goes. This is the
> app's map,
> public void map(IntWritable linearPos, FloatWritable heat, Context context
> ) throws IOException, InterruptedException {
> int myLinearPos = linearPos.get();
> //Distribute my value to the previous and the next
> linearPos.set(myLinearPos - 1);
> context.write(linearPos, heat);
> linearPos.set(myLinearPos + 1);
> context.write(linearPos, heat);
> //Distribute my value to the cells above and below
> linearPos.set(myLinearPos - MatrixData.Length());
> context.write(linearPos, heat);
> linearPos.set(myLinearPos + MatrixData.Length());
> context.write(linearPos, heat);
> }//end map
> and this is the reduce,
> public void reduce(IntWritable linearPos, Iterable<FloatWritable> fwValues,
> Context context) throws IOException,
> InterruptedException {
> //Handle first and last "cold" boundaries
> if(linearPos.get()<0 || linearPos.get()>MatrixData.LinearSize()){
> return;
> }
> if(linearPos.get()==MatrixData.HeatSourceLinearPos()){
> context.write(linearPos, new
> FloatWritable(MatrixData.HeatSourceTemperature()));
> return;
> }
> float result = 0.0f;
> //Add all the values
> for(FloatWritable heat : fwValues) {
> result += heat.get();
> }
> context.write(linearPos, new FloatWritable(result/4) );
> }
> For example, with 6 nodes I get a running time of 15minutes, and with 4 nodes
> I get a running time of 8minutes!.
> This is how I generated the input,
> public static void main(String[] args) throws IOException {
> //Write file in the local dir
> String uri = "/home/beto/mySeq";
> Configuration conf = new Configuration();
> FileSystem fs = FileSystem.get(URI.create(uri), conf);
> Path path = new Path(uri);
> IntWritable key = new IntWritable();
> FloatWritable value = new FloatWritable(0.0f);
> SequenceFile.Writer writer = null;
> try {
> writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),
> value.getClass());
> int step = MatrixData.LinearSize()/10;
> int limit = step;
> for (int i = 0; i <= MatrixData.LinearSize(); i++) {
> key.set(i);
> if(i>limit){
> System.out.println("*");
> limit +=step;
> }
> if(i==MatrixData.HeatSourceLinearPos()) {
> writer.append(key, new
> FloatWritable(MatrixData.HeatSourceTemperature()));
> continue;
> }
> writer.append(key, value);
> }
> } finally {
> IOUtils.closeStream(writer);
> }
> }
> I'm basically solving a heat transfer problem in a squared section. Pretty
> simple. The input data is being stored as a (key, value) pairs, read in this
> way, processed, and written again in the same format.
> Any thoughts?
> Alberto.
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira