[ https://issues.apache.org/jira/browse/MAPREDUCE-7376?focusedWorklogId=765911&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-765911 ]
ASF GitHub Bot logged work on MAPREDUCE-7376: --------------------------------------------- Author: ASF GitHub Bot Created on: 04/May/22 10:14 Start Date: 04/May/22 10:14 Worklog Time Spent: 10m Work Description: steveloughran commented on code in PR #4257: URL: https://github.com/apache/hadoop/pull/4257#discussion_r864666792 ########## hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestAggregateWordCount.java: ########## @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.examples; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.security.Permission; + +import org.junit.After; +import org.junit.Test; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.HadoopTestCase; +import org.apache.hadoop.util.ExitUtil.ExitException; + +import static org.junit.Assert.assertEquals; + +public class TestAggregateWordCount extends HadoopTestCase { + public TestAggregateWordCount() throws IOException { + super(LOCAL_MR, LOCAL_FS, 1, 1); + } + + @After + public void tearDown() throws Exception { + getFileSystem().delete(TEST_DIR, true); Review Comment: check for fs being null, though here it is less of an issue than with the cloud stores, which may fail during setup ########## hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestAggregateWordCount.java: ########## @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.examples; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.security.Permission; + +import org.junit.After; +import org.junit.Test; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.HadoopTestCase; +import org.apache.hadoop.util.ExitUtil.ExitException; + +import static org.junit.Assert.assertEquals; + +public class TestAggregateWordCount extends HadoopTestCase { + public TestAggregateWordCount() throws IOException { + super(LOCAL_MR, LOCAL_FS, 1, 1); + } + + @After + public void tearDown() throws Exception { + getFileSystem().delete(TEST_DIR, true); + super.tearDown(); + } + + // Input/Output paths for sort + private static final Path TEST_DIR = new Path( + new File(System.getProperty("test.build.data", "/tmp"), + "aggregatewordcount").getAbsoluteFile().toURI().toString()); + + private static final Path INPUT_PATH = new Path(TEST_DIR, "inPath"); + private static final Path OUTPUT_PATH = new Path(TEST_DIR, "outPath"); + + @Test + public void testAggregateTestCount() + throws IOException, ClassNotFoundException, InterruptedException { + SecurityManager securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); Review Comment: use `ExitUtil.disableSystemExit()` ########## hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestAggregateWordCount.java: ########## @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.examples; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.security.Permission; + +import org.junit.After; +import org.junit.Test; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.HadoopTestCase; +import org.apache.hadoop.util.ExitUtil.ExitException; + +import static org.junit.Assert.assertEquals; + +public class TestAggregateWordCount extends HadoopTestCase { + public TestAggregateWordCount() throws IOException { + super(LOCAL_MR, LOCAL_FS, 1, 1); + } + + @After + public void tearDown() throws Exception { + getFileSystem().delete(TEST_DIR, true); + super.tearDown(); + } + + // Input/Output paths for sort + private static final Path TEST_DIR = new Path( + new File(System.getProperty("test.build.data", "/tmp"), + "aggregatewordcount").getAbsoluteFile().toURI().toString()); + + private static final Path INPUT_PATH = new Path(TEST_DIR, "inPath"); + private static final Path OUTPUT_PATH = new Path(TEST_DIR, "outPath"); + + @Test + public void testAggregateTestCount() + throws IOException, ClassNotFoundException, InterruptedException { + SecurityManager securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + try { + FileSystem fs = getFileSystem(); + fs.mkdirs(INPUT_PATH); + Path file1 = new Path(INPUT_PATH, "file1"); + Path file2 = new Path(INPUT_PATH, "file2"); + FileUtil.write(fs, file1, "Hello World"); + FileUtil.write(fs, file2, "Hello Hadoop"); + + String[] args = + new String[] {INPUT_PATH.toString(), OUTPUT_PATH.toString(), "1", + "textinputformat"}; + + // Run AggregateWordCount Job. + try { + AggregateWordCount.main(args); + } catch (ExitException e) { + // Ignore + } + + String allEntries; + try (FSDataInputStream stream = fs + .open(new Path(OUTPUT_PATH, "part-r-00000"));) { + allEntries = IOUtils.toString(stream, Charset.defaultCharset()); + } + + assertEquals("Hadoop\t1\n" + "Hello\t2\n" + "World\t1\n", allEntries); Review Comment: is it always sorted? it may be better to split to lines (i think commons-* can help there) then use assertj to assert on the list contents Issue Time Tracking ------------------- Worklog Id: (was: 765911) Time Spent: 50m (was: 40m) > AggregateWordCount fetches wrong results > ---------------------------------------- > > Key: MAPREDUCE-7376 > URL: https://issues.apache.org/jira/browse/MAPREDUCE-7376 > Project: Hadoop Map/Reduce > Issue Type: Bug > Reporter: Ayush Saxena > Assignee: Ayush Saxena > Priority: Major > Labels: pull-request-available > Time Spent: 50m > Remaining Estimate: 0h > > AggregateWordCount rather than counting the words, gives a single line > output counting the number of rows > Wrong Result Looks Like: > {noformat} > hadoop-3.4.0-SNAPSHOT % bin/hdfs dfs -cat /testOut1/part-r-00000 > record_count 2 > {noformat} > Correct Should Look Like: > {noformat} > hadoop-3.4.0-SNAPSHOT % bin/hdfs dfs -cat /testOut1/part-r-00000 > > Bye 1 > Goodbye 1 > Hadoop 2 > Hello 2 > World 2 > {noformat} -- This message was sent by Atlassian Jira (v8.20.7#820007) --------------------------------------------------------------------- To unsubscribe, e-mail: mapreduce-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: mapreduce-issues-h...@hadoop.apache.org