http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java index 912cae1..3ccb34c 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java @@ -1,30 +1,30 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.split; -import org.apache.jena.datatypes.xsd.XSDDatatype ; -import org.apache.jena.graph.NodeFactory ; -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.sparql.core.Quad ; +import org.apache.jena.datatypes.xsd.XSDDatatype ; +import org.apache.jena.graph.NodeFactory ; +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.sparql.core.Quad ; /** * Abstract tests for {@link AbstractNodeTupleSplitToNodesMapper}
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java index 91d671e..5d21bde 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java @@ -1,29 +1,29 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.split; -import org.apache.jena.datatypes.xsd.XSDDatatype ; -import org.apache.jena.graph.NodeFactory ; -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.datatypes.xsd.XSDDatatype ; +import org.apache.jena.graph.NodeFactory ; +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; /** * Abstract tests for {@link AbstractNodeTupleSplitToNodesMapper} http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java index 327a821..4648a83 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java @@ -1,29 +1,29 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.split; -import org.apache.jena.datatypes.xsd.XSDDatatype ; -import org.apache.jena.graph.NodeFactory ; -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.datatypes.xsd.XSDDatatype ; +import org.apache.jena.graph.NodeFactory ; +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; /** * Abstract tests for {@link AbstractNodeTupleSplitToNodesMapper} http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java index 79f73de..61058c6 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java @@ -1,28 +1,28 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.split; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; -import org.apache.jena.hadoop.rdf.mapreduce.split.QuadSplitToNodesMapper; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.mapreduce.split.QuadSplitToNodesMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; /** http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java index b50cdbb..a171ffb 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java @@ -1,28 +1,28 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.split; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; -import org.apache.jena.hadoop.rdf.mapreduce.split.QuadSplitWithNodesMapper; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.mapreduce.split.QuadSplitWithNodesMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; /** http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java index 38b6c72..d91efca 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java @@ -1,28 +1,28 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.split; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; -import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitToNodesMapper; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitToNodesMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; /** http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java index 9731f07..3b71f40 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java @@ -1,29 +1,29 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.split; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; -import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitToNodesMapper; -import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitWithNodesMapper; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitToNodesMapper; +import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitWithNodesMapper; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; /** http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java index ad7b0f2..e6167c0 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java @@ -1,37 +1,37 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.transform; import java.io.IOException; - + import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; -import org.apache.jena.datatypes.xsd.XSDDatatype ; -import org.apache.jena.graph.NodeFactory ; -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests; -import org.apache.jena.hadoop.rdf.mapreduce.transform.QuadsToTriplesMapper; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.sparql.core.Quad ; +import org.apache.jena.datatypes.xsd.XSDDatatype ; +import org.apache.jena.graph.NodeFactory ; +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests; +import org.apache.jena.hadoop.rdf.mapreduce.transform.QuadsToTriplesMapper; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.sparql.core.Quad ; import org.junit.Test; /** http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java index 6fb1279..e52ea6f 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java @@ -1,37 +1,37 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.transform; import java.io.IOException; - + import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; -import org.apache.jena.datatypes.xsd.XSDDatatype ; -import org.apache.jena.graph.NodeFactory ; -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests; -import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsBySubjectMapper; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.sparql.core.Quad ; +import org.apache.jena.datatypes.xsd.XSDDatatype ; +import org.apache.jena.graph.NodeFactory ; +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests; +import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsBySubjectMapper; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.sparql.core.Quad ; import org.junit.Test; /** http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java index af9cf20..0a73623 100644 --- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java @@ -1,37 +1,37 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.mapreduce.transform; import java.io.IOException; - + import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; -import org.apache.jena.datatypes.xsd.XSDDatatype ; -import org.apache.jena.graph.NodeFactory ; -import org.apache.jena.graph.Triple ; -import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests; -import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.sparql.core.Quad ; +import org.apache.jena.datatypes.xsd.XSDDatatype ; +import org.apache.jena.graph.NodeFactory ; +import org.apache.jena.graph.Triple ; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests; +import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.sparql.core.Quad ; import org.junit.Test; /** http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java b/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java index 7425f42..c62c50b 100644 --- a/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java +++ b/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java @@ -1,421 +1,421 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.jena.hadoop.rdf.stats; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; - -import javax.inject.Inject; - -import org.apache.commons.io.output.CloseShieldOutputStream; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.jena.hadoop.rdf.stats.jobs.JobFactory; - -import com.github.rvesse.airline.HelpOption; -import com.github.rvesse.airline.SingleCommand; -import com.github.rvesse.airline.annotations.Arguments; -import com.github.rvesse.airline.annotations.Command; -import com.github.rvesse.airline.annotations.Option; -import com.github.rvesse.airline.annotations.restrictions.AllowedRawValues; -import com.github.rvesse.airline.annotations.restrictions.Required; -import com.github.rvesse.airline.help.Help; -import com.github.rvesse.airline.io.colors.BasicColor; -import com.github.rvesse.airline.io.output.AnsiBasicColorizedOutputStream; -import com.github.rvesse.airline.io.output.ColorizedOutputStream; -import com.github.rvesse.airline.model.CommandMetadata; -import com.github.rvesse.airline.parser.errors.ParseException; - -/** - * Entry point for the Hadoop job, handles launching all the relevant Hadoop - * jobs - */ -@Command(name = "hadoop jar PATH_TO_JAR org.apache.jena.hadoop.rdf.stats.RdfStats", description = "A command which computes statistics on RDF data using Hadoop") -public class RdfStats implements Tool { - //@formatter:off - private static final String DATA_TYPE_TRIPLES = "triples", - DATA_TYPE_QUADS = "quads", - DATA_TYPE_MIXED = "mixed"; - //@formatter:on - - /** - * Help option - */ - @Inject - public HelpOption<RdfStats> helpOption; - - /** - * Gets/Sets whether all available statistics will be calculated - */ - @Option(name = { "-a", "--all" }, description = "Requests that all available statistics be calculated") - public boolean all = false; - - /** - * Gets/Sets whether node usage counts will be calculated - */ - @Option(name = { "-n", "--node-count" }, description = "Requests that node usage counts be calculated") - public boolean nodeCount = false; - - /** - * Gets/Sets whether characteristic sets will be calculated - */ - @Option(name = { "-c", - "--characteristic-sets" }, hidden = true, description = "Requests that characteristic sets be calculated (hidden as this has scalability issues)") - public boolean characteristicSets = false; - - /** - * Gets/Sets whether type counts will be calculated - */ - @Option(name = { "-t", "--type-count" }, description = "Requests that rdf:type usage counts be calculated") - public boolean typeCount = false; - - /** - * Gets/Sets whether data type counts will be calculated - */ - @Option(name = { "-d", "--data-types" }, description = "Requests that literal data type usage counts be calculated") - public boolean dataTypeCount = false; - - /** - * Gets/Sets whether namespace counts will be calculated - */ - @Option(name = { "--namespaces" }, description = "Requests that namespace usage counts be calculated") - public boolean namespaceCount = false; - - @Option(name = { "-g", "--graph-sizes" }, description = "Requests that the size of each named graph be counted") - public boolean graphSize = false; - - /** - * Gets/Sets the input data type used - */ - @Option(name = { - "--input-type" }, description = "Specifies whether the input data is a mixture of quads and triples, just quads or just triples. Using the most specific data type will yield the most accurate statistics") - @AllowedRawValues(allowedValues = { DATA_TYPE_MIXED, DATA_TYPE_QUADS, DATA_TYPE_TRIPLES }) - public String inputType = DATA_TYPE_MIXED; - - /** - * Gets/Sets the output path - */ - @Option(name = { "-o", "--output" }, title = "OutputPath", description = "Sets the output path", arity = 1) - @Required - public String outputPath = null; - - /** - * Gets/Sets the input path(s) - */ - @Arguments(description = "Sets the input path(s)", title = "InputPath") - @Required - public List<String> inputPaths = new ArrayList<String>(); - - private Configuration config; - - /** - * Entry point method - * - * @param args - * Arguments - */ - public static void main(String[] args) { - ColorizedOutputStream<BasicColor> error = new AnsiBasicColorizedOutputStream( - new CloseShieldOutputStream(System.err)); - try { - // Run and exit with result code if no errors bubble up - // Note that the exit code may still be a error code - int res = ToolRunner.run(new Configuration(true), new RdfStats(), args); - System.exit(res); - } catch (Throwable e) { - // This will only happen if Hadoop option parsing errors - // The run() method will handle its error itself - error.setForegroundColor(BasicColor.RED); - error.println(e.getMessage()); - e.printStackTrace(error); - } finally { - error.close(); - } - // If any errors bubble up exit with non-zero code - System.exit(1); - } - - private static void showUsage() throws IOException { - CommandMetadata metadata = SingleCommand.singleCommand(RdfStats.class).getCommandMetadata(); - Help.help(metadata, System.err); - System.exit(1); - } - - @Override - public void setConf(Configuration conf) { - this.config = conf; - } - - @Override - public Configuration getConf() { - return this.config; - } - - @Override - public int run(String[] args) { - ColorizedOutputStream<BasicColor> error = new AnsiBasicColorizedOutputStream( - new CloseShieldOutputStream(System.err)); - try { - if (args.length == 0) { - showUsage(); - } - - // Parse custom arguments - RdfStats cmd = SingleCommand.singleCommand(RdfStats.class).parse(args); - - // Copy Hadoop configuration across - cmd.setConf(this.getConf()); - - // Show help if requested and exit with success - if (cmd.helpOption.showHelpIfRequested()) { - return 0; - } - - // Run the command and exit with success - cmd.run(); - return 0; - } catch (ParseException e) { - error.setForegroundColor(BasicColor.RED); - error.println(e.getMessage()); - error.println(); - } catch (Throwable e) { - error.setForegroundColor(BasicColor.RED); - error.println(e.getMessage()); - e.printStackTrace(error); - error.println(); - } finally { - error.close(); - } - return 1; - } - - private void run() throws Throwable { - if (!this.outputPath.endsWith("/")) { - this.outputPath += "/"; - } - - // If all statistics requested turn on all statistics - if (this.all) { - this.nodeCount = true; - this.characteristicSets = true; - this.typeCount = true; - this.dataTypeCount = true; - this.namespaceCount = true; - } - - // How many statistics were requested? - int statsRequested = 0; - if (this.nodeCount) - statsRequested++; - if (this.characteristicSets) - statsRequested++; - if (this.typeCount) - statsRequested++; - if (this.dataTypeCount) - statsRequested++; - if (this.namespaceCount) - statsRequested++; - if (this.graphSize) - statsRequested++; - - // Error if no statistics requested - if (statsRequested == 0) { - System.err.println( - "You did not request any statistics to be calculated, please use one/more of the relevant options to select the statistics to be computed"); - return; - } - int statsComputed = 1; - - // Compute statistics - if (this.nodeCount) { - Job job = this.selectNodeCountJob(); - statsComputed = this.computeStatistic(job, statsComputed, statsRequested); - } - if (this.graphSize) { - Job job = this.selectGraphSizeJob(); - statsComputed = this.computeStatistic(job, statsComputed, statsRequested); - } - if (this.typeCount) { - Job[] jobs = this.selectTypeCountJobs(); - statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested); - } - if (this.dataTypeCount) { - Job job = this.selectDataTypeCountJob(); - statsComputed = this.computeStatistic(job, statsComputed, statsRequested); - } - if (this.namespaceCount) { - Job job = this.selectNamespaceCountJob(); - statsComputed = this.computeStatistic(job, statsComputed, statsRequested); - } - if (this.characteristicSets) { - Job[] jobs = this.selectCharacteristicSetJobs(); - statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested); - } - } - - private int computeStatistic(Job job, int statsComputed, int statsRequested) throws Throwable { - System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested)); - this.runJob(job); - System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested)); - System.out.println(); - return ++statsComputed; - } - - private int computeStatistic(Job[] jobs, boolean continueOnFailure, boolean continueOnError, int statsComputed, - int statsRequested) { - System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested)); - this.runJobSequence(jobs, continueOnFailure, continueOnError); - System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested)); - System.out.println(); - return ++statsComputed; - } - - private boolean runJob(Job job) throws Throwable { - System.out.println("Submitting Job " + job.getJobName()); - long start = System.nanoTime(); - try { - job.submit(); - if (job.monitorAndPrintJob()) { - System.out.println("Job " + job.getJobName() + " succeeded"); - return true; - } else { - System.out.println("Job " + job.getJobName() + " failed"); - return false; - } - } catch (Throwable e) { - System.out.println("Unexpected failure in Job " + job.getJobName()); - throw e; - } finally { - long end = System.nanoTime(); - System.out.println("Job " + job.getJobName() + " finished after " - + String.format("%,d milliseconds", TimeUnit.NANOSECONDS.toMillis(end - start))); - System.out.println(); - } - } - - private void runJobSequence(Job[] jobs, boolean continueOnFailure, boolean continueOnError) { - for (int i = 0; i < jobs.length; i++) { - Job job = jobs[i]; - try { - boolean success = this.runJob(job); - if (!success && !continueOnFailure) - throw new IllegalStateException( - "Unable to complete job sequence because Job " + job.getJobName() + " failed"); - } catch (IllegalStateException e) { - throw e; - } catch (Throwable e) { - if (!continueOnError) - throw new IllegalStateException( - "Unable to complete job sequence because job " + job.getJobName() + " errorred", e); - } - } - } - - private Job selectNodeCountJob() throws IOException { - String realOutputPath = outputPath + "node-counts/"; - String[] inputs = new String[this.inputPaths.size()]; - this.inputPaths.toArray(inputs); - - if (DATA_TYPE_QUADS.equals(this.inputType)) { - return JobFactory.getQuadNodeCountJob(this.config, inputs, realOutputPath); - } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { - return JobFactory.getTripleNodeCountJob(this.config, inputs, realOutputPath); - } else { - return JobFactory.getNodeCountJob(this.config, inputs, realOutputPath); - } - } - - private Job selectGraphSizeJob() throws IOException { - String realOutputPath = outputPath + "graph-sizes/"; - String[] inputs = new String[this.inputPaths.size()]; - this.inputPaths.toArray(inputs); - - if (DATA_TYPE_QUADS.equals(this.inputType)) { - return JobFactory.getQuadGraphSizesJob(this.config, inputs, realOutputPath); - } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { - return JobFactory.getTripleGraphSizesJob(this.config, inputs, realOutputPath); - } else { - return JobFactory.getGraphSizesJob(this.config, inputs, realOutputPath); - } - } - - private Job selectDataTypeCountJob() throws IOException { - String realOutputPath = outputPath + "data-type-counts/"; - String[] inputs = new String[this.inputPaths.size()]; - this.inputPaths.toArray(inputs); - - if (DATA_TYPE_QUADS.equals(this.inputType)) { - return JobFactory.getQuadDataTypeCountJob(this.config, inputs, realOutputPath); - } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { - return JobFactory.getTripleDataTypeCountJob(this.config, inputs, realOutputPath); - } else { - return JobFactory.getDataTypeCountJob(this.config, inputs, realOutputPath); - } - } - - private Job selectNamespaceCountJob() throws IOException { - String realOutputPath = outputPath + "namespace-counts/"; - String[] inputs = new String[this.inputPaths.size()]; - this.inputPaths.toArray(inputs); - - if (DATA_TYPE_QUADS.equals(this.inputType)) { - return JobFactory.getQuadNamespaceCountJob(this.config, inputs, realOutputPath); - } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { - return JobFactory.getTripleNamespaceCountJob(this.config, inputs, realOutputPath); - } else { - return JobFactory.getNamespaceCountJob(this.config, inputs, realOutputPath); - } - } - - private Job[] selectCharacteristicSetJobs() throws IOException { - String intermediateOutputPath = outputPath + "characteristics/intermediate/"; - String finalOutputPath = outputPath + "characteristics/final/"; - String[] inputs = new String[this.inputPaths.size()]; - this.inputPaths.toArray(inputs); - - if (DATA_TYPE_QUADS.equals(this.inputType)) { - return JobFactory.getQuadCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, - finalOutputPath); - } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { - return JobFactory.getTripleCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, - finalOutputPath); - } else { - return JobFactory.getCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, finalOutputPath); - } - } - - private Job[] selectTypeCountJobs() throws IOException { - String intermediateOutputPath = outputPath + "type-declarations/"; - String finalOutputPath = outputPath + "type-counts/"; - String[] inputs = new String[this.inputPaths.size()]; - this.inputPaths.toArray(inputs); - - if (DATA_TYPE_QUADS.equals(this.inputType)) { - return JobFactory.getQuadTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath); - } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { - return JobFactory.getTripleTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath); - } else { - return JobFactory.getTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath); - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.stats; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import javax.inject.Inject; + +import org.apache.commons.io.output.CloseShieldOutputStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.jena.hadoop.rdf.stats.jobs.JobFactory; + +import com.github.rvesse.airline.HelpOption; +import com.github.rvesse.airline.SingleCommand; +import com.github.rvesse.airline.annotations.Arguments; +import com.github.rvesse.airline.annotations.Command; +import com.github.rvesse.airline.annotations.Option; +import com.github.rvesse.airline.annotations.restrictions.AllowedRawValues; +import com.github.rvesse.airline.annotations.restrictions.Required; +import com.github.rvesse.airline.help.Help; +import com.github.rvesse.airline.io.colors.BasicColor; +import com.github.rvesse.airline.io.output.AnsiBasicColorizedOutputStream; +import com.github.rvesse.airline.io.output.ColorizedOutputStream; +import com.github.rvesse.airline.model.CommandMetadata; +import com.github.rvesse.airline.parser.errors.ParseException; + +/** + * Entry point for the Hadoop job, handles launching all the relevant Hadoop + * jobs + */ +@Command(name = "hadoop jar PATH_TO_JAR org.apache.jena.hadoop.rdf.stats.RdfStats", description = "A command which computes statistics on RDF data using Hadoop") +public class RdfStats implements Tool { + //@formatter:off + private static final String DATA_TYPE_TRIPLES = "triples", + DATA_TYPE_QUADS = "quads", + DATA_TYPE_MIXED = "mixed"; + //@formatter:on + + /** + * Help option + */ + @Inject + public HelpOption<RdfStats> helpOption; + + /** + * Gets/Sets whether all available statistics will be calculated + */ + @Option(name = { "-a", "--all" }, description = "Requests that all available statistics be calculated") + public boolean all = false; + + /** + * Gets/Sets whether node usage counts will be calculated + */ + @Option(name = { "-n", "--node-count" }, description = "Requests that node usage counts be calculated") + public boolean nodeCount = false; + + /** + * Gets/Sets whether characteristic sets will be calculated + */ + @Option(name = { "-c", + "--characteristic-sets" }, hidden = true, description = "Requests that characteristic sets be calculated (hidden as this has scalability issues)") + public boolean characteristicSets = false; + + /** + * Gets/Sets whether type counts will be calculated + */ + @Option(name = { "-t", "--type-count" }, description = "Requests that rdf:type usage counts be calculated") + public boolean typeCount = false; + + /** + * Gets/Sets whether data type counts will be calculated + */ + @Option(name = { "-d", "--data-types" }, description = "Requests that literal data type usage counts be calculated") + public boolean dataTypeCount = false; + + /** + * Gets/Sets whether namespace counts will be calculated + */ + @Option(name = { "--namespaces" }, description = "Requests that namespace usage counts be calculated") + public boolean namespaceCount = false; + + @Option(name = { "-g", "--graph-sizes" }, description = "Requests that the size of each named graph be counted") + public boolean graphSize = false; + + /** + * Gets/Sets the input data type used + */ + @Option(name = { + "--input-type" }, description = "Specifies whether the input data is a mixture of quads and triples, just quads or just triples. Using the most specific data type will yield the most accurate statistics") + @AllowedRawValues(allowedValues = { DATA_TYPE_MIXED, DATA_TYPE_QUADS, DATA_TYPE_TRIPLES }) + public String inputType = DATA_TYPE_MIXED; + + /** + * Gets/Sets the output path + */ + @Option(name = { "-o", "--output" }, title = "OutputPath", description = "Sets the output path", arity = 1) + @Required + public String outputPath = null; + + /** + * Gets/Sets the input path(s) + */ + @Arguments(description = "Sets the input path(s)", title = "InputPath") + @Required + public List<String> inputPaths = new ArrayList<String>(); + + private Configuration config; + + /** + * Entry point method + * + * @param args + * Arguments + */ + public static void main(String[] args) { + ColorizedOutputStream<BasicColor> error = new AnsiBasicColorizedOutputStream( + new CloseShieldOutputStream(System.err)); + try { + // Run and exit with result code if no errors bubble up + // Note that the exit code may still be a error code + int res = ToolRunner.run(new Configuration(true), new RdfStats(), args); + System.exit(res); + } catch (Throwable e) { + // This will only happen if Hadoop option parsing errors + // The run() method will handle its error itself + error.setForegroundColor(BasicColor.RED); + error.println(e.getMessage()); + e.printStackTrace(error); + } finally { + error.close(); + } + // If any errors bubble up exit with non-zero code + System.exit(1); + } + + private static void showUsage() throws IOException { + CommandMetadata metadata = SingleCommand.singleCommand(RdfStats.class).getCommandMetadata(); + Help.help(metadata, System.err); + System.exit(1); + } + + @Override + public void setConf(Configuration conf) { + this.config = conf; + } + + @Override + public Configuration getConf() { + return this.config; + } + + @Override + public int run(String[] args) { + ColorizedOutputStream<BasicColor> error = new AnsiBasicColorizedOutputStream( + new CloseShieldOutputStream(System.err)); + try { + if (args.length == 0) { + showUsage(); + } + + // Parse custom arguments + RdfStats cmd = SingleCommand.singleCommand(RdfStats.class).parse(args); + + // Copy Hadoop configuration across + cmd.setConf(this.getConf()); + + // Show help if requested and exit with success + if (cmd.helpOption.showHelpIfRequested()) { + return 0; + } + + // Run the command and exit with success + cmd.run(); + return 0; + } catch (ParseException e) { + error.setForegroundColor(BasicColor.RED); + error.println(e.getMessage()); + error.println(); + } catch (Throwable e) { + error.setForegroundColor(BasicColor.RED); + error.println(e.getMessage()); + e.printStackTrace(error); + error.println(); + } finally { + error.close(); + } + return 1; + } + + private void run() throws Throwable { + if (!this.outputPath.endsWith("/")) { + this.outputPath += "/"; + } + + // If all statistics requested turn on all statistics + if (this.all) { + this.nodeCount = true; + this.characteristicSets = true; + this.typeCount = true; + this.dataTypeCount = true; + this.namespaceCount = true; + } + + // How many statistics were requested? + int statsRequested = 0; + if (this.nodeCount) + statsRequested++; + if (this.characteristicSets) + statsRequested++; + if (this.typeCount) + statsRequested++; + if (this.dataTypeCount) + statsRequested++; + if (this.namespaceCount) + statsRequested++; + if (this.graphSize) + statsRequested++; + + // Error if no statistics requested + if (statsRequested == 0) { + System.err.println( + "You did not request any statistics to be calculated, please use one/more of the relevant options to select the statistics to be computed"); + return; + } + int statsComputed = 1; + + // Compute statistics + if (this.nodeCount) { + Job job = this.selectNodeCountJob(); + statsComputed = this.computeStatistic(job, statsComputed, statsRequested); + } + if (this.graphSize) { + Job job = this.selectGraphSizeJob(); + statsComputed = this.computeStatistic(job, statsComputed, statsRequested); + } + if (this.typeCount) { + Job[] jobs = this.selectTypeCountJobs(); + statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested); + } + if (this.dataTypeCount) { + Job job = this.selectDataTypeCountJob(); + statsComputed = this.computeStatistic(job, statsComputed, statsRequested); + } + if (this.namespaceCount) { + Job job = this.selectNamespaceCountJob(); + statsComputed = this.computeStatistic(job, statsComputed, statsRequested); + } + if (this.characteristicSets) { + Job[] jobs = this.selectCharacteristicSetJobs(); + statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested); + } + } + + private int computeStatistic(Job job, int statsComputed, int statsRequested) throws Throwable { + System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested)); + this.runJob(job); + System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested)); + System.out.println(); + return ++statsComputed; + } + + private int computeStatistic(Job[] jobs, boolean continueOnFailure, boolean continueOnError, int statsComputed, + int statsRequested) { + System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested)); + this.runJobSequence(jobs, continueOnFailure, continueOnError); + System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested)); + System.out.println(); + return ++statsComputed; + } + + private boolean runJob(Job job) throws Throwable { + System.out.println("Submitting Job " + job.getJobName()); + long start = System.nanoTime(); + try { + job.submit(); + if (job.monitorAndPrintJob()) { + System.out.println("Job " + job.getJobName() + " succeeded"); + return true; + } else { + System.out.println("Job " + job.getJobName() + " failed"); + return false; + } + } catch (Throwable e) { + System.out.println("Unexpected failure in Job " + job.getJobName()); + throw e; + } finally { + long end = System.nanoTime(); + System.out.println("Job " + job.getJobName() + " finished after " + + String.format("%,d milliseconds", TimeUnit.NANOSECONDS.toMillis(end - start))); + System.out.println(); + } + } + + private void runJobSequence(Job[] jobs, boolean continueOnFailure, boolean continueOnError) { + for (int i = 0; i < jobs.length; i++) { + Job job = jobs[i]; + try { + boolean success = this.runJob(job); + if (!success && !continueOnFailure) + throw new IllegalStateException( + "Unable to complete job sequence because Job " + job.getJobName() + " failed"); + } catch (IllegalStateException e) { + throw e; + } catch (Throwable e) { + if (!continueOnError) + throw new IllegalStateException( + "Unable to complete job sequence because job " + job.getJobName() + " errorred", e); + } + } + } + + private Job selectNodeCountJob() throws IOException { + String realOutputPath = outputPath + "node-counts/"; + String[] inputs = new String[this.inputPaths.size()]; + this.inputPaths.toArray(inputs); + + if (DATA_TYPE_QUADS.equals(this.inputType)) { + return JobFactory.getQuadNodeCountJob(this.config, inputs, realOutputPath); + } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { + return JobFactory.getTripleNodeCountJob(this.config, inputs, realOutputPath); + } else { + return JobFactory.getNodeCountJob(this.config, inputs, realOutputPath); + } + } + + private Job selectGraphSizeJob() throws IOException { + String realOutputPath = outputPath + "graph-sizes/"; + String[] inputs = new String[this.inputPaths.size()]; + this.inputPaths.toArray(inputs); + + if (DATA_TYPE_QUADS.equals(this.inputType)) { + return JobFactory.getQuadGraphSizesJob(this.config, inputs, realOutputPath); + } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { + return JobFactory.getTripleGraphSizesJob(this.config, inputs, realOutputPath); + } else { + return JobFactory.getGraphSizesJob(this.config, inputs, realOutputPath); + } + } + + private Job selectDataTypeCountJob() throws IOException { + String realOutputPath = outputPath + "data-type-counts/"; + String[] inputs = new String[this.inputPaths.size()]; + this.inputPaths.toArray(inputs); + + if (DATA_TYPE_QUADS.equals(this.inputType)) { + return JobFactory.getQuadDataTypeCountJob(this.config, inputs, realOutputPath); + } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { + return JobFactory.getTripleDataTypeCountJob(this.config, inputs, realOutputPath); + } else { + return JobFactory.getDataTypeCountJob(this.config, inputs, realOutputPath); + } + } + + private Job selectNamespaceCountJob() throws IOException { + String realOutputPath = outputPath + "namespace-counts/"; + String[] inputs = new String[this.inputPaths.size()]; + this.inputPaths.toArray(inputs); + + if (DATA_TYPE_QUADS.equals(this.inputType)) { + return JobFactory.getQuadNamespaceCountJob(this.config, inputs, realOutputPath); + } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { + return JobFactory.getTripleNamespaceCountJob(this.config, inputs, realOutputPath); + } else { + return JobFactory.getNamespaceCountJob(this.config, inputs, realOutputPath); + } + } + + private Job[] selectCharacteristicSetJobs() throws IOException { + String intermediateOutputPath = outputPath + "characteristics/intermediate/"; + String finalOutputPath = outputPath + "characteristics/final/"; + String[] inputs = new String[this.inputPaths.size()]; + this.inputPaths.toArray(inputs); + + if (DATA_TYPE_QUADS.equals(this.inputType)) { + return JobFactory.getQuadCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, + finalOutputPath); + } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { + return JobFactory.getTripleCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, + finalOutputPath); + } else { + return JobFactory.getCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, finalOutputPath); + } + } + + private Job[] selectTypeCountJobs() throws IOException { + String intermediateOutputPath = outputPath + "type-declarations/"; + String finalOutputPath = outputPath + "type-counts/"; + String[] inputs = new String[this.inputPaths.size()]; + this.inputPaths.toArray(inputs); + + if (DATA_TYPE_QUADS.equals(this.inputType)) { + return JobFactory.getQuadTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath); + } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) { + return JobFactory.getTripleTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath); + } else { + return JobFactory.getTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath); + } + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java b/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java index b0ed898..0d6bf18 100644 --- a/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java +++ b/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java @@ -1,25 +1,25 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.jena.hadoop.rdf.stats.jobs; import java.io.IOException; - + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; @@ -28,7 +28,7 @@ import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.BZip2Codec; import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.chain.ChainMapper; +import org.apache.hadoop.mapreduce.lib.chain.ChainMapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; @@ -36,40 +36,40 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.StringUtils; -import org.apache.jena.hadoop.rdf.io.input.QuadsInputFormat; -import org.apache.jena.hadoop.rdf.io.input.TriplesInputFormat; -import org.apache.jena.hadoop.rdf.io.input.TriplesOrQuadsInputFormat; -import org.apache.jena.hadoop.rdf.io.input.nquads.NQuadsInputFormat; -import org.apache.jena.hadoop.rdf.io.input.ntriples.NTriplesInputFormat; -import org.apache.jena.hadoop.rdf.io.output.nquads.NQuadsOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesNodeOutputFormat; -import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesOutputFormat; -import org.apache.jena.hadoop.rdf.mapreduce.KeyMapper; -import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants; -import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer; -import org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer; -import org.apache.jena.hadoop.rdf.mapreduce.characteristics.QuadCharacteristicSetGeneratingReducer; -import org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer; -import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer; -import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.QuadDataTypeCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.TripleDataTypeCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.QuadNamespaceCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.TripleNamespaceCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadGraphCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadObjectCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.count.positional.TripleObjectCountMapper; -import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.QuadFilterByPredicateMapper; -import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.TripleFilterByPredicateUriMapper; -import org.apache.jena.hadoop.rdf.mapreduce.group.QuadGroupBySubjectMapper; -import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper; -import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper; -import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable; -import org.apache.jena.hadoop.rdf.types.NodeWritable; -import org.apache.jena.hadoop.rdf.types.QuadWritable; -import org.apache.jena.hadoop.rdf.types.TripleWritable; -import org.apache.jena.vocabulary.RDF ; +import org.apache.jena.hadoop.rdf.io.input.QuadsInputFormat; +import org.apache.jena.hadoop.rdf.io.input.TriplesInputFormat; +import org.apache.jena.hadoop.rdf.io.input.TriplesOrQuadsInputFormat; +import org.apache.jena.hadoop.rdf.io.input.nquads.NQuadsInputFormat; +import org.apache.jena.hadoop.rdf.io.input.ntriples.NTriplesInputFormat; +import org.apache.jena.hadoop.rdf.io.output.nquads.NQuadsOutputFormat; +import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesNodeOutputFormat; +import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesOutputFormat; +import org.apache.jena.hadoop.rdf.mapreduce.KeyMapper; +import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants; +import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.QuadCharacteristicSetGeneratingReducer; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer; +import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer; +import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.QuadDataTypeCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.TripleDataTypeCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.QuadNamespaceCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.TripleNamespaceCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadGraphCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadObjectCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.count.positional.TripleObjectCountMapper; +import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.QuadFilterByPredicateMapper; +import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.TripleFilterByPredicateUriMapper; +import org.apache.jena.hadoop.rdf.mapreduce.group.QuadGroupBySubjectMapper; +import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper; +import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper; +import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.apache.jena.vocabulary.RDF ; /** * Factory that can produce {@link Job} instances for computing various RDF @@ -101,7 +101,7 @@ public class JobFactory { public static Job getTripleNodeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); - job.setJobName("RDF Triples Node Usage Count"); + job.setJobName("RDF Triples Node Usage Count"); // Map/Reduce classes job.setMapperClass(TripleNodeCountMapper.class); @@ -180,67 +180,67 @@ public class JobFactory { FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; - } - - public static Job getTripleGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { - Job job = Job.getInstance(config); - job.setJarByClass(JobFactory.class); - job.setJobName("RDF Triples Graph Sizes"); - - // Map/Reduce classes - ChainMapper.addMapper(job, TriplesToQuadsConstantGraphMapper.class, LongWritable.class, TripleWritable.class, LongWritable.class, QuadWritable.class, config); - ChainMapper.addMapper(job, QuadGraphCountMapper.class, LongWritable.class, QuadWritable.class, NodeWritable.class, LongWritable.class, config); - job.setMapOutputKeyClass(NodeWritable.class); - job.setMapOutputValueClass(LongWritable.class); - job.setReducerClass(NodeCountReducer.class); - - // Input and Output - job.setInputFormatClass(TriplesInputFormat.class); - job.setOutputFormatClass(NTriplesNodeOutputFormat.class); - FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); - FileOutputFormat.setOutputPath(job, new Path(outputPath)); - - return job; - } - - public static Job getQuadGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { - Job job = Job.getInstance(config); - job.setJarByClass(JobFactory.class); - job.setJobName("RDF Quads Graph Sizes"); - - // Map/Reduce classes - job.setMapperClass(QuadGraphCountMapper.class); - job.setMapOutputKeyClass(NodeWritable.class); - job.setMapOutputValueClass(LongWritable.class); - job.setReducerClass(NodeCountReducer.class); - - // Input and Output - job.setInputFormatClass(QuadsInputFormat.class); - job.setOutputFormatClass(NTriplesNodeOutputFormat.class); - FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); - FileOutputFormat.setOutputPath(job, new Path(outputPath)); - - return job; - } - - public static Job getGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { - Job job = Job.getInstance(config); - job.setJarByClass(JobFactory.class); - job.setJobName("RDF Graph Sizes"); - - // Map/Reduce classes - job.setMapperClass(QuadGraphCountMapper.class); - job.setMapOutputKeyClass(NodeWritable.class); - job.setMapOutputValueClass(LongWritable.class); - job.setReducerClass(NodeCountReducer.class); - - // Input and Output - job.setInputFormatClass(TriplesOrQuadsInputFormat.class); - job.setOutputFormatClass(NTriplesNodeOutputFormat.class); - FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); - FileOutputFormat.setOutputPath(job, new Path(outputPath)); - - return job; + } + + public static Job getTripleGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { + Job job = Job.getInstance(config); + job.setJarByClass(JobFactory.class); + job.setJobName("RDF Triples Graph Sizes"); + + // Map/Reduce classes + ChainMapper.addMapper(job, TriplesToQuadsConstantGraphMapper.class, LongWritable.class, TripleWritable.class, LongWritable.class, QuadWritable.class, config); + ChainMapper.addMapper(job, QuadGraphCountMapper.class, LongWritable.class, QuadWritable.class, NodeWritable.class, LongWritable.class, config); + job.setMapOutputKeyClass(NodeWritable.class); + job.setMapOutputValueClass(LongWritable.class); + job.setReducerClass(NodeCountReducer.class); + + // Input and Output + job.setInputFormatClass(TriplesInputFormat.class); + job.setOutputFormatClass(NTriplesNodeOutputFormat.class); + FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); + FileOutputFormat.setOutputPath(job, new Path(outputPath)); + + return job; + } + + public static Job getQuadGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { + Job job = Job.getInstance(config); + job.setJarByClass(JobFactory.class); + job.setJobName("RDF Quads Graph Sizes"); + + // Map/Reduce classes + job.setMapperClass(QuadGraphCountMapper.class); + job.setMapOutputKeyClass(NodeWritable.class); + job.setMapOutputValueClass(LongWritable.class); + job.setReducerClass(NodeCountReducer.class); + + // Input and Output + job.setInputFormatClass(QuadsInputFormat.class); + job.setOutputFormatClass(NTriplesNodeOutputFormat.class); + FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); + FileOutputFormat.setOutputPath(job, new Path(outputPath)); + + return job; + } + + public static Job getGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { + Job job = Job.getInstance(config); + job.setJarByClass(JobFactory.class); + job.setJobName("RDF Graph Sizes"); + + // Map/Reduce classes + job.setMapperClass(QuadGraphCountMapper.class); + job.setMapOutputKeyClass(NodeWritable.class); + job.setMapOutputValueClass(LongWritable.class); + job.setReducerClass(NodeCountReducer.class); + + // Input and Output + job.setInputFormatClass(TriplesOrQuadsInputFormat.class); + job.setOutputFormatClass(NTriplesNodeOutputFormat.class); + FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); + FileOutputFormat.setOutputPath(job, new Path(outputPath)); + + return job; } /** http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-fuseki1/Data/books.ttl ---------------------------------------------------------------------- diff --git a/jena-fuseki1/Data/books.ttl b/jena-fuseki1/Data/books.ttl index f341edc..7957323 100644 --- a/jena-fuseki1/Data/books.ttl +++ b/jena-fuseki1/Data/books.ttl @@ -1,62 +1,62 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -@prefix dc: <http://purl.org/dc/elements/1.1/> . -@prefix vcard: <http://www.w3.org/2001/vcard-rdf/3.0#> . -@prefix ns: <http://example.org/ns#> . - -@prefix : <http://example.org/book/> . - -# A small dataset for usage examples of Joseki -# This data is intentionaly irregular (e.g. different ways to -# record the book creator) as if the information is either an -# aggregation or was created at different times. - -:book1 - dc:title "Harry Potter and the Philosopher's Stone" ; - dc:creator "J.K. Rowling" ; - . - -:book2 - dc:title "Harry Potter and the Chamber of Secrets" ; - dc:creator _:a . - -:book3 - dc:title "Harry Potter and the Prisoner Of Azkaban" ; - dc:creator _:a . - -:book4 - dc:title "Harry Potter and the Goblet of Fire" . - -:book5 - dc:title "Harry Potter and the Order of the Phoenix"; - dc:creator "J.K. Rowling" ; - . - -:book6 - dc:title "Harry Potter and the Half-Blood Prince"; - dc:creator "J.K. Rowling" . - -:book7 - dc:title "Harry Potter and the Deathly Hallows" ; - dc:creator "J.K. Rowling" . -_:a - vcard:FN "J.K. Rowling" ; - vcard:N - [ vcard:Family "Rowling" ; - vcard:Given "Joanna" - ] - . +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +@prefix dc: <http://purl.org/dc/elements/1.1/> . +@prefix vcard: <http://www.w3.org/2001/vcard-rdf/3.0#> . +@prefix ns: <http://example.org/ns#> . + +@prefix : <http://example.org/book/> . + +# A small dataset for usage examples of Joseki +# This data is intentionaly irregular (e.g. different ways to +# record the book creator) as if the information is either an +# aggregation or was created at different times. + +:book1 + dc:title "Harry Potter and the Philosopher's Stone" ; + dc:creator "J.K. Rowling" ; + . + +:book2 + dc:title "Harry Potter and the Chamber of Secrets" ; + dc:creator _:a . + +:book3 + dc:title "Harry Potter and the Prisoner Of Azkaban" ; + dc:creator _:a . + +:book4 + dc:title "Harry Potter and the Goblet of Fire" . + +:book5 + dc:title "Harry Potter and the Order of the Phoenix"; + dc:creator "J.K. Rowling" ; + . + +:book6 + dc:title "Harry Potter and the Half-Blood Prince"; + dc:creator "J.K. Rowling" . + +:book7 + dc:title "Harry Potter and the Deathly Hallows" ; + dc:creator "J.K. Rowling" . +_:a + vcard:FN "J.K. Rowling" ; + vcard:N + [ vcard:Family "Rowling" ; + vcard:Given "Joanna" + ] + .
