[ https://issues.apache.org/jira/browse/FLINK-1040?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14206120#comment-14206120 ]
ASF GitHub Bot commented on FLINK-1040: --------------------------------------- Github user fhueske commented on a diff in the pull request: https://github.com/apache/incubator-flink/pull/194#discussion_r20136871 --- Diff: flink-examples/flink-java-examples/src/main/java/org/apache/flink/examples/java/relational/WebLogAnalysisWithoutType.java --- @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.examples.java.relational; + + +import org.apache.flink.api.common.functions.CoGroupFunction; +import org.apache.flink.api.common.functions.FilterFunction; +import org.apache.flink.api.java.tuple.Tuple; +import org.apache.flink.api.java.tuple.Tuple1; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.tuple.Tuple3; +import org.apache.flink.util.Collector; +import org.apache.flink.api.java.DataSet; +import org.apache.flink.api.java.ExecutionEnvironment; +import org.apache.flink.examples.java.relational.util.WebLogData; +import org.apache.flink.examples.java.relational.util.WebLogDataGenerator; + +/** + * This program processes web logs and relational data. + * It implements the following relational query: + * + * <code><pre> + * SELECT + * r.pageURL, + * r.pageRank, + * r.avgDuration + * FROM documents d JOIN rankings r + * ON d.url = r.url + * WHERE CONTAINS(d.text, [keywords]) + * AND r.rank > [rank] + * AND NOT EXISTS + * ( + * SELECT * FROM Visits v + * WHERE v.destUrl = d.url + * AND v.visitDate < [date] + * ); + * </pre></code> + * + * <p> + * Input files are plain text CSV files using the pipe character ('|') as field separator. + * The tables referenced in the query can be generated using the {@link WebLogDataGenerator} and + * have the following schemas + * <code><pre> + * CREATE TABLE Documents ( + * url VARCHAR(100) PRIMARY KEY, + * contents TEXT ); + * + * CREATE TABLE Rankings ( + * pageRank INT, + * pageURL VARCHAR(100) PRIMARY KEY, + * avgDuration INT ); + * + * CREATE TABLE Visits ( + * sourceIP VARCHAR(16), + * destURL VARCHAR(100), + * visitDate DATE, + * adRevenue FLOAT, + * userAgent VARCHAR(64), + * countryCode VARCHAR(3), + * languageCode VARCHAR(6), + * searchWord VARCHAR(32), + * duration INT ); + * </pre></code> + * + * <p> + * Usage: <code>WebLogAnalysis <documents path> <ranks path> <visits path> <result path></code><br> + * If no parameters are provided, the program is run with default data from {@link WebLogData}. + * + * <p> + * This example shows how to use: + * <ul> + * <li> tuple data types + * <li> projection and join projection + * <li> the CoGroup transformation for an anti-join + * </ul> + * + */ +@SuppressWarnings("serial") +public class WebLogAnalysisWithoutType { + + // ************************************************************************* + // PROGRAM + // ************************************************************************* + + public static void main(String[] args) throws Exception { + + if(!parseParameters(args)) { + return; + } + + final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); + + // get input data + DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env); + DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env); + DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env); + + // Retain documents with keywords + /*DataSet<Tuple1<String>> filterDocs = documents + .filter(new FilterDocByKeyWords()) + .projection(0); */ --- End diff -- Why did you comment this block out? > Make type() call in projections optional (or remove it) > ------------------------------------------------------- > > Key: FLINK-1040 > URL: https://issues.apache.org/jira/browse/FLINK-1040 > Project: Flink > Issue Type: Improvement > Components: Java API > Reporter: Stephan Ewen > Priority: Minor > Labels: simple, starter > > I think the type() call should be optional. The compiler can also cast the > data set directly and the result type is computed from the input types > anyways. -- This message was sent by Atlassian JIRA (v6.3.4#6332)