
import com.tdunning.math.stats.TDigest;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
//import org.apache.spark.sql.*;


public class BatchProcess {


    public static void main(String[] args){

        String master = "spark://172.31.48.232:7077";
        String appName = "java_client";
        startProcess(master, appName);


    }

    public static void startProcess(String master, String appName){


        SparkSession session = SparkSession.builder().master(master).appName(appName).getOrCreate();
        Dataset<Row> t_en_data = session.read().option("header","true").option("inferSchema","true").csv("J:\\csv_path\\T_EN");

        TDigest totalDigest = TDigest.createDigest(100);
        Dataset<Integer> mappedDataset = t_en_data.map((MapFunction<Row, Integer>) row ->row.<Integer>getAs("Duration"), Encoders.INT());
        mappedDataset.show(false);


        /*mappedDataset.foreach((ForeachFunction<Integer>) value -> totalDigest.add(value));
        System.out.println(totalDigest.quantile(0.5));
*/

        //session.close();
    }



}
