hadoop data structures

steven Tue, 09 Dec 2014 00:50:59 -0800

hi,

i got this code which extracts timeframes frome a logfile and does somecalculation on it.

input lines looks like this:

1000,T,0,104,1000,1100,27147,80,80,80,80,81,81,98,98,98,101,137,137,139,177,177,177,173,166,149,134,130,124,119,111,104,921000,T,1,743,300,300,4976,492,492,492,492,492,497,497,856,856,863,866,875,875,954,954,954,954,954,954,954,954,770,770,770,770,7431000,T,2,40,800,1000,11922,29,29,29,29,29,29,29,44,46,46,50,51,51,65,65,65,61,52,47,47,47,44,42,40,32,302001,T,0,103,6700,7000,44658,80,80,80,80,80,81,96,98,98,101,134,137,139,220,192,176,168,162,156,149,144,132,122,112,104,95

1002,U,....


the first value being the time in ms,
T being the lines im interrested in
0,1,2 being a product ID,
104,743,40,103 being the price i want.

now i need to extract all prices for some specific timeframe, lets say3000ms.the code at the end works but has the problem that the variable"numberOfRuns" is counted up and used to calculate the time and i guessusing this system in hadoop doesnt work.so i need a way to extract the "timeframes" in the mapper and what datastructure would you use?







import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

import java.util.List;

public class Test {

public List<ArrayList<String>> splitFileByTime(List<String> lines,int timeFrame) {List<ArrayList<String>> myTimes = newArrayList<ArrayList<String>>();



        ArrayList<String> lines_new = new ArrayList<String>();


        for (String z: lines) {
            //System.out.println(z);
        }

        int numberOfRuns = 1;

        for (String current : lines) {
            String[] parts = current.split(",");

            int time = Integer.parseInt(parts[0]);


            if (time < 0) {
                // Zeiten vor Beginn der Simulation, uninteressant
            } else {



                if (parts[1].contains("T")) {

                    lines_new.add(current);
                }
                else {

                }
                if (time >= timeFrame * numberOfRuns) {
                    numberOfRuns++;
                    myTimes.add(lines_new);


                    lines_new = new ArrayList<String>();
                }

            }
        }
        return myTimes;
    }



    public void getOpenAndClose(List<ArrayList<String>> lines) {

        int abschnitt = 1;
        for (ArrayList<String> x: lines) {
            System.out.println("Abschnitt: " + abschnitt);
            List<Integer> tmp = new ArrayList<Integer>();
            int high = 0;
            int low = 10000;
            for (String y:x) {

                String[] parts = y.split(",");
                if (parts[2].contains("0")) {
                    int kurs = Integer.parseInt(parts[3]);
                    if (kurs > high) {
                        high = kurs;
                    }

                    if (kurs < low) {
                        low = kurs;
                    }

System.out.println("Produkt: " + parts[2] + " wurdeum " + parts[0] + " gehandelt mit kurs: " + kurs );

                    tmp.add(kurs);


                }

            }
            System.out.println("open: " + tmp.get(0));
            System.out.println("close: " + tmp.get(tmp.size()-1));
            System.out.println("high: " + high);
            System.out.println("low: " + low);
            abschnitt++;
        }

    }


    public List<String> readFile(String filename) {

        List<String> lines = new ArrayList<String>();


        BufferedReader reader = null;
        try {
            reader = new BufferedReader(new FileReader(filename));
        } catch (FileNotFoundException e1) {
            e1.printStackTrace();
        }


        String line;

        try {
            while ((line = reader.readLine()) != null) {
                lines.add(line);

            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        try {
            reader.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }


        return lines;
    }




    public static void main(String[] args) {
        //String filename = "Standard-2014-04-29-12-04.csv";
        String filename = "Standard-small.txt";
        //Zeitspanne für Zeilen in Millisekunden
        int timeFrame = 3000;

        Test x = new Test();

        List<String> lines = x.readFile(filename);

List<ArrayList<String>> lines_split = x.splitFileByTime(lines,timeFrame);




        x.getOpenAndClose(lines_split);

    }
}

hadoop data structures

Reply via email to