hi,
i got this code which extracts timeframes frome a logfile and does some
calculation on it.
input lines looks like this:
1000,T,0,104,1000,1100,27147,80,80,80,80,81,81,98,98,98,101,137,137,139,177,177,177,173,166,149,134,130,124,119,111,104,92
1000,T,1,743,300,300,4976,492,492,492,492,492,497,497,856,856,863,866,875,875,954,954,954,954,954,954,954,954,770,770,770,770,743
1000,T,2,40,800,1000,11922,29,29,29,29,29,29,29,44,46,46,50,51,51,65,65,65,61,52,47,47,47,44,42,40,32,30
2001,T,0,103,6700,7000,44658,80,80,80,80,80,81,96,98,98,101,134,137,139,220,192,176,168,162,156,149,144,132,122,112,104,95
1002,U,....
the first value being the time in ms,
T being the lines im interrested in
0,1,2 being a product ID,
104,743,40,103 being the price i want.
now i need to extract all prices for some specific timeframe, lets say
3000ms.
the code at the end works but has the problem that the variable
"numberOfRuns" is counted up and used to calculate the time and i guess
using this system in hadoop doesnt work.
so i need a way to extract the "timeframes" in the mapper and what data
structure would you use?
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Test {
public List<ArrayList<String>> splitFileByTime(List<String> lines,
int timeFrame) {
List<ArrayList<String>> myTimes = new
ArrayList<ArrayList<String>>();
ArrayList<String> lines_new = new ArrayList<String>();
for (String z: lines) {
//System.out.println(z);
}
int numberOfRuns = 1;
for (String current : lines) {
String[] parts = current.split(",");
int time = Integer.parseInt(parts[0]);
if (time < 0) {
// Zeiten vor Beginn der Simulation, uninteressant
} else {
if (parts[1].contains("T")) {
lines_new.add(current);
}
else {
}
if (time >= timeFrame * numberOfRuns) {
numberOfRuns++;
myTimes.add(lines_new);
lines_new = new ArrayList<String>();
}
}
}
return myTimes;
}
public void getOpenAndClose(List<ArrayList<String>> lines) {
int abschnitt = 1;
for (ArrayList<String> x: lines) {
System.out.println("Abschnitt: " + abschnitt);
List<Integer> tmp = new ArrayList<Integer>();
int high = 0;
int low = 10000;
for (String y:x) {
String[] parts = y.split(",");
if (parts[2].contains("0")) {
int kurs = Integer.parseInt(parts[3]);
if (kurs > high) {
high = kurs;
}
if (kurs < low) {
low = kurs;
}
System.out.println("Produkt: " + parts[2] + " wurde
um " + parts[0] + " gehandelt mit kurs: " + kurs );
tmp.add(kurs);
}
}
System.out.println("open: " + tmp.get(0));
System.out.println("close: " + tmp.get(tmp.size()-1));
System.out.println("high: " + high);
System.out.println("low: " + low);
abschnitt++;
}
}
public List<String> readFile(String filename) {
List<String> lines = new ArrayList<String>();
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(filename));
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
String line;
try {
while ((line = reader.readLine()) != null) {
lines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return lines;
}
public static void main(String[] args) {
//String filename = "Standard-2014-04-29-12-04.csv";
String filename = "Standard-small.txt";
//Zeitspanne für Zeilen in Millisekunden
int timeFrame = 3000;
Test x = new Test();
List<String> lines = x.readFile(filename);
List<ArrayList<String>> lines_split = x.splitFileByTime(lines,
timeFrame);
x.getOpenAndClose(lines_split);
}
}