Hi all,
I am trying to produce some avro file based on a TSV file. We had an
original schema which is defined like
{ "type": "record",
"name": "accessLog",
"namespace": "avro_access_log",
"fields": [
{"name": "SquidIP" , "type": "string" },
{"name": "Timestamp" , "type": "long" },
{"name": "Hostname", "type": "string" },
]
}
now that we have added additional fields, I would like to change my new
schema to
{ "type": "record",
"name": "accessLog",
"namespace": "avro_access_log",
"fields": [
{"name": "SquidIP" , "type": "string" },
{"name": "Timestamp" , "type": "long" },
{"name": "Hostname", "type": "string" },
{"name": "ClientIP", "type": "string" }
]
}
public static Object generateDatumBasedOnSchema(Schema schema, String
line, Map<String, Integer> badConversions){
GenericRecord record = new GenericData.Record(schema);
int fieldLength = schema.getFields().size();
int col =0;
String[] fields = line.trim().split("\t");
while(col < fieldLength){
try{
String name = getColumnName(col);
String v = "-";
try{
v = fields[col];
}catch(ArrayIndexOutOfBoundsException e){
if (alertedAIOOBE < 5){
System.err.println("index "+col+" is not in
fields");
}
alertedAIOOBE++;
return null;
}
Object value = ConvertFieldToType(getColumnType(col), v,
col);
record.put(name, value);
col++;
}catch(NullPointerException npe){ //this is threw when there is
no matching name for the column which indicates our schema is older than the
data.
System.err.println("Schema: "+schema.toString()+" does not
match line "+line);
return null;
}
catch(RuntimeException re){
System.err.println("Unknown option at "+col);
return null;
}
catch(Exception e){
e.printStackTrace();
return null;
}
}
return record;
}