Hi all,

I am trying to produce some avro file based on a TSV file.  We had an
original schema which is defined like
{   "type": "record",
    "name": "accessLog",
    "namespace": "avro_access_log",
    "fields": [
      {"name": "SquidIP" , "type": "string" },
      {"name": "Timestamp" , "type": "long"  },
      {"name": "Hostname", "type": "string" },
    ]
}

now that we have added additional fields, I would like to change my new
schema to

{   "type": "record",
    "name": "accessLog",
    "namespace": "avro_access_log",
    "fields": [
      {"name": "SquidIP" , "type": "string" },
      {"name": "Timestamp" , "type": "long"  },
      {"name": "Hostname", "type": "string" },
 {"name": "ClientIP", "type": "string"   }
    ]
}







    public static Object generateDatumBasedOnSchema(Schema schema, String
line, Map<String, Integer> badConversions){
        GenericRecord record = new GenericData.Record(schema);
        int fieldLength = schema.getFields().size();
        int col =0;
        String[] fields = line.trim().split("\t");
        while(col < fieldLength){
            try{
                String name = getColumnName(col);
                String v = "-";
                try{
                    v = fields[col];
                }catch(ArrayIndexOutOfBoundsException e){
                    if (alertedAIOOBE < 5){
                        System.err.println("index "+col+" is not in
fields");
                    }
                    alertedAIOOBE++;
                    return null;
                }
                Object value = ConvertFieldToType(getColumnType(col), v,
col);
                record.put(name, value);
                col++;
            }catch(NullPointerException npe){ //this is threw when there is
no matching name for the column which indicates our schema is older than the
data.
                System.err.println("Schema: "+schema.toString()+" does not
match line "+line);
                return null;
            }
            catch(RuntimeException re){
                System.err.println("Unknown option at "+col);
                return null;
            }
            catch(Exception e){
                e.printStackTrace();
                return null;
            }

        }
        return record;
    }

Reply via email to