[
https://issues.apache.org/jira/browse/HIVE-12955?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Luis Gonzalez updated HIVE-12955:
---------------------------------
Description:
Hi!
we think we have hitted a bug. We have tested this in many ways for a couple of
hours now and there is apparently a problem with avro.schema.literal when you
specify more than 50 fields. In our tests we have found that regarless the avro
file (table) we want to load, it fails with 60 fields.. some tables we uses
have more than 400 fields and have the same problem.
For instance if we launch the command
{code:none}
hive> drop table tableName;
OK
Time taken: 0.162 seconds
hive>
> CREATE EXTERNAL TABLE tableName
> ROW FORMAT
> SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> WITH SERDEPROPERTIES ('avro.schema.literal'='
> { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields":
[
> {"name": "Id", "type":["null", "string"],"default":null},
> {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> {"name": "Name", "type":["null", "string"],"default":null},
> {"name": "Type", "type":["null", "string"],"default":null},
> {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> {"name": "ParentId", "type":["null", "string"],"default":null},
> {"name": "Phone", "type":["null", "string"],"default":null},
> {"name": "Fax", "type":["null", "string"],"default":null},
> {"name": "AccountNumber", "type":["null", "string"],"default":null},
> {"name": "Website", "type":["null", "string"],"default":null},
> {"name": "Industry", "type":["null", "string"],"default":null},
> {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> {"name": "Description", "type":["null", "string"],"default":null},
> {"name": "OwnerId", "type":["null", "string"],"default":null},
> {"name": "CreatedDate", "type":["null", "string"],"default":null},
> {"name": "CreatedById", "type":["null", "string"],"default":null},
> {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> {"name": "IT_Developer_Fee__c", "type":["null",
"boolean"],"default":null},
> {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> {"name": "Shortname__c", "type":["null", "string"],"default":null},
> {"name": "Excluir_compensacion_por_desvio__c", "type":["null",
"boolean"],"default":null},
> {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> {"name": "Active_Fiscal_Details__c", "type":["null",
"boolean"],"default":null},
> {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> {"name": "Commercial_Brand__c", "type":["null",
"string"],"default":null},
> {"name": "Agreed_payment_method__c", "type":["null",
"string"],"default":null},
> {"name": "Division__c", "type":["null", "string"],"default":null},
> {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_fiscal__c", "type":["null",
"string"],"default":null},
> {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Subtype__c", "type":["null",
"string"],"default":null},
> {"name": "Relationship__c", "type":["null", "string"],"default":null},
> {"name": "Market_Country__c", "type":["null", "string"],"default":null}
> ] }
> ')
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION 's3://bucket.../path/to/avro';
OK
Time taken: 0.412 seconds
hive>
> select * from tableName limit 10;
OK
{code}
but when using the same AVRO file and more fields it fails
{code:none}
hive> drop table tableName;
OK
Time taken: 0.146 seconds
hive>
> CREATE EXTERNAL TABLE tableName
> ROW FORMAT
> SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> WITH SERDEPROPERTIES ('avro.schema.literal'='
> { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields":
[
> {"name": "Id", "type":["null", "string"],"default":null},
> {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> {"name": "Name", "type":["null", "string"],"default":null},
> {"name": "Type", "type":["null", "string"],"default":null},
> {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> {"name": "ParentId", "type":["null", "string"],"default":null},
> {"name": "Phone", "type":["null", "string"],"default":null},
> {"name": "Fax", "type":["null", "string"],"default":null},
> {"name": "AccountNumber", "type":["null", "string"],"default":null},
> {"name": "Website", "type":["null", "string"],"default":null},
> {"name": "Industry", "type":["null", "string"],"default":null},
> {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> {"name": "Description", "type":["null", "string"],"default":null},
> {"name": "OwnerId", "type":["null", "string"],"default":null},
> {"name": "CreatedDate", "type":["null", "string"],"default":null},
> {"name": "CreatedById", "type":["null", "string"],"default":null},
> {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> {"name": "IT_Developer_Fee__c", "type":["null",
"boolean"],"default":null},
> {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> {"name": "Shortname__c", "type":["null", "string"],"default":null},
> {"name": "Excluir_compensacion_por_desvio__c", "type":["null",
"boolean"],"default":null},
> {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> {"name": "Active_Fiscal_Details__c", "type":["null",
"boolean"],"default":null},
> {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> {"name": "Commercial_Brand__c", "type":["null",
"string"],"default":null},
> {"name": "Agreed_payment_method__c", "type":["null",
"string"],"default":null},
> {"name": "Division__c", "type":["null", "string"],"default":null},
> {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_fiscal__c", "type":["null",
"string"],"default":null},
> {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Subtype__c", "type":["null",
"string"],"default":null},
> {"name": "Relationship__c", "type":["null", "string"],"default":null},
> {"name": "Market_Country__c", "type":["null", "string"],"default":null},
> {"name": "Customer_Service_Centre__c", "type":["null",
"string"],"default":null},
> {"name": "Acquisition_Channel_Type__c", "type":["null",
"string"],"default":null},
> {"name": "Acquisition_Channel_Description__c", "type":["null",
"string"],"default":null},
> {"name": "Comments__c", "type":["null", "string"],"default":null},
> {"name": "Street_Commercial__c", "type":["null",
"string"],"default":null},
> {"name": "Country_Commercial__c", "type":["null",
"string"],"default":null},
> {"name": "City_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_Commercial__c", "type":["null",
"string"],"default":null},
> {"name": "Atlas_Branch_Number__c", "type":["null",
"double"],"default":null},
> {"name": "Timezone__c", "type":["null", "string"],"default":null},
> {"name": "Billing_Language__c", "type":["null",
"string"],"default":null},
> {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
> ] }
> ')
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION 's3://bucket/path/to/avro';
OK
Time taken: 0.48 seconds
hive>
> select * from tableName limit 10;
OK
Failed with exception java.io.IOException:org.apache.avro.AvroTypeException:
Found cdr.avro.Account, expecting
org.apache.hadoop.hive.CannotDetermineSchemaSentinel
Time taken: 0.028 seconds
{code}
This problem doesn't affect when we store the avro schema with 400 fields in a
file in s3 and use the avro.schema.url field.
was:
Hi!
we think we have hitted a bug. We have tested this in many ways for a couple of
hours now and there is apparently a problem with avro.schema.literal when you
specify more than 50 fields. In our tests we have found that regarless the avro
file (table) we want to load, it fails with 60 fields.. some tables we uses
have more than 400 fields and have the same problem.
For instance if we launch the command
{code:shell}
hive> drop table tableName;
OK
Time taken: 0.162 seconds
hive>
> CREATE EXTERNAL TABLE tableName
> ROW FORMAT
> SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> WITH SERDEPROPERTIES ('avro.schema.literal'='
> { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields":
[
> {"name": "Id", "type":["null", "string"],"default":null},
> {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> {"name": "Name", "type":["null", "string"],"default":null},
> {"name": "Type", "type":["null", "string"],"default":null},
> {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> {"name": "ParentId", "type":["null", "string"],"default":null},
> {"name": "Phone", "type":["null", "string"],"default":null},
> {"name": "Fax", "type":["null", "string"],"default":null},
> {"name": "AccountNumber", "type":["null", "string"],"default":null},
> {"name": "Website", "type":["null", "string"],"default":null},
> {"name": "Industry", "type":["null", "string"],"default":null},
> {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> {"name": "Description", "type":["null", "string"],"default":null},
> {"name": "OwnerId", "type":["null", "string"],"default":null},
> {"name": "CreatedDate", "type":["null", "string"],"default":null},
> {"name": "CreatedById", "type":["null", "string"],"default":null},
> {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> {"name": "IT_Developer_Fee__c", "type":["null",
"boolean"],"default":null},
> {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> {"name": "Shortname__c", "type":["null", "string"],"default":null},
> {"name": "Excluir_compensacion_por_desvio__c", "type":["null",
"boolean"],"default":null},
> {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> {"name": "Active_Fiscal_Details__c", "type":["null",
"boolean"],"default":null},
> {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> {"name": "Commercial_Brand__c", "type":["null",
"string"],"default":null},
> {"name": "Agreed_payment_method__c", "type":["null",
"string"],"default":null},
> {"name": "Division__c", "type":["null", "string"],"default":null},
> {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_fiscal__c", "type":["null",
"string"],"default":null},
> {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Subtype__c", "type":["null",
"string"],"default":null},
> {"name": "Relationship__c", "type":["null", "string"],"default":null},
> {"name": "Market_Country__c", "type":["null", "string"],"default":null}
> ] }
> ')
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION 's3://bucket.../path/to/avro';
OK
Time taken: 0.412 seconds
hive>
> select * from tableName limit 10;
OK
{code}
but when using the same AVRO file and more fields it fails
{code:shell}
hive> drop table tableName;
OK
Time taken: 0.146 seconds
hive>
> CREATE EXTERNAL TABLE tableName
> ROW FORMAT
> SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> WITH SERDEPROPERTIES ('avro.schema.literal'='
> { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields":
[
> {"name": "Id", "type":["null", "string"],"default":null},
> {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> {"name": "Name", "type":["null", "string"],"default":null},
> {"name": "Type", "type":["null", "string"],"default":null},
> {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> {"name": "ParentId", "type":["null", "string"],"default":null},
> {"name": "Phone", "type":["null", "string"],"default":null},
> {"name": "Fax", "type":["null", "string"],"default":null},
> {"name": "AccountNumber", "type":["null", "string"],"default":null},
> {"name": "Website", "type":["null", "string"],"default":null},
> {"name": "Industry", "type":["null", "string"],"default":null},
> {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> {"name": "Description", "type":["null", "string"],"default":null},
> {"name": "OwnerId", "type":["null", "string"],"default":null},
> {"name": "CreatedDate", "type":["null", "string"],"default":null},
> {"name": "CreatedById", "type":["null", "string"],"default":null},
> {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> {"name": "IT_Developer_Fee__c", "type":["null",
"boolean"],"default":null},
> {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> {"name": "Shortname__c", "type":["null", "string"],"default":null},
> {"name": "Excluir_compensacion_por_desvio__c", "type":["null",
"boolean"],"default":null},
> {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> {"name": "Active_Fiscal_Details__c", "type":["null",
"boolean"],"default":null},
> {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> {"name": "Commercial_Brand__c", "type":["null",
"string"],"default":null},
> {"name": "Agreed_payment_method__c", "type":["null",
"string"],"default":null},
> {"name": "Division__c", "type":["null", "string"],"default":null},
> {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_fiscal__c", "type":["null",
"string"],"default":null},
> {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Subtype__c", "type":["null",
"string"],"default":null},
> {"name": "Relationship__c", "type":["null", "string"],"default":null},
> {"name": "Market_Country__c", "type":["null", "string"],"default":null},
> {"name": "Customer_Service_Centre__c", "type":["null",
"string"],"default":null},
> {"name": "Acquisition_Channel_Type__c", "type":["null",
"string"],"default":null},
> {"name": "Acquisition_Channel_Description__c", "type":["null",
"string"],"default":null},
> {"name": "Comments__c", "type":["null", "string"],"default":null},
> {"name": "Street_Commercial__c", "type":["null",
"string"],"default":null},
> {"name": "Country_Commercial__c", "type":["null",
"string"],"default":null},
> {"name": "City_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_Commercial__c", "type":["null",
"string"],"default":null},
> {"name": "Atlas_Branch_Number__c", "type":["null",
"double"],"default":null},
> {"name": "Timezone__c", "type":["null", "string"],"default":null},
> {"name": "Billing_Language__c", "type":["null",
"string"],"default":null},
> {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
> ] }
> ')
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION 's3://bucket/path/to/avro';
OK
Time taken: 0.48 seconds
hive>
> select * from tableName limit 10;
OK
Failed with exception java.io.IOException:org.apache.avro.AvroTypeException:
Found cdr.avro.Account, expecting
org.apache.hadoop.hive.CannotDetermineSchemaSentinel
Time taken: 0.028 seconds
{code}
This problem doesn't affect when we store the avro schema with 400 fields in a
file in s3 and use the avro.schema.url field.
> avro.schema.literal don't support more than 50 fields.
> -------------------------------------------------------
>
> Key: HIVE-12955
> URL: https://issues.apache.org/jira/browse/HIVE-12955
> Project: Hive
> Issue Type: Bug
> Components: Hive
> Affects Versions: 1.0.0
> Reporter: Luis Gonzalez
> Priority: Minor
>
> Hi!
> we think we have hitted a bug. We have tested this in many ways for a couple
> of hours now and there is apparently a problem with avro.schema.literal when
> you specify more than 50 fields. In our tests we have found that regarless
> the avro file (table) we want to load, it fails with 60 fields.. some tables
> we uses have more than 400 fields and have the same problem.
> For instance if we launch the command
> {code:none}
> hive> drop table tableName;
> OK
> Time taken: 0.162 seconds
> hive>
> > CREATE EXTERNAL TABLE tableName
> > ROW FORMAT
> > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> > WITH SERDEPROPERTIES ('avro.schema.literal'='
> > { "namespace": "cdr.avro", "type": "record", "name": "Account",
> "fields": [
> > {"name": "Id", "type":["null", "string"],"default":null},
> > {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> > {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> > {"name": "Name", "type":["null", "string"],"default":null},
> > {"name": "Type", "type":["null", "string"],"default":null},
> > {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> > {"name": "ParentId", "type":["null", "string"],"default":null},
> > {"name": "Phone", "type":["null", "string"],"default":null},
> > {"name": "Fax", "type":["null", "string"],"default":null},
> > {"name": "AccountNumber", "type":["null", "string"],"default":null},
> > {"name": "Website", "type":["null", "string"],"default":null},
> > {"name": "Industry", "type":["null", "string"],"default":null},
> > {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> > {"name": "Description", "type":["null", "string"],"default":null},
> > {"name": "OwnerId", "type":["null", "string"],"default":null},
> > {"name": "CreatedDate", "type":["null", "string"],"default":null},
> > {"name": "CreatedById", "type":["null", "string"],"default":null},
> > {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> > {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> > {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> > {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> > {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> > {"name": "IsCustomerPortal", "type":["null",
> "boolean"],"default":null},
> > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> > {"name": "IT_Developer_Fee__c", "type":["null",
> "boolean"],"default":null},
> > {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> > {"name": "Shortname__c", "type":["null", "string"],"default":null},
> > {"name": "Excluir_compensacion_por_desvio__c", "type":["null",
> "boolean"],"default":null},
> > {"name": "Commercial_Area__c", "type":["null",
> "string"],"default":null},
> > {"name": "Account_Status__c", "type":["null",
> "string"],"default":null},
> > {"name": "Active_Fiscal_Details__c", "type":["null",
> "boolean"],"default":null},
> > {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> > {"name": "Commercial_Brand__c", "type":["null",
> "string"],"default":null},
> > {"name": "Agreed_payment_method__c", "type":["null",
> "string"],"default":null},
> > {"name": "Division__c", "type":["null", "string"],"default":null},
> > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> > {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> > {"name": "Country_fiscal__c", "type":["null",
> "string"],"default":null},
> > {"name": "Fiscal_Number_1__c", "type":["null",
> "string"],"default":null},
> > {"name": "Fiscal_Number_2__c", "type":["null",
> "string"],"default":null},
> > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "Post_Code_fiscal__c", "type":["null",
> "string"],"default":null},
> > {"name": "Web_Prepayment__c", "type":["null",
> "boolean"],"default":null},
> > {"name": "Customer_Subtype__c", "type":["null",
> "string"],"default":null},
> > {"name": "Relationship__c", "type":["null", "string"],"default":null},
> > {"name": "Market_Country__c", "type":["null", "string"],"default":null}
> > ] }
> > ')
> > STORED AS
> > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> > OUTPUTFORMAT
> 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> > LOCATION 's3://bucket.../path/to/avro';
> OK
> Time taken: 0.412 seconds
> hive>
> > select * from tableName limit 10;
> OK
> {code}
> but when using the same AVRO file and more fields it fails
> {code:none}
> hive> drop table tableName;
> OK
> Time taken: 0.146 seconds
> hive>
> > CREATE EXTERNAL TABLE tableName
> > ROW FORMAT
> > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> > WITH SERDEPROPERTIES ('avro.schema.literal'='
> > { "namespace": "cdr.avro", "type": "record", "name": "Account",
> "fields": [
> > {"name": "Id", "type":["null", "string"],"default":null},
> > {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> > {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> > {"name": "Name", "type":["null", "string"],"default":null},
> > {"name": "Type", "type":["null", "string"],"default":null},
> > {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> > {"name": "ParentId", "type":["null", "string"],"default":null},
> > {"name": "Phone", "type":["null", "string"],"default":null},
> > {"name": "Fax", "type":["null", "string"],"default":null},
> > {"name": "AccountNumber", "type":["null", "string"],"default":null},
> > {"name": "Website", "type":["null", "string"],"default":null},
> > {"name": "Industry", "type":["null", "string"],"default":null},
> > {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> > {"name": "Description", "type":["null", "string"],"default":null},
> > {"name": "OwnerId", "type":["null", "string"],"default":null},
> > {"name": "CreatedDate", "type":["null", "string"],"default":null},
> > {"name": "CreatedById", "type":["null", "string"],"default":null},
> > {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> > {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> > {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> > {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> > {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> > {"name": "IsCustomerPortal", "type":["null",
> "boolean"],"default":null},
> > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> > {"name": "IT_Developer_Fee__c", "type":["null",
> "boolean"],"default":null},
> > {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> > {"name": "Shortname__c", "type":["null", "string"],"default":null},
> > {"name": "Excluir_compensacion_por_desvio__c", "type":["null",
> "boolean"],"default":null},
> > {"name": "Commercial_Area__c", "type":["null",
> "string"],"default":null},
> > {"name": "Account_Status__c", "type":["null",
> "string"],"default":null},
> > {"name": "Active_Fiscal_Details__c", "type":["null",
> "boolean"],"default":null},
> > {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> > {"name": "Commercial_Brand__c", "type":["null",
> "string"],"default":null},
> > {"name": "Agreed_payment_method__c", "type":["null",
> "string"],"default":null},
> > {"name": "Division__c", "type":["null", "string"],"default":null},
> > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> > {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> > {"name": "Country_fiscal__c", "type":["null",
> "string"],"default":null},
> > {"name": "Fiscal_Number_1__c", "type":["null",
> "string"],"default":null},
> > {"name": "Fiscal_Number_2__c", "type":["null",
> "string"],"default":null},
> > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "Post_Code_fiscal__c", "type":["null",
> "string"],"default":null},
> > {"name": "Web_Prepayment__c", "type":["null",
> "boolean"],"default":null},
> > {"name": "Customer_Subtype__c", "type":["null",
> "string"],"default":null},
> > {"name": "Relationship__c", "type":["null", "string"],"default":null},
> > {"name": "Market_Country__c", "type":["null",
> "string"],"default":null},
> > {"name": "Customer_Service_Centre__c", "type":["null",
> "string"],"default":null},
> > {"name": "Acquisition_Channel_Type__c", "type":["null",
> "string"],"default":null},
> > {"name": "Acquisition_Channel_Description__c", "type":["null",
> "string"],"default":null},
> > {"name": "Comments__c", "type":["null", "string"],"default":null},
> > {"name": "Street_Commercial__c", "type":["null",
> "string"],"default":null},
> > {"name": "Country_Commercial__c", "type":["null",
> "string"],"default":null},
> > {"name": "City_Commercial__c", "type":["null",
> "string"],"default":null},
> > {"name": "Post_Code_Commercial__c", "type":["null",
> "string"],"default":null},
> > {"name": "Atlas_Branch_Number__c", "type":["null",
> "double"],"default":null},
> > {"name": "Timezone__c", "type":["null", "string"],"default":null},
> > {"name": "Billing_Language__c", "type":["null",
> "string"],"default":null},
> > {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
> > ] }
> > ')
> > STORED AS
> > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> > OUTPUTFORMAT
> 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> > LOCATION 's3://bucket/path/to/avro';
> OK
> Time taken: 0.48 seconds
> hive>
> > select * from tableName limit 10;
> OK
> Failed with exception java.io.IOException:org.apache.avro.AvroTypeException:
> Found cdr.avro.Account, expecting
> org.apache.hadoop.hive.CannotDetermineSchemaSentinel
> Time taken: 0.028 seconds
> {code}
> This problem doesn't affect when we store the avro schema with 400 fields in
> a file in s3 and use the avro.schema.url field.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)