[
https://issues.apache.org/jira/browse/HIVE-9962?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Johndee Burks updated HIVE-9962:
--------------------------------
Description:
To reproduce the limitation do the following.
Create a two tables the first with full schema and the second with partial
schema.
{code}
add jar
/opt/cloudera/parcels/CDH/lib/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar;
CREATE TABLE json_full
(autopolicy struct<is_active:boolean, policy_holder_name:string,
policy_num:string, vehicle:struct<brand:struct<model:string, year:int>,
price:double, vin:string>>)
ROW FORMAT SERDE 'org.apache.hcatalog.data.JsonSerDe';
CREATE TABLE json_part
(autopolicy struct<is_active:boolean, policy_holder_name:string,
policy_num:string, vehicle:struct<brand:struct<model:string, year:int>,
price:double>>)
ROW FORMAT SERDE 'org.apache.hcatalog.data.JsonSerDe';
{code}
The data for the table is below:
{code}
{"autopolicy": {"policy_holder_name": "someone", "policy_num": "20141012",
"is_active": true, "vehicle": {"brand": {"model": "Lexus", "year": 2012},
"vin": "RANDOM123", "price": 23450.50}}}
{code}
I put that data into a file and load it into the tables like this:
{code}
load data local inpath 'data.json' into table json_full;
load data local inpath 'data.json' into table json_part;
{code}
Then do a select against each table:
{code}
select * from json_full;
select * from json_part;
{code}
The second select should fail with an error simlar to that below:
{code}
15/03/12 23:19:30 [main]: ERROR CliDriver: Failed with exception
java.io.IOException:java.lang.NullPointerException
{code}
The code that throws this error is below:
{code}
172 private void populateRecord(List<Object> r, JsonToken token, JsonParser p,
HCatSchema s) throws IOException {
173 if (token != JsonToken.FIELD_NAME) {
174 throw new IOException("Field name expected");
175 }
176 String fieldName = p.getText();
177 int fpos;
178 try {
179 fpos = s.getPosition(fieldName);
180 } catch (NullPointerException npe) {
181 fpos = getPositionFromHiveInternalColumnName(fieldName);
182 LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName,
s);
183 if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) {
184 LOG.error("Hive internal column name {} and position "
185 + "encoding {} for the column name are at odds", fieldName, fpos);
186 throw npe;
187 }
188 if (fpos == -1) {
189 return; // unknown field, we return.
190 }
{code}
was:
To reproduce the limitation do the following.
Create a two tables the first with full schema and the second with partial
schema.
{code}
add jar
/opt/cloudera/parcels/CDH/lib/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar;
CREATE TABLE json_full
(autopolicy struct<is_active:boolean, policy_holder_name:string,
policy_num:string, vehicle:struct<brand:struct<model:string, year:int>,
price:double, vin:string>>)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
CREATE TABLE json_part
(autopolicy struct<is_active:boolean, policy_holder_name:string,
policy_num:string, vehicle:struct<brand:struct<model:string, year:int>,
price:double>>)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
{code}
The data for the table is below:
{code}
{"autopolicy": {"policy_holder_name": "someone", "policy_num": "20141012",
"is_active": true, "vehicle": {"brand": {"model": "Lexus", "year": 2012},
"vin": "RANDOM123", "price": 23450.50}}}
{code}
I put that data into a file and load it into the tables like this:
{code}
load data local inpath 'data.json' into table json_full;
load data local inpath 'data.json' into table json_part;
{code}
Then do a select against each table:
{code}
select * from json_full;
select * from json_part;
{code}
The second select should fail with an error simlar to that below:
{code}
15/03/12 23:19:30 [main]: ERROR CliDriver: Failed with exception
java.io.IOException:java.lang.NullPointerException
{code}
The code that throws this error is below:
{code}
172 private void populateRecord(List<Object> r, JsonToken token, JsonParser p,
HCatSchema s) throws IOException {
173 if (token != JsonToken.FIELD_NAME) {
174 throw new IOException("Field name expected");
175 }
176 String fieldName = p.getText();
177 int fpos;
178 try {
179 fpos = s.getPosition(fieldName);
180 } catch (NullPointerException npe) {
181 fpos = getPositionFromHiveInternalColumnName(fieldName);
182 LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName,
s);
183 if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) {
184 LOG.error("Hive internal column name {} and position "
185 + "encoding {} for the column name are at odds", fieldName, fpos);
186 throw npe;
187 }
188 if (fpos == -1) {
189 return; // unknown field, we return.
190 }
{code}
> JsonSerDe does not support reader schema different from data schema
> -------------------------------------------------------------------
>
> Key: HIVE-9962
> URL: https://issues.apache.org/jira/browse/HIVE-9962
> Project: Hive
> Issue Type: Improvement
> Components: HCatalog, Serializers/Deserializers
> Reporter: Johndee Burks
> Priority: Minor
>
> To reproduce the limitation do the following.
> Create a two tables the first with full schema and the second with partial
> schema.
> {code}
> add jar
> /opt/cloudera/parcels/CDH/lib/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar;
> CREATE TABLE json_full
> (autopolicy struct<is_active:boolean, policy_holder_name:string,
> policy_num:string, vehicle:struct<brand:struct<model:string, year:int>,
> price:double, vin:string>>)
> ROW FORMAT SERDE 'org.apache.hcatalog.data.JsonSerDe';
> CREATE TABLE json_part
> (autopolicy struct<is_active:boolean, policy_holder_name:string,
> policy_num:string, vehicle:struct<brand:struct<model:string, year:int>,
> price:double>>)
> ROW FORMAT SERDE 'org.apache.hcatalog.data.JsonSerDe';
> {code}
> The data for the table is below:
> {code}
> {"autopolicy": {"policy_holder_name": "someone", "policy_num": "20141012",
> "is_active": true, "vehicle": {"brand": {"model": "Lexus", "year": 2012},
> "vin": "RANDOM123", "price": 23450.50}}}
> {code}
> I put that data into a file and load it into the tables like this:
> {code}
> load data local inpath 'data.json' into table json_full;
> load data local inpath 'data.json' into table json_part;
> {code}
> Then do a select against each table:
> {code}
> select * from json_full;
> select * from json_part;
> {code}
> The second select should fail with an error simlar to that below:
> {code}
> 15/03/12 23:19:30 [main]: ERROR CliDriver: Failed with exception
> java.io.IOException:java.lang.NullPointerException
> {code}
> The code that throws this error is below:
> {code}
> 172 private void populateRecord(List<Object> r, JsonToken token, JsonParser
> p, HCatSchema s) throws IOException {
> 173 if (token != JsonToken.FIELD_NAME) {
> 174 throw new IOException("Field name expected");
> 175 }
> 176 String fieldName = p.getText();
> 177 int fpos;
> 178 try {
> 179 fpos = s.getPosition(fieldName);
> 180 } catch (NullPointerException npe) {
> 181 fpos = getPositionFromHiveInternalColumnName(fieldName);
> 182 LOG.debug("NPE finding position for field [{}] in schema [{}]",
> fieldName, s);
> 183 if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) {
> 184 LOG.error("Hive internal column name {} and position "
> 185 + "encoding {} for the column name are at odds", fieldName, fpos);
> 186 throw npe;
> 187 }
> 188 if (fpos == -1) {
> 189 return; // unknown field, we return.
> 190 }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)