Thanks a bunch for replying! I quickly ran this - the TAGS_US data in HDFS is
in parquet format.
select distinct typeof(cvalue)
from hdfs.drill.TAGS_US;
+----------+
| EXPR$0 |
+----------+
| VARCHAR |
+----------+
Same with the table in Redshift. I changed my query to specifically cast the
columns to VARCHAR again -
select count(*)
from redshift.reports.public.us_tags as a
join hdfs.drill.TAGS_US as b
on cast(b.cvalue as varchar) = cast(a.tag_value as varchar) ;
I see the same error again.
Here is the explain plan for the query -
select count(*)
from hdfs.drill.TAGS_US as a
join redshift.reports.public.us_tags as b
on a.cvalue = b.tag_value;
Error: SYSTEM ERROR: IllegalStateException: Already had POJO for id
(java.lang.Integer)
[com.fasterxml.jackson.annotation.ObjectIdGenerator$IdKey@3372bbe8]
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 StreamAgg(group=[{}], EXPR$0=[$SUM0($0)])
00-02 UnionExchange
01-01 StreamAgg(group=[{}], EXPR$0=[COUNT()])
01-02 Project($f0=[0])
01-03 HashJoin(condition=[=($0, $1)], joinType=[inner])
01-05 Scan(groupscan=[ParquetGroupScan
[entries=[ReadEntryWithPath
[path=hdfs://ec2-XX-XX-XX-XX.compute-1.amazonaws.com:8020/drill/TAGS_US]],
selectionRoot=hdfs://ec2-XX-XX-XX-XX.compute-1.amazonaws.com:8020/drill/TAGS_US,
numFiles=1, usedMetadataFile=false, columns=[`cvalue`]]])
01-04 BroadcastExchange
02-01 Project(tag_value=[$2])
02-02 Jdbc(sql=[SELECT *
FROM "reports"."public"."us_tags"])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "jdbc-scan",
"@id" : 0,
"sql" : "SELECT *\nFROM \"reports\".\"public\".\"us_tags\"",
"config" : {
"type" : "jdbc",
"driver" : "com.amazon.redshift.jdbc4.Driver",
"url" : "",
"username" : "",
"password" : "",
"enabled" : true
},
"userName" : "",
"cost" : 0.0
}, {
"pop" : "project",
"@id" : 131073,
"exprs" : [ {
"ref" : "`tag_value`",
"expr" : "`tag_value`"
} ],
"child" : 0,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 100.0
}, {
"pop" : "broadcast-exchange",
"@id" : 65540,
"child" : 131073,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 100.0
}, {
"pop" : "parquet-scan",
"@id" : 65541,
"userName" : "XXXX",
"entries" : [ {
"path" :
"hdfs://ec2-XX-XX-XX-XX.compute-1.amazonaws.com:8020/drill/TAGS_US"
} ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "hdfs://ec2-XX-XX-XX-XX.compute-1.amazonaws.com:8020/",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : true,
"defaultInputFormat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"defaultInputFormat" : null
},
"drill" : {
"location" : "/drill",
"writable" : true,
"defaultInputFormat" : "tsv"
},
"drill2" : {
"location" : "/drill",
"writable" : true,
"defaultInputFormat" : "csv"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
},
"avro" : {
"type" : "avro"
},
"sequencefile" : {
"type" : "sequencefile",
"extensions" : [ "seq" ]
},
"csvh" : {
"type" : "text",
"extensions" : [ "csvh" ],
"extractHeader" : true,
"delimiter" : ","
}
}
},
"format" : {
"type" : "parquet"
},
"columns" : [ "`cvalue`" ],
"selectionRoot" :
"hdfs://ec2-XX-XX-XX-XX.compute-1.amazonaws.com:8020/drill/TAGS_US",
"fileSet" : [ "/drill/TAGS_US/0_0_1.parquet",
"/drill/TAGS_US/0_0_0.parquet" ],
"cost" : 4.1667342E7
}, {
"pop" : "hash-join",
"@id" : 65539,
"left" : 65541,
"right" : 65540,
"conditions" : [ {
"relationship" : "EQUALS",
"left" : "`cvalue`",
"right" : "`tag_value`"
} ],
"joinType" : "INNER",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 4.1667342E7
}, {
"pop" : "project",
"@id" : 65538,
"exprs" : [ {
"ref" : "`$f0`",
"expr" : "0"
} ],
"child" : 65539,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 4.1667342E7
}, {
"pop" : "streaming-aggregate",
"@id" : 65537,
"child" : 65538,
"keys" : [ ],
"exprs" : [ {
"ref" : "`EXPR$0`",
"expr" : "count(1) "
} ],
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1.0
}, {
"pop" : "union-exchange",
"@id" : 2,
"child" : 65537,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1.0
}, {
"pop" : "streaming-aggregate",
"@id" : 1,
"child" : 2,
"keys" : [ ],
"exprs" : [ {
"ref" : "`EXPR$0`",
"expr" : "$sum0(`EXPR$0`) "
} ],
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1.0
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1.0
} ]
} |
+------+------+
> On Mon, Jan 4, 2016 at 9:42 PM, Andries Engelbrecht
> <[email protected]> wrote:
> Perhaps check the data type of all the fields being used for the join.
>
> Select cvalue, TYPEOF(cvalue) from hdfs...... limit 10
>
> and similar for tag_value on redshift.
>
> You can then do a predicate to find records where the data type may be
> different.
> where typeof(<field>) not like '<data type of field>'
>
> I believe there was a nice write up on they topic, but can't find it now.
>
>
> --Andries
>
>
>
>
> > On Jan 3, 2016, at 8:45 PM, Rohit Kulkarni <[email protected]>
> > wrote:
> >
> > Hello all,
> >
> > I am sure if not all of you, but some of you must have seen this error some
> > time -
> >
> > *Error: SYSTEM ERROR: IllegalStateException: Already had POJO for id
> > (java.lang.Integer)
> > [com.fasterxml.jackson.annotation.ObjectIdGenerator$IdKey@3372bbe8]*
> >
> > I am trying to do a join between Redshift (JDBC) and HDFS like this -
> >
> >
> >
> >
> > *select count(*)from hdfs.drill.TAGS_US as aright join
> > redshift.reports.public.us_tags as bon a.cvalue = b.tag_value;*
> >
> >
> > I don't see anything wrong in the query. The two individual tables return
> > proper data when fired a query separately. Is something missing or am I
> > doing something wrong?
> >
> > Would very much appreciate your help! Thanks!!
> >
> > --
> > Warm Regards,
> > Rohit Kulkarni
--
Warm Regards,
Rohit Kulkarni
Mo.: +91 89394 63593