[ 
https://issues.apache.org/jira/browse/IMPALA-7278?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Tim Armstrong updated IMPALA-7278:
----------------------------------
    Description: 
Distinct clause when executed with custom UDF returns unexpected results.

Custom UDF Definition:

udf.h file:
{code}
#ifndef IMPALA_UDF_SAMPLE_UDF_H
#define IMPALA_UDF_SAMPLE_UDF_H

#include "udf.h"

using namespace impala_udf;

#ifdef __cplusplus
extern "C"
{
#endif

StringVal udf_clear(FunctionContext* context, StringVal& sInput);
#ifdef __cplusplus
}
#endif
#endif
{code}

udf.cpp:

{code}
#include "clear.h"

StringVal udf_clear(
 FunctionContext* context,
 StringVal& sInput /* String to encrypt */
 )
{
 unsigned char* pReturnData = context->Allocate( 100 );
 memset( pReturnData, NULL, 100);
 memcpy(pReturnData, sInput.ptr, sInput.len );
 StringVal sResult( pReturnData );
 sResult.len = sInput.len;
 context->Free( (uint8_t*)pReturnData );
 return sResult;
}
{code}
CMakeLists.txt:
{code}
project (clear)
 ADD_LIBRARY (clear2.8_RHEL SHARED clear.cpp )
 TARGET_LINK_LIBRARIES (clear2.8_RHEL libImpalaUdf.a )
 SET_TARGET_PROPERTIES (clear2.8_RHEL PROPERTIES SUFFIX ".so")
 SET_TARGET_PROPERTIES (clear2.8_RHEL PROPERTIES PREFIX "")
 INSTALL ( TARGETS clear2.8_RHEL DESTINATION . )

Query Syntax:

CREATE TABLE clear (c1 STRING, c2 STRING) row format delimited fields 
terminated by ',' stored as textfile;
LOAD DATA INPATH '/user/clear.csv' OVERWRITE INTO TABLE clear;

Query: describe clear
+------+--------+---------+
| name | type | comment |
+------+--------+---------+
| c1 | string | |
| c2 | string | |
+------+--------+---------+
Fetched 2 row(s) in 0.04s

select * from clear;
+---------+---------+
| c1 | c2 |
+---------+---------+
| 1111111 | 1111111 |
| 1111111 | 1111111 |
| 222222 | 222222 |
| 444444 | 444444 |
| 222222 | 222222 |
| 3333333 | 3333333 |
| 3333333 | 3333333 |
+---------+---------+
Fetched 7 row(s) in 0.14s

select distinct udf_clear(c1),c2 from clear;
+-----------------------+---------+
| default.udf_clear(c1) | c2 |
+-----------------------+---------+
| {color:#d04437}*222222* {color}| 444444 |   <== this should be *444444* 
| 222222 | 222222 |
| 3333333 | 3333333 |
| 1111111 | 1111111 |
+-----------------------+---------+
Fetched 4 row(s) in 0.24s
{code}
 
Expected result:
{code}
select distinct c1,c2 from clear;
+---------+---------+
| c1 | c2 |
+---------+---------+
| 444444 | 444444 |
| 222222 | 222222 |
| 3333333 | 3333333 |
| 1111111 | 1111111 |
+---------+---------+
Fetched 4 row(s) in 0.25s
 {code}

  was:
Distinct clause when executed with custom UDF returns unexpected results.

Custom UDF Definition:

udf.h file:

==========
{code}
#ifndef IMPALA_UDF_SAMPLE_UDF_H
#define IMPALA_UDF_SAMPLE_UDF_H

#include "udf.h"

using namespace impala_udf;

#ifdef __cplusplus
extern "C"
{
#endif

 

StringVal udf_clear(FunctionContext* context, StringVal& sInput);
#ifdef __cplusplus
}
#endif
#endif
{code}

udf.cpp:

========
{code}
#include "clear.h"

StringVal udf_clear(
 FunctionContext* context,
 StringVal& sInput /* String to encrypt */
 )
{
 unsigned char* pReturnData = context->Allocate( 100 );
 memset( pReturnData, NULL, 100);
 memcpy(pReturnData, sInput.ptr, sInput.len );
 StringVal sResult( pReturnData );
 sResult.len = sInput.len;
 context->Free( (uint8_t*)pReturnData );
 return sResult;
}
{code}
CMakeLists.txt:

===============
{code}
project (clear)
 ADD_LIBRARY (clear2.8_RHEL SHARED clear.cpp )
 TARGET_LINK_LIBRARIES (clear2.8_RHEL libImpalaUdf.a )
 SET_TARGET_PROPERTIES (clear2.8_RHEL PROPERTIES SUFFIX ".so")
 SET_TARGET_PROPERTIES (clear2.8_RHEL PROPERTIES PREFIX "")
 INSTALL ( TARGETS clear2.8_RHEL DESTINATION . )

Query Syntax:

CREATE TABLE clear (c1 STRING, c2 STRING) row format delimited fields 
terminated by ',' stored as textfile;
LOAD DATA INPATH '/user/clear.csv' OVERWRITE INTO TABLE clear;

Query: describe clear
+------+--------+---------+
| name | type | comment |
+------+--------+---------+
| c1 | string | |
| c2 | string | |
+------+--------+---------+
Fetched 2 row(s) in 0.04s

select * from clear;
+---------+---------+
| c1 | c2 |
+---------+---------+
| 1111111 | 1111111 |
| 1111111 | 1111111 |
| 222222 | 222222 |
| 444444 | 444444 |
| 222222 | 222222 |
| 3333333 | 3333333 |
| 3333333 | 3333333 |
+---------+---------+
Fetched 7 row(s) in 0.14s

select distinct udf_clear(c1),c2 from clear;
+-----------------------+---------+
| default.udf_clear(c1) | c2 |
+-----------------------+---------+
| {color:#d04437}*222222* {color}| 444444 |   <== this should be *444444* 
| 222222 | 222222 |
| 3333333 | 3333333 |
| 1111111 | 1111111 |
+-----------------------+---------+
Fetched 4 row(s) in 0.24s
{code}
 
Expected result:
{code}
select distinct c1,c2 from clear;

+---------+---------+
| c1 | c2 |
+---------+---------+
| 444444 | 444444 |
| 222222 | 222222 |
| 3333333 | 3333333 |
| 1111111 | 1111111 |
+---------+---------+
Fetched 4 row(s) in 0.25s
 {code}


> distinct clause is not working as expected with custom UDFs
> -----------------------------------------------------------
>
>                 Key: IMPALA-7278
>                 URL: https://issues.apache.org/jira/browse/IMPALA-7278
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Frontend
>    Affects Versions: Impala 2.8.0
>            Reporter: shabnam perween
>            Priority: Critical
>
> Distinct clause when executed with custom UDF returns unexpected results.
> Custom UDF Definition:
> udf.h file:
> {code}
> #ifndef IMPALA_UDF_SAMPLE_UDF_H
> #define IMPALA_UDF_SAMPLE_UDF_H
> #include "udf.h"
> using namespace impala_udf;
> #ifdef __cplusplus
> extern "C"
> {
> #endif
> StringVal udf_clear(FunctionContext* context, StringVal& sInput);
> #ifdef __cplusplus
> }
> #endif
> #endif
> {code}
> udf.cpp:
> {code}
> #include "clear.h"
> StringVal udf_clear(
>  FunctionContext* context,
>  StringVal& sInput /* String to encrypt */
>  )
> {
>  unsigned char* pReturnData = context->Allocate( 100 );
>  memset( pReturnData, NULL, 100);
>  memcpy(pReturnData, sInput.ptr, sInput.len );
>  StringVal sResult( pReturnData );
>  sResult.len = sInput.len;
>  context->Free( (uint8_t*)pReturnData );
>  return sResult;
> }
> {code}
> CMakeLists.txt:
> {code}
> project (clear)
>  ADD_LIBRARY (clear2.8_RHEL SHARED clear.cpp )
>  TARGET_LINK_LIBRARIES (clear2.8_RHEL libImpalaUdf.a )
>  SET_TARGET_PROPERTIES (clear2.8_RHEL PROPERTIES SUFFIX ".so")
>  SET_TARGET_PROPERTIES (clear2.8_RHEL PROPERTIES PREFIX "")
>  INSTALL ( TARGETS clear2.8_RHEL DESTINATION . )
> Query Syntax:
> CREATE TABLE clear (c1 STRING, c2 STRING) row format delimited fields 
> terminated by ',' stored as textfile;
> LOAD DATA INPATH '/user/clear.csv' OVERWRITE INTO TABLE clear;
> Query: describe clear
> +------+--------+---------+
> | name | type | comment |
> +------+--------+---------+
> | c1 | string | |
> | c2 | string | |
> +------+--------+---------+
> Fetched 2 row(s) in 0.04s
> select * from clear;
> +---------+---------+
> | c1 | c2 |
> +---------+---------+
> | 1111111 | 1111111 |
> | 1111111 | 1111111 |
> | 222222 | 222222 |
> | 444444 | 444444 |
> | 222222 | 222222 |
> | 3333333 | 3333333 |
> | 3333333 | 3333333 |
> +---------+---------+
> Fetched 7 row(s) in 0.14s
> select distinct udf_clear(c1),c2 from clear;
> +-----------------------+---------+
> | default.udf_clear(c1) | c2 |
> +-----------------------+---------+
> | {color:#d04437}*222222* {color}| 444444 |   <== this should be *444444* 
> | 222222 | 222222 |
> | 3333333 | 3333333 |
> | 1111111 | 1111111 |
> +-----------------------+---------+
> Fetched 4 row(s) in 0.24s
> {code}
>  
> Expected result:
> {code}
> select distinct c1,c2 from clear;
> +---------+---------+
> | c1 | c2 |
> +---------+---------+
> | 444444 | 444444 |
> | 222222 | 222222 |
> | 3333333 | 3333333 |
> | 1111111 | 1111111 |
> +---------+---------+
> Fetched 4 row(s) in 0.25s
>  {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org
For additional commands, e-mail: issues-all-h...@impala.apache.org

Reply via email to