hi,
http://impala.apache.org/docs/build/html/topics/impala_udf.html this URL gives 
me little help, Could you see my code and then give me some useful advise. 
Could you show me a example of UDA function which input is string,intermediate 
results saved as unsigned char array, output is string.



------------------ ???????? ------------------
??????: "????????????"<[email protected]>;
????????: 2018??6??7??(??????) ????8:32
??????: "dev"<[email protected]>;

????: ?????? UDA debugging, was Re: Broken/Flaky Tests



md5_udaf.h


#ifndef _MD5_UDAF_H_ 
#define _MD5_UDAF_H_
#include"udf/udf.h"
using namespace impala_udf;
#define UDF_MD5_UPPERCASE 1
/* generate same result as previous releases
 * not correct if the length of the input is 56 + 64*N (N>= 0) bytes
 */
#define UDF_MD5_COMPAT 0

void md5(const unsigned char message[], int len, unsigned char result[]);


void init_func(FunctionContext* context, StringVal* result);

void update_func(FunctionContext*context, const StringVal& input, StringVal* 
result); 

void merge_func(FunctionContext*context,const StringVal& input, StringVal* 
result);

StringVal finalize_func(FunctionContext*context,const StringVal& val); 
#endif 






#######################
#######################
md5_udaf.cpp


#include "md5_udaf.h" 
#include "md5.h"
#include "udf/udf.h"
#include <iostream>
#include <ctype.h>
using namespace std; 


#define R1(a, b, c, d, xk, s, ti) (b + LROT((a + F(b, c, d) + xk + ti), s))
#define R2(a, b, c, d, xk, s, ti) (b + LROT((a + G(b, c, d) + xk + ti), s))
#define R3(a, b, c, d, xk, s, ti) (b + LROT((a + H(b, c, d) + xk + ti), s))
#define R4(a, b, c, d, xk, s, ti) (b + LROT((a + I(b, c, d) + xk + ti), s))
#define LROT(x, s) ((x << s) | (x >> (32 - s)))
#define F(X, Y, Z) ((X) & (Y) | (~X) & (Z))
#define G(X, Y, Z) (((X) & (Z)) | ((Y) & (~Z)))
#define H(X, Y, Z) ((X) ^ (Y) ^ (Z))
#define I(X, Y, Z) ((Y) ^ ((X) | (~Z)))
/* initial value for MD register */
#define A0 0x67452301
#define B0 0xefcdab89
#define C0 0x98badcfe
#define D0 0x10325476
#define BLOCK_SIZE 64
#define FINAL_BLOCK_SIZE (BLOCK_SIZE - 8)
static void md5_block(unsigned int register[], const unsigned int blk[]);
static unsigned int T[64] = {
    0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
    0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
    0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
    0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
    0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
    0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8,
    0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
    0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
    0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
    0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
    0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
    0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
    0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
    0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
    0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
    0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
};

void md5(const unsigned char message[], int len, unsigned char result[])
{
    int pos = 0;
    int padded = 0;
    int remain = 0;
    unsigned int la[2];
    const unsigned char pad = 1 << 7; /* first byte of padding */
    unsigned int X[BLOCK_SIZE / sizeof(int)]; /* 512 bit block = 32 bit * 16 */
    unsigned char buf[BLOCK_SIZE]; /* for final and final-1 block */
    unsigned int r[4] = {A0, B0, C0, D0}; /* MD register */
    
    memset(buf, 0, sizeof(buf));
    /* Process Message in 16-word Blocks */
    while (len - pos >= BLOCK_SIZE) {
        memcpy(X, &message[pos], sizeof(X));
        md5_block(r, X);
        pos += BLOCK_SIZE;
    }
    remain = len - pos;
    if (remain > 0) {
        memcpy(buf, &message[pos], remain);
    }
#if !defined(UDF_MD5_COMPAT) || ((UDF_MD5_COMPAT) == 0)
    if (remain > FINAL_BLOCK_SIZE - 1) {
#else
    if (remain > FINAL_BLOCK_SIZE) {
#endif
        /* carry block: cannot put length field in final block */
        buf[remain] = pad;
        memcpy(X, buf, sizeof(buf));
        md5_block(r, X);
        padded = 1;
        memset(buf, 0, sizeof(buf));
    }
    /* Step 1: Append Padding Bits */
    if (!padded)
        buf[remain] = pad;
    /* Step 2: Append Length */
    la[0] = len << 3; /* byte to bit */
    la[1] = 0; /* assuming length < 4Gb */
    
    /* run final block */
    memcpy(buf + FINAL_BLOCK_SIZE, la, sizeof(la));
    memcpy(X, buf, sizeof(buf));
    md5_block(r, X);
    memcpy(result, r, sizeof(int) * 4);
    /* clear digester to maintain security */
    memset(r, 0, sizeof(r));
}

static void md5_block(unsigned int r[], const unsigned int blk[])
{
    unsigned int a = r[0];
    unsigned int b = r[1];
    unsigned int c = r[2];
    unsigned int d = r[3];
#ifdef DEBUG
    int i;
    printf("md5_block <");
    for (i = 0; i < 16; ++i) {
        printf(" %x", blk[i]);
    }
    printf("\n");
    printf("a, b, c, d = %x %x %x %x\n", a, b, c, d);
#endif
    a = R1(a, b, c, d, blk[0], 7, T[0]);
    d = R1(d, a, b, c, blk[1], 12, T[1]);
    c = R1(c, d, a, b, blk[2], 17, T[2]);
    b = R1(b, c, d, a, blk[3], 22, T[3]);
    a = R1(a, b, c, d, blk[4], 7, T[4]);
    d = R1(d, a, b, c, blk[5], 12, T[5]);
    c = R1(c, d, a, b, blk[6], 17, T[6]);
    b = R1(b, c, d, a, blk[7], 22, T[7]);
    a = R1(a, b, c, d, blk[8], 7, T[8]);
    d = R1(d, a, b, c, blk[9], 12, T[9]);
    c = R1(c, d, a, b, blk[10], 17, T[10]);
    b = R1(b, c, d, a, blk[11], 22, T[11]);
    a = R1(a, b, c, d, blk[12], 7, T[12]);
    d = R1(d, a, b, c, blk[13], 12, T[13]);
    c = R1(c, d, a, b, blk[14], 17, T[14]);
    b = R1(b, c, d, a, blk[15], 22, T[15]);
#ifdef DEBUG
    printf("round 1: %x %x %x %x\n", a, b, c, d);
#endif
    a = R2(a, b, c, d, blk[1], 5, T[16]);
    d = R2(d, a, b, c, blk[6], 9, T[17]);
    c = R2(c, d, a, b, blk[11], 14, T[18]);
    b = R2(b, c, d, a, blk[0], 20, T[19]);
    a = R2(a, b, c, d, blk[5], 5, T[20]);
    d = R2(d, a, b, c, blk[10], 9, T[21]);
    c = R2(c, d, a, b, blk[15], 14, T[22]);
    b = R2(b, c, d, a, blk[4], 20, T[23]);
    a = R2(a, b, c, d, blk[9], 5, T[24]);
    d = R2(d, a, b, c, blk[14], 9, T[25]);
    c = R2(c, d, a, b, blk[3], 14, T[26]);
    b = R2(b, c, d, a, blk[8], 20, T[27]);
    a = R2(a, b, c, d, blk[13], 5, T[28]);
    d = R2(d, a, b, c, blk[2], 9, T[29]);
    c = R2(c, d, a, b, blk[7], 14, T[30]);
    b = R2(b, c, d, a, blk[12], 20, T[31]);
#ifdef DEBUG
    printf("round 2: %x %x %x %x\n", a, b, c, d);
#endif
    a = R3(a, b, c, d, blk[5], 4, T[32]);
    d = R3(d, a, b, c, blk[8], 11, T[33]);
    c = R3(c, d, a, b, blk[11], 16, T[34]);
    b = R3(b, c, d, a, blk[14], 23, T[35]);
    a = R3(a, b, c, d, blk[1], 4, T[36]);
    d = R3(d, a, b, c, blk[4], 11, T[37]);
    c = R3(c, d, a, b, blk[7], 16, T[38]);
    b = R3(b, c, d, a, blk[10], 23, T[39]);
    a = R3(a, b, c, d, blk[13], 4, T[40]);
    d = R3(d, a, b, c, blk[0], 11, T[41]);
    c = R3(c, d, a, b, blk[3], 16, T[42]);
    b = R3(b, c, d, a, blk[6], 23, T[43]);
    a = R3(a, b, c, d, blk[9], 4, T[44]);
    d = R3(d, a, b, c, blk[12], 11, T[45]);
    c = R3(c, d, a, b, blk[15], 16, T[46]);
    b = R3(b, c, d, a, blk[2], 23, T[47]);
#ifdef DEBUG
    printf("round 3: %x %x %x %x\n", a, b, c, d);
#endif
    a = R4(a, b, c, d, blk[0], 6, T[48]);
    d = R4(d, a, b, c, blk[7], 10, T[49]);
    c = R4(c, d, a, b, blk[14], 15, T[50]);
    b = R4(b, c, d, a, blk[5], 21, T[51]);
    a = R4(a, b, c, d, blk[12], 6, T[52]);
    d = R4(d, a, b, c, blk[3], 10, T[53]);
    c = R4(c, d, a, b, blk[10], 15, T[54]);
    b = R4(b, c, d, a, blk[1], 21, T[55]);
    a = R4(a, b, c, d, blk[8], 6, T[56]);
    d = R4(d, a, b, c, blk[15], 10, T[57]);
    c = R4(c, d, a, b, blk[6], 15, T[58]);
    b = R4(b, c, d, a, blk[13], 21, T[59]);
    a = R4(a, b, c, d, blk[4], 6, T[60]);
    d = R4(d, a, b, c, blk[11], 10, T[61]);
    c = R4(c, d, a, b, blk[2], 15, T[62]);
    b = R4(b, c, d, a, blk[9], 21, T[63]);
#ifdef DEBUG
    printf("round 4: %x %x %x %x\n", a, b, c, d);
#endif
    r[0] += a;
    r[1] += b;
    r[2] += c;
    r[3] += d;
#ifdef DEBUG
    printf("md5_block > : %x %x %x %x\n", digester->r[0], digester->r[1],
           digester->r[2], digester->r[3]);
#endif
}


void init_func(FunctionContext* context, StringVal* val) {
  val->is_null = true;
}
void update_func(FunctionContext* context, const StringVal& str, StringVal* 
result) {
  if (str.is_null) return;
  if (result->is_null) {
   
 unsigned char *outbuf=context->Allocate(17);
    outbuf[16]='\0';
    md5(str.ptr, str.len, outbuf);
 
    uint8_t* copy = context->Allocate(17);
 
    if (copy == NULL) return;
    memcpy(copy, outbuf, 16);
    context->Free(outbuf);
    *result = StringVal(copy, str.len);
    return;
  }
    unsigned char *outbuf1=context->Allocate(17);
    outbuf1[16]='\0';
    md5(str.ptr, sizeof(str.ptr), outbuf1);
 
    uint8_t* copy1 = context->Allocate(17);
 
         for(int i=0;i<16;i++)
         {
                 copy1[i]=outbuf1[i] & result->ptr[i];
         }

    *result = StringVal(copy1, 17);
    return;

}
void merge_func(FunctionContext* context, const StringVal& src, StringVal* dst) 
{
    if (src.is_null) return;
         for(int i=0;i<16;i++)
         {
            dst->ptr[i]=src.ptr[i] & dst->ptr[i];
         }
}

StringVal serialize_func(FunctionContext* context, const StringVal& val) {
  if (val.is_null) return val;
   unsigned char *outbuf1=context->Allocate(17);
   outbuf1[16]='\0';
  uint8_t* copy = context->Allocate(val.len);
  memcpy(copy, val.ptr, 17);
  return StringVal(copy,17);
}

StringVal finalize_func(FunctionContext* context, const StringVal& val) {
  if (val.is_null) return val;
  unsigned char *outbuf1=context->Allocate(17);
  outbuf1[16]='\0';
  uint8_t* copy = context->Allocate(val.len);
  memcpy(copy, val.ptr, 17);
  return StringVal(copy,17);
}






########################
########################
define function SQL in impala-shell: 
create aggregate function countMD5(string) returns string  location 
'hdfs://nameservice1:8020//user/hive/udfjars/libmd5udaf.so' init_fn='init_func' 
update_fn='update_func' merge_fn='merge_func' serialize_fn='serialize_func'  
finalize_fn='finalize_func';


Maybe my C++ code has some problems, could you help me?












------------------ ???????? ------------------
??????: "Jim Apple"<[email protected]>;
????????: 2018??6??7??(??????) ????1:50
??????: "dev@impala"<[email protected]>;

????: Re: UDA debugging, was Re: Broken/Flaky Tests



You have provided the function prototype, but not its definition.

For cerr: http://impala.apache.org/docs/build/html/topics/impala_udf.html

"

To handle errors in UDFs, you call functions that are members of the initial
 FunctionContext* argument passed to your function.

A UDF can record one or more warnings, for conditions that indicate minor,
recoverable problems that do not cause the query to stop. The signature for
this function is:

bool AddWarning(const char* warning_msg);

"

On Wed, Jun 6, 2018 at 1:58 AM, ?????? <[email protected]> wrote:

> Hi
> define function SQL??
>    create aggregate function countMD5(string) returns string  location
> 'hdfs://nameservice1:8020//user/hive/udfjars/libmd5udaf.so'
> init_fn='init_func' update_fn='update_func' merge_fn='merge_func'
> serialize_fn='serialize_func'  finalize_fn='finalize_func';
> package include md5_udaf.h and md5_udaf.cpp file,
> function md5 defined as: void md5(const unsigned char message[], int len,
> unsigned char result[]);
>
> when I use countMD5 function in impala-shell, the return value is null. I
> feel confused. Perhas my code has problem, but I cann't find it.
> Another, when I write "std::cerr<<"init"; "  in the initial function(
> init_func) , the console doesn't print,Why? And Where to print?
>
> please help me and point to my error, I am a greener to C++.
> Thank you,very much!
>
>
>
>
>
> ------------------ ???????? ------------------
> *??????:* "Tim Armstrong"<[email protected]>;
> *????????:* 2018??6??6??(??????) ????12:38
> *??????:* "dev"<[email protected]>;
> *????:* Re: UDA debugging, was Re: Broken/Flaky Tests
>
> We're happy to give you pointers. If you could share your uda code and
> "create function" that would help us help you
>
> On Tue., 5 Jun. 2018, 19:31 Jim Apple, <[email protected]> wrote:
>
> > Hi ??????,
> >
> > I notice you are replying to other threads about different subjects when
> > you ask your questions. I think you will be more likely to get help if
> you
> > start new threads with relevant subjects and if you be as specific as
> > possible with your questions.
> >
> > The Impala wiki has some advice for debugging:
> > https://cwiki.apache.org/confluence/display/IMPALA/Impala+Debugging+Tips
> >
> >
> > On Tue, Jun 5, 2018 at 6:21 PM ?????? <[email protected]> wrote:
> >
> > > One:I want to know how to debug the imapla UDA function
> > > Two??I would like to return a StringVal value through finalize function,
> > > but I get the null value every time. That is why?
> > >
> > >
> > >
> > >
> > > ------------------ ???????? ------------------
> > > ??????: "Tim Armstrong"<[email protected]>;
> > > ????????: 2018??6??6??(??????) ????9:08
> > > ??????: "dev@impala"<[email protected]>;
> > >
> > > ????: Re: Broken/Flaky Tests
> > >
> > >
> > >
> > > Ok, so 2/3 of those fixes are merged and the other is being merged.
> > >
> > > We still have a long list of flaky issues but I went through and we've
> > > either mitigated them or we're blocked on being able to repro them.
> > >
> > > I'll see how things look tomorrow, but if you have some low-risk
> changes
> > in
> > > mind, let me know and I can considering whether to merge them.
> > >
> > >
> > >
> > > On Tue, Jun 5, 2018 at 10:11 AM, Tim Armstrong <
> [email protected]>
> > > wrote:
> > >
> > > > Things are starting to look healthier now.
> > > >
> > > > I went through the broken-build JIRAs and downgraded some of the
> > > > infrequent infrastructure issues to critical so we have a clearer
> idea
> > of
> > > > what's actually breaking the build now versus what's an occasional
> > infra
> > > > issue: https://issues.apache.org/jira/issues/?jql=project%
> > > > 20%3D%20IMPALA%20AND%20status%20in%20(Open%2C%20%22In%
> > > > 20Progress%22%2C%20Reopened)%20AND%20labels%20%3D%20broken-
> > > > build%20ORDER%20BY%20priority%20DESC
> > > >
> > > > I'd like to see the fixes for these three issues go in:
> > > > https://issues.apache.org/jira/browse/IMPALA-7101
> > > > https://issues.apache.org/jira/browse/IMPALA-6956
> > > > https://issues.apache.org/jira/browse/IMPALA-7008
> > > >
> > > > We still need to fix any flaky infrastructure issues but that should
> be
> > > > able to proceed in parallel with other things.
> > > >
> > > >
> > > > On Fri, Jun 1, 2018 at 11:18 AM, Thomas Tauber-Marshall <
> > > > [email protected]> wrote:
> > > >
> > > >> So while its definitely better, there are still a large number of
> > > failing
> > > >> builds. We've been hit by at least: IMPALA-6642
> > > >> <https://issues.apache.org/jira/browse/IMPALA-6642>, IMPALA-6956
> > > >> <https://issues.apache.org/jira/browse/IMPALA-6956>, IMPALA-7101
> > > >> <https://issues.apache.org/jira/browse/IMPALA-7101> and IMPALA-3040
> > > >> <https://issues.apache.org/jira/browse/IMPALA-3040>
> > > >> all within the last day, along with some mysterious crashes that I
> > > haven't
> > > >> filed anything for with Apache yet as there's very little info about
> > > >> what's
> > > >> actually going on. There are still multiple builds that haven't been
> > > green
> > > >> in over a month.
> > > >> <https://issues.apache.org/jira/browse/IMPALA-6642>
> > > >>
> > > >> Of course, if we hold commits for too long, there's a danger that
> when
> > > we
> > > >> open things back up a bunch of changes will all land at the same
> time
> > > and
> > > >> destabilize the builds again, putting back in the same situation.
> So,
> > I
> > > >> would say at a minimum that any changes that are relatively minor
> and
> > > low
> > > >> risk can go in now.
> > > >>
> > > >> My preference would be to hold off on major changes until we have
> more
> > > >> stability.
> > > >>
> > > >> On Fri, Jun 1, 2018 at 10:30 AM Lars Volker <[email protected]>
> wrote:
> > > >>
> > > >> > Hi Thomas,
> > > >> >
> > > >> > Can you give an update on where we are with the builds?
> > > >> >
> > > >> > We currently have ~15 changes with a +2:
> > > >> >
> > > >> > https://gerrit.cloudera.org/#/q/status:open+project:Impala-A
> > > >> SF+branch:master+label:Code-Review%253D2
> > > >> >
> > > >> > Thanks, Lars
> > > >> >
> > > >> > On Fri, May 25, 2018 at 5:20 PM, Henry Robinson <[email protected]
> >
> > > >> wrote:
> > > >> >
> > > >> > > +1 - thanks for worrying about build health.
> > > >> > >
> > > >> > > On 25 May 2018 at 17:18, Jim Apple <[email protected]>
> wrote:
> > > >> > >
> > > >> > > > Sounds good to me. Thanks for taking ownership!
> > > >> > > >
> > > >> > > > On Fri, May 25, 2018 at 5:10 PM Thomas Tauber-Marshall <
> > > >> > > > [email protected]> wrote:
> > > >> > > >
> > > >> > > > > Hey Impala community,
> > > >> > > > >
> > > >> > > > > There seems to have been an unusually large number of flaky
> or
> > > >> broken
> > > >> > > > tests
> > > >> > > > > <
> > > >> > > > > https://issues.apache.org/jira/browse/IMPALA-7073?jql=
> > > >> > > > project%20%3D%20IMPALA%20AND%20status%20in%20(Open%2C%20%
> > > >> > > > 22In%20Progress%22%2C%20Reopened)%20AND%20labels%
> > > >> > > > 20in%20(flaky%2C%20broken-build)
> > > >> > > > > >
> > > >> > > > > cropping up in the last few weeks. I'd like to suggest that
> we
> > > >> hold
> > > >> > off
> > > >> > > > on
> > > >> > > > > merging new changes that aren't related to fixing those
> > testing
> > > >> > issues
> > > >> > > > for
> > > >> > > > > at least a few days until things become more stable.
> > > >> > > > >
> > > >> > > > > Does anyone have any objections? If not, I'll send out
> another
> > > >> email
> > > >> > > when
> > > >> > > > > more of the issues have been addressed.
> > > >> > > > >
> > > >> > > > > Thanks,
> > > >> > > > > Thomas Tauber-Marshall
> > > >> > > > >
> > > >> > > >
> > > >> > >
> > > >> >
> > > >>
> > > >
> > > >
> >
>
>

Reply via email to