Changeset: 4095850a62b5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4095850a62b5
Modified Files:
        monetdb5/modules/mal/tokenizer.c
Branch: lodrdf
Log Message:

uncrustify


diffs (truncated from 573 to 300 lines):

diff --git a/monetdb5/modules/mal/tokenizer.c b/monetdb5/modules/mal/tokenizer.c
--- a/monetdb5/modules/mal/tokenizer.c
+++ b/monetdb5/modules/mal/tokenizer.c
@@ -15,35 +15,38 @@
  * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
  * Copyright August 2008-2012 MonetDB B.V.
  * All Rights Reserved.
-*/
+ */
 
 /*
  * author Lefteris Sidirourgos
  * Tokenizer
- * This module implements a vertical fragmented tokenizer for strings. It is 
based
- * on the ideas of the urlbox module by mk.
+ * This module implements a vertical fragmented tokenizer for strings.
+ * It is based on the ideas of the urlbox module by mk.
  *
- * The input string is tokenized according to a separator character. Each 
token is
- * inserted to the next BAT with the same order of appearance in the string. We
- * currently support 255 tokens in each string as this module is intended for 
use
- * with short and similar strings such as URLs. In addition we maintain a
- * 2-dimensional index that points to the depth and height of the last token of
- * each string. The 2-dimensional index is combined to one BAT where the 8 
least
+ * The input string is tokenized according to a separator character.
+ * Each token is inserted to the next BAT with the same order of
+ * appearance in the string. We currently support 255 tokens in each
+ * string as this module is intended for use with short and similar
+ * strings such as URLs. In addition we maintain a 2-dimensional index
+ * that points to the depth and height of the last token of each string.
+ * The 2-dimensional index is combined to one BAT where the 8 least
  * significant bits represent the depth, and the rest bits the height.
  *
  * The tokenizer can be accessed in two ways. Given the oid retrieve the
- * re-constructed string, or given a string return its oid if present, 
otherwise
- * nil.
+ * re-constructed string, or given a string return its oid if present,
+ * otherwise nil.
  *
- * Strings can be added either in batch (from a file or a bat of strings) and 
by
- * appending a single string. Duplicate elimination is always performed.
+ * Strings can be added either in batch (from a file or a bat of
+ * strings) and by appending a single string. Duplicate elimination is
+ * always performed.
  *
- * There can be only one tokenizer open at the same time. This is achieved by
- * setting a TRANSaction bat. This might change in the future. However there
- * can be more than one tokenizers stored in the disk, each of which is 
identified
- * by its name (usually the name of the active schema of the db). These
- * administrative issues and security aspects (e.g., opening a tokenizer of
- * a different schema) should be addressed more thoroughly.
+ * There can be only one tokenizer open at the same time. This is
+ * achieved by setting a TRANSaction bat. This might change in the
+ * future. However there can be more than one tokenizers stored in the
+ * disk, each of which is identified by its name (usually the name of
+ * the active schema of the db). These administrative issues and
+ * security aspects (e.g., opening a tokenizer of a different schema)
+ * should be addressed more thoroughly.
  */
 #include "monetdb_config.h"
 #include "bat5.h"
@@ -53,27 +56,30 @@
 #define MAX_TKNZR_DEPTH 256
 #define INDEX MAX_TKNZR_DEPTH
 static int tokenDepth = 0;
-static BAT *tokenBAT[MAX_TKNZR_DEPTH+1];
-static BAT *TRANS = NULL;      /* the catalog of tokenizers */
+static BAT *tokenBAT[MAX_TKNZR_DEPTH + 1];
+static BAT *TRANS = NULL;   /* the catalog of tokenizers */
 static char name[128];
 
 #if SIZEOF_OID == 4 /* 32-bit oid */
-#define MAX_h ((((oid)1)<<23)-1)
+#define MAX_h ((((oid) 1) << 23) - 1)
 #else /* 64-bit oid */
-#define MAX_h ((((oid)1)<<55)-1)
+#define MAX_h ((((oid) 1) << 55) - 1)
 #endif
 
-#define COMP(h, d) ((h<<8)|(d&255))
-#define GET_d(x) ((sht)((x)&255))
-#define GET_h(x) ((x)>>8)
+#define COMP(h, d) ((h << 8) | (d & 255))
+#define GET_d(x) ((sht) ((x) & 255))
+#define GET_h(x) ((x) >> 8)
 
-static int prvlocate(BAT* b, oid *prv, str part) { 
+static int prvlocate(BAT* b, oid *prv, str part)
+{
        BAT *m = BATmirror(b);
        BATiter mi = bat_iterator(m);
        BUN p;
-       if (m->H->hash == NULL) BAThash(m, 2*BATcount(m));
-       HASHloop_str(mi, m->H->hash, p, part) {
-               if (*((oid *)BUNtail(mi,p)) == *prv) {
+       if (m->H->hash == NULL)
+               BAThash(m, 2 * BATcount(m));
+       HASHloop_str(mi, m->H->hash, p, part)
+       {
+               if (*((oid *) BUNtail(mi, p)) == *prv) {
                        *prv = (oid) p;
                        return TRUE;
                }
@@ -105,48 +111,45 @@ TKNZRopen(int *ret, str *in)
        }
        tokenDepth = 0;
 
-       TRANS = BATnew(TYPE_void, TYPE_str, MAX_TKNZR_DEPTH+1);
+       TRANS = BATnew(TYPE_void, TYPE_str, MAX_TKNZR_DEPTH + 1);
        if (TRANS == NULL) {
                MT_lock_unset(&mal_contextLock, "tokenizer");
                throw(MAL, "tokenizer.open", MAL_MALLOC_FAIL);
        }
        /* now we are sure that none overwrites the tokenizer table*/
        MT_lock_unset(&mal_contextLock, "tokenizer");
-    BATseqbase(TRANS, 0);
+       BATseqbase(TRANS, 0);
 
        snprintf(name, 128, "%s", *in);
-       batname = (str) GDKmalloc(134*sizeof(char));
+       batname = (str) GDKmalloc(134 * sizeof(char));
        snprintf(batname, 134, "%s_index", name);
        idx = BBPindex(batname);
 
        if (idx == 0) { /* new tokenizer */
-
                b = BATnew(TYPE_void, TYPE_oid, 1024);
-               if (b == NULL) 
+               if (b == NULL)
                        throw(MAL, "tokenizer.open", MAL_MALLOC_FAIL);
                BATkey(b, FALSE);
-               BATseqbase(b,0);
+               BATseqbase(b, 0);
                tokenBAT[INDEX] = b;
-               if (BKCsetName(&r, (int *)&(b->batCacheid), (str *) &batname) 
!= MAL_SUCCEED) 
+               if (BKCsetName(&r, (int *) &(b->batCacheid), (str *) &batname) 
!= MAL_SUCCEED)
                        throw(MAL, "tokenizer.open", OPERATION_FAILED);
-               if (BKCsetPersistent(&r,(int *)&(b->batCacheid)) != MAL_SUCCEED)
+               if (BKCsetPersistent(&r, (int *) &(b->batCacheid)) != 
MAL_SUCCEED)
                        throw(MAL, "tokenizer.open", OPERATION_FAILED);
                BUNappend(TRANS, batname, FALSE);
-
        } else { /* existing tokenizer */
-
                tokenBAT[INDEX] = BATdescriptor(idx);
                BUNappend(TRANS, batname, FALSE);
 
                for (depth = 0; depth < MAX_TKNZR_DEPTH; depth++) {
                        snprintf(batname, 128, "%s_%d", name, depth);
                        idx = BBPindex(batname);
-                       if (idx == 0) break;
+                       if (idx == 0)
+                               break;
                        tokenBAT[depth] = BATdescriptor(idx);
                        BUNappend(TRANS, batname, FALSE);
                }
                tokenDepth = depth;
-
        }
 
        GDKfree(batname);
@@ -178,24 +181,25 @@ TKNZRclose(int *r)
 
 /*
  * Tokenize operations
- * The tokenizer operation assumes a private copy to mark the
- * end of the token separators with a zero byte. Tokens are
- * separated by a single character for simplicity.
- * Might be a good scheme to assume that strings to be broken
- * are properly ended with either 0 or nl, not both.
- * It seems 0 can be assumed.
+ * The tokenizer operation assumes a private copy to mark the end of the
+ * token separators with a zero byte. Tokens are separated by a single
+ * character for simplicity.  Might be a good scheme to assume that
+ * strings to be broken are properly ended with either 0 or nl, not
+ * both.  It seems 0 can be assumed.
  */
 static int
-TKNZRtokenize(str in, str *parts, char tkn) {
+TKNZRtokenize(str in, str *parts, char tkn)
+{
        char *s, *t;
        int depth = 0;
 
        s = in;
        while (*s && *s != '\n') {
                t = s;
-               while (*t != tkn && *t != '\n' && *t) t++;
+               while (*t != tkn && *t != '\n' && *t)
+                       t++;
                parts[depth++] = s;
-               s = t + ( *t != 0);
+               s = t + (*t != 0);
                *t = 0;
                if (depth > MAX_TKNZR_DEPTH)
                        break;
@@ -238,10 +242,9 @@ TKNZRappend(oid *pos, str *s)
        }
        if (depth > tokenDepth || tokenBAT[0] == NULL) {
                new = tokenDepth;
-               for (i = tokenDepth; i < depth; i++){
-
+               for (i = tokenDepth; i < depth; i++) {
                        /* make new bat */
-                       batname = (str) GDKmalloc(128*sizeof(char));
+                       batname = (str) GDKmalloc(128 * sizeof(char));
                        snprintf(batname, 128, "%s_%d", name, i);
                        b = BATnew(TYPE_oid, TYPE_str, 1024);
                        if (b == NULL) {
@@ -253,13 +256,13 @@ TKNZRappend(oid *pos, str *s)
                        tokenBAT[i] = b;
 
                        if (BKCsetName(&r, (int *) &(b->batCacheid), (str *) 
&batname)
-                                       != MAL_SUCCEED) {
+                               != MAL_SUCCEED) {
                                GDKfree(batname);
                                GDKfree(url);
                                throw(MAL, "tokenizer.open", OPERATION_FAILED);
                        }
-                       if (BKCsetPersistent(&r, (int *)  &(b->batCacheid))
-                                       != MAL_SUCCEED) {
+                       if (BKCsetPersistent(&r, (int *) &(b->batCacheid))
+                               != MAL_SUCCEED) {
                                GDKfree(batname);
                                GDKfree(url);
                                throw(MAL, "tokenizer.open", OPERATION_FAILED);
@@ -274,7 +277,8 @@ TKNZRappend(oid *pos, str *s)
        if (p != BUN_NONE) {
                prv = (oid) p;
                for (i = 1; i < new; i++) {
-                       if (!prvlocate(tokenBAT[i], &prv, parts[i])) break;
+                       if (!prvlocate(tokenBAT[i], &prv, parts[i]))
+                               break;
                }
        } else {
                i = 0;
@@ -282,7 +286,7 @@ TKNZRappend(oid *pos, str *s)
 
        if (i == depth) {
                comp = COMP(prv, depth);
-               *pos = BUNfnd(BATmirror(tokenBAT[INDEX]), (ptr) &comp);
+               *pos = BUNfnd(BATmirror(tokenBAT[INDEX]), (ptr) & comp);
                if (*pos != BUN_NONE) {
                        /* the string is already there */
                        GDKfree(url);
@@ -291,14 +295,14 @@ TKNZRappend(oid *pos, str *s)
        }
 
        /* insremainder */
-       for(; i < depth; i++){
+       for (; i < depth; i++) {
                idx = BATcount(tokenBAT[i]);
                if (idx > MAX_h) {
                        GDKfree(url);
                        throw(MAL, "tokenizer.append",
                                        OPERATION_FAILED " no more free oid's");
                }
-               tokenBAT[i] = BUNins(tokenBAT[i], (ptr) &prv, parts[i], FALSE);
+               tokenBAT[i] = BUNins(tokenBAT[i], (ptr) & prv, parts[i], FALSE);
                if (tokenBAT[i] == NULL) {
                        GDKfree(url);
                        throw(MAL, "tokenizer.append",
@@ -307,33 +311,32 @@ TKNZRappend(oid *pos, str *s)
                if (tokenBAT[i]->T->hash == NULL ||
                        BATcount(tokenBAT[i]) > 4 * tokenBAT[i]->T->hash->mask) 
{
                        HASHdestroy(tokenBAT[i]);
-                       BAThash(BATmirror(tokenBAT[i]), 
2*BATcount(tokenBAT[i]));
+                       BAThash(BATmirror(tokenBAT[i]), 2 * 
BATcount(tokenBAT[i]));
                }
                prv = (oid) idx;
        }
 
        *pos = (oid) BATcount(tokenBAT[INDEX]);
        comp = COMP(prv, depth);
-       BUNappend(tokenBAT[INDEX], (ptr) &comp, TRUE);
+       BUNappend(tokenBAT[INDEX], (ptr) & comp, TRUE);
        if (tokenBAT[INDEX]->T->hash == NULL ||
-                       BATcount(tokenBAT[INDEX]) > 4 * 
tokenBAT[INDEX]->T->hash->mask) {
+               BATcount(tokenBAT[INDEX]) > 4 * tokenBAT[INDEX]->T->hash->mask) 
{
                HASHdestroy(tokenBAT[INDEX]);
-               BAThash(BATmirror(tokenBAT[INDEX]), 
2*BATcount(tokenBAT[INDEX]));
+               BAThash(BATmirror(tokenBAT[INDEX]), 2 * 
BATcount(tokenBAT[INDEX]));
        }
 
        GDKfree(url);
        return MAL_SUCCEED;
 }
 
-#define SIZE 1*1024*1024
+#define SIZE 1 * 1024 * 1024
 str
 TKNZRdepositFile(int *r, str *fnme)
 {
-
        stream *fs;
        bstream *bs;
-       char *s,*t;
-       int len=0;
+       char *s, *t;
+       int len = 0;
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to