Changeset: 4095850a62b5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4095850a62b5
Modified Files:
monetdb5/modules/mal/tokenizer.c
Branch: lodrdf
Log Message:
uncrustify
diffs (truncated from 573 to 300 lines):
diff --git a/monetdb5/modules/mal/tokenizer.c b/monetdb5/modules/mal/tokenizer.c
--- a/monetdb5/modules/mal/tokenizer.c
+++ b/monetdb5/modules/mal/tokenizer.c
@@ -15,35 +15,38 @@
* Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
* Copyright August 2008-2012 MonetDB B.V.
* All Rights Reserved.
-*/
+ */
/*
* author Lefteris Sidirourgos
* Tokenizer
- * This module implements a vertical fragmented tokenizer for strings. It is
based
- * on the ideas of the urlbox module by mk.
+ * This module implements a vertical fragmented tokenizer for strings.
+ * It is based on the ideas of the urlbox module by mk.
*
- * The input string is tokenized according to a separator character. Each
token is
- * inserted to the next BAT with the same order of appearance in the string. We
- * currently support 255 tokens in each string as this module is intended for
use
- * with short and similar strings such as URLs. In addition we maintain a
- * 2-dimensional index that points to the depth and height of the last token of
- * each string. The 2-dimensional index is combined to one BAT where the 8
least
+ * The input string is tokenized according to a separator character.
+ * Each token is inserted to the next BAT with the same order of
+ * appearance in the string. We currently support 255 tokens in each
+ * string as this module is intended for use with short and similar
+ * strings such as URLs. In addition we maintain a 2-dimensional index
+ * that points to the depth and height of the last token of each string.
+ * The 2-dimensional index is combined to one BAT where the 8 least
* significant bits represent the depth, and the rest bits the height.
*
* The tokenizer can be accessed in two ways. Given the oid retrieve the
- * re-constructed string, or given a string return its oid if present,
otherwise
- * nil.
+ * re-constructed string, or given a string return its oid if present,
+ * otherwise nil.
*
- * Strings can be added either in batch (from a file or a bat of strings) and
by
- * appending a single string. Duplicate elimination is always performed.
+ * Strings can be added either in batch (from a file or a bat of
+ * strings) and by appending a single string. Duplicate elimination is
+ * always performed.
*
- * There can be only one tokenizer open at the same time. This is achieved by
- * setting a TRANSaction bat. This might change in the future. However there
- * can be more than one tokenizers stored in the disk, each of which is
identified
- * by its name (usually the name of the active schema of the db). These
- * administrative issues and security aspects (e.g., opening a tokenizer of
- * a different schema) should be addressed more thoroughly.
+ * There can be only one tokenizer open at the same time. This is
+ * achieved by setting a TRANSaction bat. This might change in the
+ * future. However there can be more than one tokenizers stored in the
+ * disk, each of which is identified by its name (usually the name of
+ * the active schema of the db). These administrative issues and
+ * security aspects (e.g., opening a tokenizer of a different schema)
+ * should be addressed more thoroughly.
*/
#include "monetdb_config.h"
#include "bat5.h"
@@ -53,27 +56,30 @@
#define MAX_TKNZR_DEPTH 256
#define INDEX MAX_TKNZR_DEPTH
static int tokenDepth = 0;
-static BAT *tokenBAT[MAX_TKNZR_DEPTH+1];
-static BAT *TRANS = NULL; /* the catalog of tokenizers */
+static BAT *tokenBAT[MAX_TKNZR_DEPTH + 1];
+static BAT *TRANS = NULL; /* the catalog of tokenizers */
static char name[128];
#if SIZEOF_OID == 4 /* 32-bit oid */
-#define MAX_h ((((oid)1)<<23)-1)
+#define MAX_h ((((oid) 1) << 23) - 1)
#else /* 64-bit oid */
-#define MAX_h ((((oid)1)<<55)-1)
+#define MAX_h ((((oid) 1) << 55) - 1)
#endif
-#define COMP(h, d) ((h<<8)|(d&255))
-#define GET_d(x) ((sht)((x)&255))
-#define GET_h(x) ((x)>>8)
+#define COMP(h, d) ((h << 8) | (d & 255))
+#define GET_d(x) ((sht) ((x) & 255))
+#define GET_h(x) ((x) >> 8)
-static int prvlocate(BAT* b, oid *prv, str part) {
+static int prvlocate(BAT* b, oid *prv, str part)
+{
BAT *m = BATmirror(b);
BATiter mi = bat_iterator(m);
BUN p;
- if (m->H->hash == NULL) BAThash(m, 2*BATcount(m));
- HASHloop_str(mi, m->H->hash, p, part) {
- if (*((oid *)BUNtail(mi,p)) == *prv) {
+ if (m->H->hash == NULL)
+ BAThash(m, 2 * BATcount(m));
+ HASHloop_str(mi, m->H->hash, p, part)
+ {
+ if (*((oid *) BUNtail(mi, p)) == *prv) {
*prv = (oid) p;
return TRUE;
}
@@ -105,48 +111,45 @@ TKNZRopen(int *ret, str *in)
}
tokenDepth = 0;
- TRANS = BATnew(TYPE_void, TYPE_str, MAX_TKNZR_DEPTH+1);
+ TRANS = BATnew(TYPE_void, TYPE_str, MAX_TKNZR_DEPTH + 1);
if (TRANS == NULL) {
MT_lock_unset(&mal_contextLock, "tokenizer");
throw(MAL, "tokenizer.open", MAL_MALLOC_FAIL);
}
/* now we are sure that none overwrites the tokenizer table*/
MT_lock_unset(&mal_contextLock, "tokenizer");
- BATseqbase(TRANS, 0);
+ BATseqbase(TRANS, 0);
snprintf(name, 128, "%s", *in);
- batname = (str) GDKmalloc(134*sizeof(char));
+ batname = (str) GDKmalloc(134 * sizeof(char));
snprintf(batname, 134, "%s_index", name);
idx = BBPindex(batname);
if (idx == 0) { /* new tokenizer */
-
b = BATnew(TYPE_void, TYPE_oid, 1024);
- if (b == NULL)
+ if (b == NULL)
throw(MAL, "tokenizer.open", MAL_MALLOC_FAIL);
BATkey(b, FALSE);
- BATseqbase(b,0);
+ BATseqbase(b, 0);
tokenBAT[INDEX] = b;
- if (BKCsetName(&r, (int *)&(b->batCacheid), (str *) &batname)
!= MAL_SUCCEED)
+ if (BKCsetName(&r, (int *) &(b->batCacheid), (str *) &batname)
!= MAL_SUCCEED)
throw(MAL, "tokenizer.open", OPERATION_FAILED);
- if (BKCsetPersistent(&r,(int *)&(b->batCacheid)) != MAL_SUCCEED)
+ if (BKCsetPersistent(&r, (int *) &(b->batCacheid)) !=
MAL_SUCCEED)
throw(MAL, "tokenizer.open", OPERATION_FAILED);
BUNappend(TRANS, batname, FALSE);
-
} else { /* existing tokenizer */
-
tokenBAT[INDEX] = BATdescriptor(idx);
BUNappend(TRANS, batname, FALSE);
for (depth = 0; depth < MAX_TKNZR_DEPTH; depth++) {
snprintf(batname, 128, "%s_%d", name, depth);
idx = BBPindex(batname);
- if (idx == 0) break;
+ if (idx == 0)
+ break;
tokenBAT[depth] = BATdescriptor(idx);
BUNappend(TRANS, batname, FALSE);
}
tokenDepth = depth;
-
}
GDKfree(batname);
@@ -178,24 +181,25 @@ TKNZRclose(int *r)
/*
* Tokenize operations
- * The tokenizer operation assumes a private copy to mark the
- * end of the token separators with a zero byte. Tokens are
- * separated by a single character for simplicity.
- * Might be a good scheme to assume that strings to be broken
- * are properly ended with either 0 or nl, not both.
- * It seems 0 can be assumed.
+ * The tokenizer operation assumes a private copy to mark the end of the
+ * token separators with a zero byte. Tokens are separated by a single
+ * character for simplicity. Might be a good scheme to assume that
+ * strings to be broken are properly ended with either 0 or nl, not
+ * both. It seems 0 can be assumed.
*/
static int
-TKNZRtokenize(str in, str *parts, char tkn) {
+TKNZRtokenize(str in, str *parts, char tkn)
+{
char *s, *t;
int depth = 0;
s = in;
while (*s && *s != '\n') {
t = s;
- while (*t != tkn && *t != '\n' && *t) t++;
+ while (*t != tkn && *t != '\n' && *t)
+ t++;
parts[depth++] = s;
- s = t + ( *t != 0);
+ s = t + (*t != 0);
*t = 0;
if (depth > MAX_TKNZR_DEPTH)
break;
@@ -238,10 +242,9 @@ TKNZRappend(oid *pos, str *s)
}
if (depth > tokenDepth || tokenBAT[0] == NULL) {
new = tokenDepth;
- for (i = tokenDepth; i < depth; i++){
-
+ for (i = tokenDepth; i < depth; i++) {
/* make new bat */
- batname = (str) GDKmalloc(128*sizeof(char));
+ batname = (str) GDKmalloc(128 * sizeof(char));
snprintf(batname, 128, "%s_%d", name, i);
b = BATnew(TYPE_oid, TYPE_str, 1024);
if (b == NULL) {
@@ -253,13 +256,13 @@ TKNZRappend(oid *pos, str *s)
tokenBAT[i] = b;
if (BKCsetName(&r, (int *) &(b->batCacheid), (str *)
&batname)
- != MAL_SUCCEED) {
+ != MAL_SUCCEED) {
GDKfree(batname);
GDKfree(url);
throw(MAL, "tokenizer.open", OPERATION_FAILED);
}
- if (BKCsetPersistent(&r, (int *) &(b->batCacheid))
- != MAL_SUCCEED) {
+ if (BKCsetPersistent(&r, (int *) &(b->batCacheid))
+ != MAL_SUCCEED) {
GDKfree(batname);
GDKfree(url);
throw(MAL, "tokenizer.open", OPERATION_FAILED);
@@ -274,7 +277,8 @@ TKNZRappend(oid *pos, str *s)
if (p != BUN_NONE) {
prv = (oid) p;
for (i = 1; i < new; i++) {
- if (!prvlocate(tokenBAT[i], &prv, parts[i])) break;
+ if (!prvlocate(tokenBAT[i], &prv, parts[i]))
+ break;
}
} else {
i = 0;
@@ -282,7 +286,7 @@ TKNZRappend(oid *pos, str *s)
if (i == depth) {
comp = COMP(prv, depth);
- *pos = BUNfnd(BATmirror(tokenBAT[INDEX]), (ptr) &comp);
+ *pos = BUNfnd(BATmirror(tokenBAT[INDEX]), (ptr) & comp);
if (*pos != BUN_NONE) {
/* the string is already there */
GDKfree(url);
@@ -291,14 +295,14 @@ TKNZRappend(oid *pos, str *s)
}
/* insremainder */
- for(; i < depth; i++){
+ for (; i < depth; i++) {
idx = BATcount(tokenBAT[i]);
if (idx > MAX_h) {
GDKfree(url);
throw(MAL, "tokenizer.append",
OPERATION_FAILED " no more free oid's");
}
- tokenBAT[i] = BUNins(tokenBAT[i], (ptr) &prv, parts[i], FALSE);
+ tokenBAT[i] = BUNins(tokenBAT[i], (ptr) & prv, parts[i], FALSE);
if (tokenBAT[i] == NULL) {
GDKfree(url);
throw(MAL, "tokenizer.append",
@@ -307,33 +311,32 @@ TKNZRappend(oid *pos, str *s)
if (tokenBAT[i]->T->hash == NULL ||
BATcount(tokenBAT[i]) > 4 * tokenBAT[i]->T->hash->mask)
{
HASHdestroy(tokenBAT[i]);
- BAThash(BATmirror(tokenBAT[i]),
2*BATcount(tokenBAT[i]));
+ BAThash(BATmirror(tokenBAT[i]), 2 *
BATcount(tokenBAT[i]));
}
prv = (oid) idx;
}
*pos = (oid) BATcount(tokenBAT[INDEX]);
comp = COMP(prv, depth);
- BUNappend(tokenBAT[INDEX], (ptr) &comp, TRUE);
+ BUNappend(tokenBAT[INDEX], (ptr) & comp, TRUE);
if (tokenBAT[INDEX]->T->hash == NULL ||
- BATcount(tokenBAT[INDEX]) > 4 *
tokenBAT[INDEX]->T->hash->mask) {
+ BATcount(tokenBAT[INDEX]) > 4 * tokenBAT[INDEX]->T->hash->mask)
{
HASHdestroy(tokenBAT[INDEX]);
- BAThash(BATmirror(tokenBAT[INDEX]),
2*BATcount(tokenBAT[INDEX]));
+ BAThash(BATmirror(tokenBAT[INDEX]), 2 *
BATcount(tokenBAT[INDEX]));
}
GDKfree(url);
return MAL_SUCCEED;
}
-#define SIZE 1*1024*1024
+#define SIZE 1 * 1024 * 1024
str
TKNZRdepositFile(int *r, str *fnme)
{
-
stream *fs;
bstream *bs;
- char *s,*t;
- int len=0;
+ char *s, *t;
+ int len = 0;
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list