Someone (Graham Toal?) recently asked for this. Here's a quickie in C to
print out all strings stored in a GADDAG file. Note that the same word
appears multiple times, permuted various ways; that's the way a GADDAG
works (read Steve Gordon's paper to understand this fully). If you want
just a sorted wordlist, compile the program below (I named it
"traverse_quack") and use this command:
traverse_quack infile.gaddag |\
perl -F"\\\\" -ane 'print((reverse $F[0])."$F[1]\n")' |\
sed '/^$/d' |\
sort -u
-AC
/*** ==> traverse_quack.c by Amit Chakrabarti, 2008-11-04 <==
*
* Utility to traverse a Quackle GADDAG file in depth first order,
* effectively printing out all words stored therein. Note that a "word"
* in the GADDAG is an appropriate cooked version of a lexicon word.
*/
#include<stdio.h>
#include<errno.h>
#include<string.h>
#include<unistd.h>
#include<fcntl.h>
unsigned char tree[33000000]; /* Same as Quackle's size */
char word[50];
#define letter(x) ((tree[4*(x)+3] & 0x1f) + 'A')
#define right(x) ((tree[4*(x)+3] & 0x40) ? 0 : (x)+1)
#define is_term(x) (tree[4*(x)+3] & 0x20)
inline int down(int x)
{
unsigned int offset = tree[4*x] << 16 | tree[4*x+1] << 8 | tree[4*x+2];
return offset ? x + offset : 0;
}
void dfs(int x, int len)
{
if(x) {
word[len++] = letter(x);
if(is_term(x))
printf("%.*s\n", len, word);
dfs(down(x), len);
dfs(right(x), --len);
}
}
main(int argc, char **argv)
{
int fd, nbytes;
if(argc < 2)
printf("Usage: %s <gaddag_file_name>\n", argv[0]);
else if((fd = open(argv[1], O_RDONLY)) < 0)
printf("Could not open %s for reading: %s\n", argv[1], strerror(errno));
else if((nbytes = read(fd, tree, sizeof tree)) < 0)
printf("Could not read from %s: %s\n", argv[1], strerror(errno));
else {
printf("Read %d bytes from gaddag file.\n", nbytes);
dfs(down(0), 0);
}
}