Someone (Graham Toal?) recently asked for this. Here's a quickie in C to
print out all strings stored in a GADDAG file. Note that the same word
appears multiple times, permuted various ways; that's the way a GADDAG
works (read Steve Gordon's paper to understand this fully). If you want
just a sorted wordlist, compile the program below (I named it 
"traverse_quack") and use this command:

        traverse_quack infile.gaddag |\
        perl -F"\\\\" -ane 'print((reverse $F[0])."$F[1]\n")' |\
        sed '/^$/d' |\
        sort -u

-AC


/*** ==> traverse_quack.c by Amit Chakrabarti, 2008-11-04 <==
 *
 * Utility to traverse a Quackle GADDAG file in depth first order,
 * effectively printing out all words stored therein. Note that a "word"
 * in the GADDAG is an appropriate cooked version of a lexicon word.
 */

#include<stdio.h>
#include<errno.h>
#include<string.h>
#include<unistd.h>
#include<fcntl.h>

unsigned char tree[33000000];   /* Same as Quackle's size */
char word[50];

#define letter(x) ((tree[4*(x)+3] & 0x1f) + 'A')
#define right(x) ((tree[4*(x)+3] & 0x40) ? 0 : (x)+1)
#define is_term(x) (tree[4*(x)+3] & 0x20)


inline int down(int x)
{
  unsigned int offset = tree[4*x] << 16 | tree[4*x+1] << 8 | tree[4*x+2];
  return offset ? x + offset : 0;
}


void dfs(int x, int len)
{
  if(x)  {
    word[len++] = letter(x);
    if(is_term(x))
      printf("%.*s\n", len, word);
    dfs(down(x), len);
    dfs(right(x), --len);
  }
}


main(int argc, char **argv)
{
  int fd, nbytes;

  if(argc < 2)
    printf("Usage: %s <gaddag_file_name>\n", argv[0]);
  else if((fd = open(argv[1], O_RDONLY)) < 0)
    printf("Could not open %s for reading: %s\n", argv[1], strerror(errno));
  else if((nbytes = read(fd, tree, sizeof tree)) < 0)
    printf("Could not read from %s: %s\n", argv[1], strerror(errno));
  else  {
    printf("Read %d bytes from gaddag file.\n", nbytes);
    dfs(down(0), 0);
  }
}

Reply via email to