This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 467c553061 [SYSTEMDS-3179] Builtin for GloVe word embedding training
467c553061 is described below

commit 467c553061b2650ab49805ceafbe8ece76c916e8
Author: Xixuan Zhang <119933243+xixuanzhang2...@users.noreply.github.com>
AuthorDate: Thu Jan 30 19:12:35 2025 +0100

    [SYSTEMDS-3179] Builtin for GloVe word embedding training
    
    Closes #2201.
    
    Co-authored-by: Samin <bassirisa...@gmail.com>
---
 scripts/builtin/glove.dml                          | 162 +++++++
 .../java/org/apache/sysds/common/Builtins.java     |   1 +
 .../functions/builtin/part1/BuiltinGloVeTest.java  | 140 ++++++
 .../datasets/GloVe/gloveExpectedTop10.csv          | 478 +++++++++++++++++++++
 src/test/scripts/functions/builtin/glove.dml       |  86 ++++
 5 files changed, 867 insertions(+)

diff --git a/scripts/builtin/glove.dml b/scripts/builtin/glove.dml
new file mode 100644
index 0000000000..fc5ee9bafb
--- /dev/null
+++ b/scripts/builtin/glove.dml
@@ -0,0 +1,162 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#-------------------------------------------------------------
+
+init = function(matrix[double] cooc_matrix, double x_max, double alpha)
+  return(matrix[double] weights, matrix[double] log_cooc_matrix){
+  E = 2.718281828;
+  bounded = pmin(cooc_matrix, x_max);
+  weights = pmin(1, (bounded / x_max) ^ alpha);
+  log_cooc_matrix = ifelse(cooc_matrix > 0, log(cooc_matrix, E), 0);
+}
+
+gloveWithCoocMatrix = function(matrix[double] cooc_matrix, frame[Unknown] 
cooc_index, int seed, int vector_size, double alpha, double eta, double x_max, 
double tol, int iterations,int print_loss_it)
+    return (frame[Unknown] G){
+    /*
+     * Computes the vector embeddings for words by analyzing their 
co-occurrence statistics in a large text corpus.
+     *
+     * Inputs:
+     *  - cooc_matrix: Precomputed co-occurrence matrix of shape (N, N).
+     *  - cooc_index:  Index file mapping words to their positions in the 
co-occurrence matrix.
+     *                 The second column should contain the word list in the 
same order as the matrix.
+     *  - seed: Random seed for reproducibility.
+     *  - vector_size: Dimensionality of word vectors, V.
+     *  - eta: Learning rate for optimization, recommended value: 0.05.
+     *  - alpha: Weighting function parameter, recommended value: 0.75.
+     *  - x_max: Maximum co-occurrence value as per the GloVe paper: 100.
+     *  - tol: Tolerance value to avoid overfitting, recommended value: 1e-4.
+     *  - iterations: Total number of training iterations.
+     *  - print_loss_it: Interval (in iterations) for printing the loss.
+     *
+     * Outputs:
+     *  - G: frame of the word indices and their word vectors, of shape (N, 
V). Each represented as a vector, of shape (1,V)
+     */
+
+    vocab_size = nrow(cooc_matrix);
+    W = (rand(rows=vocab_size, cols=vector_size, min=0, max=1, 
seed=seed)-0.5)/vector_size;
+    C = (rand(rows=vocab_size, cols=vector_size, min=0, max=1, 
seed=seed+1)-0.5)/vector_size;
+    bw = (rand(rows=vocab_size, cols=1, min=0, max=1, 
seed=seed+2)-0.5)/vector_size;
+    bc = (rand(rows=vocab_size, cols=1, min=0, max=1, 
seed=seed+3)-0.5)/vector_size;
+    [weights, log_cooc_matrix] = init(cooc_matrix, x_max, alpha);
+
+    momentum_W = 1e-8 + 0.1 * matrix(1, nrow(W), ncol(W));
+    momentum_C = 1e-8 + 0.1 * matrix(1, nrow(C), ncol(C));
+    momentum_bw = 1e-8 + 0.1 * matrix(1, nrow(bw), ncol(bw));
+    momentum_bc = 1e-8 + 0.1 * matrix(1, nrow(bc), ncol(bc));
+
+    error = 0;
+    iter = 0;
+    tolerance = tol;
+    previous_error = 1e10;
+    conti = TRUE;
+
+    while (conti) {
+
+        # compute predictions for all co-occurring word pairs at once
+        predictions = W %*% t(C) + bw + t(bc);
+        diffs = predictions - log_cooc_matrix;
+        weighted_diffs = weights * diffs;
+
+        # compute gradients
+        wgrad = weighted_diffs %*% C;
+        cgrad = t(weighted_diffs) %*% W;
+        bwgrad = rowSums(weighted_diffs);
+        bcgrad = matrix(colSums(weighted_diffs), nrow(bc), ncol(bc));
+
+        error =  sum(0.5 * (weights * (diffs ^ 2)));
+        iter = iter + 1;
+
+
+        if (abs(previous_error - error) >= tolerance) {
+            if(iter <= iterations){
+
+                # get steps and update
+                momentum_W = momentum_W + (wgrad ^ 2);
+                momentum_C = momentum_C + (cgrad ^ 2);
+                momentum_bw = momentum_bw + (bwgrad ^ 2);
+                momentum_bc = momentum_bc + (bcgrad ^ 2);
+
+                W = W - (eta * wgrad / (sqrt(momentum_W) + 1e-8));
+                C = C - (eta * cgrad / (sqrt(momentum_C) + 1e-8));
+                bw = bw - (eta * bwgrad / (sqrt(momentum_bw) + 1e-8));
+                bc = bc - (eta * bcgrad / (sqrt(momentum_bc) + 1e-8));
+
+                G = W + C;
+
+                previous_error = error;
+
+                final_iter = iter;
+            } else {
+                conti = FALSE;
+            }
+        } else {
+          conti = FALSE;
+        }
+
+        if (iter - floor(iter / print_loss_it) * print_loss_it == 0) {
+            print("iteration: " + iter + " error: " + error);
+        }
+    }
+
+    # add the word index to the word vectors
+    print("Given " + iterations + " iterations, " + "stopped (or converged) at 
the " + final_iter + " iteration / error: " + error);
+    G = cbind(cooc_index[,2], as.frame(G));
+}
+
+glove = function(
+    Frame[Unknown] input,
+    int seed, int vector_size,
+    double alpha, double eta,
+    double x_max,
+    double tol,
+    int iterations,
+    int print_loss_it,
+    Int maxTokens,
+    Int windowSize,
+    Boolean distanceWeighting,
+    Boolean symmetric)
+    return (frame[Unknown] G){
+
+        /*
+        * Main function to Computes the vector embeddings for words in a large 
text corpus.
+        * INPUT:
+        * 
------------------------------------------------------------------------------
+        * - input (Frame[Unknown]): 1DInput corpus in CSV format.
+        * - seed: Random seed for reproducibility.
+        * - vector_size: Dimensionality of word vectors, V.
+        * - eta: Learning rate for optimization, recommended value: 0.05.
+        * - alpha: Weighting function parameter, recommended value: 0.75.
+        * - x_max: Maximum co-occurrence value as per the GloVe paper: 100.
+        * - tol: Tolerance value to avoid overfitting, recommended value: 1e-4.
+        * - iterations: Total number of training iterations.
+        * - print_loss_it: Interval (in iterations) for printing the loss.
+        * - maxTokens (Int): Maximum number of tokens per text entry.
+        * - windowSize (Int): Context window size.
+        * - distanceWeighting (Boolean): Whether to apply distance-based 
weighting.
+        * - symmetric (Boolean): Determines if the matrix is symmetric (TRUE) 
or asymmetric (FALSE).
+        * 
------------------------------------------------------------------------------
+        * OUTPUT:
+        * 
------------------------------------------------------------------------------
+        * G (Frame[Unknown]): The word indices and their word vectors, of 
shape (N, V). Each represented as a vector, of shape (1,V)
+        * 
------------------------------------------------------------------------------
+        */
+
+        [cooc_matrix, cooc_index] = cooccurrenceMatrix(input, maxTokens, 
windowSize, distanceWeighting, symmetric);
+        G = gloveWithCoocMatrix(cooc_matrix, cooc_index, seed, vector_size, 
alpha, eta, x_max, tol, iterations, print_loss_it);
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java 
b/src/main/java/org/apache/sysds/common/Builtins.java
index 92398a66b0..971e43a14b 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -154,6 +154,7 @@ public enum Builtins {
        GET_ACCURACY("getAccuracy", true),
        GLM("glm", true),
        GLM_PREDICT("glmPredict", true),
+       GLOVE("glove", true),
        GMM("gmm", true),
        GMM_PREDICT("gmmPredict", true),
        GNMF("gnmf", true),
diff --git 
a/src/test/java/org/apache/sysds/test/functions/builtin/part1/BuiltinGloVeTest.java
 
b/src/test/java/org/apache/sysds/test/functions/builtin/part1/BuiltinGloVeTest.java
new file mode 100644
index 0000000000..6641ab025c
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/functions/builtin/part1/BuiltinGloVeTest.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin.part1;
+
+import java.io.IOException;
+import java.util.Objects;
+
+import org.apache.sysds.common.Types;
+import org.apache.sysds.common.Types.FileFormat;
+import org.apache.sysds.runtime.frame.data.FrameBlock;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.junit.Test;
+
+public class BuiltinGloVeTest extends AutomatedTestBase {
+
+       private static final String TEST_NAME = "glove";
+       private static final String TEST_DIR = "functions/builtin/";
+       private static final String RESOURCE_DIRECTORY = 
"./src/test/resources/datasets/";
+       private static final String TEST_CLASS_DIR = TEST_DIR + 
BuiltinGloVeTest.class.getSimpleName() + "/";
+
+       private static final int TOP_K = 5;
+       private static final double ACCURACY_THRESHOLD = 0.85;
+       
+       private static final double seed = 45;
+       private static final double vector_size = 100;
+       private static final double alpha = 0.75;
+       private static final double eta = 0.05;
+       private static final double x_max = 100;
+       private static final double tol = 1e-4;
+       private static final double iterations = 10000;
+       private static final double print_loss_it  = 100;
+       private static final double maxTokens = 2600;
+       private static final double windowSize = 15;
+       private static final String distanceWeighting = "TRUE";
+       private static final String symmetric = "TRUE";
+
+       @Override
+       public void setUp() {
+               addTestConfiguration(TEST_NAME,
+                               new TestConfiguration(TEST_CLASS_DIR, 
TEST_NAME, new String[] {"out_result"}));
+       }
+
+       @Test
+       public void gloveTest() throws IOException{
+               // Using top-5 words for similarity comparison
+               runGloVe(TOP_K); 
+
+               // Read the computed similarity results from SystemDS
+               FrameBlock computedSimilarity = 
readDMLFrameFromHDFS("out_result", FileFormat.CSV);
+
+               // Load expected results (precomputed in Python)
+               FrameBlock expectedSimilarity = 
readDMLFrameFromHDFS(RESOURCE_DIRECTORY + "/GloVe/gloveExpectedTop10.csv", 
FileFormat.CSV, false);
+
+               // Compute accuracy by comparing computed and expected results
+               double accuracy = computeAccuracy(computedSimilarity, 
expectedSimilarity, TOP_K);
+
+               System.out.println("Computed Accuracy: " + accuracy);
+
+               // Ensure accuracy is above a reasonable threshold
+               assert accuracy > ACCURACY_THRESHOLD : "Accuracy too low! 
Expected > 85% match.";
+       }
+
+       public void runGloVe(int topK) {
+               // Load test configuration
+               Types.ExecMode platformOld = setExecMode(Types.ExecType.CP);
+               try {
+                       loadTestConfiguration(getTestConfiguration(TEST_NAME));
+
+                       String HOME = SCRIPT_DIR + TEST_DIR;
+
+                       fullDMLScriptName = HOME + TEST_NAME + ".dml";
+
+                       programArgs = new String[] {
+                                       "-nvargs",
+                                       "input=" + RESOURCE_DIRECTORY + 
"20news/20news_subset_untokenized.csv",
+                                       "seed=" + seed, 
+                                       "vector_size=" + vector_size, 
+                                       "alpha=" + alpha, 
+                                       "eta=" + eta, 
+                                       "x_max=" + x_max, 
+                                       "tol=" + tol, 
+                                       "iterations=" + iterations, 
+                                       "print_loss_it=" + print_loss_it, 
+                                       "maxTokens=" + maxTokens, 
+                                       "windowSize=" + windowSize, 
+                                       "distanceWeighting=" + 
distanceWeighting,
+                                       "symmetric=" + symmetric,
+                                       "topK=" + topK,
+                                       "out_result=" + output("out_result")
+                       };
+
+                       System.out.println("Running DML script...");
+                       runTest(true, false, null, -1);
+                       System.out.println("Test execution completed.");
+               } finally {
+                       rtplatform = platformOld;
+               }
+       }
+
+       /**
+        * Computes accuracy by comparing top-K similar words between computed 
and expected results.
+        */
+       private double computeAccuracy(FrameBlock computed, FrameBlock 
expected, int k) {
+               int count = 0;
+               for (int i = 0; i < computed.getNumRows(); i++) {
+                       int matchCount = 0;
+                       for (int j = 1; j < k; j++) {
+                               String word1 = computed.getString(i, j);
+                               for (int m = 0; m < k; m++) {
+                                       if (Objects.equals(word1, 
expected.getString(i, m))) {
+                                               matchCount++;
+                                               break;
+                                       }
+                               }
+                       }
+                       if (matchCount > 0) {
+                               count++;
+                       }
+               }
+               return (double) count / computed.getNumRows();
+       }
+}
diff --git a/src/test/resources/datasets/GloVe/gloveExpectedTop10.csv 
b/src/test/resources/datasets/GloVe/gloveExpectedTop10.csv
new file mode 100644
index 0000000000..c736311e71
--- /dev/null
+++ b/src/test/resources/datasets/GloVe/gloveExpectedTop10.csv
@@ -0,0 +1,478 @@
+from,apart,decay,cfaehl,mathew,subject,discourage,prejudices,viceicotekcom,referring,words
+decay,cbnewsjcbattcom,tammy,r,deankaflowitz,healy,from,cfaehl,chris,timmbake,faehl
+cbnewsjcbattcom,decay,deankaflowitz,tammy,r,healy,might,wrong,faehl,intend,get
+deankaflowitz,cbnewsjcbattcom,faehl,wrong,decay,subject,healy,might,intend,yet,r
+subject,mantiscouk,re,deankaflowitz,faehl,clam,wrong,mathew,amusing,bake,correct
+re,subject,rushdie,yet,wrong,correct,amusing,more,deankaflowitz,tell,faehl
+about,knowledgeable,or,anything,worse,had,mathew,reference,faehl,pronoun,mantiscouk
+the,post,of,wholesome,useful,more,ark,that,university,extermination,new
+bible,quiz,example,using,reliable,baptist,wrt,answers,stated,rules,statements
+quiz,answers,bible,university,rules,boasted,whethe,bank,instead,banking,mantis
+answers,quiz,mantis,organization,university,consultants,at,boasted,bible,agnostics,law
+organization,agnostics,law,mantis,university,answers,hard,consultants,at,pretty,proof
+at,least,answers,blanket,mantis,distribution,consultants,believe,pretty,organization,world
+t,wrodhipped,wasn,they,mm,and,haven,won,provided,another,content
+distribution,na,lines,world,albuquerque,v,nntp,maybe,at,mm,lo
+na,distribution,lines,world,albuquerque,nntp,v,says,jaeger,alyosha,dogma
+lines,distribution,na,albuquerque,v,correction,jaeger,proselytize,world,conspiracy,this
+in,article,engaged,evidence,useful,my,idol,of,for,lines,that
+article,in,illustrate,engaged,allegory,guy,some,provide,healta,mere,if
+healta,saturnwwcedu,tammy,if,article,fool,r,albuquerque,wants,nntp,too
+saturnwwcedu,healta,tammy,r,if,guess,too,want,wants,created,idols
+tammy,r,saturnwwcedu,decay,cbnewsjcbattcom,healy,healta,intend,deankaflowitz,bad,misinformed
+r,healy,tammy,cbnewsjcbattcom,decay,deankaflowitz,saturnwwcedu,bad,mantiscouk,intend,writes
+healy,r,tammy,cbnewsjcbattcom,decay,deankaflowitz,writes,cheribums,intend,bad,mantiscouk
+writes,fallacy,guy,healy,timmons,viceicotekcom,mantiscouk,r,many,beauchaine,bake
+cheribums,are,on,healy,arrogant,wont,hoping,deankaflowitz,more,but,as
+are,arrogant,cheribums,wont,hoping,say,theists,who,on,arrogance,interpretation
+on,year,cheribums,ark,reliable,statements,day,university,are,arrogant,rules
+ark,covenant,of,theists,on,university,banking,new,the,atheism,rules
+of,ark,the,university,new,time,atheism,group,down,millions,theists
+covenant,when,wasn,ark,bible,waste,god,they,hard,t,example
+when,god,covenant,wasn,wrodhipped,won,said,bible,make,re,offer
+god,when,justify,does,make,followed,said,poster,exist,evidence,covenant
+said,never,make,god,original,graven,when,had,millions,killed,na
+make,said,no,god,graven,idol,questions,does,answers,when,followed
+no,reason,have,make,occasion,wholesome,and,useful,agree,they,idol
+graven,image,had,boasted,instead,aren,down,make,quiz,waste,whether
+image,graven,had,whether,waste,determine,down,put,millions,he,instead
+he,relied,by,whethe,implies,virtue,anything,upon,try,agnostics,innocent
+was,refering,university,implies,koran,something,an,that,rules,adapted,meaning
+refering,idols,was,something,implies,may,clear,prepared,position,meaning,mongols
+to,articles,references,i,reason,want,created,you,not,determine,agree
+idols,which,refering,support,out,created,pointing,kept,shines,may,itself
+which,idols,created,support,were,out,intend,pointing,bobby,justifications,healy
+were,bad,created,justifications,fool,which,reference,illustrate,compelling,very,healy
+created,which,were,bad,support,out,justifications,fool,idols,grip,pointing
+be,d,glad,must,fool,then,it,a,albuquerque,created,worse
+worshipped,him,someone,loved,year,ark,join,buphybuedu,on,faith,wants
+wasn,wrodhipped,covenant,when,t,day,won,atonement,they,bible,god
+wrodhipped,wasn,t,atonement,won,day,and,only,they,when,titles
+and,wholesome,him,useful,wrodhipped,mmm,questions,day,agnostics,amusing,titles
+only,titles,worthy,wrodhipped,respond,m,does,seriously,how,bzzt,fit
+high,priest,we,name,enter,could,defending,bzzt,virtue,fit,occasion
+priest,high,enter,could,name,we,virtue,defending,fit,occasion,extermination
+could,enter,too,priest,mongols,high,bad,therefore,d,we,am
+enter,holy,could,priest,high,therefore,bad,stating,respond,cannot,times
+holy,holies,enter,large,group,bcci,sources,kaflowitz,put,stating,down
+holies,holy,where,large,dean,brothers,kaflowitz,out,group,bcci,case
+where,post,holies,then,understand,must,it,d,call,case,bcci
+it,is,be,justify,where,that,albuquerque,a,irrelevant,gregg,must
+kept,once,shines,misinformation,intellectually,word,year,simply,beckoning,jaeger,serious
+once,kept,beckoning,wonderful,year,man,again,vestaunmedu,arrogance,faq,nntp
+a,simply,is,be,issued,faith,it,waste,albuquerque,title,lines
+year,on,day,arrogant,kept,worshipped,issued,once,buphybuedu,cheribums,ark
+day,atonement,idol,wrodhipped,whether,year,on,wasn,determine,and,his
+atonement,day,am,wrodhipped,and,wasn,idol,fit,time,look,someone
+i,not,but,am,lo,albuquerque,m,mexico,to,for,new
+am,referring,atonement,prepared,familiar,not,i,m,lo,we,words
+not,i,am,familiar,prepared,an,even,time,worthy,looking,words
+familiar,misinformed,worthy,respond,articles,am,knowledgeable,or,not,with,words
+with,articles,agree,moslems,atheists,makes,respond,amusing,familiar,you,do
+or,knowledgeable,familiar,reference,seriously,about,knows,respond,adapted,neil,bake
+knowledgeable,or,about,familiar,reference,defending,yet,deankaflowitz,clam,bake,respond
+original,language,dogma,reference,had,my,said,illustrate,point,statement,include
+language,original,but,extermination,apologize,qualifying,my,had,illustrate,want,guess
+but,language,i,guess,etc,merely,sorry,cheribums,using,can,mexico
+believe,reason,at,anyone,is,meant,there,least,iii,maybe,wont
+there,iii,know,s,believe,doubtless,at,titles,extermination,nothing,year
+is,it,a,that,reason,albuquerque,evidence,irrelevant,believe,how,useful
+word,respect,instead,idol,titles,kept,used,for,using,report,buphybuedu
+for,titles,evidence,fit,respect,real,mm,upon,word,i,occasion
+idol,day,titles,word,whether,determine,fit,provide,nothing,make,instead
+that,is,it,idea,useful,wont,was,the,evidence,s,say
+translator,would,used,justify,bcci,mmm,might,grip,his,large,occasion
+would,translator,grip,call,used,mmm,irrelevant,man,atheist,be,rule
+have,no,cannot,been,lost,referring,may,grip,reason,used,man
+used,translator,indicting,all,would,man,instead,have,word,while,grip
+instead,boasted,graven,word,all,stalin,indicting,determine,quiz,report,mere
+had,image,graven,whether,original,determine,proof,language,millions,about,said
+so,although,proof,wont,august,compelling,let,must,by,pointing,kill
+think,makes,m,may,correct,defending,my,moslems,wrong,amusing,what
+you,agree,articles,with,tell,correct,that,to,a,yet,the
+wrong,here,deankaflowitz,might,re,faehl,correct,kaflowitz,dean,subject,cbnewsjcbattcom
+here,wrong,might,dogmatic,anyway,understand,posting,deankaflowitz,see,wonderful,cbnewsjcbattcom
+then,case,must,prove,where,seriously,be,d,it,again,get
+again,wonderful,while,times,post,want,every,reader,then,lost,stating
+too,could,glad,suggesting,just,blanket,issued,saturnwwcedu,d,apart,meant
+just,mm,suggesting,ve,got,too,issued,special,looking,for,provided
+suggesting,just,mm,issued,way,too,correction,looking,discussion,conspiracy,real
+way,suggesting,mm,determine,because,this,say,just,such,occasion,another
+determine,whether,flocking,people,way,day,instead,idol,had,image,sophisticated
+whether,determine,interpretation,flocking,day,idol,innocent,had,people,image,instead
+interpretation,offer,whether,his,they,won,atheists,determine,idol,group,people
+offer,interpretation,correct,didn,won,moslems,haven,atheists,because,absolutely,wrong
+correct,dean,didn,offer,wrong,kaflowitz,re,oranges,subject,think,clam
+dean,kaflowitz,correct,wrong,today,loved,others,holies,kill,christians,absolutely
+kaflowitz,dean,others,wrong,correct,loved,mcl,unalterably,under,absolutely,holies
+cfaehl,host,vestaunmedu,chris,apart,decay,from,mathew,thought,timmbake,achieve
+vestaunmedu,host,cfaehl,chris,beckoning,did,issue,defending,discuss,given,fit
+chris,faehl,cfaehl,vestaunmedu,decay,deankaflowitz,host,cbnewsjcbattcom,might,mathew,mantiscouk
+faehl,chris,deankaflowitz,yet,subject,mantiscouk,wrong,bake,didn,clam,cbnewsjcbattcom
+amusing,yet,atheists,mclucsbedu,with,re,subject,clam,mmm,moslems,and
+atheists,moslems,do,amusing,bzzt,articles,with,today,christians,oranges,agree
+agnostics,university,organization,mantis,koran,consultants,answers,and,law,reason,whethe
+university,agnostics,new,organization,answers,mantis,quiz,ark,consultants,koran,was
+new,mexico,albuquerque,university,ark,of,cambridge,consultants,hear,uk,x
+mexico,new,albuquerque,cambridge,uk,x,lo,rusnews,hear,newsreader,i
+albuquerque,mexico,new,lines,v,rusnews,distribution,na,lo,newsreader,be
+world,nntp,distribution,na,v,maybe,rusnews,must,posting,anyway,says
+nntp,world,posting,host,v,rusnews,says,distribution,maybe,na,jaeger
+posting,nntp,host,see,world,here,anyway,wonderful,cfaehl,excuse,propaganda
+host,vestaunmedu,cfaehl,posting,nntp,beckoning,thought,achieve,chris,world,wonderful
+timmbake,mcl,mclucsbedu,decay,cfaehl,cbnewsjcbattcom,clam,chris,prove,deankaflowitz,claims
+mcl,timmbake,mclucsbedu,claims,kaflowitz,clam,justifications,chris,cfaehl,fashion,bobbe
+mclucsbedu,clam,mcl,timmbake,amusing,deankaflowitz,faehl,bobbe,didn,yet,wrong
+clam,mclucsbedu,bake,didn,timmons,subject,faehl,deankaflowitz,amusing,knowledgeable,mcl
+bake,timmons,clam,iii,faehl,subject,beauchaine,mclucsbedu,wrong,knowledgeable,explained
+timmons,bake,iii,clam,fallacy,writes,bennett,beauchaine,dostoevsky,neil,christians
+fallacy,many,writes,timmons,christians,hard,subject,atheism,bake,respond,amusing
+atheism,hard,whethe,merely,of,university,recognizing,ark,has,fallacy,new
+faith,lo,special,yes,a,mm,jaeger,someone,worshipped,place,maybe
+lo,hear,albuquerque,greater,btw,faith,apology,v,i,mexico,am
+hear,lo,faq,newsreader,v,beckoning,rusnews,mexico,albuquerque,supports,new
+faq,etc,beckoning,hear,name,guess,banking,let,times,once,burden
+beckoning,once,faq,vestaunmedu,host,supports,hear,times,kept,nntp,posting
+wonderful,job,rule,deleted,again,don,slander,although,posting,understand,here
+rule,don,deleted,wonderful,special,mmm,medium,haven,slippery,call,pronoun
+deleted,wrt,medium,rule,wonderful,slander,because,they,arrogant,slippery,agree
+didn,correct,clam,faehl,offer,mclucsbedu,anything,subject,group,mantiscouk,other
+say,such,are,other,fool,they,than,way,didn,because,arrogant
+anything,other,piece,didn,about,fool,than,he,memory,say,usenet
+conspiracy,correction,greater,mongols,discussion,looking,suggesting,waste,whethe,sorry,lo
+correction,conspiracy,looking,waste,lines,suggesting,discussion,mongols,greater,simply,sorry
+hard,atheism,indictment,fallacy,organization,correction,of,covenant,has,what,not
+yes,mmm,enough,faith,we,any,stronger,robert,what,why,agnostics
+don,rule,mix,wonderful,aren,his,apples,why,propaganda,deleted,slanderous
+mix,apples,don,aren,superior,also,supporting,oranges,unsympathetic,atheists,did
+apples,mix,oranges,superior,don,atheists,aren,unsympathetic,wonderful,moslems,also
+oranges,apples,moslems,do,state,understand,correct,atheists,respond,misinformed,how
+how,seriously,many,oranges,respond,today,can,do,religion,only,is
+can,tell,how,oranges,religion,fool,i,want,references,but,good
+extermination,mongols,by,times,language,august,conspiracy,banking,burden,the,doubtless
+by,we,enough,extermination,name,he,pointing,qualifying,bzzt,virtue,apologize
+mongols,extermination,conspiracy,clear,correction,could,worse,whethe,by,enough,hear
+worse,than,stimulating,glad,mongols,other,fool,debate,greater,be,about
+than,other,life,worse,greater,words,anything,some,say,mm,conspiracy
+stalin,looking,instead,khan,killed,kept,brought,did,issued,prepared,simply
+khan,conquered,prepared,greater,through,explained,stalin,blanket,stimulating,shines,unsympathetic
+conquered,khan,unsympathetic,prepared,nntp,through,cfaehl,world,explained,greater,blanket
+people,determine,whether,articles,koran,will,adapted,exist,of,discourage,own
+unsympathetic,conquered,aren,causethat,prove,bobby,lost,khan,apples,may,alyosha
+his,flocking,don,interpretation,all,causethat,day,baptist,translator,atrociousbut,provide
+causethat,atrociousbut,neil,unsympathetic,bennett,an,alyosha,anti,his,aren,slander
+atrociousbut,causethat,killed,millions,an,slander,his,example,stalin,mathew,arrogance
+killed,millions,atrociousbut,stalin,while,all,looking,reader,brought,said,reliable
+millions,killed,atrociousbut,reader,of,burden,graven,they,had,own,said
+own,prejudices,reader,every,millions,people,fit,your,theists,from,idol
+who,wish,baptist,boasted,loved,are,arrogant,thanks,exist,sorry,something
+loved,kaflowitz,dean,bzzt,who,worshipped,baptist,boasted,burden,propaganda,slanderous
+him,join,wholesome,stronger,and,yet,worshipped,discuss,more,rushdie,include
+atheist,state,soccultureislam,place,got,good,irrelevant,ve,mmm,would,special
+state,atheist,absolutely,unalterably,oranges,christians,today,many,religion,clam,mclucsbedu
+anyone,irrelevant,been,believe,be,respect,convinced,wont,glad,thanks,iii
+will,discourage,apologize,kill,cambridge,explain,one,uk,people,x,consultants
+explain,may,referring,apologize,will,otherwise,prepared,lo,lost,this,says
+this,contention,supports,explain,way,lines,lost,times,excuse,support,distribution
+did,nothing,vestaunmedu,mathew,discuss,higher,has,occasion,report,man,august
+nothing,has,supports,did,higher,fit,stronger,idol,times,titles,only
+name,by,under,let,virtue,any,high,bzzt,priest,faq,enough
+whethe,he,debate,doubtless,quiz,atheism,using,recognizing,relied,mongols,conspiracy
+an,indictment,causethat,not,allegory,anti,example,islamic,as,got,was
+irrelevant,get,justify,anyone,usenet,it,d,would,is,atheist,deankaflowitz
+get,irrelevant,idea,might,deankaflowitz,cbnewsjcbattcom,justify,become,grip,then,bcci
+grip,man,would,d,issued,created,have,glad,get,blanket,mantiscouk
+man,grip,while,used,did,have,issued,blanket,would,once,issue
+example,bible,as,arrogance,reliable,sources,statements,using,baptist,recognizing,koran
+brought,up,statements,looking,stalin,blanket,reliable,achieve,example,issue,killed
+up,brought,statements,look,suggesting,indictment,why,blanket,worthy,allegory,wants
+as,example,arrogance,superior,guilty,itself,reliable,merely,sources,more,indictment
+indictment,an,soccultureislam,proof,hard,as,atheism,time,burden,got,up
+merely,recognizing,using,as,atheism,but,arrogance,example,bennett,been,indictment
+another,superior,anti,down,such,because,t,beauchaine,time,they,alyosha
+kill,others,file,my,illustrate,apologize,will,point,discourage,under,dean
+others,under,kill,stimulating,file,kaflowitz,debate,pretty,dean,explained,qualifying
+under,others,stimulating,name,debate,discourage,pretty,any,btw,dogmatic,clear
+any,position,dogmatic,questions,under,name,knows,m,we,defending,yes
+s,fit,what,there,second,name,let,many,titles,that,doubtless
+fit,occasion,titles,s,for,respect,let,nothing,idol,excuse,high
+occasion,fit,titles,no,did,respect,high,for,times,priest,stating
+look,up,become,soccultureislam,special,occasion,convinced,content,atonement,brought,supports
+while,slander,never,articles,again,job,man,killed,agree,superior,mmm
+never,said,while,meant,special,place,supporting,mmm,makes,agree,looking
+implication,pretty,very,kill,sources,intellectually,proof,justifications,burden,medium,don
+pretty,clear,implication,others,intellectually,under,enough,answers,very,least,mantis
+clear,pretty,enough,mongols,refering,sorry,under,others,intellectually,btw,organization
+m,defending,think,sorry,worthy,am,only,i,any,religion,may
+sorry,least,m,proof,providing,greater,btw,conspiracy,clear,correction,whethe
+respond,therefore,misinformed,cannot,also,familiar,with,articles,seriously,religion,only
+your,contention,spend,innocent,time,words,referring,rusnews,true,own,am
+words,familiar,prepared,your,prove,than,am,referring,misinformed,true,not
+true,memory,alyosha,usenet,unalterably,meaning,want,your,words,good,dogma
+meaning,usenet,thought,justify,dogma,true,worse,simply,was,refering,others
+usenet,meaning,justify,simply,memory,irrelevant,true,dogma,fool,absolutely,alyosha
+slippery,medium,issued,simply,obviously,piece,rule,deleted,spreading,waste,justify
+medium,slippery,wrt,deleted,rule,issued,baptist,implication,provided,pronoun,intellectually
+wrt,burden,deleted,medium,baptist,bible,all,reliable,theists,pronoun,arrogant
+burden,wrt,proof,august,arrogance,theists,baptist,reliable,boasted,include,all
+proof,burden,indictment,let,sorry,so,arrogance,had,bank,organization,answers
+has,nothing,higher,prove,pointing,misinformed,did,out,put,sophisticated,atheism
+prove,then,unsympathetic,seriously,religion,misinformed,has,want,bobby,words,given
+does,justify,god,exist,only,says,cannot,make,it,case,one
+justify,does,irrelevant,god,usenet,translator,evidence,it,get,bcci,today
+exist,know,wish,does,people,prejudices,glad,god,have,who,been
+know,there,exist,apology,absolutely,btw,won,iii,conspiracy,explained,provided
+etc,faq,guess,providing,but,hear,propaganda,see,priest,sorry,extermination
+guess,etc,ass,but,justifications,if,faq,saturnwwcedu,fashion,given,burden
+what,many,ass,s,makes,think,second,m,that,i,for
+if,see,someone,healta,saturnwwcedu,do,guess,wants,lost,ass,propaganda
+those,justifications,out,very,pointing,title,sources,itself,engaged,pretty,if
+justifications,those,out,were,very,fool,intellectually,pointing,created,sources,illustrate
+compelling,aren,although,why,others,thread,out,justifications,kill,so,were
+why,thread,bank,aren,compelling,don,up,yes,special,excuse,atonement
+aren,compelling,unsympathetic,why,thread,graven,don,flocking,bank,mix,arrogance
+flocking,whether,determine,his,aren,down,put,all,has,spend,articles
+they,won,pronoun,t,because,theists,useful,wrodhipped,deleted,say,interpretation
+won,they,wrodhipped,haven,offer,recognizing,wasn,when,and,provided,know
+one,greater,discourage,boasted,nntp,anyway,misinformation,will,lo,debate,uk
+discourage,one,will,under,others,apologize,kill,bzzt,apology,from,prejudices
+pointing,out,by,put,justifications,has,those,which,claims,down,created
+out,pointing,those,justifications,which,created,support,idols,mere,compelling,has
+very,sources,intellectually,justifications,implication,those,pretty,out,title,stimulating,were
+sources,very,reliable,fashion,example,same,intellectually,as,justifications,bible,large
+reliable,statements,sources,bible,fashion,example,indicting,on,as,wrt,include
+statements,reliable,indicting,brought,up,example,bible,on,all,issue,rules
+supporting,dogmatic,charged,islam,never,wonderful,wrong,here,mix,questions,think
+dogmatic,position,any,here,might,supporting,we,understand,btw,anyway,may
+position,dogmatic,any,claims,might,under,although,wrong,intend,refering,if
+d,be,glad,fool,then,grip,must,world,irrelevant,where,created
+fool,were,justifications,be,d,created,say,worse,anything,usenet,illustrate
+large,group,holy,discuss,reliable,sources,holies,doubtless,didn,bcci,translator
+group,large,holy,didn,of,holies,down,idol,interpretation,arrogance,provide
+wish,proselytize,boasted,exist,who,prepared,world,glad,khan,conquered,says
+proselytize,wish,lines,same,conspiracy,mere,correction,implies,report,suggesting,mongols
+same,fashion,sources,proselytize,include,reliable,writes,bible,mcl,implication,allegory
+fashion,same,sources,reliable,include,above,bible,reference,allegory,statements,mcl
+religion,given,seriously,prove,defending,respond,many,worthy,can,state,how
+many,christians,ass,fallacy,what,my,today,qualifying,how,do,writes
+do,atheists,oranges,today,christians,with,wont,many,moslems,articles,sophisticated
+see,if,propaganda,posting,let,slanderous,someone,here,excuse,some,ass
+anyway,maybe,here,nntp,one,world,posting,dogmatic,misinformation,serious,intend
+maybe,anyway,mm,world,misinformation,nntp,distribution,serious,glad,lo,shines
+mm,just,suggesting,maybe,way,looking,t,faith,misinformation,for,life
+looking,correction,waste,stalin,suggesting,conspiracy,special,mm,brought,even,second
+enough,second,by,clear,pretty,yes,we,name,looking,blanket,mongols
+second,enough,makes,looking,s,what,a,lines,stalin,simply,waste
+makes,second,think,with,what,never,amusing,article,how,my,correct
+defending,given,understand,m,worthy,religion,knowledgeable,might,high,think,wrong
+given,defending,religion,understand,although,prove,oranges,many,dogmatic,others,seriously
+recognizing,using,merely,baptist,whethe,example,won,atheism,bible,reliable,sorry
+meant,never,believe,too,reason,glad,charged,worshipped,wants,be,wish
+although,understand,compelling,must,given,lost,we,wonderful,so,qualifying,oranges
+understand,although,given,might,defending,where,oranges,here,dogmatic,wonderful,kaflowitz
+might,get,idea,here,wrong,understand,deankaflowitz,cbnewsjcbattcom,dogmatic,faehl,iii
+idea,get,might,convinced,become,debate,doubtless,btw,stimulating,that,mexico
+using,recognizing,merely,bible,example,all,whethe,baptist,arrogance,but,theists
+allegory,illustrate,alyosha,article,an,memory,engaged,statement,firmly,justifications,fashion
+illustrate,allegory,point,my,article,engaged,head,kill,dogma,right,alyosha
+my,point,dogma,illustrate,many,kill,engaged,original,ass,head,think
+point,illustrate,my,bobbe,qualifying,kill,provide,christians,do,atheists,file
+we,by,qualifying,apologize,let,although,high,dogmatic,higher,referring,enough
+referring,am,lost,every,excuse,may,prepared,we,have,explain,bobby
+every,reader,referring,bobby,thread,all,lost,join,theists,own,again
+reader,every,theists,post,all,times,own,spend,thread,millions,arrogance
+post,where,the,rules,adapted,bcci,koran,reader,john,again,banking
+evidence,for,upon,justify,is,in,relied,that,won,thanks,god
+poster,stated,such,bible,doubtless,god,debate,convinced,absolutely,justify,chris
+stated,poster,doubtless,debate,bible,absolutely,such,stimulating,convinced,chris,relied
+relied,upon,he,debate,whethe,doubtless,evidence,under,stated,by,unalterably
+upon,relied,evidence,for,he,thought,discussion,unalterably,whethe,debate,priest
+may,lost,explain,referring,think,refering,have,prepared,dogmatic,btw,least
+lost,may,thread,referring,bobby,although,unsympathetic,have,supports,every,explain
+thread,lost,why,supports,aren,all,indicting,every,someone,compelling,reader
+theists,baptist,all,ark,arrogant,reader,burden,boasted,are,koran,wrt
+arrogant,are,theists,baptist,thanks,hoping,cheribums,year,arrogance,wont,deleted
+because,they,such,way,deleted,down,another,references,say,offer,life
+such,absolutely,say,poster,because,stated,unalterably,doubtless,another,and,pronoun
+absolutely,unalterably,such,state,stated,know,doubtless,spreading,kaflowitz,usenet,under
+unalterably,absolutely,state,true,such,kaflowitz,alyosha,memory,usenet,dean,others
+dogma,says,my,today,original,alyosha,memory,otherwise,illustrate,place,thought
+says,otherwise,dogma,nntp,world,jaeger,does,na,explain,wish,it
+prepared,khan,am,issue,conquered,not,wish,referring,words,may,blanket
+issue,prepared,discuss,blanket,stronger,statements,vestaunmedu,more,higher,khan,did
+blanket,issued,at,too,indicting,issue,khan,prepared,conquered,man,enough
+indicting,all,statements,used,reliable,mathew,thread,achieve,john,blanket,instead
+all,indicting,theists,thread,achieve,instead,used,john,baptist,arrogance,wrt
+arrogance,example,as,burden,all,arrogant,wont,proof,theists,wrt,reader
+wont,are,do,file,arrogant,hoping,arrogance,cheribums,so,that,kill
+bzzt,virtue,atheists,apologize,qualifying,moslems,loved,by,high,name,discourage
+virtue,bzzt,name,by,high,he,innocent,priest,idol,fit,own
+innocent,little,pronoun,whether,your,virtue,instead,he,whethe,graven,provide
+little,innocent,pronoun,apart,discuss,high,whether,priest,they,translator,some
+pronoun,little,innocent,they,wrt,rule,all,such,won,about,john
+ve,got,soccultureislam,just,issued,call,good,rule,atheist,suggesting,not
+issued,blanket,suggesting,just,slippery,simply,ve,grip,too,a,medium
+statement,illustrate,allegory,least,at,thought,my,viceicotekcom,if,head,unsympathetic
+least,at,mantis,sorry,apologize,may,pretty,consultants,anyway,statement,blanket
+apologize,qualifying,will,bzzt,must,we,kill,language,explain,by,least
+qualifying,apologize,we,many,bzzt,point,others,by,language,christians,although
+place,special,atheist,got,never,dogma,rule,causethat,mmm,obviously,faith
+call,john,baptist,would,indicting,got,slander,rule,all,job,post
+john,call,baptist,all,post,indicting,issued,banking,theists,wrt,rules
+baptist,john,theists,boasted,wrt,bible,arrogant,who,rules,call,all
+boasted,instead,graven,baptist,wish,answers,quiz,one,who,greater,theists
+greater,one,conspiracy,khan,lo,than,correction,boasted,debate,sorry,worse
+christians,many,today,ass,do,atheists,state,fallacy,qualifying,timmons,piece
+today,christians,dogma,many,dean,do,atheists,state,moslems,with,justify
+itself,claims,guilty,as,superior,justifications,those,anti,out,sources,charged
+guilty,charged,itself,as,claims,firmly,engaged,superior,example,arrogance,head
+charged,guilty,supporting,as,looking,itself,again,meant,wrong,too,brought
+other,than,anything,say,worse,some,life,didn,simply,conspiracy,fool
+thought,meaning,host,cfaehl,btw,apology,dogma,my,nntp,upon,convinced
+claims,itself,superior,guilty,mcl,position,pointing,out,justifications,as,beckoning
+superior,claims,as,another,itself,slander,apples,do,while,anti,guilty
+thanks,arrogant,baptist,apology,special,who,evidence,theists,serious,misinformation,for
+apology,btw,excuse,lo,thought,jaeger,thanks,know,misinformation,otherwise,shines
+btw,apology,excuse,lo,thought,idea,dogmatic,under,sorry,may,iii
+worthy,familiar,only,seriously,misinformed,defending,m,not,religion,higher,statements
+seriously,misinformed,worthy,how,religion,prove,then,therefore,respond,or,bad
+misinformed,seriously,familiar,respond,worthy,sophisticated,prove,join,articles,has,oranges
+sophisticated,put,down,articles,misinformed,provide,time,do,familiar,has,support
+put,down,sophisticated,provide,pointing,time,has,out,flocking,graven,determine
+down,put,sophisticated,provide,graven,time,pointing,flocking,image,of,useful
+serious,misinformation,shines,rusnews,boasted,maybe,v,theists,nntp,anyway,kept
+misinformation,serious,shines,maybe,rusnews,kept,v,through,glad,nntp,one
+shines,through,misinformation,serious,explained,kept,providing,discussion,khan,apology,rusnews
+through,explained,shines,providing,discussion,khan,misinformation,others,conquered,kept,btw
+explained,through,shines,providing,discussion,khan,above,others,bake,stimulating,conquered
+above,reference,explained,fashion,include,reliable,bake,unalterably,absolutely,or,religion
+iii,there,timmons,bake,dostoevsky,might,btw,apology,know,wrong,mathew
+higher,stronger,nothing,has,we,did,issue,supports,misinformed,worthy,join
+stronger,higher,wholesome,more,him,issue,yet,nothing,rushdie,join,yes
+more,yet,wholesome,rushdie,stronger,useful,re,seriously,him,as,the
+wholesome,stronger,more,him,and,useful,rushdie,yet,the,no,join
+useful,life,wholesome,more,and,the,rushdie,they,yet,down,that
+life,useful,than,other,they,mm,looking,some,worse,because,down
+some,guy,viceicotekcom,ass,bobbe,other,someone,wants,article,than,good
+good,memory,islam,familiar,knows,atheist,viceicotekcom,some,ve,alyosha,true
+memory,alyosha,true,good,dogma,usenet,allegory,try,illustrate,unalterably,anything
+alyosha,memory,firmly,dogma,brothers,allegory,head,true,engaged,illustrate,causethat
+brothers,karamazov,dostoevsky,alyosha,holies,obviously,otherwise,simply,kept,unalterably,providing
+karamazov,dostoevsky,brothers,file,obviously,others,otherwise,iii,kill,atrociousbut,idols
+dostoevsky,karamazov,brothers,iii,otherwise,file,timmons,others,right,obviously,arrogance
+mathew,achieve,mantiscouk,indicting,cfaehl,subject,apart,faehl,did,all,chris
+mantiscouk,mathew,subject,faehl,viceicotekcom,bobbe,guy,yet,r,chris,deankaflowitz
+yet,rushdie,more,amusing,faehl,re,wholesome,deankaflowitz,him,knowledgeable,mclucsbedu
+rushdie,yet,more,re,wholesome,useful,bcci,stronger,law,tell,amusing
+islamic,anti,slander,been,law,job,an,rushdie,followed,bcci,implies
+law,mantis,organization,islamic,answers,rushdie,agnostics,consultants,pretty,implies,anti
+mantis,consultants,law,answers,organization,agnostics,cambridge,university,least,at,quiz
+consultants,mantis,cambridge,answers,uk,university,agnostics,at,new,law,least
+cambridge,uk,consultants,x,mantis,mexico,will,new,hoping,also,bobby
+uk,x,cambridge,newsreader,consultants,mexico,new,one,will,lo,albuquerque
+x,newsreader,uk,cambridge,rusnews,mexico,albuquerque,new,hear,will,lo
+newsreader,x,rusnews,v,uk,hear,albuquerque,mexico,new,lo,serious
+rusnews,v,newsreader,albuquerque,x,misinformation,world,serious,nntp,hear,mexico
+v,rusnews,newsreader,lines,world,albuquerque,hear,nntp,distribution,jaeger,misinformation
+jaeger,buphybuedu,otherwise,obviously,v,nntp,spreading,apology,lines,simply,special
+buphybuedu,jaeger,gregg,v,worshipped,respect,year,let,someone,august,word
+gregg,buphybuedu,questions,haven,jaeger,it,respect,islam,rules,justify,where
+viceicotekcom,bobbe,robert,guy,some,beauchaine,mantiscouk,writes,mclucsbedu,good,from
+bobbe,viceicotekcom,robert,point,mantiscouk,mclucsbedu,some,yet,guy,mcl,christians
+robert,beauchaine,viceicotekcom,bobbe,guy,mantiscouk,yes,bake,faehl,provided,some
+beauchaine,robert,bennett,guy,viceicotekcom,bake,timmons,neil,writes,clam,mantiscouk
+bennett,neil,cannot,beauchaine,causethat,timmons,bake,also,knowledgeable,merely,or
+neil,bennett,cannot,causethat,adapted,beauchaine,timmons,also,or,irrelevant,been
+bcci,adapted,post,bad,rushdie,therefore,also,justify,holy,knows,translator
+adapted,koran,bcci,post,implies,something,respond,neil,or,tell,knows
+koran,adapted,rules,knows,post,managed,agnostics,include,university,baptist,theists
+rules,banking,koran,quiz,post,baptist,bible,example,followed,ark,on
+banking,rules,followed,ark,quiz,post,times,bible,john,baptist,spend
+times,stating,august,extermination,beckoning,again,reader,banking,nothing,occasion,burden
+august,let,times,discuss,burden,did,idol,extermination,so,proof,buphybuedu
+let,august,we,propaganda,fit,see,proof,someone,name,s,so
+guy,some,viceicotekcom,writes,robert,beauchaine,mantiscouk,bobbe,mere,article,someone
+piece,obviously,spreading,anything,real,content,right,moslems,slippery,next,christians
+title,implies,waste,looking,mmm,simply,even,those,very,a,something
+implies,something,title,refering,adapted,was,he,an,law,organization,people
+something,implies,refering,adapted,was,case,title,koran,who,proselytize,itself
+case,then,must,stating,something,where,holies,contention,does,wish,prove
+must,then,be,apologize,world,although,case,where,d,nntp,glad
+haven,provided,questions,won,islam,gregg,rule,wrodhipped,offer,slanderous,t
+provided,even,haven,won,bank,bake,clam,just,t,rule,viceicotekcom
+even,managed,provided,not,looking,title,include,just,knows,won,questions
+support,which,idols,spend,created,contention,out,bobby,sophisticated,job,pointing
+contention,your,support,spend,this,case,stating,respond,put,times,bobby
+intend,deankaflowitz,which,cbnewsjcbattcom,tammy,job,healy,r,anyway,christians,position
+respect,titles,word,fit,for,occasion,gregg,says,buphybuedu,anyone,have
+questions,haven,gregg,islam,any,and,knows,agnostics,titles,make,respect
+managed,even,include,koran,statements,example,implies,agnostics,provided,adapted,blanket
+include,managed,reference,koran,reliable,fashion,bible,above,burden,same,example
+reference,above,include,head,or,knowledgeable,with,original,were,fashion,alyosha
+head,firmly,engaged,reference,alyosha,illustrate,my,didn,causethat,correct,guilty
+firmly,head,engaged,alyosha,guilty,illustrate,allegory,article,causethat,my,place
+engaged,firmly,head,ass,in,article,illustrate,my,alyosha,allegory,obviously
+ass,many,what,engaged,christians,some,excuse,guess,obviously,my,spreading
+excuse,supports,referring,btw,apology,apart,ass,fit,v,might,posting
+supports,excuse,nothing,thread,beckoning,lost,hear,this,higher,might,did
+reason,believe,moslems,no,is,try,have,agnostics,to,meant,agree
+anti,slander,islamic,job,an,another,causethat,superior,itself,example,which
+slander,job,anti,islamic,while,wonderful,deleted,deankaflowitz,superior,knows,call
+job,slander,anti,wonderful,islamic,apart,while,deankaflowitz,intend,which,call
+also,respond,agree,cannot,bcci,stating,articles,bennett,atheists,arrogance,neil
+apart,from,cfaehl,job,mathew,excuse,decay,little,slander,btw,serious
+prejudices,own,report,discourage,from,exist,hear,priest,fit,mere,high
+titles,fit,respect,only,for,idol,occasion,word,wrodhipped,and,questions
+real,content,discussion,piece,providing,for,suggesting,titles,obviously,title,conspiracy
+content,real,discussion,piece,obviously,spreading,for,t,engaged,providing,look
+want,bobby,prove,again,references,true,to,join,lost,wonderful,although
+tell,knows,can,oranges,re,deankaflowitz,rushdie,you,slander,adapted,job
+bank,why,quiz,aren,whethe,provided,proof,graven,compelling,clam,answers
+mere,report,instead,out,guy,did,article,pointing,conquered,correction,proselytize
+report,mere,instead,prejudices,did,wish,otherwise,fit,proselytize,word,conquered
+stating,times,cannot,also,case,oranges,again,been,hoping,followed,enter
+followed,banking,god,rules,been,might,islamic,stating,idea,won,get
+knows,islam,tell,koran,deankaflowitz,or,any,slander,good,bcci,questions
+islam,knows,questions,good,haven,supporting,wrong,gregg,any,dean,slanderous
+bad,therefore,were,created,r,seriously,bcci,healy,could,which,cannot
+therefore,bad,cannot,respond,seriously,bcci,could,enter,r,created,misinformed
+cannot,therefore,been,neil,bennett,respond,stating,have,also,does,bcci
+been,cannot,islamic,have,followed,anyone,stating,neil,merely,glad,idea
+otherwise,says,obviously,jaeger,dogma,discussion,dostoevsky,providing,explain,apology,simply
+obviously,spreading,slanderous,piece,otherwise,jaeger,discussion,simply,content,ass,slippery
+spreading,obviously,slanderous,propaganda,piece,jaeger,discussion,ass,slippery,absolutely,content
+slanderous,propaganda,spreading,obviously,see,haven,don,posting,loved,jaeger,islam
+propaganda,slanderous,spreading,see,let,someone,don,posting,obviously,loved,through
+someone,wants,if,let,propaganda,see,thread,worshipped,some,lost,discuss
+wants,someone,discuss,provide,some,if,worshipped,determine,saturnwwcedu,meant,up
+discuss,provide,wants,issue,august,him,large,did,join,vestaunmedu,someone
+glad,d,be,too,worse,misinformation,lo,issued,maybe,wish,must
+discussion,providing,real,through,content,explained,obviously,shines,correction,conspiracy,suggesting
+providing,discussion,through,shines,explained,real,etc,sorry,otherwise,conspiracy,btw
+references,articles,provide,want,illustrate,to,try,because,bobby,can,respond
+provide,down,discuss,put,wants,references,articles,sophisticated,idol,point,article
+articles,agree,with,atheists,sophisticated,familiar,references,while,respond,you,provide
+agree,articles,with,mmm,you,also,moslems,while,atheists,amusing,deleted
+mmm,yes,agree,rule,and,amusing,title,would,atheists,try,while
+intellectually,stimulating,very,justifications,debate,kept,pretty,sources,others,file,clear
+stimulating,intellectually,debate,others,doubtless,under,worse,file,khan,idea,stated
+debate,doubtless,stimulating,others,whethe,under,stated,intellectually,idea,relied,greater
+doubtless,debate,stimulating,stated,whethe,idea,poster,absolutely,such,relied,large
+spend,time,support,your,contention,reader,bobby,respond,banking,flocking,every
+time,spend,soccultureislam,waste,try,down,sophisticated,put,of,your,not
+soccultureislam,got,ve,time,indictment,atheist,look,worthy,spend,banking,won
+got,ve,special,soccultureislam,just,call,place,atheist,an,issued,rule
+special,place,got,rule,looking,never,faith,simply,just,jaeger,thanks
+file,right,kill,others,next,stimulating,wont,intellectually,dostoevsky,hoping,piece
+right,next,file,illustrate,piece,moslems,kill,dostoevsky,didn,anything,allegory
+next,right,bobby,file,moslems,piece,illustrate,which,respond,anything,determine
+bobby,want,lost,next,unsympathetic,support,every,which,prove,familiar,referring
+join,him,misinformed,every,wholesome,bobby,yet,discuss,burden,stronger,worshipped
+become,convinced,idea,look,might,get,issue,must,issued,then,here
+convinced,become,idea,debate,stated,must,doubtless,poster,thought,issued,worse
+simply,waste,a,obviously,usenet,issued,slippery,special,looking,jaeger,kept
+waste,simply,time,correction,looking,title,conspiracy,image,graven,a,suggesting
+try,time,reason,memory,mmm,sophisticated,references,under,he,bzzt,put
+moslems,atheists,with,reason,oranges,bzzt,do,next,agree,piece,today
+hoping,achieve,are,arrogant,wont,cheribums,file,has,didn,cambridge,stating
+achieve,mathew,hoping,all,indicting,host,cfaehl,mantiscouk,which,did,thread
diff --git a/src/test/scripts/functions/builtin/glove.dml 
b/src/test/scripts/functions/builtin/glove.dml
new file mode 100644
index 0000000000..2dbb9979ef
--- /dev/null
+++ b/src/test/scripts/functions/builtin/glove.dml
@@ -0,0 +1,86 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cosine_similarity = function(matrix[double] trained_emb)
+    return (matrix[double] cosine_sim){
+    /*
+     * Computes cosine similarity between word embeddings.
+     *
+     * Inputs:
+     *  - trained_emb: Matrix of word embeddings of shape (N, D), where N is 
the number of words and D is the embedding dimension.
+     *
+     * Outputs:
+     *  - cosine_sim: Matrix of cosine similarity scores between word 
embeddings, shape (N, N).
+     */
+    dot_product = trained_emb %*% t(trained_emb);
+    row_norms = rowSums(trained_emb^2) ^ 0.5;
+    denominator = row_norms %*% t(row_norms);
+    cosine_sim = dot_product / denominator;
+}
+
+get_top = function(matrix[double] trained_emb, int k, Frame[Unknown] column)
+    return (Frame[Unknown] result){
+    /*
+     * Retrieves the top k most similar word vectors for each word.
+     *
+     * Inputs:
+     *  - trained_emb: Matrix of word embeddings of shape (N, D).
+     *  - k: Number of top similar words to retrieve.
+     *  - column: Frame containing the word column.
+     *
+     * Outputs:
+     *  - result: Frame containing the top k most similar words for each word, 
shape (N, k+1).
+     *          The first column contains the target word, followed by k 
columns with similar words.
+     */
+    S = cosine_similarity(trained_emb);
+    n = nrow(S);
+    I = diag(matrix(1, rows=nrow(S), cols=1));
+    S = S * (1 - I) + (-1.0 * I);
+    result = column;
+    for(i in 1:k){
+        result = cbind(result, column);
+    }
+    for (i in 1:n){
+        Scol = S[,i];
+        topN = order(target=Scol, by=1, decreasing=TRUE, index.return=TRUE);
+        
+        for(j in 2:k+1){
+            result[i, j] = column[as.integer(as.scalar(topN[j]))];
+        }
+    }
+}
+
+# Read input word embeddings
+X = read($input, data_type="frame", format="csv", sep=",", header=FALSE);
+
+# compute glove result for input text
+G = glove(X[,4], as.integer($seed), as.integer($vector_size), 
as.double($alpha), as.double($eta), as.integer($x_max), as.double($tol), 
as.integer($iterations), as.integer($print_loss_it), as.integer($maxTokens), 
as.integer($windowSize), $distanceWeighting, $symmetric);
+
+# Extract only the embeddings (excluding the word column)
+X_column = G[,1];
+X_matrix = as.matrix(G[,2:ncol(G)]);
+
+# Compute top-K similar words
+result = get_top(X_matrix, $topK, X_column);
+
+# Write results to output file
+write(result, $out_result, data_type="frame", format="csv");
+

Reply via email to