Author: joern
Date: Tue Aug  4 07:50:26 2015
New Revision: 1694009

URL: http://svn.apache.org/r1694009
Log:
OPENNLP-791  Reads the mentioned clustering files, could also switch to 
objectstream. Thanks to Anthony Beylerian for providing a patch.

Added:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
   (with props)
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java
   (with props)
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
   (with props)
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
   (with props)

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java?rev=1694009&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
 Tue Aug  4 07:50:26 2015
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator.contextclustering;
+
+import java.security.InvalidParameterException;
+
+import opennlp.tools.disambiguator.WSDParameters;
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.util.Span;
+
+/**
+ * Implementation of the <b>Context Clustering</b> approach. This approach
+ * returns uses n-gram based clusters.
+ * 
+ * This implementation is based on {@link http://nlp.cs.rpi.edu/paper/wsd.pdf}
+ */
+public class ContextClusterer implements WSDisambiguator {
+
+  protected ContextClustererParameters params;
+
+  @Override
+  public WSDParameters getParams() {
+    return params;
+  }
+
+  @Override
+  public void setParams(WSDParameters params) throws InvalidParameterException 
{
+    if (params == null) {
+      this.params = new ContextClustererParameters();
+    } else {
+      if (params.isValid()) {
+        this.params = (ContextClustererParameters) params;
+      } else {
+        throw new InvalidParameterException("wrong params");
+      }
+    }
+  }
+
+  @Override
+  public String[] disambiguate(String[] tokenizedContext,
+      int ambiguousTokenIndex) {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+  @Override
+  public String[][] disambiguate(String[] tokenizedContext,
+      Span[] ambiguousTokenIndexSpans) {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+}

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java?rev=1694009&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java
 Tue Aug  4 07:50:26 2015
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator.contextclustering;
+
+import opennlp.tools.disambiguator.WSDParameters;
+
+public class ContextClustererParameters extends WSDParameters {
+
+  protected int ngram;
+
+  public int getNgram() {
+    return ngram;
+  }
+
+  public void setNgram(int ngram) {
+    this.ngram = ngram;
+  }
+
+  @Override
+  public boolean isValid() {
+    return ngram > 0;
+  }
+
+}

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java?rev=1694009&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
 Tue Aug  4 07:50:26 2015
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator.datareader;
+
+public class ClusterMembership {
+
+  public int clusterID;
+  public double centroidSimilarity;
+  public String phrase;
+  public String[] phraseWords;
+
+  public ClusterMembership(int clusterID, double centroidSimilarity) {
+    this.clusterID = clusterID;
+    this.centroidSimilarity = centroidSimilarity;
+  }
+
+  public ClusterMembership() {
+    this(0, 0.0);
+  }
+}

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java?rev=1694009&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
 Tue Aug  4 07:50:26 2015
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator.datareader;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+public class ClustersReader {
+
+  public static String path = "src\\test\\resources\\phraseclusters\\";
+  private static HashMap<String, ArrayList<ClusterMembership>> map = new 
HashMap<String, ArrayList<ClusterMembership>>();
+
+  public void readFile(String url) {
+
+    File file = new File(url);
+
+    try (BufferedReader clusterList = new BufferedReader(new 
FileReader(file))) {
+
+      String line;
+
+      // Read the file
+      while ((line = clusterList.readLine()) != null) {
+
+        String[] parts = line.split("\\t");
+        String phraseKey = parts[0];
+        String[] phraseWords = phraseKey.split("\\s");
+
+        System.out.println(phraseKey);
+
+        ArrayList<ClusterMembership> memberships = new 
ArrayList<ClusterMembership>();
+
+        for (int i = 1; i < parts.length; i += 2) {
+          ClusterMembership membership = new ClusterMembership(
+              Integer.parseInt(parts[i]), Double.parseDouble(parts[i + 1]));
+          membership.phrase = phraseKey;
+          membership.phraseWords = phraseWords;
+
+          memberships.add(membership);
+        }
+        map.put(phraseKey, memberships);
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  public boolean getNgramClusters(String word) {
+
+    File folder = new File(path);
+    if (folder.isDirectory()) {
+      for (File file : folder.listFiles()) {
+        readFile(file.getAbsolutePath());
+      }
+
+    } else {
+      return false;
+    }
+
+    return true;
+
+  }
+
+}

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain


Reply via email to