http://www.mediawiki.org/wiki/Special:Code/MediaWiki/73242

Revision: 73242
Author:   daniel
Date:     2010-09-17 19:27:21 +0000 (Fri, 17 Sep 2010)

Log Message:
-----------
implement forward matching disambiguation

Modified Paths:
--------------
    
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
    
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
    
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
    
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java

Added Paths:
-----------
    
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java

Modified: 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
===================================================================
--- 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
   2010-09-17 19:05:58 UTC (rev 73241)
+++ 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
   2010-09-17 19:27:21 UTC (rev 73242)
@@ -52,7 +52,7 @@
                        return features;
                }
                
-               public ConceptFeatures<C, Integer> getFeature(int concept) {
+               public ConceptFeatures<C, Integer> getConceptFeatures(int 
concept) {
                        return getFeatures().get(concept);
                }
                
@@ -252,7 +252,7 @@
                return disambiguate(sequences, root, meanings, context);
        }
        
-       public <X extends T>CoherenceDisambiguation<X, C> 
disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? 
extends C>> meanings, Collection<? extends C> context) throws 
PersistenceException {
+       protected <X extends T>CoherenceDisambiguation<X, C> 
disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? 
extends C>> meanings, Collection<? extends C> context) throws 
PersistenceException {
                LabeledMatrix<C, C> similarities = new MapLabeledMatrix<C, 
C>(true);
                FeatureFetcher<C, Integer> features = getFeatureCache(meanings, 
context); 
 
@@ -570,4 +570,8 @@
                return value;
        }
 
+       public boolean exploresAllSequences() {
+               return true;
+       }
+       
 }

Modified: 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
===================================================================
--- 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
    2010-09-17 19:05:58 UTC (rev 73241)
+++ 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
    2010-09-17 19:27:21 UTC (rev 73242)
@@ -169,4 +169,6 @@
        public <X extends T>Disambiguation<X, C> disambiguate(List<X> terms, 
Collection<? extends C> context) throws PersistenceException;
        public <X extends T>Disambiguation<X, C> disambiguate(PhraseNode<X> 
root, Collection<? extends C> context) throws PersistenceException;
 
+       public boolean exploresAllSequences();
+
 }
\ No newline at end of file

Added: 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java
===================================================================
--- 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java
                           (rev 0)
+++ 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java
   2010-09-17 19:27:21 UTC (rev 73242)
@@ -0,0 +1,96 @@
+package de.brightbyte.wikiword.disambig;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import de.brightbyte.data.Functors;
+import de.brightbyte.data.measure.Measure;
+import de.brightbyte.wikiword.model.PhraseNode;
+import de.brightbyte.wikiword.model.TermListNode;
+import de.brightbyte.wikiword.model.TermReference;
+import de.brightbyte.wikiword.model.WikiWordConcept;
+
+public class ForwardPopularityDisambiguator<T extends TermReference, C extends 
WikiWordConcept> extends PopularityDisambiguator<T, C> {
+
+       public ForwardPopularityDisambiguator(MeaningFetcher<? extends C> 
meaningFetcher,
+                       int cacheCapacity) {
+               super(meaningFetcher, cacheCapacity);
+       }
+
+       public ForwardPopularityDisambiguator(MeaningFetcher<? extends C> 
meaningFetcher,
+                       int cacheCapacity, Measure<? super C> 
popularityMeasure) {
+               super(meaningFetcher, cacheCapacity, popularityMeasure);
+       }
+
+       @Override
+       public <X extends T> Disambiguator.Disambiguation<X, C> 
disambiguate(List<X> sequence, Map<X, List<? extends C>> meanings, Collection<? 
extends C> context) {
+               PhraseNode<X> root = new TermListNode<X>( sequence, 0 );
+               return disambiguate(root, meanings, context);
+       }
+
+       @Override
+       public <X extends T> Disambiguator.Disambiguation<X, C> 
disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, 
Collection<? extends C> context) {
+               Map<X, C> disambig = new HashMap<X, C>();
+               List<X> sequence  = new ArrayList<X>();
+               
+               double totalScore = 0;
+               double totalPop = 0;
+               
+               PhraseNode<X> node = root; 
+               while ( true ) {
+                       Collection<? extends PhraseNode<X>> terms = 
node.getSuccessors();
+                       if ( terms==null  || terms.isEmpty() ) break;
+                       
+                       // pick the combination of term/meaning with the best 
combined popularity-weight 
+                       double bestScore = 0;
+                       double bestPop = 0;
+                       C bestMeaning = null;
+                       
+                       for (PhraseNode<X> n: terms) {
+                               C m = getBestMeaning(n.getTermReference(), 
meanings, popularityMeasure);
+                               if ( m==null ) continue;
+                               
+                               double pop = popularityMeasure.measure(m);
+                               double score = weigthCombiner.apply(pop, 
n.getTermReference().getWeight());
+                               
+                               if ( bestMeaning == null || bestScore<score) {
+                                       bestScore = score;
+                                       bestPop = pop;
+                                       bestMeaning = m;
+                                       node = n;
+                               }
+                       }
+                       
+                       if ( bestMeaning == null ) {
+                               // if no term had a best meaning, pick the term 
with the smallest weight to skip, and bugger on.  
+                               double minWeight = Double.POSITIVE_INFINITY;
+                               for (PhraseNode<X> n: terms) {
+                                       double w = 
n.getTermReference().getWeight();
+                                       
+                                       if ( minWeight>w ) {
+                                               minWeight = w;
+                                               node = n;
+                                       }
+                               }
+                       } else  {
+                               totalPop += bestPop;
+                               totalScore += bestScore;
+                               disambig.put( node.getTermReference(), 
bestMeaning );
+                               sequence.add( node.getTermReference() );
+                       }
+               }
+               
+               if (disambig.size()>0) totalScore = totalScore / 
sequence.size(); //NOTE: treat unknown terms as having pop = 0
+               
+               Disambiguation<X, C> r = new Disambiguation<X, C>(disambig, 
sequence, totalScore, "score="+totalScore+"; pop="+totalPop);
+               return r;
+       }
+
+       public boolean exploresAllSequences() {
+               return false;
+       }
+
+}

Modified: 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
===================================================================
--- 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
  2010-09-17 19:05:58 UTC (rev 73241)
+++ 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
  2010-09-17 19:27:21 UTC (rev 73242)
@@ -9,7 +9,6 @@
 import de.brightbyte.data.Functor;
 import de.brightbyte.data.Functor2;
 import de.brightbyte.data.measure.Measure;
-import de.brightbyte.data.measure.Measure.Comparator;
 import de.brightbyte.wikiword.model.PhraseNode;
 import de.brightbyte.wikiword.model.TermReference;
 import de.brightbyte.wikiword.model.WikiWordConcept;
@@ -17,16 +16,15 @@
 public class PopularityDisambiguator<T extends TermReference, C extends 
WikiWordConcept> extends AbstractDisambiguator<T, C> {
        
        protected Measure<? super C> popularityMeasure;
-       protected Comparator<? super C> popularityComparator;
        
        protected Functor.Double weightBooster = SquareBooster.instance; 
        protected Functor2.Double weigthCombiner = new ProductCombiner(); 
//NOTE: pop and weight are not in the same scale.
        
-       public PopularityDisambiguator(MeaningFetcher<C> meaningFetcher, int 
cacheCapacity) {
+       public PopularityDisambiguator(MeaningFetcher<? extends C> 
meaningFetcher, int cacheCapacity) {
                this(meaningFetcher, cacheCapacity, 
WikiWordConcept.theCardinality);
        }
        
-       public PopularityDisambiguator(MeaningFetcher<C> meaningFetcher, int 
cacheCapacity, Measure<? super C> popularityMeasure) {
+       public PopularityDisambiguator(MeaningFetcher<? extends C> 
meaningFetcher, int cacheCapacity, Measure<? super C> popularityMeasure) {
                super(meaningFetcher, cacheCapacity);
                
                this.setPopularityMeasure(popularityMeasure);
@@ -38,7 +36,6 @@
 
        public void setPopularityMeasure(Measure<? super C> popularityMeasure) {
                this.popularityMeasure = popularityMeasure;
-               this.popularityComparator = new 
Measure.Comparator<C>(popularityMeasure, true);
        }
 
        public void setWeightCombiner(Functor2.Double weightCombiner) {
@@ -66,7 +63,7 @@
                return disambiguate(sequences, root, meanings, context);
        }
        
-       public <X extends T>Disambiguation<X, C> 
disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? 
extends C>> meanings, Collection<? extends C> context) {
+       protected <X extends T>Disambiguation<X, C> 
disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? 
extends C>> meanings, Collection<? extends C> context) {
                Disambiguation<X, C> best = null;
                
                pruneMeaninglessSequences( sequences, meanings );
@@ -83,6 +80,25 @@
                return best;
        }
        
+       protected <X extends T> C getBestMeaning(X term, Map<X, List<? extends 
C>> meanings, Measure<? super C> measure) {
+               List<? extends C> m = meanings.get(term);
+               if (m==null || m.size()==0) return null;
+               
+               C best = null;
+               double bestPop = 0;
+               
+               for (C c: m) {
+                       double pop = measure.measure(c);
+                       if ( best==null || pop>bestPop ) {
+                               bestPop = pop;
+                               best = c;
+                       }
+               }
+               
+               C c = m.get(0);
+               return c;
+       }
+       
        public <X extends T>Disambiguation<X, C> disambiguate(List<X> sequence, 
Map<X, List<? extends C>> meanings, Collection<? extends C> context) {
                if (sequence.isEmpty() || meanings.isEmpty()) return new 
Disambiguator.Disambiguation<X, C>(Collections.<X, C>emptyMap(), 
Collections.<X>emptyList(), 0.0, "no terms or meanings");
 
@@ -91,12 +107,9 @@
                int totalPop = 0;
                
                for (X t: sequence) {
-                       List<? extends C> m = meanings.get(t);
-                       if (m==null || m.size()==0) continue;
+                       C c = getBestMeaning(t, meanings, popularityMeasure);
+                       if ( c==null ) continue;
                        
-                       if (m.size()>1) Collections.sort(m, 
popularityComparator);
-                       
-                       C c = m.get(0);
                        disambig.put(t, c);
 
                        double pop = popularityMeasure.measure(c);
@@ -113,4 +126,8 @@
                return r;
        }
 
+       public boolean exploresAllSequences() {
+               return true;
+       }
+
 }

Modified: 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
===================================================================
--- 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
    2010-09-17 19:05:58 UTC (rev 73241)
+++ 
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
    2010-09-17 19:27:21 UTC (rev 73242)
@@ -173,4 +173,8 @@
                this.window = window;
        }
 
+       public boolean exploresAllSequences() {
+               return true; //XXX: really true?... not *all* but still to many?
+       }
+
 }



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to