http://www.mediawiki.org/wiki/Special:Code/MediaWiki/73242
Revision: 73242
Author: daniel
Date: 2010-09-17 19:27:21 +0000 (Fri, 17 Sep 2010)
Log Message:
-----------
implement forward matching disambiguation
Modified Paths:
--------------
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
Added Paths:
-----------
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java
Modified:
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
===================================================================
---
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
2010-09-17 19:05:58 UTC (rev 73241)
+++
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
2010-09-17 19:27:21 UTC (rev 73242)
@@ -52,7 +52,7 @@
return features;
}
- public ConceptFeatures<C, Integer> getFeature(int concept) {
+ public ConceptFeatures<C, Integer> getConceptFeatures(int
concept) {
return getFeatures().get(concept);
}
@@ -252,7 +252,7 @@
return disambiguate(sequences, root, meanings, context);
}
- public <X extends T>CoherenceDisambiguation<X, C>
disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<?
extends C>> meanings, Collection<? extends C> context) throws
PersistenceException {
+ protected <X extends T>CoherenceDisambiguation<X, C>
disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<?
extends C>> meanings, Collection<? extends C> context) throws
PersistenceException {
LabeledMatrix<C, C> similarities = new MapLabeledMatrix<C,
C>(true);
FeatureFetcher<C, Integer> features = getFeatureCache(meanings,
context);
@@ -570,4 +570,8 @@
return value;
}
+ public boolean exploresAllSequences() {
+ return true;
+ }
+
}
Modified:
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
===================================================================
---
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
2010-09-17 19:05:58 UTC (rev 73241)
+++
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
2010-09-17 19:27:21 UTC (rev 73242)
@@ -169,4 +169,6 @@
public <X extends T>Disambiguation<X, C> disambiguate(List<X> terms,
Collection<? extends C> context) throws PersistenceException;
public <X extends T>Disambiguation<X, C> disambiguate(PhraseNode<X>
root, Collection<? extends C> context) throws PersistenceException;
+ public boolean exploresAllSequences();
+
}
\ No newline at end of file
Added:
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java
===================================================================
---
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java
(rev 0)
+++
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java
2010-09-17 19:27:21 UTC (rev 73242)
@@ -0,0 +1,96 @@
+package de.brightbyte.wikiword.disambig;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import de.brightbyte.data.Functors;
+import de.brightbyte.data.measure.Measure;
+import de.brightbyte.wikiword.model.PhraseNode;
+import de.brightbyte.wikiword.model.TermListNode;
+import de.brightbyte.wikiword.model.TermReference;
+import de.brightbyte.wikiword.model.WikiWordConcept;
+
+public class ForwardPopularityDisambiguator<T extends TermReference, C extends
WikiWordConcept> extends PopularityDisambiguator<T, C> {
+
+ public ForwardPopularityDisambiguator(MeaningFetcher<? extends C>
meaningFetcher,
+ int cacheCapacity) {
+ super(meaningFetcher, cacheCapacity);
+ }
+
+ public ForwardPopularityDisambiguator(MeaningFetcher<? extends C>
meaningFetcher,
+ int cacheCapacity, Measure<? super C>
popularityMeasure) {
+ super(meaningFetcher, cacheCapacity, popularityMeasure);
+ }
+
+ @Override
+ public <X extends T> Disambiguator.Disambiguation<X, C>
disambiguate(List<X> sequence, Map<X, List<? extends C>> meanings, Collection<?
extends C> context) {
+ PhraseNode<X> root = new TermListNode<X>( sequence, 0 );
+ return disambiguate(root, meanings, context);
+ }
+
+ @Override
+ public <X extends T> Disambiguator.Disambiguation<X, C>
disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings,
Collection<? extends C> context) {
+ Map<X, C> disambig = new HashMap<X, C>();
+ List<X> sequence = new ArrayList<X>();
+
+ double totalScore = 0;
+ double totalPop = 0;
+
+ PhraseNode<X> node = root;
+ while ( true ) {
+ Collection<? extends PhraseNode<X>> terms =
node.getSuccessors();
+ if ( terms==null || terms.isEmpty() ) break;
+
+ // pick the combination of term/meaning with the best
combined popularity-weight
+ double bestScore = 0;
+ double bestPop = 0;
+ C bestMeaning = null;
+
+ for (PhraseNode<X> n: terms) {
+ C m = getBestMeaning(n.getTermReference(),
meanings, popularityMeasure);
+ if ( m==null ) continue;
+
+ double pop = popularityMeasure.measure(m);
+ double score = weigthCombiner.apply(pop,
n.getTermReference().getWeight());
+
+ if ( bestMeaning == null || bestScore<score) {
+ bestScore = score;
+ bestPop = pop;
+ bestMeaning = m;
+ node = n;
+ }
+ }
+
+ if ( bestMeaning == null ) {
+ // if no term had a best meaning, pick the term
with the smallest weight to skip, and bugger on.
+ double minWeight = Double.POSITIVE_INFINITY;
+ for (PhraseNode<X> n: terms) {
+ double w =
n.getTermReference().getWeight();
+
+ if ( minWeight>w ) {
+ minWeight = w;
+ node = n;
+ }
+ }
+ } else {
+ totalPop += bestPop;
+ totalScore += bestScore;
+ disambig.put( node.getTermReference(),
bestMeaning );
+ sequence.add( node.getTermReference() );
+ }
+ }
+
+ if (disambig.size()>0) totalScore = totalScore /
sequence.size(); //NOTE: treat unknown terms as having pop = 0
+
+ Disambiguation<X, C> r = new Disambiguation<X, C>(disambig,
sequence, totalScore, "score="+totalScore+"; pop="+totalPop);
+ return r;
+ }
+
+ public boolean exploresAllSequences() {
+ return false;
+ }
+
+}
Modified:
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
===================================================================
---
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
2010-09-17 19:05:58 UTC (rev 73241)
+++
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
2010-09-17 19:27:21 UTC (rev 73242)
@@ -9,7 +9,6 @@
import de.brightbyte.data.Functor;
import de.brightbyte.data.Functor2;
import de.brightbyte.data.measure.Measure;
-import de.brightbyte.data.measure.Measure.Comparator;
import de.brightbyte.wikiword.model.PhraseNode;
import de.brightbyte.wikiword.model.TermReference;
import de.brightbyte.wikiword.model.WikiWordConcept;
@@ -17,16 +16,15 @@
public class PopularityDisambiguator<T extends TermReference, C extends
WikiWordConcept> extends AbstractDisambiguator<T, C> {
protected Measure<? super C> popularityMeasure;
- protected Comparator<? super C> popularityComparator;
protected Functor.Double weightBooster = SquareBooster.instance;
protected Functor2.Double weigthCombiner = new ProductCombiner();
//NOTE: pop and weight are not in the same scale.
- public PopularityDisambiguator(MeaningFetcher<C> meaningFetcher, int
cacheCapacity) {
+ public PopularityDisambiguator(MeaningFetcher<? extends C>
meaningFetcher, int cacheCapacity) {
this(meaningFetcher, cacheCapacity,
WikiWordConcept.theCardinality);
}
- public PopularityDisambiguator(MeaningFetcher<C> meaningFetcher, int
cacheCapacity, Measure<? super C> popularityMeasure) {
+ public PopularityDisambiguator(MeaningFetcher<? extends C>
meaningFetcher, int cacheCapacity, Measure<? super C> popularityMeasure) {
super(meaningFetcher, cacheCapacity);
this.setPopularityMeasure(popularityMeasure);
@@ -38,7 +36,6 @@
public void setPopularityMeasure(Measure<? super C> popularityMeasure) {
this.popularityMeasure = popularityMeasure;
- this.popularityComparator = new
Measure.Comparator<C>(popularityMeasure, true);
}
public void setWeightCombiner(Functor2.Double weightCombiner) {
@@ -66,7 +63,7 @@
return disambiguate(sequences, root, meanings, context);
}
- public <X extends T>Disambiguation<X, C>
disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<?
extends C>> meanings, Collection<? extends C> context) {
+ protected <X extends T>Disambiguation<X, C>
disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<?
extends C>> meanings, Collection<? extends C> context) {
Disambiguation<X, C> best = null;
pruneMeaninglessSequences( sequences, meanings );
@@ -83,6 +80,25 @@
return best;
}
+ protected <X extends T> C getBestMeaning(X term, Map<X, List<? extends
C>> meanings, Measure<? super C> measure) {
+ List<? extends C> m = meanings.get(term);
+ if (m==null || m.size()==0) return null;
+
+ C best = null;
+ double bestPop = 0;
+
+ for (C c: m) {
+ double pop = measure.measure(c);
+ if ( best==null || pop>bestPop ) {
+ bestPop = pop;
+ best = c;
+ }
+ }
+
+ C c = m.get(0);
+ return c;
+ }
+
public <X extends T>Disambiguation<X, C> disambiguate(List<X> sequence,
Map<X, List<? extends C>> meanings, Collection<? extends C> context) {
if (sequence.isEmpty() || meanings.isEmpty()) return new
Disambiguator.Disambiguation<X, C>(Collections.<X, C>emptyMap(),
Collections.<X>emptyList(), 0.0, "no terms or meanings");
@@ -91,12 +107,9 @@
int totalPop = 0;
for (X t: sequence) {
- List<? extends C> m = meanings.get(t);
- if (m==null || m.size()==0) continue;
+ C c = getBestMeaning(t, meanings, popularityMeasure);
+ if ( c==null ) continue;
- if (m.size()>1) Collections.sort(m,
popularityComparator);
-
- C c = m.get(0);
disambig.put(t, c);
double pop = popularityMeasure.measure(c);
@@ -113,4 +126,8 @@
return r;
}
+ public boolean exploresAllSequences() {
+ return true;
+ }
+
}
Modified:
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
===================================================================
---
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
2010-09-17 19:05:58 UTC (rev 73241)
+++
trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
2010-09-17 19:27:21 UTC (rev 73242)
@@ -173,4 +173,8 @@
this.window = window;
}
+ public boolean exploresAllSequences() {
+ return true; //XXX: really true?... not *all* but still to many?
+ }
+
}
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs