Author: j16sdiz
Date: 2008-12-22 11:31:37 +0000 (Mon, 22 Dec 2008)
New Revision: 24733
Added:
trunk/plugins/XMLLibrarian/LibrarianHandler.java
trunk/plugins/XMLLibrarian/URIWrapper.java
Modified:
trunk/plugins/XMLLibrarian/XMLLibrarian.java
Log:
convert URIWrapper and LibrarianHandler to top level class
Added: trunk/plugins/XMLLibrarian/LibrarianHandler.java
===================================================================
--- trunk/plugins/XMLLibrarian/LibrarianHandler.java
(rev 0)
+++ trunk/plugins/XMLLibrarian/LibrarianHandler.java 2008-12-22 11:31:37 UTC
(rev 24733)
@@ -0,0 +1,157 @@
+package plugins.XMLLibrarian;
+
+import java.util.HashMap;
+import java.util.List;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import freenet.support.Logger;
+
+/**
+ * Required for using SAX parser on XML indices
+ * @author swati
+ *
+ */
+ public class LibrarianHandler extends DefaultHandler {
+ private String word;
+ private int prefix;
+ private boolean processingWord;
+ private String prefix_match;
+
+ // now we need to adapt this to read subindexing
+ private boolean found_match ;
+ /** file id -> uri */
+ private HashMap<String, String> uris;
+ /** file id -> title */
+ private HashMap<String, String> titles;
+ private List<URIWrapper> fileuris;
+
+ public LibrarianHandler(String word, List<URIWrapper> fileuris)
throws Exception {
+ this.fileuris = fileuris;
+ }
+ public void setDocumentLocator(Locator value) {
+
+ }
+ public void endDocument() throws SAXException{}
+
+ public void startDocument () throws SAXException
+ {
+ found_match = false;
+ uris = new HashMap<String, String>();
+ titles = new HashMap<String, String>();
+ }
+ public void startElement(String nameSpaceURI, String localName,
String rawName, Attributes attrs) throws SAXException {
+ if (rawName == null) {
+ rawName = localName;
+ }
+ String elt_name = rawName;
+ /*
+ * Gives the maximum number of digits of md5 used for
creating subindices
+ */
+ if(elt_name.equals("prefix")){
+ prefix =
Integer.parseInt(attrs.getValue("value"));
+ }
+ if(elt_name.equals("subIndex")){
+ try{
+ String md5 = XMLLibrarian.MD5(word);
+ //here we need to match and see if any
of the subindices match the required substring of the word.
+ for(int i=0;i<prefix;i++){
+
if((md5.substring(0,prefix-i)).equals(attrs.getValue("key"))){
+
setPrefix_match(md5.substring(0, prefix - i));
+ Logger.normal(this,
"match found " + getPrefix_match());
+ Logger.minor(this,
"word searched = " + word + " prefix matcheed = " + getPrefix_match());
+ break;
+ }
+ }
+ }
+ catch(Exception e){Logger.error(this, "MD5 of
the word"+word+"could not be calculated "+e.toString(), e);}
+ }
+
+ if(elt_name.equals("files")) processingWord = false;
+ if(elt_name.equals("keywords")) processingWord = true;
+ /*
+ * looks for the word in the given subindex file
+ * if the word is found then the parser fetches the
corresponding fileElements
+ */
+ if(elt_name.equals("word")){
+ try{
+ found_match = false;
+ String match = attrs.getValue("v");
+ if(match.equals(word)) found_match =
true;
+
//if((attrs.getValue("v")).equals(word)) found_match = true;
+ Logger.minor(this, "word searched =
"+word+" matched");
+ }catch(Exception e){Logger.error(this, "word
key doesn't match"+e.toString(), e); }
+ }
+
+ if(elt_name.equals("file")){
+// try{
+// FileWriter outp = new
FileWriter("logfile",true);
+// outp.write("word searched = "+word+"
found_match = "+found_match+" processingWord "+processingWord+" \n");
+// outp.close();
+// }
+// catch(Exception e){
+
+// }
+ if(processingWord == true && found_match ==
true){
+ URIWrapper uri = new URIWrapper();
+ try{
+ uri.URI =
uris.get(attrs.getValue("id"));
+ Logger.minor(this, "word
searched = "+word+" file id = "+uri.URI);
+ //uri.descr = "not available";
+ synchronized(this){
+
if(titles.containsKey(attrs.getValue("id")))
+ {
+ uri.descr =
titles.get(attrs.getValue("id"));
+ if
((uri.URI).equals(uri.descr)) uri.descr = "not available";
+ }
+ else uri.descr = "not available";
+
+ fileuris.add(uri);
+ }
+ }
+ catch(Exception e){
+ Logger.error(this, "Index
format may be outdated "+e.toString(), e);
+ }
+
+ }
+ else if(processingWord == false){
+ try{
+ String id =
attrs.getValue("id");
+ String key =
attrs.getValue("key");
+ int l = attrs.getLength();
+ String title;
+ synchronized(this){
+ if (l>=3 )
+ {
+ try{
+ title =
attrs.getValue("title");
+//
FileWriter outp = new FileWriter("logfile",true);
+//
outp.write("found title "+title+" == \n");
+//
outp.close();
+
titles.put(id,title);
+ }
+ catch(Exception
e){
+
Logger.error(this, "Index Format not compatible "+e.toString(), e);
+ }
+ }
+
+ uris.put(id,key);
+ }
+ //String[] words = (String[])
uris.values().toArray(new String[uris.size()]);
+ }
+ catch(Exception
e){Logger.error(this,"File id and key could not be retrieved. May be due to
format clash",e);}
+ }
+ }
+ }
+ public void setPrefix_match(String prefix_match) {
+ this.prefix_match = prefix_match;
+ }
+
+ public String getPrefix_match() {
+ return prefix_match;
+ }
+
+ }
\ No newline at end of file
Added: trunk/plugins/XMLLibrarian/URIWrapper.java
===================================================================
--- trunk/plugins/XMLLibrarian/URIWrapper.java (rev 0)
+++ trunk/plugins/XMLLibrarian/URIWrapper.java 2008-12-22 11:31:37 UTC (rev
24733)
@@ -0,0 +1,21 @@
+/**
+ *
+ */
+package plugins.XMLLibrarian;
+
+import plugins.XMLLibrarian.XMLLibrarian.URIWrapper;
+
+class URIWrapper implements Comparable<URIWrapper> {
+ public String URI;
+ public String descr;
+
+ public int compareTo(URIWrapper o) {
+ return URI.compareTo(o.URI);
+ }
+
+ public boolean equals(Object o) {
+ if (o == null || o.getClass() != getClass())
+ return false;
+ return URI.equals(((URIWrapper) o).URI);
+ }
+}
\ No newline at end of file
Modified: trunk/plugins/XMLLibrarian/XMLLibrarian.java
===================================================================
--- trunk/plugins/XMLLibrarian/XMLLibrarian.java 2008-12-22 11:31:16 UTC
(rev 24732)
+++ trunk/plugins/XMLLibrarian/XMLLibrarian.java 2008-12-22 11:31:37 UTC
(rev 24733)
@@ -13,7 +13,6 @@
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
-import java.util.List;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilder;
@@ -31,10 +30,6 @@
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
-import org.xml.sax.Attributes;
-import org.xml.sax.Locator;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
import freenet.client.FetchException;
import freenet.client.FetchResult;
@@ -754,167 +749,6 @@
this.test = true;
}
- private static class URIWrapper implements Comparable<URIWrapper> {
- public String URI;
- public String descr;
-
- public int compareTo(URIWrapper o) {
- return URI.compareTo(o.URI);
- }
-
- public boolean equals(Object o) {
- if (o == null || o.getClass() != getClass())
- return false;
- return URI.equals(((URIWrapper) o).URI);
- }
- }
-
- /**
- * Required for using SAX parser on XML indices
- * @author swati
- *
- */
- public static class LibrarianHandler extends DefaultHandler {
- private String word;
- private int prefix;
- private boolean processingWord;
- private String prefix_match;
-
- // now we need to adapt this to read subindexing
- private boolean found_match ;
- /** file id -> uri */
- private HashMap<String, String> uris;
- /** file id -> title */
- private HashMap<String, String> titles;
- private List<URIWrapper> fileuris;
-
- public LibrarianHandler(String word, List<URIWrapper> fileuris)
throws Exception {
- this.fileuris = fileuris;
- }
- public void setDocumentLocator(Locator value) {
-
- }
- public void endDocument() throws SAXException{}
-
- public void startDocument () throws SAXException
- {
- found_match = false;
- uris = new HashMap<String, String>();
- titles = new HashMap<String, String>();
- }
- public void startElement(String nameSpaceURI, String localName,
String rawName, Attributes attrs) throws SAXException {
- if (rawName == null) {
- rawName = localName;
- }
- String elt_name = rawName;
- /*
- * Gives the maximum number of digits of md5 used for
creating subindices
- */
- if(elt_name.equals("prefix")){
- prefix =
Integer.parseInt(attrs.getValue("value"));
- }
- if(elt_name.equals("subIndex")){
- try{
- String md5 = MD5(word);
- //here we need to match and see if any
of the subindices match the required substring of the word.
- for(int i=0;i<prefix;i++){
-
if((md5.substring(0,prefix-i)).equals(attrs.getValue("key"))){
-
setPrefix_match(md5.substring(0, prefix - i));
- Logger.normal(this,
"match found " + getPrefix_match());
- Logger.minor(this,
"word searched = " + word + " prefix matcheed = " + getPrefix_match());
- break;
- }
- }
- }
- catch(Exception e){Logger.error(this, "MD5 of
the word"+word+"could not be calculated "+e.toString(), e);}
- }
-
- if(elt_name.equals("files")) processingWord = false;
- if(elt_name.equals("keywords")) processingWord = true;
- /*
- * looks for the word in the given subindex file
- * if the word is found then the parser fetches the
corresponding fileElements
- */
- if(elt_name.equals("word")){
- try{
- found_match = false;
- String match = attrs.getValue("v");
- if(match.equals(word)) found_match =
true;
-
//if((attrs.getValue("v")).equals(word)) found_match = true;
- Logger.minor(this, "word searched =
"+word+" matched");
- }catch(Exception e){Logger.error(this, "word
key doesn't match"+e.toString(), e); }
- }
-
- if(elt_name.equals("file")){
-// try{
-// FileWriter outp = new
FileWriter("logfile",true);
-// outp.write("word searched = "+word+"
found_match = "+found_match+" processingWord "+processingWord+" \n");
-// outp.close();
-// }
-// catch(Exception e){
-
-// }
- if(processingWord == true && found_match ==
true){
- URIWrapper uri = new URIWrapper();
- try{
- uri.URI =
uris.get(attrs.getValue("id"));
- Logger.minor(this, "word
searched = "+word+" file id = "+uri.URI);
- //uri.descr = "not available";
- synchronized(this){
-
if(titles.containsKey(attrs.getValue("id")))
- {
- uri.descr =
titles.get(attrs.getValue("id"));
- if
((uri.URI).equals(uri.descr)) uri.descr = "not available";
- }
- else uri.descr = "not available";
-
- fileuris.add(uri);
- }
- }
- catch(Exception e){
- Logger.error(this, "Index
format may be outdated "+e.toString(), e);
- }
-
- }
- else if(processingWord == false){
- try{
- String id =
attrs.getValue("id");
- String key =
attrs.getValue("key");
- int l = attrs.getLength();
- String title;
- synchronized(this){
- if (l>=3 )
- {
- try{
- title =
attrs.getValue("title");
-//
FileWriter outp = new FileWriter("logfile",true);
-//
outp.write("found title "+title+" == \n");
-//
outp.close();
-
titles.put(id,title);
- }
- catch(Exception
e){
-
Logger.error(this, "Index Format not compatible "+e.toString(), e);
- }
- }
-
- uris.put(id,key);
- }
- //String[] words = (String[])
uris.values().toArray(new String[uris.size()]);
- }
- catch(Exception
e){Logger.error(this,"File id and key could not be retrieved. May be due to
format clash",e);}
- }
- }
- }
- public void setPrefix_match(String prefix_match) {
- this.prefix_match = prefix_match;
- }
-
- public String getPrefix_match() {
- return prefix_match;
- }
-
- }
-
private static String convertToHex(byte[] data) {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < data.length; i++) {
_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs