On Thu, Dec 11, 2008 at 11:37 PM, Matthew Toseland
<toad at amphibian.dyndns.org> wrote:
> On Thursday 11 December 2008 07:56, j16sdiz at freenetproject.org wrote:
>> Author: j16sdiz
>> Date: 2008-12-11 07:56:25 +0000 (Thu, 11 Dec 2008)
>> New Revision: 24197
>>
>> Modified:
>> trunk/plugins/XMLSpider/XMLSpider.java
>> Log:
>> use Long for position
>
> Any particular reason?
Integer have 2GB limit.
A non-dictionary based Chinese tokenizer can generate more token then
the length of the document and i am planning to write that after the db4o
and some ui changes...
hmmm.... maybe we should drop that document if it grow that large....
>> Modified: trunk/plugins/XMLSpider/XMLSpider.java
>> ===================================================================
>> --- trunk/plugins/XMLSpider/XMLSpider.java 2008-12-11 07:56:15 UTC (rev
> 24196)
>> +++ trunk/plugins/XMLSpider/XMLSpider.java 2008-12-11 07:56:25 UTC (rev
> 24197)
>> @@ -182,9 +182,9 @@
>> private static final String indexOwnerEmail = null;
>>
>> // private final HashMap lastPositionByURI = new HashMap(); /* String
> (URI) -> Integer */ /* Use to determine word position on each uri */
>> - private final HashMap<Long, Integer> lastPositionById = new
>> HashMap<Long,
> Integer>();
>> + private final HashMap<Long, Long> lastPositionById = new HashMap<Long,
> Long>();
>> // private final HashMap positionsByWordByURI = new HashMap(); /* String
> (URI) -> HashMap (String (word) -> Integer[] (Positions)) */
>> - private final HashMap<Long, HashMap<String, Integer[]>>
> positionsByWordById = new HashMap<Long, HashMap<String, Integer[]>>();
>> + private final HashMap<Long, HashMap<String, Long[]>>
>> positionsByWordById =
> new HashMap<Long, HashMap<String, Long[]>>();
>> // Can have many; this limit only exists to save memory.
>> private static final int maxParallelRequests = 100;
>> private int maxShownURIs = 15;
>> @@ -739,8 +739,8 @@
>>
>> /* Position by position */
>>
>> - HashMap<String, Integer[]>
>> positionsForGivenWord =
> positionsByWordById.get(x);
>> - Integer[] positions =
>> (Integer[])positionsForGivenWord.get(str);
>> + HashMap<String, Long[]> positionsForGivenWord =
> positionsByWordById.get(x);
>> + Long[] positions =
>> positionsForGivenWord.get(str);
>> StringBuilder positionList = new
>> StringBuilder();
>>
>> for(int k=0; k < positions.length ; k++) {
>> @@ -1257,11 +1257,11 @@
>> * FIXME - replace with a real tokenizor
>> */
>> String[] words = s.split("[^\\p{L}\\{N}]");
>> - Integer lastPosition = null;
>> + Long lastPosition = null;
>> lastPosition = lastPositionById.get(page.id);
>>
>> if(lastPosition == null)
>> - lastPosition = 1;
>> + lastPosition = 1L;
>> for (int i = 0; i < words.length; i++) {
>> String word = words[i];
>> if ((word == null) || (word.length() == 0))
>> @@ -1284,7 +1284,7 @@
>>
>> }
>>
>> - private void addWord(String word, int position, Long id)
>> throws Exception
> {
>> + private void addWord(String word, long position, Long id)
>> throws
> Exception {
>> synchronized(XMLSpider.this) {
>> if(word.length() < 3)
>> return;
>> @@ -1293,7 +1293,7 @@
>> idsWithWords.add(id);
>>
>> /* Word position indexation */
>> - HashMap<String, Integer[]> wordPositionsForOneUri =
> positionsByWordById.get(id); /*
>> + HashMap<String, Long[]> wordPositionsForOneUri =
> positionsByWordById.get(id); /*
>>
>>
>> * For
>>
>>
>> * a
>>
>>
>> * given
>> @@ -1310,18 +1310,18 @@
>>
>>
>> * position
>>
>>
>> */
>> if(wordPositionsForOneUri == null) {
>> - wordPositionsForOneUri = new HashMap<String,
>> Integer[]>();
>> - wordPositionsForOneUri.put(word, new Integer[]
>> { position });
>> + wordPositionsForOneUri = new HashMap<String,
>> Long[]>();
>> + wordPositionsForOneUri.put(word, new Long[] {
>> position });
>> positionsByWordById.put(id,
>> wordPositionsForOneUri);
>> }
>> else {
>> - Integer[] positions =
>> wordPositionsForOneUri.get(word);
>> + Long[] positions =
>> wordPositionsForOneUri.get(word);
>> if(positions == null) {
>> - positions = new Integer[] { position };
>> + positions = new Long[] { position };
>> wordPositionsForOneUri.put(word,
>> positions);
>> }
>> else {
>> - Integer[] newPositions = new
>> Integer[positions.length + 1];
>> + Long[] newPositions = new
>> Long[positions.length + 1];
>> System.arraycopy(positions, 0,
>> newPositions, 0, positions.length);
>> newPositions[positions.length] =
>> position;
>> wordPositionsForOneUri.put(word,
>> newPositions);
>>
>> _______________________________________________
>> cvs mailing list
>> cvs at freenetproject.org
>> http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs
>>
>>
>
> _______________________________________________
> Devl mailing list
> Devl at freenetproject.org
> http://emu.freenetproject.org/cgi-bin/mailman/listinfo/devl
>