On Thu, Dec 11, 2008 at 11:37 PM, Matthew Toseland
<toad at amphibian.dyndns.org> wrote:
> On Thursday 11 December 2008 07:56, j16sdiz at freenetproject.org wrote:
>> Author: j16sdiz
>> Date: 2008-12-11 07:56:25 +0000 (Thu, 11 Dec 2008)
>> New Revision: 24197
>>
>> Modified:
>>    trunk/plugins/XMLSpider/XMLSpider.java
>> Log:
>> use Long for position
>
> Any particular reason?

Integer have 2GB limit.

A non-dictionary based Chinese tokenizer can generate more token then
the length of the document and i am planning to write that after the db4o
and some ui changes...

hmmm.... maybe we should drop that document if it grow that large....


>> Modified: trunk/plugins/XMLSpider/XMLSpider.java
>> ===================================================================
>> --- trunk/plugins/XMLSpider/XMLSpider.java    2008-12-11 07:56:15 UTC (rev
> 24196)
>> +++ trunk/plugins/XMLSpider/XMLSpider.java    2008-12-11 07:56:25 UTC (rev
> 24197)
>> @@ -182,9 +182,9 @@
>>       private static final String indexOwnerEmail = null;
>>
>>  //   private final HashMap lastPositionByURI = new HashMap(); /* String
> (URI) -> Integer */ /* Use to determine word position on each uri */
>> -     private final HashMap<Long, Integer> lastPositionById = new 
>> HashMap<Long,
> Integer>();
>> +     private final HashMap<Long, Long> lastPositionById = new HashMap<Long,
> Long>();
>>  //   private final HashMap positionsByWordByURI = new HashMap(); /* String
> (URI) -> HashMap (String (word) -> Integer[] (Positions)) */
>> -     private final HashMap<Long, HashMap<String, Integer[]>>
> positionsByWordById = new HashMap<Long, HashMap<String, Integer[]>>();
>> +     private final HashMap<Long, HashMap<String, Long[]>> 
>> positionsByWordById =
> new HashMap<Long, HashMap<String, Long[]>>();
>>       // Can have many; this limit only exists to save memory.
>>       private static final int maxParallelRequests = 100;
>>       private int maxShownURIs = 15;
>> @@ -739,8 +739,8 @@
>>
>>                               /* Position by position */
>>
>> -                             HashMap<String, Integer[]> 
>> positionsForGivenWord =
> positionsByWordById.get(x);
>> -                             Integer[] positions = 
>> (Integer[])positionsForGivenWord.get(str);
>> +                             HashMap<String, Long[]> positionsForGivenWord =
> positionsByWordById.get(x);
>> +                             Long[] positions = 
>> positionsForGivenWord.get(str);
>>                               StringBuilder positionList = new 
>> StringBuilder();
>>
>>                               for(int k=0; k < positions.length ; k++) {
>> @@ -1257,11 +1257,11 @@
>>                        * FIXME - replace with a real tokenizor
>>                        */
>>                       String[] words = s.split("[^\\p{L}\\{N}]");
>> -                     Integer lastPosition = null;
>> +                     Long lastPosition = null;
>>                       lastPosition = lastPositionById.get(page.id);
>>
>>                       if(lastPosition == null)
>> -                             lastPosition = 1;
>> +                             lastPosition = 1L;
>>                       for (int i = 0; i < words.length; i++) {
>>                               String word = words[i];
>>                               if ((word == null) || (word.length() == 0))
>> @@ -1284,7 +1284,7 @@
>>
>>               }
>>
>> -             private void addWord(String word, int position, Long id) 
>> throws Exception
> {
>> +             private void addWord(String word, long position, Long id) 
>> throws
> Exception {
>>                       synchronized(XMLSpider.this) {
>>                       if(word.length() < 3)
>>                               return;
>> @@ -1293,7 +1293,7 @@
>>                       idsWithWords.add(id);
>>
>>                       /* Word position indexation */
>> -                     HashMap<String, Integer[]> wordPositionsForOneUri =
> positionsByWordById.get(id); /*
>> +                     HashMap<String, Long[]> wordPositionsForOneUri =
> positionsByWordById.get(id); /*
>>                                                                              
>>                                                                              
>>                                      * For
>>                                                                              
>>                                                                              
>>                                      * a
>>                                                                              
>>                                                                              
>>                                      * given
>> @@ -1310,18 +1310,18 @@
>>                                                                              
>>                                                                              
>>                                      * position
>>                                                                              
>>                                                                              
>>                                      */
>>                       if(wordPositionsForOneUri == null) {
>> -                             wordPositionsForOneUri = new HashMap<String, 
>> Integer[]>();
>> -                             wordPositionsForOneUri.put(word, new Integer[] 
>> { position });
>> +                             wordPositionsForOneUri = new HashMap<String, 
>> Long[]>();
>> +                             wordPositionsForOneUri.put(word, new Long[] { 
>> position });
>>                               positionsByWordById.put(id, 
>> wordPositionsForOneUri);
>>                       }
>>                       else {
>> -                             Integer[] positions = 
>> wordPositionsForOneUri.get(word);
>> +                             Long[] positions = 
>> wordPositionsForOneUri.get(word);
>>                               if(positions == null) {
>> -                                     positions = new Integer[] { position };
>> +                                     positions = new Long[] { position };
>>                                       wordPositionsForOneUri.put(word, 
>> positions);
>>                               }
>>                               else {
>> -                                     Integer[] newPositions = new 
>> Integer[positions.length + 1];
>> +                                     Long[] newPositions = new 
>> Long[positions.length + 1];
>>                                       System.arraycopy(positions, 0, 
>> newPositions, 0, positions.length);
>>                                       newPositions[positions.length] = 
>> position;
>>                                       wordPositionsForOneUri.put(word, 
>> newPositions);
>>
>> _______________________________________________
>> cvs mailing list
>> cvs at freenetproject.org
>> http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs
>>
>>
>
> _______________________________________________
> Devl mailing list
> Devl at freenetproject.org
> http://emu.freenetproject.org/cgi-bin/mailman/listinfo/devl
>

Reply via email to