PeterOeClausen commented on issue #784:
URL: https://github.com/apache/lucenenet/issues/784#issuecomment-1359280385

   Created a Console Application, installed Bogus and Lucene.
   I see very different results running `writer.AddDocument(document)` versus 
`writer.AddDocuments(documents)`:
   
   ```cs
   using Bogus;
   using Lucene.Net.Analysis.Core;
   using Lucene.Net.Documents;
   using Lucene.Net.Index;
   using Lucene.Net.Store;
   using Lucene.Net.Util;
   using static ConsoleAppForPrototyping.Program;
   
   namespace ConsoleAppForPrototyping {
       public class Program
       {
           public static void Main(string[] args)
           {
               Console.WriteLine("Hello, World!");
               var p = new Program();
               p.Run();
               Console.WriteLine("Done creating index!");
               while (true) { }
           }
   
           public class Row
           {
               public Guid Id { get; set; }
               public string Name { get; set; }
               public DateTime Date { get; set; }
           }
   
           public void Run()
           {
               var rowFaker = new Faker<Row>()
                   .StrictMode(true)
                   .RuleFor(o => o.Id, f => Guid.NewGuid())
                   .RuleFor(o => o.Name, f => f.Name.FullName())
                   .RuleFor(o => o.Date, f => f.Date.Future());
   
               var luceneVersion = LuceneVersion.LUCENE_48;
               using var analyzer = new KeywordAnalyzer();
               using var directory = FSDirectory.Open("../../../App_Data");
               var config = new IndexWriterConfig(luceneVersion, analyzer)
               {
                   OpenMode = OpenMode.CREATE,
               };
               using var writer = new IndexWriter(directory, config);
   
               // Config
               for (int i = 0; i < 1000000; i++)
               {
                   if(i % 100 == 0)
                   {
                       Console.WriteLine(i);
                   }
                   var row = rowFaker.Generate();
   
                   var document = new Document();
                   document.Add(new StringField("id", row.Id.ToString(), 
Field.Store.YES));
                   document.Add(new StringField("name", row.Name, 
Field.Store.YES));
                   document.Add(new StringField("date", row.Date.ToString(), 
Field.Store.YES));
                   writer.AddDocument(document);
               }
   
               writer.Commit();
               writer.Dispose();
               directory.Dispose();
               analyzer.Dispose();
           }
       }
   }
   ```
   
![image](https://user-images.githubusercontent.com/6975621/208662017-302591ea-210e-4aa2-8e56-bdfec38ac961.png)
   
   Stuff is added and released from memory with `writer.AddDocument(document)`, 
ending up consuming 56mb.
   
   **Versus**:
   ```cs
   using Bogus;
   using Lucene.Net.Analysis.Core;
   using Lucene.Net.Documents;
   using Lucene.Net.Index;
   using Lucene.Net.Store;
   using Lucene.Net.Util;
   using static ConsoleAppForPrototyping.Program;
   
   namespace ConsoleAppForPrototyping {
       public class Program
       {
           public static void Main(string[] args)
           {
               Console.WriteLine("Hello, World!");
               var p = new Program();
               p.Run();
               Console.WriteLine("Done creating index!");
               while (true) { }
           }
   
           public class Row
           {
               public Guid Id { get; set; }
               public string Name { get; set; }
               public DateTime Date { get; set; }
           }
   
           public void Run()
           {
               var rowFaker = new Faker<Row>()
                   .StrictMode(true)
                   .RuleFor(o => o.Id, f => Guid.NewGuid())
                   .RuleFor(o => o.Name, f => f.Name.FullName())
                   .RuleFor(o => o.Date, f => f.Date.Future());
   
               var luceneVersion = LuceneVersion.LUCENE_48;
               using var analyzer = new KeywordAnalyzer();
               using var directory = FSDirectory.Open("../../../App_Data");
               var config = new IndexWriterConfig(luceneVersion, analyzer)
               {
                   OpenMode = OpenMode.CREATE, // Use OpenMode.CREATE to 
overwrite, or OpenMode.APPEND to just open
               };
               using var writer = new IndexWriter(directory, config);
   
               // Config
               var documentList = new List<Document>();
               for (int i = 0; i < 1000000; i++)
               {
                   if(i % 100 == 0)
                   {
                       Console.WriteLine(i);
                   }
                   var row = rowFaker.Generate();
   
                   var document = new Document();
                   document.Add(new StringField("id", row.Id.ToString(), 
Field.Store.YES));
                   document.Add(new StringField("name", row.Name, 
Field.Store.YES));
                   document.Add(new StringField("date", row.Date.ToString(), 
Field.Store.YES));
                   documentList.Add(document);
               }
               writer.AddDocuments(documentList);
   
               writer.Commit();
               writer.Dispose();
               directory.Dispose();
               analyzer.Dispose();
           }
       }
   }
   ```
   
   
![image](https://user-images.githubusercontent.com/6975621/208663974-acd01214-3858-4451-bc49-bc2fcc1aa5a3.png)
   First it consumes up to 6 GB.
   
   And after 4.5 minutes it runs garbage collection and ends up at 609mb.
   
![image](https://user-images.githubusercontent.com/6975621/208664233-dbe00475-4172-4943-9b23-f9a9f4261d63.png)
   
   **Two things I found:**
   
   1) It seems to me that `writer.AddDocument(document);` handles memory better 
in this scenario
   2) Garbage collection does pick it up, though it may take some time (in 
second senario 4,5 minutes).
   
   So I think I'll experiment further with our project and use 
`.AddDocument(document);` instead of `.AddDocuments(documents);`.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@lucenenet.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to