Each shard has around 4.2 million documents which are around 40GB on disk.
Two nodes have 3 shard replicas each and the third has 2 shard replicas.

The text of the exception is: java.lang.OutOfMemoryError: Java heap space
And the heap dump is a full 24GB indicating the full heap space was being
used.

Here is the solrconfig as output by the config request handler:

{
  "responseHeader":{
    "status":0,
    "QTime":0},
  "config":{
    "znodeVersion":0,
    "luceneMatchVersion":"org.apache.lucene.util.Version:6.5.1",
    "updateHandler":{
      "indexWriter":{"closeWaitsForMerges":true},
      "commitWithin":{"softCommit":true},
      "autoCommit":{
        "maxDocs":50000,
        "maxTime":300000,
        "openSearcher":false},
      "autoSoftCommit":{
        "maxDocs":-1,
        "maxTime":30000}},
    "query":{
      "useFilterForSortedQuery":false,
      "queryResultWindowSize":1,
      "queryResultMaxDocsCached":2147483647,
      "enableLazyFieldLoading":false,
      "maxBooleanClauses":1024,
      "":{
        "size":"10000",
        "showItems":"-1",
        "initialSize":"10",
        "name":"fieldValueCache"}},
    "jmx":{
      "agentId":null,
      "serviceUrl":null,
      "rootName":null},
    "requestHandler":{
      "/select":{
        "name":"/select",
        "defaults":{
          "rows":10,
          "echoParams":"explicit"},
        "class":"solr.SearchHandler"},
      "/update":{
        "useParams":"_UPDATE",
        "class":"solr.UpdateRequestHandler",
        "name":"/update"},
      "/update/json":{
        "useParams":"_UPDATE_JSON",
        "class":"solr.UpdateRequestHandler",
        "invariants":{"update.contentType":"application/json"},
        "name":"/update/json"},
      "/update/csv":{
        "useParams":"_UPDATE_CSV",
        "class":"solr.UpdateRequestHandler",
        "invariants":{"update.contentType":"application/csv"},
        "name":"/update/csv"},
      "/update/json/docs":{
        "useParams":"_UPDATE_JSON_DOCS",
        "class":"solr.UpdateRequestHandler",
        "invariants":{
          "update.contentType":"application/json",
          "json.command":"false"},
        "name":"/update/json/docs"},
      "update":{
        "class":"solr.UpdateRequestHandlerApi",
        "useParams":"_UPDATE_JSON_DOCS",
        "name":"update"},
      "/config":{
        "useParams":"_CONFIG",
        "class":"solr.SolrConfigHandler",
        "name":"/config"},
      "/schema":{
        "class":"solr.SchemaHandler",
        "useParams":"_SCHEMA",
        "name":"/schema"},
      "/replication":{
        "class":"solr.ReplicationHandler",
        "useParams":"_REPLICATION",
        "name":"/replication"},
      "/get":{
        "class":"solr.RealTimeGetHandler",
        "useParams":"_GET",
        "defaults":{
          "omitHeader":true,
          "wt":"json",
          "indent":true},
        "name":"/get"},
      "/admin/ping":{
        "class":"solr.PingRequestHandler",
        "useParams":"_ADMIN_PING",
        "invariants":{
          "echoParams":"all",
          "q":"{!lucene}*:*"},
        "name":"/admin/ping"},
      "/admin/segments":{
        "class":"solr.SegmentsInfoRequestHandler",
        "useParams":"_ADMIN_SEGMENTS",
        "name":"/admin/segments"},
      "/admin/luke":{
        "class":"solr.LukeRequestHandler",
        "useParams":"_ADMIN_LUKE",
        "name":"/admin/luke"},
      "/admin/system":{
        "class":"solr.SystemInfoHandler",
        "useParams":"_ADMIN_SYSTEM",
        "name":"/admin/system"},
      "/admin/mbeans":{
        "class":"solr.SolrInfoMBeanHandler",
        "useParams":"_ADMIN_MBEANS",
        "name":"/admin/mbeans"},
      "/admin/plugins":{
        "class":"solr.PluginInfoHandler",
        "name":"/admin/plugins"},
      "/admin/threads":{
        "class":"solr.ThreadDumpHandler",
        "useParams":"_ADMIN_THREADS",
        "name":"/admin/threads"},
      "/admin/properties":{
        "class":"solr.PropertiesRequestHandler",
        "useParams":"_ADMIN_PROPERTIES",
        "name":"/admin/properties"},
      "/admin/logging":{
        "class":"solr.LoggingHandler",
        "useParams":"_ADMIN_LOGGING",
        "name":"/admin/logging"},
      "/admin/file":{
        "class":"solr.ShowFileRequestHandler",
        "useParams":"_ADMIN_FILE",
        "name":"/admin/file"},
      "/export":{
        "class":"solr.ExportHandler",
        "useParams":"_EXPORT",
        "components":["query"],
        "defaults":{"wt":"json"},
        "invariants":{
          "rq":"{!xport}",
          "distrib":false},
        "name":"/export"},
      "/graph":{
        "class":"solr.GraphHandler",
        "useParams":"_ADMIN_GRAPH",
        "invariants":{
          "wt":"graphml",
          "distrib":false},
        "name":"/graph"},
      "/stream":{
        "class":"solr.StreamHandler",
        "useParams":"_STREAM",
        "defaults":{"wt":"json"},
        "invariants":{"distrib":false},
        "name":"/stream"},
      "/sql":{
        "class":"solr.SQLHandler",
        "useParams":"_SQL",
        "defaults":{"wt":"json"},
        "invariants":{"distrib":false},
        "name":"/sql"},
      "/terms":{
        "class":"solr.SearchHandler",
        "useParams":"_TERMS",
        "components":["terms"],
        "name":"/terms"},
      "/analysis/document":{
        "class":"solr.DocumentAnalysisRequestHandler",
        "startup":"lazy",
        "useParams":"_ANALYSIS_DOCUMENT",
        "name":"/analysis/document"},
      "/analysis/field":{
        "class":"solr.FieldAnalysisRequestHandler",
        "startup":"lazy",
        "useParams":"_ANALYSIS_FIELD",
        "name":"/analysis/field"},
      "/debug/dump":{
        "class":"solr.DumpRequestHandler",
        "useParams":"_DEBUG_DUMP",
        "defaults":{
          "echoParams":"explicit",
          "echoHandler":true},
        "name":"/debug/dump"}},
    "updateRequestProcessorChain":[{
        "default":"true",
        "name":"customupdatechain",

"":[{"class":"org.apache.solr.update.processor.CustomDedupProcessorFactory"},
          {"class":"solr.LogUpdateProcessorFactory"},
          {"class":"solr.RunUpdateProcessorFactory"}]}],
    "updateHandlerupdateLog":{
      "dir":"",
      "numVersionBuckets":65536},
    "requestDispatcher":{
      "handleSelect":true,
      "httpCaching":{
        "never304":false,
        "etagSeed":"Solr",
        "lastModFrom":"opentime",
        "cacheControl":null},
      "requestParsers":{
        "multipartUploadLimitKB":2048,
        "formUploadLimitKB":2048,
        "addHttpRequestToContext":false}},
    "indexConfig":{
      "useCompoundFile":false,
      "maxBufferedDocs":-1,
      "maxMergeDocs":-1,
      "mergeFactor":-1,
      "ramBufferSizeMB":100.0,
      "writeLockTimeout":-1,
      "lockType":"native",
      "infoStreamEnabled":false,
      "metrics":{}},
    "peerSync":{"useRangeVersions":true}}}



On Mon, Oct 16, 2017 at 3:38 PM Shawn Heisey <apa...@elyograg.org> wrote:

> On 10/16/2017 3:19 PM, Randy Fradin wrote:
> > We are seeing a lot of full GC events and eventual OOM errors in Solr
> > during indexing. This is Solr 6.5.1 running in cloud mode with a 24G
> heap.
> > At these times indexing is the only activity taking place. The collection
> > has 4 shards and 2 replicas across 3 nodes. Each document is ~10KB (a few
> > hundred fields each), and indexing is using the normal update handler, 1
> > document per request, up to 240 request at a time.
> >
> > The heap dump taken automatically on OOM shows 18.3GB of heap taken by 3
> > instances of DocumentsWriter. Within those instances, all of the heap is
> > retained by the blockedFlushes LinkedList inside the flushControl object.
> > Each node in the LinkedList appears to be retaining around 55MB.
> >
> > Clearly something to do with flushing is at play here but I'm at a loss
> > what tuning parameters I should be looking at. I would expect things to
> > start blocking if I fall too far behind on flushing but apparently that's
> > not happening. The ramBufferSizeMB is set to the default 100. My heap
> size
> > is already absurdly more than I thought we would need for this volume.
>
> One of the first things we need to find out is about your index size.
>
> In each of your shards, how many documents are there?  How much disk
> space does one shard replica take up?  How many shard replica cores does
> each node have on it in total?
>
> I would also like to get a look at your full solrconfig.xml file.  The
> schema may be helpful at a later date, along with an example of a
> document that you're indexing.  With ramBufferSizeMB at the default,
> having a ton of memory used up by a class used for indexing seems very odd.
>
> Do you have the text of the OOM exception? Is it saying out of heap
> space, or some other problem?
>
> Thanks,
> Shawn
>
>

Reply via email to