Hi Upayavira,
Here are a couple examples with debugQuery set.
I've mislead Mikhail as the query times are getting longer as the list of ids 
gets bigger.

Can you see a reason why where indexB has only 6 id's in its list it still 
takes 46 seconds?

Ids=6
{
  "responseHeader": {
    "status": 0,
    "QTime": 46849,
    "params": {
      "debugQuery": "true",
      "indent": "true",
      "start": "0",
      "q": "{!join from=sedolKey to=sedolKey fromIndex=indexB}universe:55AL86",
      "_": "1441810031117",
      "wt": "json"
    }
  },
  "response": {
    "numFound": 0,
    "start": 0,
    "docs": []
  },
  "debug": {
    "join": {
      "{!join from=sedolKey to=sedolKey fromIndex=indexB}universe:55AL86": {
        "time": 46848,
        "fromSetSize": 6,
        "toSetSize": 0,
        "fromTermCount": 11837043,
        "fromTermTotalDf": 11837043,
        "fromTermDirectCount": 11837043,
        "fromTermHits": 6,
        "fromTermHitsTotalDf": 6,
        "toTermHits": 0,
        "toTermHitsTotalDf": 0,
        "toTermDirectCount": 0,
        "smallSetsDeferred": 0,
        "toSetDocsAdded": 0
      }
    },
    "rawquerystring": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:55AL86",
    "querystring": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:55AL86",
    "parsedquery": "JoinQuery({!join from=longValue to=longValue 
fromIndex=indexB}universe:55AL86)",
    "parsedquery_toString": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:55AL86",
    "explain": {},
    "QParser": "",
    "timing": {
      "time": 46849,
      "prepare": {
        "time": 0,
        "query": {
          "time": 0
        },
        "facet": {
          "time": 0
        },
        "mlt": {
          "time": 0
        },
        "highlight": {
          "time": 0
        },
        "stats": {
          "time": 0
        },
        "expand": {
          "time": 0
        },
        "debug": {
          "time": 0
        }
      },
      "process": {
        "time": 46848,
        "query": {
          "time": 46848
        },
        "facet": {
          "time": 0
        },
        "mlt": {
          "time": 0
        },
        "highlight": {
          "time": 0
        },
        "stats": {
          "time": 0
        },
        "expand": {
          "time": 0
        },
        "debug": {
          "time": 0
        }
      }
    }
  }
}

###########################################
Ids=298
{
  "responseHeader": {
    "status": 0,
    "QTime": 51570,
    "params": {
      "debugQuery": "true",
      "indent": "true",
      "q": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:16XO52",
      "_": "1441810442921",
      "wt": "json"
    }
  },
  "response": {
    "numFound": 0,
    "start": 0,
    "docs": []
  },
  "debug": {
    "join": {
      "{!join from=longValue to=longValue fromIndex=indexB}universe:16XO52": {
        "time": 51570,
        "fromSetSize": 298,
        "toSetSize": 0,
        "fromTermCount": 11837043,
        "fromTermTotalDf": 11837043,
        "fromTermDirectCount": 11837043,
        "fromTermHits": 298,
        "fromTermHitsTotalDf": 298,
        "toTermHits": 0,
        "toTermHitsTotalDf": 0,
        "toTermDirectCount": 0,
        "smallSetsDeferred": 0,
        "toSetDocsAdded": 0
      }
    },
    "rawquerystring": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:16XO52",
    "querystring": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:16XO52",
    "parsedquery": "JoinQuery({!join from=longValue to=longValue 
fromIndex=indexB}universe:16XO52)",
    "parsedquery_toString": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:16XO52",
    "explain": {},
    "QParser": "",
    "timing": {
      "time": 51570,
      "prepare": {
        "time": 0,
        "query": {
          "time": 0
        },
        "facet": {
          "time": 0
        },
        "mlt": {
          "time": 0
        },
        "highlight": {
          "time": 0
        },
        "stats": {
          "time": 0
        },
        "expand": {
          "time": 0
        },
        "debug": {
          "time": 0
        }
      },
      "process": {
        "time": 51570,
        "query": {
          "time": 51570
        },
        "facet": {
          "time": 0
        },
        "mlt": {
          "time": 0
        },
        "highlight": {
          "time": 0
        },
        "stats": {
          "time": 0
        },
        "expand": {
          "time": 0
        },
        "debug": {
          "time": 0
        }
      }
    }
  }
}

###################################################################
Id's = 424088
"debug": {
    "join": {
      "{!join from=longValue to=longValue fromIndex=indexB}universe:LARGE": {
        "time": 44386,
        "fromSetSize": 424088,
        "toSetSize": 892314,
        "fromTermCount": 11837043,
        "fromTermTotalDf": 11837043,
        "fromTermDirectCount": 11837043,
        "fromTermHits": 420365,
        "fromTermHitsTotalDf": 420365,
        "toTermHits": 57074,
        "toTermHitsTotalDf": 944597,
        "toTermDirectCount": 55722,
        "smallSetsDeferred": 1352,
        "toSetDocsAdded": 892314
      }
    },
    "rawquerystring": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:LARGE",
    "querystring": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:LARGE",
    "parsedquery": "JoinQuery({!join from=longValue to=longValue 
fromIndex=indexB}universe:LARGE)",
    "parsedquery_toString": "{!join from=longValue to=longValue 
fromIndex=indexB}universe:LARGE",
    "explain": {
      "2000000076769983": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@483489aa , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076769984": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@118f9aef , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076769985": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@7a5d18ac , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076769986": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@6d601adc , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076769987": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@2e262f31 , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076769988": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@4b200302 , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076769989": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@569910d2 , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076770006": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@635a3843 , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076770007": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@5b438a92 , product of:\n  1.0 
= boost\n  1.0 = queryNorm\n",
      "2000000076770029": "\n1.0 = (MATCH) 
org.apache.solr.search.JoinQuery$JoinQueryWeight@31d9c4c , product of:\n  1.0 = 
boost\n  1.0 = queryNorm\n"
    },
    "QParser": "",
    "timing": {
      "time": 480859,
      "prepare": {
        "time": 0,
        "query": {
          "time": 0
        },
        "facet": {
          "time": 0
        },
        "mlt": {
          "time": 0
        },
        "highlight": {
          "time": 0
        },
        "stats": {
          "time": 0
        },
        "expand": {
          "time": 0
        },
        "debug": {
          "time": 0
        }
      },
      "process": {
        "time": 480859,
        "query": {
          "time": 43737
        },
        "facet": {
          "time": 0
        },
        "mlt": {
          "time": 0
        },
        "highlight": {
          "time": 0
        },
        "stats": {
          "time": 0
        },
        "expand": {
          "time": 0
        },
        "debug": {
          "time": 437122
        }
      }
    }
  }
}


Thanks


Russ.
-----Original Message-----
From: Upayavira [mailto:u...@odoko.co.uk] 
Sent: 09 September 2015 13:02
To: solr-user@lucene.apache.org
Subject: Re: Solr Join between two indexes taking too long.

To explain what a join does:

It goes over to the joined index, and executes a query. This results in a list 
of "ids" that will be used to do a search on the main index. The more of these 
ids there are, the worse performance will be. Thus, if you have 100k documents 
that match in the join core, you will be doing a 100k term search, which will 
invariably be painful, because the more terms you include in the search, the 
slower it will be.

How many matching docs do you have on the other side of your query?

Upayavira

On Tue, Sep 8, 2015, at 02:09 PM, Russell Taylor wrote:
> Hi,
>  I hope somebody can help.
> 
> We have two indexes, one which holds the descriptive data and the 
> other one which holds lists of docs which are of a certain type 
> (called universes in our world). They need to be joined together to 
> show a list of data from indexA where a filtered indexB (by 
> universe:value) has matching longs (The join field).
> 
> At the moment the query is taking 55 seconds we need to get it under a 
> second, any help most appreciated.
> 
> INDEXES:
> 
> Index a (primary index)
> 31 million docs with a converted alphanumeric to a long value with a 
> possible 10 million unique values.
> 
> Index B (the joined index)
> 250 million documents with a converted alphanumeric to a long value 
> with a possible 10 million unique values.
> IndexB is filtered by universe which could be between 1 and 500,000 docs.
> 
> QUERY:
> http://127.0.0.1:8080/solr/indexA/select?q={!join+from=longValue+to=lo
> ngValue+fromIndex=IndexB}universe:<http://127.0.0.1:8080/solr/indexA/s
> elect?q=%7b!join+from=longValue+to=longValue+fromIndex=IndexB%7duniver
> se:>universeValue
> 
> Qtime is 55 seconds for either a universe of 5 docs or 500,000 docs.
> 
> 
> 
> Thanks
> 
> 
> Russ.
> 
> 
> *******************************************************
> This message (including any files transmitted with it) may contain 
> confidential and/or proprietary information, is the property of 
> Interactive Data Corporation and/or its subsidiaries, and is directed 
> only to the addressee(s). If you are not the designated recipient or 
> have reason to believe you received this message in error, please 
> delete this message from your system and notify the sender 
> immediately. An unintended recipient's disclosure, copying, 
> distribution, or use of this message or any attachments is prohibited and may 
> be unlawful.
> *******************************************************


*******************************************************
This message (including any files transmitted with it) may contain confidential 
and/or proprietary information, is the property of Interactive Data Corporation 
and/or its subsidiaries, and is directed only to the addressee(s). If you are 
not the designated recipient or have reason to believe you received this 
message in error, please delete this message from your system and notify the 
sender immediately. An unintended recipient's disclosure, copying, 
distribution, or use of this message or any attachments is prohibited and may 
be unlawful. 
*******************************************************

Reply via email to