Repository: incubator-s2graph Updated Branches: refs/heads/master 0322060e2 -> cf1bc9150
[S2GRAPH-20] bug fix in query option "sample" bug fix in query option "sample". JIRA: [S2GRAPH-20] https://issues.apache.org/jira/browse/S2GRAPH-20 Pull Request: Closes #12 Authors: Hyunsung Jo: [email protected] Project: http://git-wip-us.apache.org/repos/asf/incubator-s2graph/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-s2graph/commit/cf1bc915 Tree: http://git-wip-us.apache.org/repos/asf/incubator-s2graph/tree/cf1bc915 Diff: http://git-wip-us.apache.org/repos/asf/incubator-s2graph/diff/cf1bc915 Branch: refs/heads/master Commit: cf1bc915087b7392a44b6b6a758912f0f44cf54a Parents: 0322060 Author: DO YUNG YOON <[email protected]> Authored: Tue Feb 23 16:00:38 2016 +0900 Committer: DO YUNG YOON <[email protected]> Committed: Tue Feb 23 16:00:38 2016 +0900 ---------------------------------------------------------------------- CHANGES | 3 ++ .../storage/hbase/AsynchbaseQueryBuilder.scala | 40 +++++++++++--------- .../s2graph/core/Integrate/QueryTest.scala | 28 +++++++++++--- 3 files changed, 47 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-s2graph/blob/cf1bc915/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index e477a71..347f9b6 100644 --- a/CHANGES +++ b/CHANGES @@ -19,6 +19,9 @@ Release 0.12.1 - unreleased S2GRAPH-18: Query Option "interval" is Broken. (Contributed by Hyunsung Jo<[email protected]>, committed by daewon) + S2GRAPH-20: bug fix in query option "sample". + (Contributed by Hyunsung Jo<[email protected]>, committed by DOYUNG YOON) + TASKS S2GRAPH-2: Update document and quick start environment to have recent set-up command changes. http://git-wip-us.apache.org/repos/asf/incubator-s2graph/blob/cf1bc915/s2core/src/main/scala/com/kakao/s2graph/core/storage/hbase/AsynchbaseQueryBuilder.scala ---------------------------------------------------------------------- diff --git a/s2core/src/main/scala/com/kakao/s2graph/core/storage/hbase/AsynchbaseQueryBuilder.scala b/s2core/src/main/scala/com/kakao/s2graph/core/storage/hbase/AsynchbaseQueryBuilder.scala index 717a5ac..13cfc0d 100644 --- a/s2core/src/main/scala/com/kakao/s2graph/core/storage/hbase/AsynchbaseQueryBuilder.scala +++ b/s2core/src/main/scala/com/kakao/s2graph/core/storage/hbase/AsynchbaseQueryBuilder.scala @@ -92,10 +92,10 @@ class AsynchbaseQueryBuilder(storage: AsynchbaseStorage)(implicit ec: ExecutionC val maxSize = storage.config.getInt("future.cache.max.size") val futureCacheTTL = storage.config.getInt("future.cache.expire.after.access") val futureCache = CacheBuilder.newBuilder() - .initialCapacity(maxSize) - .concurrencyLevel(Runtime.getRuntime.availableProcessors()) - .expireAfterAccess(futureCacheTTL, TimeUnit.MILLISECONDS) - .maximumSize(maxSize).build[java.lang.Long, (Long, Deferred[QueryRequestWithResult])]() + .initialCapacity(maxSize) + .concurrencyLevel(Runtime.getRuntime.availableProcessors()) + .expireAfterAccess(futureCacheTTL, TimeUnit.MILLISECONDS) + .maximumSize(maxSize).build[java.lang.Long, (Long, Deferred[QueryRequestWithResult])]() override def fetch(queryRequest: QueryRequest, prevStepScore: Double, @@ -103,30 +103,33 @@ class AsynchbaseQueryBuilder(storage: AsynchbaseStorage)(implicit ec: ExecutionC parentEdges: Seq[EdgeWithScore]): Deferred[QueryRequestWithResult] = { @tailrec def randomInt(sampleNumber: Int, range: Int, set: Set[Int] = Set.empty[Int]): Set[Int] = { - if (set.size == sampleNumber) set + if (range < sampleNumber || set.size == sampleNumber) set else randomInt(sampleNumber, range, set + Random.nextInt(range)) } def sample(edges: Seq[EdgeWithScore], n: Int): Seq[EdgeWithScore] = { - val plainEdges = if (queryRequest.queryParam.offset == 0) { - edges.tail - } else edges - - val randoms = randomInt(n, plainEdges.size) - var samples = List.empty[EdgeWithScore] - var idx = 0 - plainEdges.foreach { e => - if (randoms.contains(idx)) samples = e :: samples - idx += 1 + if (edges.size <= n) { + edges + } else { + val plainEdges = if (queryRequest.queryParam.offset == 0) { + edges.tail + } else edges + + val randoms = randomInt(n, plainEdges.size) + var samples = List.empty[EdgeWithScore] + var idx = 0 + plainEdges.foreach { e => + if (randoms.contains(idx)) samples = e :: samples + idx += 1 + } + samples.toSeq } - - samples.toSeq } def fetchInner(request: GetRequest) = { storage.client.get(request) withCallback { kvs => val edgeWithScores = storage.toEdges(kvs.toSeq, queryRequest.queryParam, prevStepScore, isInnerCall, parentEdges) - val resultEdgesWithScores = if (queryRequest.queryParam.sample >= 0 ) { + val resultEdgesWithScores = if (queryRequest.queryParam.sample >= 0) { sample(edgeWithScores, queryRequest.queryParam.sample) } else edgeWithScores QueryRequestWithResult(queryRequest, QueryResult(resultEdgesWithScores)) @@ -135,6 +138,7 @@ class AsynchbaseQueryBuilder(storage: AsynchbaseStorage)(implicit ec: ExecutionC QueryRequestWithResult(queryRequest, QueryResult(isFailure = true)) } } + def checkAndExpire(request: GetRequest, cacheKey: Long, cacheTTL: Long, http://git-wip-us.apache.org/repos/asf/incubator-s2graph/blob/cf1bc915/s2core/src/test/scala/com/kakao/s2graph/core/Integrate/QueryTest.scala ---------------------------------------------------------------------- diff --git a/s2core/src/test/scala/com/kakao/s2graph/core/Integrate/QueryTest.scala b/s2core/src/test/scala/com/kakao/s2graph/core/Integrate/QueryTest.scala index 5b0dfbd..633476e 100644 --- a/s2core/src/test/scala/com/kakao/s2graph/core/Integrate/QueryTest.scala +++ b/s2core/src/test/scala/com/kakao/s2graph/core/Integrate/QueryTest.scala @@ -469,14 +469,30 @@ class QueryTest extends IntegrateCommon with BeforeAndAfterEach { insertEdgesSync(bulkEdges: _*) - val result1 = getEdgesSync(queryWithSampling(testId, sampleSize)) - (result1 \ "results").as[List[JsValue]].size should be(math.min(sampleSize, bulkEdges.size)) + var result = getEdgesSync(queryWithSampling(testId, sampleSize)) + println(Json.toJson(result)) + (result \ "results").as[List[JsValue]].size should be(scala.math.min(sampleSize, bulkEdges.size)) - val result2 = getEdgesSync(twoStepQueryWithSampling(testId, sampleSize)) - (result2 \ "results").as[List[JsValue]].size should be(math.min(sampleSize * sampleSize, bulkEdges.size * bulkEdges.size)) + result = getEdgesSync(twoStepQueryWithSampling(testId, sampleSize)) + println(Json.toJson(result)) + (result \ "results").as[List[JsValue]].size should be(scala.math.min(sampleSize * sampleSize, bulkEdges.size * bulkEdges.size)) + + result = getEdgesSync(twoQueryWithSampling(testId, sampleSize)) + println(Json.toJson(result)) + (result \ "results").as[List[JsValue]].size should be(sampleSize + 3) // edges in testLabelName2 = 3 + + result = getEdgesSync(queryWithSampling(testId, 0)) + println(Json.toJson(result)) + (result \ "results").as[List[JsValue]].size should be(0) // edges in testLabelName2 = 3 + + result = getEdgesSync(queryWithSampling(testId, 10)) + println(Json.toJson(result)) + (result \ "results").as[List[JsValue]].size should be(3) // edges in testLabelName2 = 3 + + result = getEdgesSync(queryWithSampling(testId, -1)) + println(Json.toJson(result)) + (result \ "results").as[List[JsValue]].size should be(3) // edges in testLabelName2 = 3 - val result3 = getEdgesSync(twoQueryWithSampling(testId, sampleSize)) - (result3 \ "results").as[List[JsValue]].size should be(sampleSize + 3) // edges in testLabelName2 = 3 } def querySingle(id: Int, offset: Int = 0, limit: Int = 100) = Json.parse(
