[ https://issues.apache.org/jira/browse/BAHIR-110?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16080435#comment-16080435 ]
ASF GitHub Bot commented on BAHIR-110: -------------------------------------- Github user ricellis commented on a diff in the pull request: https://github.com/apache/bahir/pull/45#discussion_r126428690 --- Diff: sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala --- @@ -30,81 +28,83 @@ import org.apache.bahir.cloudant.common._ */ class CloudantConfig(val protocol: String, val host: String, - val dbName: String, val indexName: String = null, val viewName: String = null) + val dbName: String, val indexName: String, val viewName: String) (implicit val username: String, val password: String, val partitions: Int, val maxInPartition: Int, val minInPartition: Int, val requestTimeout: Long, val bulkSize: Int, val schemaSampleSize: Int, - val createDBOnSave: Boolean, val selector: String, val useQuery: Boolean = false, - val queryLimit: Int) - extends Serializable{ + val createDBOnSave: Boolean, val apiReceiver: String, + val useQuery: Boolean = false, val queryLimit: Int) + extends Serializable { - private lazy val dbUrl = {protocol + "://" + host + "/" + dbName} + lazy val dbUrl: String = {protocol + "://" + host + "/" + dbName} val pkField = "_id" - val defaultIndex = "_all_docs" // "_changes" does not work for partition + val defaultIndex: String = apiReceiver val default_filter: String = "*:*" - def getContinuousChangesUrl(): String = { - var url = dbUrl + "/_changes?include_docs=true&feed=continuous&heartbeat=3000" - if (selector != null) { - url = url + "&filter=_selector" - } - url - } - - def getSelector() : String = { - selector - } - - def getDbUrl(): String = { + def getDbUrl: String = { dbUrl } - def getSchemaSampleSize(): Int = { + def getSchemaSampleSize: Int = { schemaSampleSize } - def getCreateDBonSave(): Boolean = { + def getCreateDBonSave: Boolean = { createDBOnSave } - def getTotalUrl(url: String): String = { - if (url.contains('?')) { - url + "&limit=1" - } else { - url + "?limit=1" - } - } - - def getDbname(): String = { - dbName - } - - def queryEnabled(): Boolean = {useQuery && indexName==null && viewName==null} - - def allowPartition(queryUsed: Boolean): Boolean = {indexName==null && !queryUsed} - - def getAllDocsUrl(limit: Int, excludeDDoc: Boolean = false): String = { + def getLastNum(result: JsValue): JsValue = (result \ "last_seq").get + /* Url containing limit for docs in a Cloudant database. + * If a view is not defined, use the _all_docs endpoint. + * @return url with one doc limit for retrieving total doc count + */ + def getUrl(limit: Int, excludeDDoc: Boolean = false): String = { if (viewName == null) { - val baseUrl = ( - if ( excludeDDoc) dbUrl + "/_all_docs?startkey=%22_design0/%22&include_docs=true" - else dbUrl + "/_all_docs?include_docs=true" - ) - if (limit == JsonStoreConfigManager.ALL_DOCS_LIMIT) { + val baseUrl = { + if (excludeDDoc) { + dbUrl + "/_all_docs?startkey=%22_design0/%22&include_docs=true" --- End diff -- See https://github.com/cloudant/java-cloudant/issues/344#issuecomment-276938689 This `startkey` makes the assumption that no document IDs will start with upper case letters. Possibly one for another issue. > Replace use of _all_docs API with _changes API in all receivers > --------------------------------------------------------------- > > Key: BAHIR-110 > URL: https://issues.apache.org/jira/browse/BAHIR-110 > Project: Bahir > Issue Type: Improvement > Reporter: Esteban Laver > Original Estimate: 216h > Remaining Estimate: 216h > > Today we use the _changes API for Spark streaming receiver and _all_docs API > for non-streaming receiver. _all_docs API supports parallel reads (using > offset and range) but performance of _changes API is still better in most > cases (even with single threaded support). > With this ticket we want to: > a) re-implement all receivers using _changes API > b) compare performance between the two implementations based on _changes and > _all_docs > Based on the results in b) we could decide to either > - replace _all_docs implementation with _changes based implementation OR > - allow customers to pick one (with a solid documentation about pros and > cons) -- This message was sent by Atlassian JIRA (v6.4.14#64029)