[ 
https://issues.apache.org/jira/browse/BAHIR-110?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16080435#comment-16080435
 ] 

ASF GitHub Bot commented on BAHIR-110:
--------------------------------------

Github user ricellis commented on a diff in the pull request:

    https://github.com/apache/bahir/pull/45#discussion_r126428690
  
    --- Diff: 
sql-cloudant/src/main/scala/org/apache/bahir/cloudant/CloudantConfig.scala ---
    @@ -30,81 +28,83 @@ import org.apache.bahir.cloudant.common._
     */
     
     class CloudantConfig(val protocol: String, val host: String,
    -    val dbName: String, val indexName: String = null, val viewName: String 
= null)
    +    val dbName: String, val indexName: String, val viewName: String)
         (implicit val username: String, val password: String,
         val partitions: Int, val maxInPartition: Int, val minInPartition: Int,
         val requestTimeout: Long, val bulkSize: Int, val schemaSampleSize: Int,
    -    val createDBOnSave: Boolean, val selector: String, val useQuery: 
Boolean = false,
    -    val queryLimit: Int)
    -    extends Serializable{
    +    val createDBOnSave: Boolean, val apiReceiver: String,
    +    val useQuery: Boolean = false, val queryLimit: Int)
    +    extends Serializable {
     
    -  private lazy val dbUrl = {protocol + "://" + host + "/" + dbName}
    +  lazy val dbUrl: String = {protocol + "://" + host + "/" + dbName}
     
       val pkField = "_id"
    -  val defaultIndex = "_all_docs" // "_changes" does not work for partition
    +  val defaultIndex: String = apiReceiver
       val default_filter: String = "*:*"
     
    -  def getContinuousChangesUrl(): String = {
    -    var url = dbUrl + 
"/_changes?include_docs=true&feed=continuous&heartbeat=3000"
    -    if (selector != null) {
    -      url = url + "&filter=_selector"
    -    }
    -    url
    -  }
    -
    -  def getSelector() : String = {
    -    selector
    -  }
    -
    -  def getDbUrl(): String = {
    +  def getDbUrl: String = {
         dbUrl
       }
     
    -  def getSchemaSampleSize(): Int = {
    +  def getSchemaSampleSize: Int = {
         schemaSampleSize
       }
     
    -  def getCreateDBonSave(): Boolean = {
    +  def getCreateDBonSave: Boolean = {
         createDBOnSave
       }
     
    -  def getTotalUrl(url: String): String = {
    -    if (url.contains('?')) {
    -      url + "&limit=1"
    -    } else {
    -      url + "?limit=1"
    -    }
    -  }
    -
    -  def getDbname(): String = {
    -    dbName
    -  }
    -
    -  def queryEnabled(): Boolean = {useQuery && indexName==null && 
viewName==null}
    -
    -  def allowPartition(queryUsed: Boolean): Boolean = {indexName==null && 
!queryUsed}
    -
    -  def getAllDocsUrl(limit: Int, excludeDDoc: Boolean = false): String = {
    +  def getLastNum(result: JsValue): JsValue = (result \ "last_seq").get
     
    +  /* Url containing limit for docs in a Cloudant database.
    +  * If a view is not defined, use the _all_docs endpoint.
    +  * @return url with one doc limit for retrieving total doc count
    +  */
    +  def getUrl(limit: Int, excludeDDoc: Boolean = false): String = {
         if (viewName == null) {
    -      val baseUrl = (
    -          if ( excludeDDoc) dbUrl + 
"/_all_docs?startkey=%22_design0/%22&include_docs=true"
    -          else dbUrl + "/_all_docs?include_docs=true"
    -          )
    -      if (limit == JsonStoreConfigManager.ALL_DOCS_LIMIT) {
    +      val baseUrl = {
    +        if (excludeDDoc) {
    +          dbUrl + "/_all_docs?startkey=%22_design0/%22&include_docs=true"
    --- End diff --
    
    See 
https://github.com/cloudant/java-cloudant/issues/344#issuecomment-276938689
    This `startkey` makes the assumption that no document IDs will start with 
upper case letters. Possibly one for another issue.


> Replace use of _all_docs API with _changes API in all receivers
> ---------------------------------------------------------------
>
>                 Key: BAHIR-110
>                 URL: https://issues.apache.org/jira/browse/BAHIR-110
>             Project: Bahir
>          Issue Type: Improvement
>            Reporter: Esteban Laver
>   Original Estimate: 216h
>  Remaining Estimate: 216h
>
> Today we use the _changes API for Spark streaming receiver and _all_docs API 
> for non-streaming receiver. _all_docs API supports parallel reads (using 
> offset and range) but performance of _changes API is still better in most 
> cases (even with single threaded support).
> With this ticket we want to:
> a) re-implement all receivers using _changes API
> b) compare performance between the two implementations based on _changes and 
> _all_docs
> Based on the results in b) we could decide to either
> - replace _all_docs implementation with _changes based implementation OR
> - allow customers to pick one (with a solid documentation about pros and 
> cons) 



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to