jiangpengcheng commented on a change in pull request #4963:
URL: https://github.com/apache/openwhisk/pull/4963#discussion_r614506639



##########
File path: 
common/scala/src/main/scala/org/apache/openwhisk/core/database/mongodb/MongoDBArtifactStore.scala
##########
@@ -0,0 +1,661 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.database.mongodb
+
+import java.security.MessageDigest
+
+import akka.actor.ActorSystem
+import akka.event.Logging.ErrorLevel
+import akka.http.scaladsl.model._
+import akka.stream.ActorMaterializer
+import akka.stream.scaladsl._
+import akka.util.ByteString
+import com.mongodb.client.gridfs.model.GridFSUploadOptions
+import org.apache.openwhisk.common.{Logging, LoggingMarkers, TransactionId}
+import org.apache.openwhisk.core.database._
+import org.apache.openwhisk.core.database.StoreUtils._
+import org.apache.openwhisk.core.entity.Attachments.Attached
+import org.apache.openwhisk.core.entity.{DocId, DocInfo, DocRevision, 
DocumentReader, UUID}
+import org.apache.openwhisk.http.Messages
+import org.bson.json.{JsonMode, JsonWriterSettings}
+import org.mongodb.scala.bson.BsonString
+import org.mongodb.scala.bson.collection.immutable.Document
+import org.mongodb.scala.gridfs.{GridFSBucket, GridFSFile, 
MongoGridFSException}
+import org.mongodb.scala.model._
+import org.mongodb.scala.{MongoClient, MongoCollection, MongoException}
+import spray.json._
+
+import scala.concurrent.Future
+import scala.util.Try
+
+object MongoDBArtifactStore {
+  val _computed = "_computed"
+}
+
+/**
+ * Basic client to put and delete artifacts in a data store.
+ *
+ * @param client the mongodb client to access database
+ * @param dbName the name of the database to operate on
+ * @param collName the name of the collection to operate on
+ * @param documentHandler helper class help to simulate the designDoc of 
CouchDB
+ * @param viewMapper helper class help to simulate the designDoc of CouchDB
+ */
+class MongoDBArtifactStore[DocumentAbstraction <: DocumentSerializer](client: 
MongoClient,
+                                                                      dbName: 
String,
+                                                                      
collName: String,
+                                                                      
documentHandler: DocumentHandler,
+                                                                      
viewMapper: MongoDBViewMapper,
+                                                                      val 
inliningConfig: InliningConfig,
+                                                                      val 
attachmentStore: Option[AttachmentStore])(
+  implicit system: ActorSystem,
+  val logging: Logging,
+  jsonFormat: RootJsonFormat[DocumentAbstraction],
+  val materializer: ActorMaterializer,
+  docReader: DocumentReader)
+    extends ArtifactStore[DocumentAbstraction]
+    with DocumentProvider
+    with DefaultJsonProtocol
+    with AttachmentSupport[DocumentAbstraction] {
+
+  import MongoDBArtifactStore._
+
+  protected[core] implicit val executionContext = system.dispatcher
+
+  private val mongodbScheme = "mongodb"
+  val attachmentScheme: String = 
attachmentStore.map(_.scheme).getOrElse(mongodbScheme)
+
+  private val database = client.getDatabase(dbName)
+  private val collection = getCollectionAndCreateIndexes
+  private val gridFSBucket = GridFSBucket(database, collName)
+
+  private val jsonWriteSettings = 
JsonWriterSettings.builder().outputMode(JsonMode.RELAXED).build
+
+  // MongoDB doesn't support using `$` as the first char of field name, so 
below two fields needs to be encoded first
+  private val fieldsNeedEncode = Seq("annotations", "parameters")
+
+  override protected[database] def put(d: DocumentAbstraction)(implicit 
transid: TransactionId): Future[DocInfo] = {
+    val asJson = d.toDocumentRecord
+
+    val id: String = asJson.fields.getOrElse("_id", 
JsString.empty).convertTo[String].trim
+    require(!id.isEmpty, "document id must be defined")
+
+    val (old_rev, rev) = revisionCalculate(asJson)
+    val docinfoStr = s"id: $id, rev: $rev"
+    val start =
+      transid.started(this, LoggingMarkers.DATABASE_SAVE, s"[PUT] '$collName' 
saving document: '$docinfoStr'")
+
+    val encodedData = encodeFields(fieldsNeedEncode, asJson)
+
+    val data = JsObject(
+      encodedData.fields + (_computed -> 
documentHandler.computedFields(asJson)) + ("_rev" -> rev.toJson))
+
+    val filters =
+      if (rev.startsWith("1-")) {
+        // for new document, we should get no matched document and insert new 
one
+        // if there is a matched document, that one with no _rev filed will be 
replaced
+        // if there is a document with the same id but has an _rev field, will 
return en E11000(conflict) error
+        Filters.and(Filters.eq("_id", id), Filters.not(Filters.exists("_rev")))
+      } else {
+        // for old document, we should find a matched document and replace it
+        // if no matched document find and try to insert new document, mongodb 
will return an E11000 error
+        Filters.and(Filters.eq("_id", id), Filters.eq("_rev", old_rev))
+      }
+
+    val f =
+      collection
+        .findOneAndReplace(
+          filters,
+          Document(data.compactPrint),
+          
FindOneAndReplaceOptions().upsert(true).returnDocument(ReturnDocument.AFTER))
+        .toFuture()
+        .map { doc =>
+          transid.finished(this, start, s"[PUT] '$collName' completed 
document: '$docinfoStr', document: '$doc'")
+          DocInfo(DocId(id), DocRevision(rev))
+        }
+        .recover {
+          case t: MongoException if t.getCode == 11000 =>
+            transid.finished(this, start, s"[PUT] '$dbName', document: 
'$docinfoStr'; conflict.")
+            throw DocumentConflictException("conflict on 'put'")
+          case t: MongoException =>
+            transid.failed(
+              this,
+              start,
+              s"[PUT] '$dbName' failed to put document: '$docinfoStr'; return 
error code: '${t.getCode}'",
+              ErrorLevel)
+            throw new Exception("Unexpected mongodb server error: " + 
t.getMessage)
+        }
+
+    reportFailure(
+      f,
+      failure =>
+        transid
+          .failed(this, start, s"[PUT] '$collName' internal error, failure: 
'${failure.getMessage}'", ErrorLevel))
+  }
+
+  override protected[database] def del(doc: DocInfo)(implicit transid: 
TransactionId): Future[Boolean] = {
+    require(doc != null && doc.rev.asString != null, "doc revision required 
for delete")
+
+    val start =
+      transid.started(this, LoggingMarkers.DATABASE_DELETE, s"[DEL] 
'$collName' deleting document: '$doc'")
+
+    val f = collection
+      .deleteOne(Filters.and(Filters.eq("_id", doc.id.id), Filters.eq("_rev", 
doc.rev.rev)))
+      .toFuture()
+      .flatMap { result =>
+        if (result.getDeletedCount == 1) { // the result can only be 1 or 0
+          transid.finished(this, start, s"[DEL] '$collName' completed 
document: '$doc'")
+          Future(true)
+        } else {
+          collection.find(Filters.eq("_id", doc.id.id)).toFuture.map { result 
=>
+            if (result.size == 1) {
+              // find the document according to _id, conflict
+              transid.finished(this, start, s"[DEL] '$collName', document: 
'$doc'; conflict.")
+              throw DocumentConflictException("conflict on 'delete'")
+            } else {
+              // doesn't find the document according to _id, not found
+              transid.finished(this, start, s"[DEL] '$collName', document: 
'$doc'; not found.")
+              throw NoDocumentException(s"$doc not found on 'delete'")
+            }
+          }
+        }
+      }
+      .recover {
+        case t: MongoException =>
+          transid.failed(
+            this,
+            start,
+            s"[DEL] '$collName' failed to delete document: '$doc'; error code: 
'${t.getCode}'",
+            ErrorLevel)
+          throw new Exception("Unexpected mongodb server error: " + 
t.getMessage)
+      }
+
+    reportFailure(
+      f,
+      failure =>
+        transid.failed(
+          this,
+          start,
+          s"[DEL] '$collName' internal error, doc: '$doc', failure: 
'${failure.getMessage}'",
+          ErrorLevel))
+  }
+
+  override protected[database] def get[A <: DocumentAbstraction](doc: DocInfo,
+                                                                 
attachmentHandler: Option[(A, Attached) => A] = None)(
+    implicit transid: TransactionId,
+    ma: Manifest[A]): Future[A] = {
+
+    val start = transid.started(this, LoggingMarkers.DATABASE_GET, s"[GET] 
'$dbName' finding document: '$doc'")
+
+    require(doc != null, "doc undefined")
+
+    val f = collection
+      .find(Filters.eq("_id", doc.id.id)) // method deserialize will check 
whether the _rev matched
+      .toFuture()
+      .map(result =>
+        if (result.isEmpty) {
+          transid.finished(this, start, s"[GET] '$collName', document: '$doc'; 
not found.")
+          throw NoDocumentException("not found on 'get'")
+        } else {
+          transid.finished(this, start, s"[GET] '$collName' completed: found 
document '$doc'")
+          val response = 
result.head.toJson(jsonWriteSettings).parseJson.asJsObject
+          val decodeData = decodeFields(fieldsNeedEncode, response)
+
+          val deserializedDoc = deserialize[A, DocumentAbstraction](doc, 
decodeData)
+          attachmentHandler
+            .map(processAttachments(deserializedDoc, decodeData, doc.id.id, _))
+            .getOrElse(deserializedDoc)
+      })
+      .recoverWith {
+        case t: MongoException =>
+          transid.finished(this, start, s"[GET] '$collName' failed to get 
document: '$doc'; error code: '${t.getCode}'")
+          throw new Exception("Unexpected mongodb server error: " + 
t.getMessage)
+        case _: DeserializationException => throw 
DocumentUnreadable(Messages.corruptedEntity)
+      }
+
+    reportFailure(
+      f,
+      failure =>
+        transid.failed(
+          this,
+          start,
+          s"[GET] '$collName' internal error, doc: '$doc', failure: 
'${failure.getMessage}'",
+          ErrorLevel))
+  }
+
+  override protected[database] def get(id: DocId)(implicit transid: 
TransactionId): Future[Option[JsObject]] = {
+    val start = transid.started(this, LoggingMarkers.DATABASE_GET, s"[GET] 
'$collName' finding document: '$id'")
+    val f = collection
+      .find(Filters.equal("_id", id.id))
+      .head()
+      .map {
+        case d: Document =>
+          transid.finished(this, start, s"[GET] '$dbName' completed: found 
document '$id'")
+          Some(decodeFields(fieldsNeedEncode, 
d.toJson(jsonWriteSettings).parseJson.asJsObject))
+        case null =>
+          transid.finished(this, start, s"[GET] '$dbName', document: '$id'; 
not found.")
+          None
+      }
+      .recover {
+        case t: MongoException =>
+          transid.failed(
+            this,
+            start,
+            s"[GET] '$collName' failed to get document: '$id'; error code: 
'${t.getCode}'",
+            ErrorLevel)
+          throw new Exception("Unexpected mongodb server error: " + 
t.getMessage)
+      }
+
+    reportFailure(
+      f,
+      failure =>
+        transid.failed(
+          this,
+          start,
+          s"[GET] '$collName' internal error, doc: '$id', failure: 
'${failure.getMessage}'",
+          ErrorLevel))
+  }
+
+  override protected[core] def query(table: String,
+                                     startKey: List[Any],
+                                     endKey: List[Any],
+                                     skip: Int,
+                                     limit: Int,
+                                     includeDocs: Boolean,
+                                     descending: Boolean,
+                                     reduce: Boolean,
+                                     stale: StaleParameter)(implicit transid: 
TransactionId): Future[List[JsObject]] = {
+    require(!(reduce && includeDocs), "reduce and includeDocs cannot both be 
true")
+    require(!reduce, "Reduce scenario not supported") //TODO Investigate reduce
+    require(skip >= 0, "skip should be non negative")
+    require(limit >= 0, "limit should be non negative")
+
+    val Array(ddoc, viewName) = table.split("/")
+
+    val find = collection
+      .find(viewMapper.filter(ddoc, viewName, startKey, endKey))
+
+    viewMapper.sort(ddoc, viewName, descending).foreach(find.sort)
+
+    find.skip(skip).limit(limit)
+
+    val realIncludeDocs = includeDocs | 
documentHandler.shouldAlwaysIncludeDocs(ddoc, viewName)
+    val start = transid.started(this, LoggingMarkers.DATABASE_QUERY, s"[QUERY] 
'$collName' searching '$table")
+
+    val f = find
+      .toFuture()
+      .map { docs =>
+        transid.finished(this, start, s"[QUERY] '$dbName' completed: matched 
${docs.size}")
+        docs.map { doc =>
+          val js = decodeFields(fieldsNeedEncode, 
doc.toJson(jsonWriteSettings).parseJson.convertTo[JsObject])
+          documentHandler.transformViewResult(
+            ddoc,
+            viewName,
+            startKey,
+            endKey,
+            realIncludeDocs,
+            JsObject(js.fields - _computed),
+            MongoDBArtifactStore.this)
+        }
+      }
+      .flatMap(Future.sequence(_))
+      .map(_.flatten.toList)
+      .recover {
+        case t: MongoException =>
+          transid.failed(this, start, s"[QUERY] '$collName' failed; error 
code: '${t.getCode}'", ErrorLevel)
+          throw new Exception("Unexpected mongodb server error: " + 
t.getMessage)
+      }
+
+    reportFailure(
+      f,
+      failure =>
+        transid
+          .failed(this, start, s"[QUERY] '$collName' internal error, failure: 
'${failure.getMessage}'", ErrorLevel))
+  }
+
+  protected[core] def count(table: String, startKey: List[Any], endKey: 
List[Any], skip: Int, stale: StaleParameter)(
+    implicit transid: TransactionId): Future[Long] = {
+    require(skip >= 0, "skip should be non negative")
+
+    val Array(ddoc, viewName) = table.split("/")
+    val start = transid.started(this, LoggingMarkers.DATABASE_QUERY, s"[COUNT] 
'$dbName' searching '$table")
+
+    val query = viewMapper.filter(ddoc, viewName, startKey, endKey)
+
+    val option = CountOptions().skip(skip)
+    val f =
+      collection
+        .countDocuments(query, option)
+        .toFuture()
+        .map { result =>
+          transid.finished(this, start, s"[COUNT] '$collName' completed: count 
$result")
+          result
+        }
+        .recover {
+          case t: MongoException =>
+            transid.failed(this, start, s"[COUNT] '$collName' failed; error 
code: '${t.getCode}'", ErrorLevel)
+            throw new Exception("Unexpected mongodb server error: " + 
t.getMessage)
+        }
+
+    reportFailure(
+      f,
+      failure =>
+        transid
+          .failed(this, start, s"[COUNT] '$dbName' internal error, failure: 
'${failure.getMessage}'", ErrorLevel))
+  }
+
+  override protected[database] def putAndAttach[A <: DocumentAbstraction](
+    doc: A,
+    update: (A, Attached) => A,
+    contentType: ContentType,
+    docStream: Source[ByteString, _],
+    oldAttachment: Option[Attached])(implicit transid: TransactionId): 
Future[(DocInfo, Attached)] = {
+
+    attachmentStore match {
+      case Some(as) =>
+        attachToExternalStore(doc, update, contentType, docStream, 
oldAttachment, as)
+      case None =>
+        attachToMongo(doc, update, contentType, docStream, oldAttachment)
+    }
+
+  }
+
+  private def attachToMongo[A <: DocumentAbstraction](
+    doc: A,
+    update: (A, Attached) => A,
+    contentType: ContentType,
+    docStream: Source[ByteString, _],
+    oldAttachment: Option[Attached])(implicit transid: TransactionId): 
Future[(DocInfo, Attached)] = {
+
+    for {
+      bytesOrSource <- inlineOrAttach(docStream)
+      uri = uriOf(bytesOrSource, UUID().asString)
+      attached <- {
+        bytesOrSource match {
+          case Left(bytes) =>
+            Future.successful(Attached(uri.toString, contentType, 
Some(bytes.size), Some(digest(bytes))))
+          case Right(source) =>
+            attach(doc, uri.path.toString, contentType, source).map { r =>
+              Attached(uri.toString, contentType, Some(r.length), 
Some(r.digest))
+            }
+        }
+      }
+      docInfo <- put(update(doc, attached))
+
+      //Remove old attachment if it was part of attachmentStore
+      _ <- oldAttachment
+        .map { old =>
+          val oldUri = Uri(old.attachmentName)
+          if (oldUri.scheme == mongodbScheme) {
+            val name = oldUri.path.toString
+            
gridFSBucket.delete(BsonString(s"${docInfo.id.id}/$name")).toFuture.map { _ =>
+              true
+            }
+          } else {
+            Future.successful(true)
+          }
+        }
+        .getOrElse(Future.successful(true))
+    } yield (docInfo, attached)
+  }
+
+  private def attach(d: DocumentAbstraction, name: String, contentType: 
ContentType, docStream: Source[ByteString, _])(
+    implicit transid: TransactionId): Future[AttachResult] = {
+
+    logging.info(this, s"Uploading attach $name")
+    val asJson = d.toDocumentRecord
+    val id: String = asJson.fields("_id").convertTo[String].trim
+    require(!id.isEmpty, "document id must be defined")
+
+    val start = transid.started(
+      this,
+      LoggingMarkers.DATABASE_ATT_SAVE,
+      s"[ATT_PUT] '$collName' uploading attachment '$name' of document 'id: 
$id'")
+
+    val document: org.bson.Document = new org.bson.Document("contentType", 
contentType.toString)
+    //add the document id to the metadata
+    document.append("belongsTo", id)
+
+    val option = new GridFSUploadOptions().metadata(document)
+
+    val uploadStream = gridFSBucket.openUploadStream(BsonString(s"$id/$name"), 
name, option)
+    val sink = MongoDBAsyncStreamSink(uploadStream)
+
+    val f = docStream
+      .runWith(combinedSink(sink))
+      .map { r =>
+        transid
+          .finished(this, start, s"[ATT_PUT] '$collName' completed uploading 
attachment '$name' of document '$id'")
+        AttachResult(r.digest, r.length)
+      }
+      .recover {
+        case t: MongoException =>
+          transid.failed(
+            this,
+            start,
+            s"[ATT_PUT] '$collName' failed to upload attachment '$name' of 
document '$id'; error code '${t.getCode}'",
+            ErrorLevel)
+          throw new Exception("Unexpected mongodb server error: " + 
t.getMessage)
+      }
+
+    reportFailure(
+      f,
+      failure =>
+        transid.failed(
+          this,
+          start,
+          s"[ATT_PUT] '$collName' internal error, name: '$name', doc: '$id', 
failure: '${failure.getMessage}'",
+          ErrorLevel))
+  }
+
+  override protected[core] def readAttachment[T](doc: DocInfo, attached: 
Attached, sink: Sink[ByteString, Future[T]])(
+    implicit transid: TransactionId): Future[T] = {
+
+    val name = attached.attachmentName
+    val attachmentUri = Uri(name)
+
+    attachmentUri.scheme match {
+      case AttachmentSupport.MemScheme =>
+        memorySource(attachmentUri).runWith(sink)
+      case s if s == mongodbScheme || attachmentUri.isRelative =>
+        //relative case is for compatibility with earlier naming approach 
where attachment name would be like 'jarfile'
+        //Compared to current approach of '<scheme>:<name>'
+        readAttachmentFromMongo(doc, attachmentUri, sink)
+      case s if attachmentStore.isDefined && attachmentStore.get.scheme == s =>
+        attachmentStore.get.readAttachment(doc.id, 
attachmentUri.path.toString, sink)
+      case _ =>
+        throw new IllegalArgumentException(s"Unknown attachment scheme in 
attachment uri $attachmentUri")
+    }
+  }
+
+  private def readAttachmentFromMongo[T](doc: DocInfo, attachmentUri: Uri, 
sink: Sink[ByteString, Future[T]])(
+    implicit transid: TransactionId): Future[T] = {
+
+    val attachmentName = attachmentUri.path.toString
+    val start = transid.started(
+      this,
+      LoggingMarkers.DATABASE_ATT_GET,
+      s"[ATT_GET] '$dbName' finding attachment '$attachmentName' of document 
'$doc'")
+
+    require(doc != null, "doc undefined")
+    require(doc.rev.rev != null, "doc revision must be specified")
+
+    val downloadStream = 
gridFSBucket.openDownloadStream(BsonString(s"${doc.id.id}/$attachmentName"))
+
+    def readStream(file: GridFSFile) = {
+      val source = MongoDBAsyncStreamSource(downloadStream)
+      source
+        .runWith(sink)
+        .map { result =>
+          transid
+            .finished(
+              this,
+              start,
+              s"[ATT_GET] '$collName' completed: found attachment 
'$attachmentName' of document '$doc'")
+          result
+        }
+    }
+
+    def getGridFSFile = {
+      downloadStream
+        .gridFSFile()
+        .head()
+        .transform(
+          identity, {
+            case ex: MongoGridFSException if ex.getMessage.contains("File not 
found") =>
+              transid.finished(
+                this,
+                start,
+                s"[ATT_GET] '$collName', retrieving attachment 
'$attachmentName' of document '$doc'; not found.")
+              NoDocumentException("Not found on 'readAttachment'.")
+            case ex: MongoGridFSException =>
+              transid.failed(
+                this,
+                start,
+                s"[ATT_GET] '$collName' failed to get attachment 
'$attachmentName' of document '$doc'; error code: '${ex.getCode}'",
+                ErrorLevel)
+              throw new Exception("Unexpected mongodb server error: " + 
ex.getMessage)
+            case t => t
+          })
+    }
+
+    val f = for {
+      file <- getGridFSFile
+      result <- readStream(file)
+    } yield result
+
+    reportFailure(
+      f,
+      failure =>
+        transid.failed(
+          this,
+          start,
+          s"[ATT_GET] '$dbName' internal error, name: '$attachmentName', doc: 
'$doc', failure: '${failure.getMessage}'",
+          ErrorLevel))
+
+  }
+
+  override protected[core] def deleteAttachments[T](doc: DocInfo)(implicit 
transid: TransactionId): Future[Boolean] =
+    attachmentStore
+      .map(as => as.deleteAttachments(doc.id))
+      .getOrElse(Future.successful(true)) // For MongoDB it is expected that 
the entire document is deleted.
+
+  override def shutdown(): Unit = {
+    // MongoClient maintains the connection pool internally, we don't need to 
manage it
+    attachmentStore.foreach(_.shutdown())
+  }
+
+  private def reportFailure[T, U](f: Future[T], onFailure: Throwable => U): 
Future[T] = {
+    f.failed.foreach {
+      case _: ArtifactStoreException => // These failures are intentional and 
shouldn't trigger the catcher.
+      case x                         => onFailure(x)
+    }
+    f
+  }
+
+  // calculate the revision manually, to be compatible with couchdb's _rev 
field
+  private def revisionCalculate(doc: JsObject): (String, String) = {
+    val md: MessageDigest = MessageDigest.getInstance("MD5")

Review comment:
       great, I will update




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to