This is an automated email from the ASF dual-hosted git repository.

cbickel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk.git


The following commit(s) were added to refs/heads/master by this push:
     new fbc0091  Cleanup script for unused entities in the whisks database. 
(#3382)
fbc0091 is described below

commit fbc009170295a23473b1ee390985d4b4dda4aad0
Author: Steffen Rost <lue-...@freenet.de>
AuthorDate: Fri Mar 9 16:08:11 2018 +0100

    Cleanup script for unused entities in the whisks database. (#3382)
    
    python script to cleanup whisk db if namespace does not exist anymore
    
    Co-authored-by: Christian Bickel <git...@cbickel.de>
---
 .../database/test/CleanUpWhisksDbSkriptTests.scala | 279 +++++++++++++++++++++
 tools/db/cleanUpWhisks.py                          | 150 +++++++++++
 2 files changed, 429 insertions(+)

diff --git 
a/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala
 
b/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala
new file mode 100644
index 0000000..1df1a48
--- /dev/null
+++ 
b/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package whisk.core.database.test
+
+import java.io.File
+import java.time.Instant
+import java.time.temporal.ChronoUnit
+
+import common.{StreamLogging, TestUtils, WhiskProperties, WskActorSystem}
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+import org.scalatest.{FlatSpec, Matchers}
+import pureconfig.loadConfigOrThrow
+import spray.json._
+import spray.json.DefaultJsonProtocol._
+import whisk.core.database.CouchDbConfig
+//import whisk.core.{ConfigKeys, WhiskConfig}
+import whisk.core.ConfigKeys
+import whisk.core.entity._
+
+@RunWith(classOf[JUnitRunner])
+class CleanUpWhisksDbSkriptTests
+    extends FlatSpec
+    with Matchers
+    with DatabaseScriptTestUtils
+    with WskActorSystem
+    with StreamLogging {
+
+  val cleanupScript = 
WhiskProperties.getFileRelativeToWhiskHome("tools/db/cleanUpWhisks.py").getAbsolutePath
+  val dbConfig = loadConfigOrThrow[CouchDbConfig](ConfigKeys.couchdb)
+  val authDBName = dbConfig.databaseFor[WhiskAuth]
+
+  def runScript(dbUrl: String, whisksDbName: String, subjectsDbName: String) = 
{
+    println(s"Running script: $dbUrl, $whisksDbName, $subjectsDbName")
+
+    val cmd =
+      Seq(
+        python,
+        cleanupScript,
+        "--dbUrl",
+        dbUrl,
+        "--dbNameWhisks",
+        whisksDbName,
+        "--dbNameSubjects",
+        subjectsDbName,
+        "--days",
+        "1",
+        "--docsPerRequest",
+        "1")
+
+    val rr = TestUtils.runCmd(0, new File("."), cmd: _*)
+
+    val Seq(marked, deleted, skipped, kept) =
+      Seq("marking: ", "deleting: ", "skipping: ", "keeping: ").map { 
linePrefix =>
+        rr.stdout.lines.collect {
+          case line if line.startsWith(linePrefix) => line.replace(linePrefix, 
"")
+        }.toList
+      }
+
+    println(s"marked:  $marked")
+    println(s"deleted: $deleted")
+    println(s"skipped: $skipped")
+    println(s"kept:    $kept")
+
+    (marked, deleted, skipped, kept)
+  }
+
+  behavior of "Cleanup whisksDb script"
+
+  it should "mark documents for deletion if namespace does not exist" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_mark_for_deletion"
+    val client = createDatabase(dbName, None)
+
+    // Create document/action with random namespace
+    val documents = Map(
+      "whisksCleanTests/utils/actionName1" -> JsObject("namespace" -> 
JsString("whisksCleanTests/utils")),
+      "whisksCleanTests/utils/actionName2" -> JsObject("namespace" -> 
JsString("whisksCleanTests")),
+      "whisksCleanTests/actionName3" -> JsObject("namespace" -> 
JsString("whisksCleanTests")))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (marked, _, _, _) = runScript(dbUrl, dbName, authDBName)
+    println(s"marked: $marked")
+
+    // Check, that script marked document to be deleted: output + document 
from DB
+    val ids = documents.keys
+    println(s"ids: $ids")
+    marked should contain allElementsOf ids
+
+    val databaseResponse = client.getAllDocs(includeDocs = 
Some(true)).futureValue
+    databaseResponse should be('right)
+    val databaseDocuments = 
databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+    databaseDocuments.foreach { doc =>
+      doc.fields("doc").asJsObject.fields.keys should 
contain("markedForDeletion")
+    }
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+
+  it should "delete marked for deletion documents if namespace does not 
exists" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_delete_mark_for_deletion"
+    val client = createDatabase(dbName, None)
+
+    // Create document/action with random namespace and markedForDeletion field
+    val documents = Map(
+      "whisksCleanTests/utils/actionName1" -> JsObject(
+        "namespace" -> JsString("whisksCleanTests/utils"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, 
ChronoUnit.DAYS).toEpochMilli)),
+      "whisksCleanTests/utils/actionName2" -> JsObject(
+        "namespace" -> JsString("whisksCleanTests"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, 
ChronoUnit.DAYS).toEpochMilli)),
+      "whisksCleanTests/actionName3" -> JsObject(
+        "namespace" -> JsString("whisksCleanTests"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, 
ChronoUnit.DAYS).toEpochMilli)))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (marked, deleted, _, _) = runScript(dbUrl, dbName, authDBName)
+    println(s"marked: $marked")
+    println(s"deleted: $deleted")
+
+    // Check, that script deleted already marked documents from DB
+    val ids = documents.keys
+    println(s"ids: $ids")
+    marked shouldBe empty
+
+    val databaseResponse = client.getAllDocs(includeDocs = 
Some(true)).futureValue
+    databaseResponse should be('right)
+
+    val databaseDocuments = 
databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+    databaseDocuments shouldBe empty
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+
+  it should "not mark documents for deletion if namespace does exist" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_not_mark_for_deletion"
+    val client = createDatabase(dbName, None)
+
+    // Create document/action with whisk-system namespace
+    val documents = Map(
+      "whisk.system/utils" -> JsObject("namespace" -> 
JsString("whisk.system")),
+      "whisk.system/samples/helloWorld" -> JsObject("namespace" -> 
JsString("whisk.system/samples")),
+      "whisk.system/utils/namespace" -> JsObject("namespace" -> 
JsString("whisk.system/utils")))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (_, _, _, kept) = runScript(dbUrl, dbName, authDBName)
+    println(s"kept: $kept")
+
+    // Check, that script did not mark documents for deletion
+    val ids = documents.keys
+    println(s"ids: $ids")
+    kept should contain allElementsOf ids
+
+    val databaseResponse = client.getAllDocs(includeDocs = 
Some(true)).futureValue
+    databaseResponse should be('right)
+
+    val databaseDocuments = 
databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+    val databaseDocumentIDs = 
databaseDocuments.map(_.fields("id").convertTo[String])
+    databaseDocumentIDs should contain allElementsOf ids
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+
+  it should "skip design documents" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_skip_design_documents"
+    val client = createDatabase(dbName, None)
+
+    // Create design documents
+    val documents = Map(
+      "_design/all-whisks.v2.1.0" -> JsObject("language" -> 
JsString("javascript")),
+      "_design/snapshotFilters" -> JsObject("language" -> 
JsString("javascript")),
+      "_design/whisks.v2.1.0" -> JsObject("language" -> 
JsString("javascript")))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (_, _, skipped, _) = runScript(dbUrl, dbName, authDBName)
+    println(s"skipped: $skipped")
+
+    // Check, that script skipped design documents
+    val ids = documents.keys
+    println(s"ids: $ids")
+    skipped should contain allElementsOf ids
+
+    val databaseResponse = client.getAllDocs(includeDocs = 
Some(true)).futureValue
+    databaseResponse should be('right)
+
+    val databaseDocuments = 
databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+    val databaseDocumentIDs = 
databaseDocuments.map(_.fields("id").convertTo[String])
+    databaseDocumentIDs should contain allElementsOf ids
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+
+  it should "not delete marked for deletion documents if namespace does 
exists" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_not_delete_mark_for_deletion"
+    val client = createDatabase(dbName, None)
+
+    // Create document/action with whisk-system namespace and 
markedForDeletion field
+    val documents = Map(
+      "whisk.system/utils" -> JsObject(
+        "namespace" -> JsString("whisk.system"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, 
ChronoUnit.DAYS).toEpochMilli)),
+      "whisk.system/samples/helloWorld" -> JsObject(
+        "namespace" -> JsString("whisk.system/samples"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, 
ChronoUnit.DAYS).toEpochMilli)),
+      "whisk.system/utils/namespace" -> JsObject(
+        "namespace" -> JsString("whisk.system/utils"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, 
ChronoUnit.DAYS).toEpochMilli)))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (_, _, _, kept) = runScript(dbUrl, dbName, authDBName)
+    println(s"kept: $kept")
+
+    // Check, that script kept documents in DB
+    val ids = documents.keys
+    println(s"ids: $ids")
+    kept should contain allElementsOf ids
+
+    val databaseResponse = client.getAllDocs(includeDocs = 
Some(true)).futureValue
+    databaseResponse should be('right)
+
+    val databaseDocuments = 
databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+    val databaseDocumentIDs = 
databaseDocuments.map(_.fields("id").convertTo[String])
+    databaseDocumentIDs should contain allElementsOf ids
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+}
diff --git a/tools/db/cleanUpWhisks.py b/tools/db/cleanUpWhisks.py
new file mode 100755
index 0000000..83b47bb
--- /dev/null
+++ b/tools/db/cleanUpWhisks.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+"""Python script to delete whisks entries having none existent ns.
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+"""
+
+import argparse
+import time
+import couchdb.client
+
+skipWhisks = 0
+
+try:
+    long        # Python 2
+except NameError:
+    long = int  # Python 3
+
+HOUR = 1000 * 60 * 60
+DAY = HOUR * 24
+
+#
+# simple ring buffer like list
+#
+class SimpleRingBuffer:
+    def __init__(self, size):
+        self.index = -1
+        self.data = []
+        self.maxsize = size * 2
+
+    def append(self, ns, bool):
+
+        self.index=(self.index+2)%self.maxsize
+
+        if len(self.data) < self.maxsize:
+            self.data.append(ns)
+            self.data.append(bool)
+        else:
+            self.data[self.index-1]=ns
+            self.data[(self.index)]=bool
+
+    def getself(self):
+        return self.data
+
+    def get(self, ns):
+        if ns in self.data:
+            return self.data[self.data.index(ns)+1]
+        else:
+            return None
+
+#
+# mark whisks entry for deletion of delete if already marked
+#
+def deleteWhisk(dbWhisks, wdoc):
+
+    global skipWhisks
+
+    wdocd = dbWhisks[wdoc['id']]
+    if not 'markedForDeletion' in wdocd:
+        print('marking: {0}'.format(wdoc['id']))
+        dts = int(time.time() * 1000)
+        wdocd['markedForDeletion'] = dts
+        dbWhisks.save(wdocd)
+    else:
+        dts = wdocd['markedForDeletion']
+        now = int(time.time() * 1000)
+        elapsedh = int((now - dts) / HOUR)
+        elapsedd = int((now - dts) / DAY)
+
+        if elapsedd >= args.days:
+            print('deleting: {0}'.format(wdoc['id']))
+            dbWhisks.delete(wdocd)
+            skipWhisks-=1
+        else:
+            print('marked: {0}, elapsed hours: {1}, elapsed days: 
{2}'.format(wdoc['id'], elapsedh, elapsedd))
+
+
+#
+# check subjects db for existence of ns
+#
+def checkNamespace(dbSubjects, namespace):
+
+    while True:
+
+        allNamespaces = dbSubjects.view('subjects/identities', 
startkey=[namespace], endkey=[namespace])
+
+        if allNamespaces:
+            return True
+        else:
+            return False
+
+
+#
+# check whisks db for entries having none existent ns
+#
+def checkWhisks(args):
+
+    dbWhisks = couchdb.client.Server(args.dbUrl)[args.dbNameWhisks]
+    dbSubjects = couchdb.client.Server(args.dbUrl)[args.dbNameSubjects]
+
+    rb = SimpleRingBuffer(args.bufferLen)
+
+    global skipWhisks
+    while True:
+        allWhisks = dbWhisks.view('_all_docs', limit=args.docsPerRequest, 
skip=skipWhisks)
+        skipWhisks += args.docsPerRequest
+        if allWhisks:
+            for wdoc in allWhisks:
+                if wdoc['id'].startswith('_design/'):
+                    print('skipping: {0}'.format(wdoc['id']))
+                    continue
+                namespace = wdoc['id'][0:wdoc['id'].find('/')]
+
+                exists = rb.get(namespace)
+                if exists == None:
+                    exists = checkNamespace(dbSubjects, namespace)
+                    rb.append(namespace, exists)
+
+                if exists:
+                    print('keeping: {0}'.format(wdoc['id']))
+                else:
+                    deleteWhisk(dbWhisks, wdoc)
+        else:
+            return
+
+
+parser = argparse.ArgumentParser(description="Utility to mark/delete whisks 
entries where the ns does not exist in the subjects database.")
+parser.add_argument("--dbUrl", required=True, help="Server URL of the 
database, that has to be cleaned of old activations. E.g. 
'https://xxx:y...@domain.couch.com:443'")
+parser.add_argument("--dbNameWhisks", required=True, help="Name of the Whisks 
Database of the whisks entries to be marked for deletion or deleted if already 
marked.")
+parser.add_argument("--dbNameSubjects", required=True, help="Name of the 
Subjects Database.")
+parser.add_argument("--days", required=True, type=int, default=7, help="How 
many days whisks keep entries marked for deletion before deleting them.")
+parser.add_argument("--docsPerRequest", type=int, default=200, help="Number of 
documents handled on each CouchDb Request. Default is 200.")
+parser.add_argument("--bufferLen", type=int, default=100, help="Maximum buffer 
length to cache already checked ns. Default is 100.")
+args = parser.parse_args()
+
+checkWhisks(args)

-- 
To stop receiving notification emails like this one, please contact
cbic...@apache.org.

Reply via email to