This is an automated email from the ASF dual-hosted git repository. cbickel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk.git
The following commit(s) were added to refs/heads/master by this push: new fbc0091 Cleanup script for unused entities in the whisks database. (#3382) fbc0091 is described below commit fbc009170295a23473b1ee390985d4b4dda4aad0 Author: Steffen Rost <lue-...@freenet.de> AuthorDate: Fri Mar 9 16:08:11 2018 +0100 Cleanup script for unused entities in the whisks database. (#3382) python script to cleanup whisk db if namespace does not exist anymore Co-authored-by: Christian Bickel <git...@cbickel.de> --- .../database/test/CleanUpWhisksDbSkriptTests.scala | 279 +++++++++++++++++++++ tools/db/cleanUpWhisks.py | 150 +++++++++++ 2 files changed, 429 insertions(+) diff --git a/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala b/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala new file mode 100644 index 0000000..1df1a48 --- /dev/null +++ b/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala @@ -0,0 +1,279 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package whisk.core.database.test + +import java.io.File +import java.time.Instant +import java.time.temporal.ChronoUnit + +import common.{StreamLogging, TestUtils, WhiskProperties, WskActorSystem} +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner +import org.scalatest.{FlatSpec, Matchers} +import pureconfig.loadConfigOrThrow +import spray.json._ +import spray.json.DefaultJsonProtocol._ +import whisk.core.database.CouchDbConfig +//import whisk.core.{ConfigKeys, WhiskConfig} +import whisk.core.ConfigKeys +import whisk.core.entity._ + +@RunWith(classOf[JUnitRunner]) +class CleanUpWhisksDbSkriptTests + extends FlatSpec + with Matchers + with DatabaseScriptTestUtils + with WskActorSystem + with StreamLogging { + + val cleanupScript = WhiskProperties.getFileRelativeToWhiskHome("tools/db/cleanUpWhisks.py").getAbsolutePath + val dbConfig = loadConfigOrThrow[CouchDbConfig](ConfigKeys.couchdb) + val authDBName = dbConfig.databaseFor[WhiskAuth] + + def runScript(dbUrl: String, whisksDbName: String, subjectsDbName: String) = { + println(s"Running script: $dbUrl, $whisksDbName, $subjectsDbName") + + val cmd = + Seq( + python, + cleanupScript, + "--dbUrl", + dbUrl, + "--dbNameWhisks", + whisksDbName, + "--dbNameSubjects", + subjectsDbName, + "--days", + "1", + "--docsPerRequest", + "1") + + val rr = TestUtils.runCmd(0, new File("."), cmd: _*) + + val Seq(marked, deleted, skipped, kept) = + Seq("marking: ", "deleting: ", "skipping: ", "keeping: ").map { linePrefix => + rr.stdout.lines.collect { + case line if line.startsWith(linePrefix) => line.replace(linePrefix, "") + }.toList + } + + println(s"marked: $marked") + println(s"deleted: $deleted") + println(s"skipped: $skipped") + println(s"kept: $kept") + + (marked, deleted, skipped, kept) + } + + behavior of "Cleanup whisksDb script" + + it should "mark documents for deletion if namespace does not exist" in { + // Create whisks db + val dbName = dbPrefix + "cleanup_whisks_test_mark_for_deletion" + val client = createDatabase(dbName, None) + + // Create document/action with random namespace + val documents = Map( + "whisksCleanTests/utils/actionName1" -> JsObject("namespace" -> JsString("whisksCleanTests/utils")), + "whisksCleanTests/utils/actionName2" -> JsObject("namespace" -> JsString("whisksCleanTests")), + "whisksCleanTests/actionName3" -> JsObject("namespace" -> JsString("whisksCleanTests"))) + + documents.foreach { + case (id, document) => + client.putDoc(id, document).futureValue + } + + // execute script + val (marked, _, _, _) = runScript(dbUrl, dbName, authDBName) + println(s"marked: $marked") + + // Check, that script marked document to be deleted: output + document from DB + val ids = documents.keys + println(s"ids: $ids") + marked should contain allElementsOf ids + + val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue + databaseResponse should be('right) + val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]] + + databaseDocuments.foreach { doc => + doc.fields("doc").asJsObject.fields.keys should contain("markedForDeletion") + } + + // Delete database + client.deleteDb().futureValue + } + + it should "delete marked for deletion documents if namespace does not exists" in { + // Create whisks db + val dbName = dbPrefix + "cleanup_whisks_test_delete_mark_for_deletion" + val client = createDatabase(dbName, None) + + // Create document/action with random namespace and markedForDeletion field + val documents = Map( + "whisksCleanTests/utils/actionName1" -> JsObject( + "namespace" -> JsString("whisksCleanTests/utils"), + "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)), + "whisksCleanTests/utils/actionName2" -> JsObject( + "namespace" -> JsString("whisksCleanTests"), + "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)), + "whisksCleanTests/actionName3" -> JsObject( + "namespace" -> JsString("whisksCleanTests"), + "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli))) + + documents.foreach { + case (id, document) => + client.putDoc(id, document).futureValue + } + + // execute script + val (marked, deleted, _, _) = runScript(dbUrl, dbName, authDBName) + println(s"marked: $marked") + println(s"deleted: $deleted") + + // Check, that script deleted already marked documents from DB + val ids = documents.keys + println(s"ids: $ids") + marked shouldBe empty + + val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue + databaseResponse should be('right) + + val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]] + databaseDocuments shouldBe empty + + // Delete database + client.deleteDb().futureValue + } + + it should "not mark documents for deletion if namespace does exist" in { + // Create whisks db + val dbName = dbPrefix + "cleanup_whisks_test_not_mark_for_deletion" + val client = createDatabase(dbName, None) + + // Create document/action with whisk-system namespace + val documents = Map( + "whisk.system/utils" -> JsObject("namespace" -> JsString("whisk.system")), + "whisk.system/samples/helloWorld" -> JsObject("namespace" -> JsString("whisk.system/samples")), + "whisk.system/utils/namespace" -> JsObject("namespace" -> JsString("whisk.system/utils"))) + + documents.foreach { + case (id, document) => + client.putDoc(id, document).futureValue + } + + // execute script + val (_, _, _, kept) = runScript(dbUrl, dbName, authDBName) + println(s"kept: $kept") + + // Check, that script did not mark documents for deletion + val ids = documents.keys + println(s"ids: $ids") + kept should contain allElementsOf ids + + val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue + databaseResponse should be('right) + + val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]] + val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String]) + databaseDocumentIDs should contain allElementsOf ids + + // Delete database + client.deleteDb().futureValue + } + + it should "skip design documents" in { + // Create whisks db + val dbName = dbPrefix + "cleanup_whisks_test_skip_design_documents" + val client = createDatabase(dbName, None) + + // Create design documents + val documents = Map( + "_design/all-whisks.v2.1.0" -> JsObject("language" -> JsString("javascript")), + "_design/snapshotFilters" -> JsObject("language" -> JsString("javascript")), + "_design/whisks.v2.1.0" -> JsObject("language" -> JsString("javascript"))) + + documents.foreach { + case (id, document) => + client.putDoc(id, document).futureValue + } + + // execute script + val (_, _, skipped, _) = runScript(dbUrl, dbName, authDBName) + println(s"skipped: $skipped") + + // Check, that script skipped design documents + val ids = documents.keys + println(s"ids: $ids") + skipped should contain allElementsOf ids + + val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue + databaseResponse should be('right) + + val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]] + + val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String]) + databaseDocumentIDs should contain allElementsOf ids + + // Delete database + client.deleteDb().futureValue + } + + it should "not delete marked for deletion documents if namespace does exists" in { + // Create whisks db + val dbName = dbPrefix + "cleanup_whisks_test_not_delete_mark_for_deletion" + val client = createDatabase(dbName, None) + + // Create document/action with whisk-system namespace and markedForDeletion field + val documents = Map( + "whisk.system/utils" -> JsObject( + "namespace" -> JsString("whisk.system"), + "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)), + "whisk.system/samples/helloWorld" -> JsObject( + "namespace" -> JsString("whisk.system/samples"), + "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)), + "whisk.system/utils/namespace" -> JsObject( + "namespace" -> JsString("whisk.system/utils"), + "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli))) + + documents.foreach { + case (id, document) => + client.putDoc(id, document).futureValue + } + + // execute script + val (_, _, _, kept) = runScript(dbUrl, dbName, authDBName) + println(s"kept: $kept") + + // Check, that script kept documents in DB + val ids = documents.keys + println(s"ids: $ids") + kept should contain allElementsOf ids + + val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue + databaseResponse should be('right) + + val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]] + + val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String]) + databaseDocumentIDs should contain allElementsOf ids + + // Delete database + client.deleteDb().futureValue + } +} diff --git a/tools/db/cleanUpWhisks.py b/tools/db/cleanUpWhisks.py new file mode 100755 index 0000000..83b47bb --- /dev/null +++ b/tools/db/cleanUpWhisks.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +"""Python script to delete whisks entries having none existent ns. + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +""" + +import argparse +import time +import couchdb.client + +skipWhisks = 0 + +try: + long # Python 2 +except NameError: + long = int # Python 3 + +HOUR = 1000 * 60 * 60 +DAY = HOUR * 24 + +# +# simple ring buffer like list +# +class SimpleRingBuffer: + def __init__(self, size): + self.index = -1 + self.data = [] + self.maxsize = size * 2 + + def append(self, ns, bool): + + self.index=(self.index+2)%self.maxsize + + if len(self.data) < self.maxsize: + self.data.append(ns) + self.data.append(bool) + else: + self.data[self.index-1]=ns + self.data[(self.index)]=bool + + def getself(self): + return self.data + + def get(self, ns): + if ns in self.data: + return self.data[self.data.index(ns)+1] + else: + return None + +# +# mark whisks entry for deletion of delete if already marked +# +def deleteWhisk(dbWhisks, wdoc): + + global skipWhisks + + wdocd = dbWhisks[wdoc['id']] + if not 'markedForDeletion' in wdocd: + print('marking: {0}'.format(wdoc['id'])) + dts = int(time.time() * 1000) + wdocd['markedForDeletion'] = dts + dbWhisks.save(wdocd) + else: + dts = wdocd['markedForDeletion'] + now = int(time.time() * 1000) + elapsedh = int((now - dts) / HOUR) + elapsedd = int((now - dts) / DAY) + + if elapsedd >= args.days: + print('deleting: {0}'.format(wdoc['id'])) + dbWhisks.delete(wdocd) + skipWhisks-=1 + else: + print('marked: {0}, elapsed hours: {1}, elapsed days: {2}'.format(wdoc['id'], elapsedh, elapsedd)) + + +# +# check subjects db for existence of ns +# +def checkNamespace(dbSubjects, namespace): + + while True: + + allNamespaces = dbSubjects.view('subjects/identities', startkey=[namespace], endkey=[namespace]) + + if allNamespaces: + return True + else: + return False + + +# +# check whisks db for entries having none existent ns +# +def checkWhisks(args): + + dbWhisks = couchdb.client.Server(args.dbUrl)[args.dbNameWhisks] + dbSubjects = couchdb.client.Server(args.dbUrl)[args.dbNameSubjects] + + rb = SimpleRingBuffer(args.bufferLen) + + global skipWhisks + while True: + allWhisks = dbWhisks.view('_all_docs', limit=args.docsPerRequest, skip=skipWhisks) + skipWhisks += args.docsPerRequest + if allWhisks: + for wdoc in allWhisks: + if wdoc['id'].startswith('_design/'): + print('skipping: {0}'.format(wdoc['id'])) + continue + namespace = wdoc['id'][0:wdoc['id'].find('/')] + + exists = rb.get(namespace) + if exists == None: + exists = checkNamespace(dbSubjects, namespace) + rb.append(namespace, exists) + + if exists: + print('keeping: {0}'.format(wdoc['id'])) + else: + deleteWhisk(dbWhisks, wdoc) + else: + return + + +parser = argparse.ArgumentParser(description="Utility to mark/delete whisks entries where the ns does not exist in the subjects database.") +parser.add_argument("--dbUrl", required=True, help="Server URL of the database, that has to be cleaned of old activations. E.g. 'https://xxx:y...@domain.couch.com:443'") +parser.add_argument("--dbNameWhisks", required=True, help="Name of the Whisks Database of the whisks entries to be marked for deletion or deleted if already marked.") +parser.add_argument("--dbNameSubjects", required=True, help="Name of the Subjects Database.") +parser.add_argument("--days", required=True, type=int, default=7, help="How many days whisks keep entries marked for deletion before deleting them.") +parser.add_argument("--docsPerRequest", type=int, default=200, help="Number of documents handled on each CouchDb Request. Default is 200.") +parser.add_argument("--bufferLen", type=int, default=100, help="Maximum buffer length to cache already checked ns. Default is 100.") +args = parser.parse_args() + +checkWhisks(args) -- To stop receiving notification emails like this one, please contact cbic...@apache.org.