This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new fbace5e03 KUDU-3326 Add soft-delete to Python client
fbace5e03 is described below

commit fbace5e035e55379030d36710b2edef691f12983
Author: Marton Greber <[email protected]>
AuthorDate: Thu May 18 18:14:05 2023 +0000

    KUDU-3326 Add soft-delete to Python client
    
    This is a follow-up change to 7b6b6b636818d3e22a3939fde77689dce84e88b2.
    
    Change-Id: Ia936c43dc52888eb4dc5827e944f18e1ff89940c
    Reviewed-on: http://gerrit.cloudera.org:8080/19901
    Tested-by: Kudu Jenkins
    Reviewed-by: Yingchun Lai <[email protected]>
---
 python/kudu/client.pyx           | 100 +++++++++++++++++++-
 python/kudu/libkudu_client.pxd   |   6 ++
 python/kudu/tests/common.py      |   6 +-
 python/kudu/tests/test_client.py | 192 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 299 insertions(+), 5 deletions(-)

diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index edf56e3c7..fdebff0c5 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -497,7 +497,23 @@ cdef class Client:
 
     def delete_table(self, table_name):
         """
-        Delete a Kudu table. Raises KuduNotFound if the table does not exist.
+        Delete/drop a Kudu table without reserving. Raises KuduNotFound
+        if the table does not exist.
+
+        Notes
+        -----
+        The deleted table may turn to soft-deleted status with the flag
+        default_deleted_table_reserve_seconds set to nonzero on the master 
side.
+
+        The delete operation or drop operation means that the service will 
directly
+        delete the table after receiving the instruction. Which means that 
once we
+        delete the table by mistake, we have no way to recall the deleted data.
+        We have added a new API @soft_delete_table to allow the deleted data 
to be
+        reserved for a period of time, which means that the wrongly deleted 
data may
+        be recalled. In order to be compatible with the previous versions, 
this interface
+        will continue to directly delete tables without reserving the table.
+
+        Refer to soft_delete_table for detailed usage examples.
 
         Parameters
         ----------
@@ -505,6 +521,53 @@ cdef class Client:
         """
         check_status(self.cp.DeleteTable(tobytes(table_name)))
 
+    def soft_delete_table(self, table_name, reserve_seconds=None):
+        """
+        Soft delete/drop a table.
+
+        Notes
+        -----
+        Usage Example1:
+        Equal to delete_table(table_name) and the table will not be reserved.
+
+        client.soft_delete_table(table_name)
+
+        Usage Example2:
+        The table will be reserved for 600s after delete operation.
+        We can recall the table in time after the delete.
+
+        client.soft_delete_table(table_name, 600)
+        client.recall_table(table_id)
+
+        Parameters
+        ----------
+        table_name : string
+          Name of the table to drop.
+        reserve_seconds : int
+          Reserve seconds after being deleted.
+        """
+        if reserve_seconds is not None:
+            check_status(self.cp.SoftDeleteTable(tobytes(table_name), 
reserve_seconds))
+        else:
+            check_status(self.cp.SoftDeleteTable(tobytes(table_name)))
+
+    def recall_table(self, table_id, new_table_name=None):
+        """
+        Recall a deleted but still reserved table.
+
+        Parameters
+        ----------
+        table_id : string
+          ID of the table to recall.
+        new_table_name : string
+          New table name for the recalled table. The recalled table will use 
the original
+          table name if the parameter is empty string (i.e. "").
+        """
+        if new_table_name is not None:
+            check_status(self.cp.RecallTable(tobytes(table_id), 
tobytes(new_table_name)))
+        else:
+            check_status(self.cp.RecallTable(tobytes(table_id)))
+
     def table_exists(self, table_name):
         """Return True if the indicated table exists in the Kudu cluster.
 
@@ -563,8 +626,8 @@ cdef class Client:
 
     def list_tables(self, match_substring=None):
         """
-        Retrieve a list of table names in the Kudu cluster with an optional
-        substring filter.
+        Retrieve a list of non-soft-deleted table names in the Kudu cluster 
with
+        an optional substring filter.
 
         Parameters
         ----------
@@ -592,6 +655,37 @@ cdef class Client:
             result.append(frombytes(tables[i]))
         return result
 
+    def list_soft_deleted_tables(self, match_substring=None):
+        """
+        Retrieve a list of soft-deleted table names in the Kudu cluster with
+        an optional substring filter.
+
+        Parameters
+        ----------
+        match_substring : string, optional
+          If passed, the string must be exactly contained in the table names
+
+        Returns
+        -------
+        tables : list[string]
+          Table names returned from Kudu
+        """
+        cdef:
+            vector[string] tables
+            string c_match
+            size_t i
+
+        if match_substring is not None:
+            c_match = tobytes(match_substring)
+            check_status(self.cp.ListSoftDeletedTables(&tables, c_match))
+        else:
+            check_status(self.cp.ListSoftDeletedTables(&tables))
+
+        result = []
+        for i in range(tables.size()):
+            result.append(frombytes(tables[i]))
+        return result
+
     def list_tablet_servers(self):
         """
         Retrieve a list of tablet servers currently running in the Kudu cluster
diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd
index 9cd375f4d..a17259c23 100644
--- a/python/kudu/libkudu_client.pxd
+++ b/python/kudu/libkudu_client.pxd
@@ -558,6 +558,10 @@ cdef extern from "kudu/client/client.h" namespace 
"kudu::client" nogil:
     cdef cppclass KuduClient:
 
         Status DeleteTable(const string& table_name)
+        Status SoftDeleteTable(const string& table_name)
+        Status SoftDeleteTable(const string& table_name, uint32_t 
reserve_seconds)
+        Status RecallTable(const string& table_id)
+        Status RecallTable(const string& table_id, const string& 
new_table_name)
         Status OpenTable(const string& table_name,
                          shared_ptr[KuduTable]* table)
         Status GetTableSchema(const string& table_name, KuduSchema* schema)
@@ -570,6 +574,8 @@ cdef extern from "kudu/client/client.h" namespace 
"kudu::client" nogil:
 
         Status ListTables(vector[string]* tables)
         Status ListTables(vector[string]* tables, const string& filter)
+        Status ListSoftDeletedTables(vector[string]* tables)
+        Status ListSoftDeletedTables(vector[string]* tables, const string& 
filter)
 
         Status ListTabletServers(vector[KuduTabletServer*]* tablet_servers)
 
diff --git a/python/kudu/tests/common.py b/python/kudu/tests/common.py
index 85b4fefad..c86aac4a8 100644
--- a/python/kudu/tests/common.py
+++ b/python/kudu/tests/common.py
@@ -91,7 +91,11 @@ class KuduTestBase(object):
                    "extraMasterFlags" : [
                        "--default_num_replicas=1",
                        "--ipki_ca_key_size=2048",
-                       "--ipki_server_key_size=2048" ],
+                       "--ipki_server_key_size=2048",
+                       # TODO: once setting flags per unittest is implemented,
+                       # remove this line here and add it to the test:
+                       # 'test_soft_delete_and_recall_table_after_reserve_time'
+                       "--check_expired_table_interval_seconds=2" ],
                    "extraTserverFlags" : [ "--ipki_server_key_size=2048" ],
                    "mini_oidc_options" :
                    { "expiration_time" : "300000",
diff --git a/python/kudu/tests/test_client.py b/python/kudu/tests/test_client.py
index d3bb965e0..0ce6d9bb4 100755
--- a/python/kudu/tests/test_client.py
+++ b/python/kudu/tests/test_client.py
@@ -24,11 +24,13 @@ from kudu.client import (Partitioning,
                          ENCRYPTION_REQUIRED,
                          ENCRYPTION_REQUIRED_REMOTE)
 from kudu.errors import (KuduInvalidArgument,
-                         KuduBadStatus)
+                         KuduBadStatus,
+                         KuduNotFound)
 from kudu.schema import (Schema,
                          KuduValue)
 import kudu
 import datetime
+import time
 from pytz import utc
 try:
     from urllib.error import HTTPError
@@ -97,6 +99,13 @@ class TestClient(KuduTestBase, CompatUnitTest):
         for name in to_create:
             self.client.delete_table(name)
 
+        self.client.create_table('foo4', schema, partitioning)
+        assert len(self.client.list_soft_deleted_tables()) == 0
+        self.client.soft_delete_table('foo4', 1000)
+        assert len(self.client.list_soft_deleted_tables()) == 1
+        # Force delete the table
+        self.client.soft_delete_table('foo4')
+
     def test_is_multimaster(self):
         assert self.client.is_multimaster
 
@@ -959,3 +968,184 @@ class TestMonoDelta(CompatUnitTest):
 
         delta = kudu.timedelta(nanos=3500)
         assert delta.to_nanos() == 3500
+
+class TestSoftDelete(KuduTestBase, CompatUnitTest):
+
+    def setUp(self):
+        pass
+
+    def create_test_table(self, table_name, nrows):
+        self.client.create_table(table_name, self.schema, self.partitioning)
+        table = self.client.table(table_name)
+        session = self.client.new_session()
+
+        # Insert a few rows, and scan them back. This is to populate the 
MetaCache.
+        for i in range(nrows):
+            op = table.new_insert((i, 2, 'hello'))
+            session.apply(op)
+        session.flush()
+
+        scanner = table.scanner().open()
+        tuples = scanner.read_all_tuples()
+        assert len(tuples) == nrows
+
+    def test_soft_deleted_table_alter_operations(self):
+        try:
+            table_name = 'test_soft_deleted_table_alter_operations'
+            self.create_test_table(table_name, 10)
+            table = self.client.table(table_name)
+
+            # Soft-delete the table.
+            assert len(self.client.list_tables()) == 2
+            assert sorted(self.client.list_tables()) == sorted([self.ex_table, 
table_name])
+            assert len(self.client.list_soft_deleted_tables()) == 0
+            self.client.soft_delete_table(table_name, 600)
+
+            # Soft-deleted table is still visible
+            assert self.client.table_exists(table_name) == True
+
+            # The table has been moved into the soft_deleted list.
+            assert len(self.client.list_tables()) == 1
+            assert self.client.list_tables() == [self.ex_table]
+            assert len(self.client.list_soft_deleted_tables()) == 1
+            assert self.client.list_soft_deleted_tables() == [table_name]
+
+            # Altering a soft-deleted table is not allowed.
+            # Not allowed to rename.
+            alterer = self.client.new_table_alterer(table)
+            alterer.rename("new_table_name")
+            error_msg = 'soft_deleted table {0} should not be 
altered'.format(table_name)
+            with self.assertRaisesRegex(KuduInvalidArgument, error_msg):
+                alterer.alter()
+
+            # Not allowed to add column.
+            alterer = self.client.new_table_alterer(table)
+            alterer.add_column('new_column', type_='int64', default=0)
+            error_msg = 'soft_deleted table {0} should not be 
altered'.format(table_name)
+            with self.assertRaisesRegex(KuduInvalidArgument, error_msg):
+                alterer.alter()
+
+            # Not allowed to delete the soft-deleted table with new 
reserve_seconds value.
+            error_msg = 'soft_deleted table {0} should not be soft deleted 
again'.format(table_name)
+            with self.assertRaisesRegex(KuduInvalidArgument, error_msg):
+                self.client.soft_delete_table(table_name, 600)
+
+            # Not allowed to set extra configs.
+            # TODO: Once the Python client supports changing extra configs 
through table alterer,
+            # check that this can't be performed for soft-deleted table.
+
+            # It is not allowed to create a new table with the same name.
+            error_msg = 'table {0} already exists with id 
{1}'.format(table_name, table.id)
+            with self.assertRaisesRegex(KuduBadStatus, error_msg):
+                self.client.create_table(table_name, self.schema, 
self.partitioning)
+        finally:
+            try:
+                # Force delete the soft-deleted table.
+                self.client.delete_table(table_name)
+            except:
+                pass
+
+    def test_soft_delete_and_recall_table_positive(self):
+        try:
+            # Create and open the table before soft-deleting it.
+            table_name = "test_soft_delete_and_recall_table_positive"
+            nrows = 10
+            self.create_test_table(table_name, nrows)
+            table = self.client.table(table_name)
+            session = self.client.new_session()
+
+            # Remove the table. Perform sanity checks.
+            self.client.soft_delete_table(table_name, 600)
+            assert len(self.client.list_tables()) == 1
+            assert self.client.list_tables() == [self.ex_table]
+            assert len(self.client.list_soft_deleted_tables()) == 1
+            assert self.client.list_soft_deleted_tables() == [table_name]
+
+            # Read and write are allowed for soft-deleted table.
+            for i in range(nrows):
+                op = table.new_insert((i+nrows, 2, 'hello'))
+                session.apply(op)
+            session.flush()
+
+            scanner = table.scanner().open()
+            tuples = scanner.read_all_tuples()
+            assert len(tuples) == 2*nrows
+
+            # Recall and reopen table. Perform sanity checks.
+            self.client.recall_table(table.id)
+            assert len(self.client.list_tables()) == 2
+            assert sorted(self.client.list_tables()) == sorted([self.ex_table, 
table_name])
+            assert len(self.client.list_soft_deleted_tables()) == 0
+
+            # Check the data in the table.
+            scanner = table.scanner().open()
+            tuples = scanner.read_all_tuples()
+            assert len(tuples) == 2*nrows
+        finally:
+            try:
+                # Force delete the soft-deleted table.
+                self.client.delete_table(table_name)
+            except:
+                pass
+
+    def test_soft_delete_and_recall_table_with_new_name_positive(self):
+        try:
+            # Create and open the table before soft-deleting it.
+            table_name = 
"test_soft_delete_and_recall_table_with_new_name_positive"
+            nrows = 10
+            self.create_test_table(table_name, nrows)
+            table = self.client.table(table_name)
+
+            # Remove the table. Perform sanity checks.
+            self.client.soft_delete_table(table_name, 600)
+            assert len(self.client.list_tables()) == 1
+            assert self.client.list_tables() == [self.ex_table]
+            assert len(self.client.list_soft_deleted_tables()) == 1
+            assert self.client.list_soft_deleted_tables() == [table_name]
+
+            # Recall and reopen table. Perform sanity checks.
+            table_name = "new_table_name"
+            self.client.recall_table(table.id, table_name)
+            assert len(self.client.list_tables()) == 2
+            assert sorted(self.client.list_tables()) == sorted([self.ex_table, 
table_name])
+            assert len(self.client.list_soft_deleted_tables()) == 0
+
+            # Re-open the table. Check the data in the table.
+            table = self.client.table(table_name)
+            scanner = table.scanner().open()
+            tuples = scanner.read_all_tuples()
+            assert len(tuples) == nrows
+            assert table.name == table_name
+        finally:
+            try:
+                # Force delete the soft-deleted table.
+                self.client.delete_table(table_name)
+            except:
+                pass
+
+    def test_soft_delete_and_recall_table_after_reserve_time(self):
+        # Create and open the table before soft-deleting it.
+        table_name = "test_soft_delete_and_recall_table_after_reserve_time"
+        self.create_test_table(table_name, 10)
+
+        # Remove the table. Wait until the table is removed completely.
+        self.client.soft_delete_table(table_name, 1)
+        error_msg = 'the table does not exist'
+        table_exists = True
+        while table_exists:
+            try:
+                table = self.client.table(table_name)
+            except KuduNotFound as kudu_error:
+                assert error_msg in str(kudu_error)
+                table_exists = False
+            time.sleep(1)
+
+        # Try to recall the table.
+        error_msg = 'soft-deleted state false, expired state false, can\'t 
recall'
+        with self.assertRaisesRegex(KuduNotFound, error_msg):
+            self.client.recall_table(table.id)
+
+        # Perform sanity checks to validate that the table is removed entirely.
+        assert len(self.client.list_tables()) == 1
+        assert self.client.list_tables() == [self.ex_table]
+        assert len(self.client.list_soft_deleted_tables()) == 0

Reply via email to