This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new fbace5e03 KUDU-3326 Add soft-delete to Python client
fbace5e03 is described below
commit fbace5e035e55379030d36710b2edef691f12983
Author: Marton Greber <[email protected]>
AuthorDate: Thu May 18 18:14:05 2023 +0000
KUDU-3326 Add soft-delete to Python client
This is a follow-up change to 7b6b6b636818d3e22a3939fde77689dce84e88b2.
Change-Id: Ia936c43dc52888eb4dc5827e944f18e1ff89940c
Reviewed-on: http://gerrit.cloudera.org:8080/19901
Tested-by: Kudu Jenkins
Reviewed-by: Yingchun Lai <[email protected]>
---
python/kudu/client.pyx | 100 +++++++++++++++++++-
python/kudu/libkudu_client.pxd | 6 ++
python/kudu/tests/common.py | 6 +-
python/kudu/tests/test_client.py | 192 ++++++++++++++++++++++++++++++++++++++-
4 files changed, 299 insertions(+), 5 deletions(-)
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index edf56e3c7..fdebff0c5 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -497,7 +497,23 @@ cdef class Client:
def delete_table(self, table_name):
"""
- Delete a Kudu table. Raises KuduNotFound if the table does not exist.
+ Delete/drop a Kudu table without reserving. Raises KuduNotFound
+ if the table does not exist.
+
+ Notes
+ -----
+ The deleted table may turn to soft-deleted status with the flag
+ default_deleted_table_reserve_seconds set to nonzero on the master
side.
+
+ The delete operation or drop operation means that the service will
directly
+ delete the table after receiving the instruction. Which means that
once we
+ delete the table by mistake, we have no way to recall the deleted data.
+ We have added a new API @soft_delete_table to allow the deleted data
to be
+ reserved for a period of time, which means that the wrongly deleted
data may
+ be recalled. In order to be compatible with the previous versions,
this interface
+ will continue to directly delete tables without reserving the table.
+
+ Refer to soft_delete_table for detailed usage examples.
Parameters
----------
@@ -505,6 +521,53 @@ cdef class Client:
"""
check_status(self.cp.DeleteTable(tobytes(table_name)))
+ def soft_delete_table(self, table_name, reserve_seconds=None):
+ """
+ Soft delete/drop a table.
+
+ Notes
+ -----
+ Usage Example1:
+ Equal to delete_table(table_name) and the table will not be reserved.
+
+ client.soft_delete_table(table_name)
+
+ Usage Example2:
+ The table will be reserved for 600s after delete operation.
+ We can recall the table in time after the delete.
+
+ client.soft_delete_table(table_name, 600)
+ client.recall_table(table_id)
+
+ Parameters
+ ----------
+ table_name : string
+ Name of the table to drop.
+ reserve_seconds : int
+ Reserve seconds after being deleted.
+ """
+ if reserve_seconds is not None:
+ check_status(self.cp.SoftDeleteTable(tobytes(table_name),
reserve_seconds))
+ else:
+ check_status(self.cp.SoftDeleteTable(tobytes(table_name)))
+
+ def recall_table(self, table_id, new_table_name=None):
+ """
+ Recall a deleted but still reserved table.
+
+ Parameters
+ ----------
+ table_id : string
+ ID of the table to recall.
+ new_table_name : string
+ New table name for the recalled table. The recalled table will use
the original
+ table name if the parameter is empty string (i.e. "").
+ """
+ if new_table_name is not None:
+ check_status(self.cp.RecallTable(tobytes(table_id),
tobytes(new_table_name)))
+ else:
+ check_status(self.cp.RecallTable(tobytes(table_id)))
+
def table_exists(self, table_name):
"""Return True if the indicated table exists in the Kudu cluster.
@@ -563,8 +626,8 @@ cdef class Client:
def list_tables(self, match_substring=None):
"""
- Retrieve a list of table names in the Kudu cluster with an optional
- substring filter.
+ Retrieve a list of non-soft-deleted table names in the Kudu cluster
with
+ an optional substring filter.
Parameters
----------
@@ -592,6 +655,37 @@ cdef class Client:
result.append(frombytes(tables[i]))
return result
+ def list_soft_deleted_tables(self, match_substring=None):
+ """
+ Retrieve a list of soft-deleted table names in the Kudu cluster with
+ an optional substring filter.
+
+ Parameters
+ ----------
+ match_substring : string, optional
+ If passed, the string must be exactly contained in the table names
+
+ Returns
+ -------
+ tables : list[string]
+ Table names returned from Kudu
+ """
+ cdef:
+ vector[string] tables
+ string c_match
+ size_t i
+
+ if match_substring is not None:
+ c_match = tobytes(match_substring)
+ check_status(self.cp.ListSoftDeletedTables(&tables, c_match))
+ else:
+ check_status(self.cp.ListSoftDeletedTables(&tables))
+
+ result = []
+ for i in range(tables.size()):
+ result.append(frombytes(tables[i]))
+ return result
+
def list_tablet_servers(self):
"""
Retrieve a list of tablet servers currently running in the Kudu cluster
diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd
index 9cd375f4d..a17259c23 100644
--- a/python/kudu/libkudu_client.pxd
+++ b/python/kudu/libkudu_client.pxd
@@ -558,6 +558,10 @@ cdef extern from "kudu/client/client.h" namespace
"kudu::client" nogil:
cdef cppclass KuduClient:
Status DeleteTable(const string& table_name)
+ Status SoftDeleteTable(const string& table_name)
+ Status SoftDeleteTable(const string& table_name, uint32_t
reserve_seconds)
+ Status RecallTable(const string& table_id)
+ Status RecallTable(const string& table_id, const string&
new_table_name)
Status OpenTable(const string& table_name,
shared_ptr[KuduTable]* table)
Status GetTableSchema(const string& table_name, KuduSchema* schema)
@@ -570,6 +574,8 @@ cdef extern from "kudu/client/client.h" namespace
"kudu::client" nogil:
Status ListTables(vector[string]* tables)
Status ListTables(vector[string]* tables, const string& filter)
+ Status ListSoftDeletedTables(vector[string]* tables)
+ Status ListSoftDeletedTables(vector[string]* tables, const string&
filter)
Status ListTabletServers(vector[KuduTabletServer*]* tablet_servers)
diff --git a/python/kudu/tests/common.py b/python/kudu/tests/common.py
index 85b4fefad..c86aac4a8 100644
--- a/python/kudu/tests/common.py
+++ b/python/kudu/tests/common.py
@@ -91,7 +91,11 @@ class KuduTestBase(object):
"extraMasterFlags" : [
"--default_num_replicas=1",
"--ipki_ca_key_size=2048",
- "--ipki_server_key_size=2048" ],
+ "--ipki_server_key_size=2048",
+ # TODO: once setting flags per unittest is implemented,
+ # remove this line here and add it to the test:
+ # 'test_soft_delete_and_recall_table_after_reserve_time'
+ "--check_expired_table_interval_seconds=2" ],
"extraTserverFlags" : [ "--ipki_server_key_size=2048" ],
"mini_oidc_options" :
{ "expiration_time" : "300000",
diff --git a/python/kudu/tests/test_client.py b/python/kudu/tests/test_client.py
index d3bb965e0..0ce6d9bb4 100755
--- a/python/kudu/tests/test_client.py
+++ b/python/kudu/tests/test_client.py
@@ -24,11 +24,13 @@ from kudu.client import (Partitioning,
ENCRYPTION_REQUIRED,
ENCRYPTION_REQUIRED_REMOTE)
from kudu.errors import (KuduInvalidArgument,
- KuduBadStatus)
+ KuduBadStatus,
+ KuduNotFound)
from kudu.schema import (Schema,
KuduValue)
import kudu
import datetime
+import time
from pytz import utc
try:
from urllib.error import HTTPError
@@ -97,6 +99,13 @@ class TestClient(KuduTestBase, CompatUnitTest):
for name in to_create:
self.client.delete_table(name)
+ self.client.create_table('foo4', schema, partitioning)
+ assert len(self.client.list_soft_deleted_tables()) == 0
+ self.client.soft_delete_table('foo4', 1000)
+ assert len(self.client.list_soft_deleted_tables()) == 1
+ # Force delete the table
+ self.client.soft_delete_table('foo4')
+
def test_is_multimaster(self):
assert self.client.is_multimaster
@@ -959,3 +968,184 @@ class TestMonoDelta(CompatUnitTest):
delta = kudu.timedelta(nanos=3500)
assert delta.to_nanos() == 3500
+
+class TestSoftDelete(KuduTestBase, CompatUnitTest):
+
+ def setUp(self):
+ pass
+
+ def create_test_table(self, table_name, nrows):
+ self.client.create_table(table_name, self.schema, self.partitioning)
+ table = self.client.table(table_name)
+ session = self.client.new_session()
+
+ # Insert a few rows, and scan them back. This is to populate the
MetaCache.
+ for i in range(nrows):
+ op = table.new_insert((i, 2, 'hello'))
+ session.apply(op)
+ session.flush()
+
+ scanner = table.scanner().open()
+ tuples = scanner.read_all_tuples()
+ assert len(tuples) == nrows
+
+ def test_soft_deleted_table_alter_operations(self):
+ try:
+ table_name = 'test_soft_deleted_table_alter_operations'
+ self.create_test_table(table_name, 10)
+ table = self.client.table(table_name)
+
+ # Soft-delete the table.
+ assert len(self.client.list_tables()) == 2
+ assert sorted(self.client.list_tables()) == sorted([self.ex_table,
table_name])
+ assert len(self.client.list_soft_deleted_tables()) == 0
+ self.client.soft_delete_table(table_name, 600)
+
+ # Soft-deleted table is still visible
+ assert self.client.table_exists(table_name) == True
+
+ # The table has been moved into the soft_deleted list.
+ assert len(self.client.list_tables()) == 1
+ assert self.client.list_tables() == [self.ex_table]
+ assert len(self.client.list_soft_deleted_tables()) == 1
+ assert self.client.list_soft_deleted_tables() == [table_name]
+
+ # Altering a soft-deleted table is not allowed.
+ # Not allowed to rename.
+ alterer = self.client.new_table_alterer(table)
+ alterer.rename("new_table_name")
+ error_msg = 'soft_deleted table {0} should not be
altered'.format(table_name)
+ with self.assertRaisesRegex(KuduInvalidArgument, error_msg):
+ alterer.alter()
+
+ # Not allowed to add column.
+ alterer = self.client.new_table_alterer(table)
+ alterer.add_column('new_column', type_='int64', default=0)
+ error_msg = 'soft_deleted table {0} should not be
altered'.format(table_name)
+ with self.assertRaisesRegex(KuduInvalidArgument, error_msg):
+ alterer.alter()
+
+ # Not allowed to delete the soft-deleted table with new
reserve_seconds value.
+ error_msg = 'soft_deleted table {0} should not be soft deleted
again'.format(table_name)
+ with self.assertRaisesRegex(KuduInvalidArgument, error_msg):
+ self.client.soft_delete_table(table_name, 600)
+
+ # Not allowed to set extra configs.
+ # TODO: Once the Python client supports changing extra configs
through table alterer,
+ # check that this can't be performed for soft-deleted table.
+
+ # It is not allowed to create a new table with the same name.
+ error_msg = 'table {0} already exists with id
{1}'.format(table_name, table.id)
+ with self.assertRaisesRegex(KuduBadStatus, error_msg):
+ self.client.create_table(table_name, self.schema,
self.partitioning)
+ finally:
+ try:
+ # Force delete the soft-deleted table.
+ self.client.delete_table(table_name)
+ except:
+ pass
+
+ def test_soft_delete_and_recall_table_positive(self):
+ try:
+ # Create and open the table before soft-deleting it.
+ table_name = "test_soft_delete_and_recall_table_positive"
+ nrows = 10
+ self.create_test_table(table_name, nrows)
+ table = self.client.table(table_name)
+ session = self.client.new_session()
+
+ # Remove the table. Perform sanity checks.
+ self.client.soft_delete_table(table_name, 600)
+ assert len(self.client.list_tables()) == 1
+ assert self.client.list_tables() == [self.ex_table]
+ assert len(self.client.list_soft_deleted_tables()) == 1
+ assert self.client.list_soft_deleted_tables() == [table_name]
+
+ # Read and write are allowed for soft-deleted table.
+ for i in range(nrows):
+ op = table.new_insert((i+nrows, 2, 'hello'))
+ session.apply(op)
+ session.flush()
+
+ scanner = table.scanner().open()
+ tuples = scanner.read_all_tuples()
+ assert len(tuples) == 2*nrows
+
+ # Recall and reopen table. Perform sanity checks.
+ self.client.recall_table(table.id)
+ assert len(self.client.list_tables()) == 2
+ assert sorted(self.client.list_tables()) == sorted([self.ex_table,
table_name])
+ assert len(self.client.list_soft_deleted_tables()) == 0
+
+ # Check the data in the table.
+ scanner = table.scanner().open()
+ tuples = scanner.read_all_tuples()
+ assert len(tuples) == 2*nrows
+ finally:
+ try:
+ # Force delete the soft-deleted table.
+ self.client.delete_table(table_name)
+ except:
+ pass
+
+ def test_soft_delete_and_recall_table_with_new_name_positive(self):
+ try:
+ # Create and open the table before soft-deleting it.
+ table_name =
"test_soft_delete_and_recall_table_with_new_name_positive"
+ nrows = 10
+ self.create_test_table(table_name, nrows)
+ table = self.client.table(table_name)
+
+ # Remove the table. Perform sanity checks.
+ self.client.soft_delete_table(table_name, 600)
+ assert len(self.client.list_tables()) == 1
+ assert self.client.list_tables() == [self.ex_table]
+ assert len(self.client.list_soft_deleted_tables()) == 1
+ assert self.client.list_soft_deleted_tables() == [table_name]
+
+ # Recall and reopen table. Perform sanity checks.
+ table_name = "new_table_name"
+ self.client.recall_table(table.id, table_name)
+ assert len(self.client.list_tables()) == 2
+ assert sorted(self.client.list_tables()) == sorted([self.ex_table,
table_name])
+ assert len(self.client.list_soft_deleted_tables()) == 0
+
+ # Re-open the table. Check the data in the table.
+ table = self.client.table(table_name)
+ scanner = table.scanner().open()
+ tuples = scanner.read_all_tuples()
+ assert len(tuples) == nrows
+ assert table.name == table_name
+ finally:
+ try:
+ # Force delete the soft-deleted table.
+ self.client.delete_table(table_name)
+ except:
+ pass
+
+ def test_soft_delete_and_recall_table_after_reserve_time(self):
+ # Create and open the table before soft-deleting it.
+ table_name = "test_soft_delete_and_recall_table_after_reserve_time"
+ self.create_test_table(table_name, 10)
+
+ # Remove the table. Wait until the table is removed completely.
+ self.client.soft_delete_table(table_name, 1)
+ error_msg = 'the table does not exist'
+ table_exists = True
+ while table_exists:
+ try:
+ table = self.client.table(table_name)
+ except KuduNotFound as kudu_error:
+ assert error_msg in str(kudu_error)
+ table_exists = False
+ time.sleep(1)
+
+ # Try to recall the table.
+ error_msg = 'soft-deleted state false, expired state false, can\'t
recall'
+ with self.assertRaisesRegex(KuduNotFound, error_msg):
+ self.client.recall_table(table.id)
+
+ # Perform sanity checks to validate that the table is removed entirely.
+ assert len(self.client.list_tables()) == 1
+ assert self.client.list_tables() == [self.ex_table]
+ assert len(self.client.list_soft_deleted_tables()) == 0