This is an automated email from the ASF dual-hosted git repository. laiyingchun pushed a commit to branch branch-1.17.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit f9ed93de5e47adeb8624355bff352a8c885c68cc Author: Marton Greber <[email protected]> AuthorDate: Thu May 18 18:14:05 2023 +0000 KUDU-3326 Add soft-delete to Python client This is a follow-up change to 7b6b6b636818d3e22a3939fde77689dce84e88b2. Change-Id: Ia936c43dc52888eb4dc5827e944f18e1ff89940c Reviewed-on: http://gerrit.cloudera.org:8080/19901 Tested-by: Kudu Jenkins Reviewed-by: Yingchun Lai <[email protected]> (cherry picked from commit fbace5e035e55379030d36710b2edef691f12983) Reviewed-on: http://gerrit.cloudera.org:8080/20225 Reviewed-by: Marton Greber <[email protected]> Reviewed-by: Yifan Zhang <[email protected]> Tested-by: Yingchun Lai <[email protected]> --- python/kudu/client.pyx | 100 +++++++++++++++++++- python/kudu/libkudu_client.pxd | 6 ++ python/kudu/tests/common.py | 6 +- python/kudu/tests/test_client.py | 192 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 299 insertions(+), 5 deletions(-) diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx index edf56e3c7..fdebff0c5 100644 --- a/python/kudu/client.pyx +++ b/python/kudu/client.pyx @@ -497,7 +497,23 @@ cdef class Client: def delete_table(self, table_name): """ - Delete a Kudu table. Raises KuduNotFound if the table does not exist. + Delete/drop a Kudu table without reserving. Raises KuduNotFound + if the table does not exist. + + Notes + ----- + The deleted table may turn to soft-deleted status with the flag + default_deleted_table_reserve_seconds set to nonzero on the master side. + + The delete operation or drop operation means that the service will directly + delete the table after receiving the instruction. Which means that once we + delete the table by mistake, we have no way to recall the deleted data. + We have added a new API @soft_delete_table to allow the deleted data to be + reserved for a period of time, which means that the wrongly deleted data may + be recalled. In order to be compatible with the previous versions, this interface + will continue to directly delete tables without reserving the table. + + Refer to soft_delete_table for detailed usage examples. Parameters ---------- @@ -505,6 +521,53 @@ cdef class Client: """ check_status(self.cp.DeleteTable(tobytes(table_name))) + def soft_delete_table(self, table_name, reserve_seconds=None): + """ + Soft delete/drop a table. + + Notes + ----- + Usage Example1: + Equal to delete_table(table_name) and the table will not be reserved. + + client.soft_delete_table(table_name) + + Usage Example2: + The table will be reserved for 600s after delete operation. + We can recall the table in time after the delete. + + client.soft_delete_table(table_name, 600) + client.recall_table(table_id) + + Parameters + ---------- + table_name : string + Name of the table to drop. + reserve_seconds : int + Reserve seconds after being deleted. + """ + if reserve_seconds is not None: + check_status(self.cp.SoftDeleteTable(tobytes(table_name), reserve_seconds)) + else: + check_status(self.cp.SoftDeleteTable(tobytes(table_name))) + + def recall_table(self, table_id, new_table_name=None): + """ + Recall a deleted but still reserved table. + + Parameters + ---------- + table_id : string + ID of the table to recall. + new_table_name : string + New table name for the recalled table. The recalled table will use the original + table name if the parameter is empty string (i.e. ""). + """ + if new_table_name is not None: + check_status(self.cp.RecallTable(tobytes(table_id), tobytes(new_table_name))) + else: + check_status(self.cp.RecallTable(tobytes(table_id))) + def table_exists(self, table_name): """Return True if the indicated table exists in the Kudu cluster. @@ -563,8 +626,8 @@ cdef class Client: def list_tables(self, match_substring=None): """ - Retrieve a list of table names in the Kudu cluster with an optional - substring filter. + Retrieve a list of non-soft-deleted table names in the Kudu cluster with + an optional substring filter. Parameters ---------- @@ -592,6 +655,37 @@ cdef class Client: result.append(frombytes(tables[i])) return result + def list_soft_deleted_tables(self, match_substring=None): + """ + Retrieve a list of soft-deleted table names in the Kudu cluster with + an optional substring filter. + + Parameters + ---------- + match_substring : string, optional + If passed, the string must be exactly contained in the table names + + Returns + ------- + tables : list[string] + Table names returned from Kudu + """ + cdef: + vector[string] tables + string c_match + size_t i + + if match_substring is not None: + c_match = tobytes(match_substring) + check_status(self.cp.ListSoftDeletedTables(&tables, c_match)) + else: + check_status(self.cp.ListSoftDeletedTables(&tables)) + + result = [] + for i in range(tables.size()): + result.append(frombytes(tables[i])) + return result + def list_tablet_servers(self): """ Retrieve a list of tablet servers currently running in the Kudu cluster diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd index 9cd375f4d..a17259c23 100644 --- a/python/kudu/libkudu_client.pxd +++ b/python/kudu/libkudu_client.pxd @@ -558,6 +558,10 @@ cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil: cdef cppclass KuduClient: Status DeleteTable(const string& table_name) + Status SoftDeleteTable(const string& table_name) + Status SoftDeleteTable(const string& table_name, uint32_t reserve_seconds) + Status RecallTable(const string& table_id) + Status RecallTable(const string& table_id, const string& new_table_name) Status OpenTable(const string& table_name, shared_ptr[KuduTable]* table) Status GetTableSchema(const string& table_name, KuduSchema* schema) @@ -570,6 +574,8 @@ cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil: Status ListTables(vector[string]* tables) Status ListTables(vector[string]* tables, const string& filter) + Status ListSoftDeletedTables(vector[string]* tables) + Status ListSoftDeletedTables(vector[string]* tables, const string& filter) Status ListTabletServers(vector[KuduTabletServer*]* tablet_servers) diff --git a/python/kudu/tests/common.py b/python/kudu/tests/common.py index 85b4fefad..c86aac4a8 100644 --- a/python/kudu/tests/common.py +++ b/python/kudu/tests/common.py @@ -91,7 +91,11 @@ class KuduTestBase(object): "extraMasterFlags" : [ "--default_num_replicas=1", "--ipki_ca_key_size=2048", - "--ipki_server_key_size=2048" ], + "--ipki_server_key_size=2048", + # TODO: once setting flags per unittest is implemented, + # remove this line here and add it to the test: + # 'test_soft_delete_and_recall_table_after_reserve_time' + "--check_expired_table_interval_seconds=2" ], "extraTserverFlags" : [ "--ipki_server_key_size=2048" ], "mini_oidc_options" : { "expiration_time" : "300000", diff --git a/python/kudu/tests/test_client.py b/python/kudu/tests/test_client.py index 3a1e43e9e..020dec96a 100755 --- a/python/kudu/tests/test_client.py +++ b/python/kudu/tests/test_client.py @@ -24,11 +24,13 @@ from kudu.client import (Partitioning, ENCRYPTION_REQUIRED, ENCRYPTION_REQUIRED_REMOTE) from kudu.errors import (KuduInvalidArgument, - KuduBadStatus) + KuduBadStatus, + KuduNotFound) from kudu.schema import (Schema, KuduValue) import kudu import datetime +import time from pytz import utc try: from urllib.error import HTTPError @@ -97,6 +99,13 @@ class TestClient(KuduTestBase, CompatUnitTest): for name in to_create: self.client.delete_table(name) + self.client.create_table('foo4', schema, partitioning) + assert len(self.client.list_soft_deleted_tables()) == 0 + self.client.soft_delete_table('foo4', 1000) + assert len(self.client.list_soft_deleted_tables()) == 1 + # Force delete the table + self.client.soft_delete_table('foo4') + def test_is_multimaster(self): assert self.client.is_multimaster @@ -966,3 +975,184 @@ class TestMonoDelta(CompatUnitTest): delta = kudu.timedelta(nanos=3500) assert delta.to_nanos() == 3500 + +class TestSoftDelete(KuduTestBase, CompatUnitTest): + + def setUp(self): + pass + + def create_test_table(self, table_name, nrows): + self.client.create_table(table_name, self.schema, self.partitioning) + table = self.client.table(table_name) + session = self.client.new_session() + + # Insert a few rows, and scan them back. This is to populate the MetaCache. + for i in range(nrows): + op = table.new_insert((i, 2, 'hello')) + session.apply(op) + session.flush() + + scanner = table.scanner().open() + tuples = scanner.read_all_tuples() + assert len(tuples) == nrows + + def test_soft_deleted_table_alter_operations(self): + try: + table_name = 'test_soft_deleted_table_alter_operations' + self.create_test_table(table_name, 10) + table = self.client.table(table_name) + + # Soft-delete the table. + assert len(self.client.list_tables()) == 2 + assert sorted(self.client.list_tables()) == sorted([self.ex_table, table_name]) + assert len(self.client.list_soft_deleted_tables()) == 0 + self.client.soft_delete_table(table_name, 600) + + # Soft-deleted table is still visible + assert self.client.table_exists(table_name) == True + + # The table has been moved into the soft_deleted list. + assert len(self.client.list_tables()) == 1 + assert self.client.list_tables() == [self.ex_table] + assert len(self.client.list_soft_deleted_tables()) == 1 + assert self.client.list_soft_deleted_tables() == [table_name] + + # Altering a soft-deleted table is not allowed. + # Not allowed to rename. + alterer = self.client.new_table_alterer(table) + alterer.rename("new_table_name") + error_msg = 'soft_deleted table {0} should not be altered'.format(table_name) + with self.assertRaisesRegex(KuduInvalidArgument, error_msg): + alterer.alter() + + # Not allowed to add column. + alterer = self.client.new_table_alterer(table) + alterer.add_column('new_column', type_='int64', default=0) + error_msg = 'soft_deleted table {0} should not be altered'.format(table_name) + with self.assertRaisesRegex(KuduInvalidArgument, error_msg): + alterer.alter() + + # Not allowed to delete the soft-deleted table with new reserve_seconds value. + error_msg = 'soft_deleted table {0} should not be soft deleted again'.format(table_name) + with self.assertRaisesRegex(KuduInvalidArgument, error_msg): + self.client.soft_delete_table(table_name, 600) + + # Not allowed to set extra configs. + # TODO: Once the Python client supports changing extra configs through table alterer, + # check that this can't be performed for soft-deleted table. + + # It is not allowed to create a new table with the same name. + error_msg = 'table {0} already exists with id {1}'.format(table_name, table.id) + with self.assertRaisesRegex(KuduBadStatus, error_msg): + self.client.create_table(table_name, self.schema, self.partitioning) + finally: + try: + # Force delete the soft-deleted table. + self.client.delete_table(table_name) + except: + pass + + def test_soft_delete_and_recall_table_positive(self): + try: + # Create and open the table before soft-deleting it. + table_name = "test_soft_delete_and_recall_table_positive" + nrows = 10 + self.create_test_table(table_name, nrows) + table = self.client.table(table_name) + session = self.client.new_session() + + # Remove the table. Perform sanity checks. + self.client.soft_delete_table(table_name, 600) + assert len(self.client.list_tables()) == 1 + assert self.client.list_tables() == [self.ex_table] + assert len(self.client.list_soft_deleted_tables()) == 1 + assert self.client.list_soft_deleted_tables() == [table_name] + + # Read and write are allowed for soft-deleted table. + for i in range(nrows): + op = table.new_insert((i+nrows, 2, 'hello')) + session.apply(op) + session.flush() + + scanner = table.scanner().open() + tuples = scanner.read_all_tuples() + assert len(tuples) == 2*nrows + + # Recall and reopen table. Perform sanity checks. + self.client.recall_table(table.id) + assert len(self.client.list_tables()) == 2 + assert sorted(self.client.list_tables()) == sorted([self.ex_table, table_name]) + assert len(self.client.list_soft_deleted_tables()) == 0 + + # Check the data in the table. + scanner = table.scanner().open() + tuples = scanner.read_all_tuples() + assert len(tuples) == 2*nrows + finally: + try: + # Force delete the soft-deleted table. + self.client.delete_table(table_name) + except: + pass + + def test_soft_delete_and_recall_table_with_new_name_positive(self): + try: + # Create and open the table before soft-deleting it. + table_name = "test_soft_delete_and_recall_table_with_new_name_positive" + nrows = 10 + self.create_test_table(table_name, nrows) + table = self.client.table(table_name) + + # Remove the table. Perform sanity checks. + self.client.soft_delete_table(table_name, 600) + assert len(self.client.list_tables()) == 1 + assert self.client.list_tables() == [self.ex_table] + assert len(self.client.list_soft_deleted_tables()) == 1 + assert self.client.list_soft_deleted_tables() == [table_name] + + # Recall and reopen table. Perform sanity checks. + table_name = "new_table_name" + self.client.recall_table(table.id, table_name) + assert len(self.client.list_tables()) == 2 + assert sorted(self.client.list_tables()) == sorted([self.ex_table, table_name]) + assert len(self.client.list_soft_deleted_tables()) == 0 + + # Re-open the table. Check the data in the table. + table = self.client.table(table_name) + scanner = table.scanner().open() + tuples = scanner.read_all_tuples() + assert len(tuples) == nrows + assert table.name == table_name + finally: + try: + # Force delete the soft-deleted table. + self.client.delete_table(table_name) + except: + pass + + def test_soft_delete_and_recall_table_after_reserve_time(self): + # Create and open the table before soft-deleting it. + table_name = "test_soft_delete_and_recall_table_after_reserve_time" + self.create_test_table(table_name, 10) + + # Remove the table. Wait until the table is removed completely. + self.client.soft_delete_table(table_name, 1) + error_msg = 'the table does not exist' + table_exists = True + while table_exists: + try: + table = self.client.table(table_name) + except KuduNotFound as kudu_error: + assert error_msg in str(kudu_error) + table_exists = False + time.sleep(1) + + # Try to recall the table. + error_msg = 'soft-deleted state false, expired state false, can\'t recall' + with self.assertRaisesRegex(KuduNotFound, error_msg): + self.client.recall_table(table.id) + + # Perform sanity checks to validate that the table is removed entirely. + assert len(self.client.list_tables()) == 1 + assert self.client.list_tables() == [self.ex_table] + assert len(self.client.list_soft_deleted_tables()) == 0
