David Capwell created CASSANDRA-16220: -----------------------------------------
Summary: python dtest pending_range_test.py::TestPendingRangeMovements::test_pending_range (@pytest.mark.resource_intensive) fails on trunk Key: CASSANDRA-16220 URL: https://issues.apache.org/jira/browse/CASSANDRA-16220 Project: Cassandra Issue Type: Bug Components: Test/dtest/python Reporter: David Capwell The test has the following section {code} if cluster.version() >= '2.2': node2.watch_log_for('127.0.0.1 state moving', timeout=10, filename='debug.log’) {code} The issue is that in trunk we have the port attached to the log, so the log is now {code} DEBUG [GossipStage:1] 2020-10-21 00:48:20,104 StorageService.java:2452 - Node /127.0.0.1:7000 state MOVING, tokens [-9223372036854775808] DEBUG [GossipStage:1] 2020-10-21 00:48:20,105 StorageService.java:2670 - Node /127.0.0.1:7000 state moving, new token -634023222112864484 {code} Since the log now contains the port, this test always times out on trunk when it hits this {code} self = <pending_range_test.TestPendingRangeMovements object at 0x7fc5d35c85f8> @pytest.mark.resource_intensive def test_pending_range(self): """ @jira_ticket CASSANDRA-10887 """ cluster = self.cluster # If we are on 2.1, we need to set the log level to debug or higher, as debug.log does not exist. if cluster.version() < '2.2': cluster.set_log_level('DEBUG') # Create 5 node cluster cluster.populate(5).start() node1, node2 = cluster.nodelist()[0:2] # Set up RF=3 keyspace session = self.patient_cql_connection(node1) create_ks(session, 'ks', 3) session.execute("CREATE TABLE users (login text PRIMARY KEY, email text, name text, login_count int)") # We use the partition key 'jdoe3' because it belongs to node1. # The key MUST belong to node1 to repro the bug. session.execute("INSERT INTO users (login, email, name, login_count) VALUES ('jdoe3', 'j...@abc.com', 'Jane Doe', 1) IF NOT EXISTS;") lwt_query = SimpleStatement("UPDATE users SET email = 'jane...@abc.com' WHERE login = 'jdoe3' IF email = 'j...@abc.com'") # Show we can execute LWT no problem for i in range(1000): session.execute(lwt_query) token = '-634023222112864484' mark = node1.mark_log() # Move a node node1.nodetool('move {}'.format(token)) # Watch the log so we know when the node is moving node1.watch_log_for('Moving .* to {}'.format(token), timeout=10, from_mark=mark) node1.watch_log_for('Sleeping 30000 ms before start streaming/fetching ranges', timeout=10, from_mark=mark) if cluster.version() >= '2.2': > node2.watch_log_for('127.0.0.1 state moving', timeout=10, > filename='debug.log') pending_range_test.py:57: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <ccmlib.node.Node object at 0x7fc5d3b51208> exprs = '127.0.0.1 state moving', from_mark = None, timeout = 10, process = None verbose = False, filename = 'debug.log' def watch_log_for(self, exprs, from_mark=None, timeout=600, process=None, verbose=False, filename='system.log'): """ Watch the log until one or more (regular) expressions are found or timeouts (a TimeoutError is then raised). On successful completion, a list of pair (line matched, match object) is returned. """ start = time.time() tofind = [exprs] if isinstance(exprs, string_types) else exprs tofind = [re.compile(e) for e in tofind] matchings = [] reads = "" if len(tofind) == 0: return None log_file = os.path.join(self.log_directory(), filename) output_read = False while not os.path.exists(log_file): time.sleep(.5) if start + timeout < time.time(): raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S", time.gmtime()) + " [" + self.name + "] Timed out waiting for {} to be created.".format(log_file)) if process and not output_read: process.poll() if process.returncode is not None: self.print_process_output(self.name, process, verbose) output_read = True if process.returncode != 0: raise RuntimeError() # Shouldn't reuse RuntimeError but I'm lazy with open(log_file) as f: if from_mark: f.seek(from_mark) while True: # First, if we have a process to check, then check it. # Skip on Windows - stdout/stderr is cassandra.bat if not common.is_win() and not output_read: if process: process.poll() if process.returncode is not None: self.print_process_output(self.name, process, verbose) output_read = True if process.returncode != 0: raise RuntimeError() # Shouldn't reuse RuntimeError but I'm lazy line = f.readline() if line: reads = reads + line for e in tofind: m = e.search(line) if m: matchings.append((line, m)) tofind.remove(e) if len(tofind) == 0: return matchings[0] if isinstance(exprs, string_types) else matchings else: # yep, it's ugly time.sleep(1) if start + timeout < time.time(): > raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S", > time.gmtime()) + " [" + self.name + "] Missing: " + str([e.pattern for e in > tofind]) + ":\n" + reads[:50] + ".....\nSee {} for > remainder".format(filename)) E ccmlib.node.TimeoutError: 21 Oct 2020 00:50:26 [node2] Missing: ['127.0.0.1 state moving']: E INFO [main] 2020-10-21 00:49:15,878 YamlConfigura..... E See debug.log for remainder /opt/rh/rh-python36/root/usr/lib/python3.6/site-packages/ccmlib/node.py:550: TimeoutError {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org