David Capwell created CASSANDRA-16220:
-----------------------------------------
Summary: python dtest
pending_range_test.py::TestPendingRangeMovements::test_pending_range
(@pytest.mark.resource_intensive) fails on trunk
Key: CASSANDRA-16220
URL: https://issues.apache.org/jira/browse/CASSANDRA-16220
Project: Cassandra
Issue Type: Bug
Components: Test/dtest/python
Reporter: David Capwell
The test has the following section
{code}
if cluster.version() >= '2.2':
node2.watch_log_for('127.0.0.1 state moving', timeout=10,
filename='debug.log’)
{code}
The issue is that in trunk we have the port attached to the log, so the log is
now
{code}
DEBUG [GossipStage:1] 2020-10-21 00:48:20,104 StorageService.java:2452 - Node
/127.0.0.1:7000 state MOVING, tokens [-9223372036854775808]
DEBUG [GossipStage:1] 2020-10-21 00:48:20,105 StorageService.java:2670 - Node
/127.0.0.1:7000 state moving, new token -634023222112864484
{code}
Since the log now contains the port, this test always times out on trunk when
it hits this
{code}
self = <pending_range_test.TestPendingRangeMovements object at 0x7fc5d35c85f8>
@pytest.mark.resource_intensive
def test_pending_range(self):
"""
@jira_ticket CASSANDRA-10887
"""
cluster = self.cluster
# If we are on 2.1, we need to set the log level to debug or higher, as
debug.log does not exist.
if cluster.version() < '2.2':
cluster.set_log_level('DEBUG')
# Create 5 node cluster
cluster.populate(5).start()
node1, node2 = cluster.nodelist()[0:2]
# Set up RF=3 keyspace
session = self.patient_cql_connection(node1)
create_ks(session, 'ks', 3)
session.execute("CREATE TABLE users (login text PRIMARY KEY, email
text, name text, login_count int)")
# We use the partition key 'jdoe3' because it belongs to node1.
# The key MUST belong to node1 to repro the bug.
session.execute("INSERT INTO users (login, email, name, login_count)
VALUES ('jdoe3', '[email protected]', 'Jane Doe', 1) IF NOT EXISTS;")
lwt_query = SimpleStatement("UPDATE users SET email = '[email protected]'
WHERE login = 'jdoe3' IF email = '[email protected]'")
# Show we can execute LWT no problem
for i in range(1000):
session.execute(lwt_query)
token = '-634023222112864484'
mark = node1.mark_log()
# Move a node
node1.nodetool('move {}'.format(token))
# Watch the log so we know when the node is moving
node1.watch_log_for('Moving .* to {}'.format(token), timeout=10,
from_mark=mark)
node1.watch_log_for('Sleeping 30000 ms before start streaming/fetching
ranges', timeout=10, from_mark=mark)
if cluster.version() >= '2.2':
> node2.watch_log_for('127.0.0.1 state moving', timeout=10,
> filename='debug.log')
pending_range_test.py:57:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ccmlib.node.Node object at 0x7fc5d3b51208>
exprs = '127.0.0.1 state moving', from_mark = None, timeout = 10, process = None
verbose = False, filename = 'debug.log'
def watch_log_for(self, exprs, from_mark=None, timeout=600, process=None,
verbose=False, filename='system.log'):
"""
Watch the log until one or more (regular) expressions are found or
timeouts (a
TimeoutError is then raised). On successful completion, a list of
pair (line matched,
match object) is returned.
"""
start = time.time()
tofind = [exprs] if isinstance(exprs, string_types) else exprs
tofind = [re.compile(e) for e in tofind]
matchings = []
reads = ""
if len(tofind) == 0:
return None
log_file = os.path.join(self.log_directory(), filename)
output_read = False
while not os.path.exists(log_file):
time.sleep(.5)
if start + timeout < time.time():
raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S",
time.gmtime()) + " [" + self.name + "] Timed out waiting for {} to be
created.".format(log_file))
if process and not output_read:
process.poll()
if process.returncode is not None:
self.print_process_output(self.name, process, verbose)
output_read = True
if process.returncode != 0:
raise RuntimeError() # Shouldn't reuse RuntimeError
but I'm lazy
with open(log_file) as f:
if from_mark:
f.seek(from_mark)
while True:
# First, if we have a process to check, then check it.
# Skip on Windows - stdout/stderr is cassandra.bat
if not common.is_win() and not output_read:
if process:
process.poll()
if process.returncode is not None:
self.print_process_output(self.name, process,
verbose)
output_read = True
if process.returncode != 0:
raise RuntimeError() # Shouldn't reuse
RuntimeError but I'm lazy
line = f.readline()
if line:
reads = reads + line
for e in tofind:
m = e.search(line)
if m:
matchings.append((line, m))
tofind.remove(e)
if len(tofind) == 0:
return matchings[0] if isinstance(exprs,
string_types) else matchings
else:
# yep, it's ugly
time.sleep(1)
if start + timeout < time.time():
> raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S",
> time.gmtime()) + " [" + self.name + "] Missing: " + str([e.pattern for e in
> tofind]) + ":\n" + reads[:50] + ".....\nSee {} for
> remainder".format(filename))
E ccmlib.node.TimeoutError: 21 Oct 2020 00:50:26 [node2]
Missing: ['127.0.0.1 state moving']:
E INFO [main] 2020-10-21 00:49:15,878 YamlConfigura.....
E See debug.log for remainder
/opt/rh/rh-python36/root/usr/lib/python3.6/site-packages/ccmlib/node.py:550:
TimeoutError
{code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]