This is an automated email from the ASF dual-hosted git repository. mmartell pushed a commit to branch develop in repository https://gitbox.apache.org/repos/asf/geode-native.git
The following commit(s) were added to refs/heads/develop by this push: new a7dab24 GEODE-10073: Print bytes of ClientConnectionRequest/Response in logs (#927) a7dab24 is described below commit a7dab245483be28b8c7443839b10fd29c7063b03 Author: Blake Bender <bben...@pivotal.io> AuthorDate: Tue Feb 22 12:48:49 2022 -0800 GEODE-10073: Print bytes of ClientConnectionRequest/Response in logs (#927) * Print bytes of ClientConnectionRequest/Response in logs * Dump the bytes at debug log level, so we can parse * Also added parsing of these to gnmsg tool Co-authored-by: Blake Bender <bbl...@vmware.com> --- cppcache/src/FunctionMacros.hpp | 28 +++++ cppcache/src/ThinClientLocatorHelper.cpp | 56 +++++---- tools/gnmsg/client_messages.py | 6 +- tools/gnmsg/ds_fids.py | 40 +++++++ tools/gnmsg/gnmsg.py | 19 --- tools/gnmsg/handshake_decoder.py | 194 +++++++++++++++++++++++++++---- tools/gnmsg/modified_utf8.py | 2 +- tools/gnmsg/read_values.py | 71 ++++++++++- 8 files changed, 347 insertions(+), 69 deletions(-) diff --git a/cppcache/src/FunctionMacros.hpp b/cppcache/src/FunctionMacros.hpp new file mode 100644 index 0000000..03d426b --- /dev/null +++ b/cppcache/src/FunctionMacros.hpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#ifndef __INC_FUNCTIONMACROS__ +#define __INC_FUNCTIONMACROS__ + +#define INIT_GNFN(this_class) \ + namespace { \ + const auto classname = std::string(this_class); \ + } + +#define __GNFN__ (classname + "::" + __func__).c_str() + +#endif // __INC_FUNCTIONMACROS__ diff --git a/cppcache/src/ThinClientLocatorHelper.cpp b/cppcache/src/ThinClientLocatorHelper.cpp index 7d97443..d896d65 100644 --- a/cppcache/src/ThinClientLocatorHelper.cpp +++ b/cppcache/src/ThinClientLocatorHelper.cpp @@ -30,6 +30,7 @@ #include "ClientConnectionRequest.hpp" #include "ClientConnectionResponse.hpp" #include "ClientReplacementRequest.hpp" +#include "FunctionMacros.hpp" #include "LocatorListRequest.hpp" #include "LocatorListResponse.hpp" #include "QueueConnectionRequest.hpp" @@ -40,6 +41,8 @@ #include "ThinClientPoolDM.hpp" #include "Version.hpp" +INIT_GNFN("ThinClientLocatorHelper") + namespace apache { namespace geode { namespace client { @@ -129,6 +132,11 @@ std::shared_ptr<Serializable> ThinClientLocatorHelper::sendRequest( data.writeInt(kGossipVersion); data.writeInt(Version::current().getOrdinal()); data.writeObject(request); + LOGDEBUG( + "%s(%p): sending %d bytes to locator: %s", __GNFN__, this, + data.getBufferLength(), + Utils::convertBytesToString(data.getBuffer(), data.getBufferLength()) + .c_str()); auto sentLength = conn->send( reinterpret_cast<char*>(const_cast<uint8_t*>(data.getBuffer())), data.getBufferLength(), m_poolDM->getReadTimeout()); @@ -141,12 +149,19 @@ std::shared_ptr<Serializable> ThinClientLocatorHelper::sendRequest( return nullptr; } + LOGDEBUG("%s(%p): received %d bytes from locator: %s", __GNFN__, this, + receivedLength, + Utils::convertBytesToString(reinterpret_cast<uint8_t*>(buff), + receivedLength) + .c_str()); + auto di = m_poolDM->getConnectionManager().getCacheImpl()->createDataInput( reinterpret_cast<uint8_t*>(buff), receivedLength); if (di.read() == REPLY_SSL_ENABLED && !sys_prop.sslEnabled()) { - LOGERROR("SSL is enabled on locator %s, enable SSL in client as well", - location.toString().c_str()); + LOGERROR( + "%s(%p): SSL is enabled on locator %s, enable SSL in client as well", + __GNFN__, this, location.toString().c_str()); throw AuthenticationRequiredException( "SSL is enabled on locator, enable SSL in client as well"); } @@ -156,10 +171,10 @@ std::shared_ptr<Serializable> ThinClientLocatorHelper::sendRequest( } catch (const AuthenticationRequiredException& excp) { throw excp; } catch (const Exception& excp) { - LOGFINE("Exception while querying locator: %s: %s", excp.getName().c_str(), - excp.what()); + LOGFINE("%s(%p): Exception while querying locator: %s: %s", __GNFN__, this, + excp.getName().c_str(), excp.what()); } catch (...) { - LOGFINE("Exception while querying locator"); + LOGFINE("%s(%p): Exception while querying locator", __GNFN__, this); } return nullptr; @@ -169,8 +184,8 @@ GfErrType ThinClientLocatorHelper::getAllServers( std::vector<std::shared_ptr<ServerLocation> >& servers, const std::string& serverGrp) const { for (const auto& loc : getLocators()) { - LOGDEBUG("getAllServers getting servers from server = %s ", - loc.getServerName().c_str()); + LOGDEBUG("%s(%p): getAllServers getting servers from server = %s ", + __GNFN__, this, loc.getServerName().c_str()); auto request = std::make_shared<GetAllServersRequest>(serverGrp); auto response = std::dynamic_pointer_cast<GetAllServersResponse>( @@ -194,10 +209,7 @@ GfErrType ThinClientLocatorHelper::getEndpointForNewCallBackConn( auto locatorsSize = locators.size(); auto maxAttempts = getConnRetries(); - LOGFINER( - "ThinClientLocatorHelper::getEndpointForNewCallBackConn maxAttempts = " - "%zu", - maxAttempts); + LOGFINER("%s(%p): maxAttempts = %zu", __GNFN__, this, maxAttempts); for (auto attempt = 0ULL; attempt < maxAttempts;) { const auto& loc = locators[attempt++ % locatorsSize]; @@ -229,8 +241,9 @@ GfErrType ThinClientLocatorHelper::getEndpointForNewFwdConn( auto maxAttempts = getConnRetries(); LOGFINER( - "ThinClientLocatorHelper::getEndpointForNewFwdConn maxAttempts = %zu", - maxAttempts); + "%s(%p): ThinClientLocatorHelper::getEndpointForNewFwdConn maxAttempts = " + "%zu", + __GNFN__, this, maxAttempts); for (auto attempt = 0ULL; attempt < maxAttempts;) { const auto& loc = locators[attempt++ % locatorsSize]; @@ -239,11 +252,12 @@ GfErrType ThinClientLocatorHelper::getEndpointForNewFwdConn( std::shared_ptr<Serializable> request; if (currentServer == nullptr) { - LOGDEBUG("Creating ClientConnectionRequest"); + LOGDEBUG("%s(%p): Creating ClientConnectionRequest", __GNFN__, this); request = std::make_shared<ClientConnectionRequest>(exclEndPts, serverGrp); } else { - LOGDEBUG("Creating ClientReplacementRequest for connection: %s", + LOGDEBUG("%s(%p): Creating ClientReplacementRequest for connection: %s", + __GNFN__, this, currentServer->getEndpointObject()->name().c_str()); request = std::make_shared<ClientReplacementRequest>( currentServer->getEndpointObject()->name(), exclEndPts, serverGrp); @@ -257,14 +271,14 @@ GfErrType ThinClientLocatorHelper::getEndpointForNewFwdConn( response->printInfo(); if (!response->serverFound()) { - LOGFINE("Server not found"); + LOGFINE("%s(%p): Server not found", __GNFN__, this); locatorFound = true; continue; } outEndpoint = response->getServerLocation(); - LOGFINE("Server found at [%s:%d]", outEndpoint.getServerName().c_str(), - outEndpoint.getPort()); + LOGFINE("%s(%p): Server found at [%s:%d]", __GNFN__, this, + outEndpoint.getServerName().c_str(), outEndpoint.getPort()); return GF_NOERR; } @@ -280,8 +294,10 @@ GfErrType ThinClientLocatorHelper::updateLocators( const std::string& serverGrp) { auto locators = getLocators(); for (const auto& loc : locators) { - LOGFINER("Querying locator list at: [%s:%d] for update from group [%s]", - loc.getServerName().c_str(), loc.getPort(), serverGrp.c_str()); + LOGFINER( + "%s(%p): Querying locator list at: [%s:%d] for update from group [%s]", + __GNFN__, this, loc.getServerName().c_str(), loc.getPort(), + serverGrp.c_str()); auto request = std::make_shared<LocatorListRequest>(serverGrp); auto response = std::dynamic_pointer_cast<LocatorListResponse>( diff --git a/tools/gnmsg/client_messages.py b/tools/gnmsg/client_messages.py index 3ff4428..f63333e 100644 --- a/tools/gnmsg/client_messages.py +++ b/tools/gnmsg/client_messages.py @@ -23,7 +23,7 @@ from read_values import ( read_byte_value, read_cacheable, read_int_value, - read_jmutf8_string_value, + read_geode_jmutf8_string_value, read_long_value, read_short_value, read_string_value, @@ -177,8 +177,8 @@ def parse_raw_string_part(message_bytes, offset): (string_part["IsObject"], offset) = call_reader_function( message_bytes, offset, read_byte_value ) - (string_part["Value"], offset) = read_jmutf8_string_value( - message_bytes, string_part["Size"], offset + (string_part["Value"], offset) = read_geode_jmutf8_string_value( + message_bytes, offset ) return (string_part, offset) diff --git a/tools/gnmsg/ds_fids.py b/tools/gnmsg/ds_fids.py new file mode 100644 index 0000000..d011cc8 --- /dev/null +++ b/tools/gnmsg/ds_fids.py @@ -0,0 +1,40 @@ +#!/usr/local/bin/python3 + +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +ds_fids = { + -135: "GatewaySenderEventCallbackArgument", + -126: "ClientHealthStats", + -120: "VersionTag", + -59: "CollectionTypeImpl", + -54: "LocatorListRequest", + -53: "ClientConnectionRequest", + -52: "QueueConnectionRequest", + -51: "LocatorListResponse", + -50: "ClientConnectionResponse", + -49: "QueueConnectionResponse", + -48: "ClientReplacementRequest", + -43: "GetAllServersRequest", + -42: "GetAllServersResponse", + 7: "VersionedObjectPartList", + 9: "EnumInfo", + 25: "CacheableObjectPartList", + 31: "CacheableUndefined", + 32: "Struct", + 36: "EventId", + 92: "InternalDistributedMember", + 110: "TXCommitMessage", + 2131: "DiskVersionTag", + 2133: "DiskStoreId", +} diff --git a/tools/gnmsg/gnmsg.py b/tools/gnmsg/gnmsg.py index 3b46257..b616729 100755 --- a/tools/gnmsg/gnmsg.py +++ b/tools/gnmsg/gnmsg.py @@ -16,31 +16,12 @@ import json import queue import os -import re import sys -import threading import traceback -from modified_utf8 import utf8m_to_utf8s -from numeric_conversion import to_hex_digit import command_line -from ds_codes import ds_codes -from connection_types import ConnectionTypes, ConnectionTypeStrings -from read_values import ( - read_number_from_hex_string, - read_byte_value, - read_number_from_hex_string, - read_short_value, - read_number_from_hex_string, - read_int_value, - read_long_value, - read_string_value, - read_jmutf8_string_value, - read_number_from_hex_string, - call_reader_function, -) from client_message_decoder import ClientMessageDecoder from server_message_decoder import ServerMessageDecoder from handshake_decoder import HandshakeDecoder diff --git a/tools/gnmsg/handshake_decoder.py b/tools/gnmsg/handshake_decoder.py index 8e7c1d3..b865df2 100644 --- a/tools/gnmsg/handshake_decoder.py +++ b/tools/gnmsg/handshake_decoder.py @@ -13,28 +13,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json import re +from dateutil import parser + from decoder_base import DecoderBase from ds_codes import ds_codes +from ds_fids import ds_fids from modified_utf8 import utf8m_to_utf8s from connection_types import ConnectionTypes, ConnectionTypeStrings from read_values import ( - read_number_from_hex_string, + call_reader_function, read_byte_value, - read_number_from_hex_string, - read_short_value, - read_number_from_hex_string, + read_cacheable_ascii_string_value, + read_fixed_id_byte_value, + read_geode_jmutf8_string_value, read_int_value, - read_long_value, - read_string_value, - read_jmutf8_string_value, - read_number_from_hex_string, - call_reader_function, + read_short_value, + read_unsigned_byte_value, ) +# TODO: Find a more reasonable place for this and other REPLY_* constants +REPLY_SSL_ENABLED = 21 + class HandshakeDecoder(DecoderBase): def __init__(self, output_queue): @@ -46,8 +48,50 @@ class HandshakeDecoder(DecoderBase): 2: "SECURITY_CREDENTIALS_DHENCRYPT", 3: "SECURITY_MULTIUSER_NOTIFICATIONCHANNEL", } + self.client_connection_request_expression_ = re.compile( + r"(\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d\.\d+).*([\d|a-f|A-F|x|X]+) .*\]\s*ThinClientLocatorHelper::sendRequest\([0-9|a-f|A-F]+\): sending \d+ bytes to locator:\s*([0-9|a-f|A-F]+)" + ) + self.client_connection_response_expression_ = re.compile( + r"(\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d\.\d+).*([\d|a-f|A-F|x|X]+) .*\]\s*ThinClientLocatorHelper::sendRequest\([0-9|a-f|A-F]+\): received \d+ bytes from locator:\s*([0-9|a-f|A-F]+)" + ) - def is_handshake_trace(self, line): + def is_client_connection_request(self, line): + match = self.client_connection_request_expression_.search(line) + if match: + return True + else: + return False + + def get_client_connection_request_parts(self, line, parts): + result = False + match = self.client_connection_request_expression_.search(line) + if match: + parts.append(parser.parse(match.group(1))) + parts.append(match.group(2)) + parts.append(match.group(3)) + result = True + + return result + + def is_client_connection_response(self, line): + match = self.client_connection_response_expression_.search(line) + if match: + return True + else: + return False + + def get_client_connection_response_parts(self, line, parts): + result = False + match = self.client_connection_response_expression_.search(line) + if match: + parts.append(parser.parse(match.group(1))) + parts.append(match.group(2)) + parts.append(match.group(3)) + result = True + + return result + + def is_server_handshake_trace(self, line): expression = re.compile(r"Handshake bytes: \(\d+\):\s*([0-9|a-f|A-F]+)") match = expression.search(line) if match: @@ -55,7 +99,7 @@ class HandshakeDecoder(DecoderBase): else: return False - def get_handshake_bytes(self, line): + def get_server_handshake_bytes(self, line): expression = re.compile(r"Handshake bytes: \(\d+\):\s*([0-9|a-f|A-F]+)") match = expression.search(line) if match: @@ -72,10 +116,18 @@ class HandshakeDecoder(DecoderBase): result = "" if int(address_size) == 4: - (octet1, offset) = call_reader_function(string, offset, read_byte_value) - (octet2, offset) = call_reader_function(string, offset, read_byte_value) - (octet3, offset) = call_reader_function(string, offset, read_byte_value) - (octet4, offset) = call_reader_function(string, offset, read_byte_value) + (octet1, offset) = call_reader_function( + string, offset, read_unsigned_byte_value + ) + (octet2, offset) = call_reader_function( + string, offset, read_unsigned_byte_value + ) + (octet3, offset) = call_reader_function( + string, offset, read_unsigned_byte_value + ) + (octet4, offset) = call_reader_function( + string, offset, read_unsigned_byte_value + ) result = ( str(octet1) + "." + str(octet2) + "." + str(octet3) + "." + str(octet4) ) @@ -104,7 +156,7 @@ class HandshakeDecoder(DecoderBase): string_bytes = string[offset : offset + string_length * 2] hostname = utf8m_to_utf8s( self.convert_to_bytes(string_bytes, string_length * 2) - ).decode("utf-8") + ) offset += string_length * 2 elif string_type == "CacheableStringHuge": (length_byte_3, offset) = call_reader_function( @@ -173,8 +225,8 @@ class HandshakeDecoder(DecoderBase): ) return (self.credentials_types[credential_type], offset) - def get_handshake_info(self, line, handshake_info): - handshake_bytes = self.get_handshake_bytes(line) + def get_server_handshake_info(self, line, handshake_info): + handshake_bytes = self.get_server_handshake_bytes(line) (connection_type, offset) = call_reader_function( handshake_bytes, 0, read_byte_value ) @@ -290,8 +342,108 @@ class HandshakeDecoder(DecoderBase): handshake_bytes, offset ) + def decode_client_connection_request(self, line, handshake_request): + parts = [] + if self.get_client_connection_request_parts(line, parts): + offset = 0 + handshake_request["Timestamp"] = parts[0] + handshake_request["tid"] = parts[1] + handshake_request["Direction"] = "--->" + handshake_request["Type"] = "ClientConnectionRequest" + request_bytes = parts[2] + + (handshake_request["GossipVersion"], offset) = call_reader_function( + request_bytes, offset, read_int_value + ) + (handshake_request["ProtocolOrdinal"], offset) = call_reader_function( + request_bytes, offset, read_short_value + ) + + (ds_code, offset) = call_reader_function( + request_bytes, offset, read_byte_value + ) + + (dsfid, offset) = call_reader_function( + request_bytes, offset, read_byte_value + ) + if ds_fids[dsfid] != "ClientConnectionRequest": + raise TypeError("Expected type 'ClientConnectionRequest'") + + server_group = {} + (ds_code, offset) = call_reader_function( + request_bytes, offset, read_byte_value + ) + server_group["DSCode"] = ds_codes[ds_code] + + (server_group["Name"], offset) = read_geode_jmutf8_string_value( + request_bytes, offset + ) + handshake_request["ServerGroup"] = server_group + + (server_location_count, offset) = call_reader_function( + request_bytes, offset, read_int_value + ) + handshake_request["ServerLocations"] = server_location_count + + # TODO: Decode server locations. Not concerned about this right now because we don't have a log showing + # native client actually sending any. + + def read_server_location(self, line, handshake_response, offset): + server_location = {} + (server_location["hostname"], offset) = read_cacheable_ascii_string_value( + line, offset + ) + (server_location["port"], offset) = call_reader_function( + line, offset, read_int_value + ) + + handshake_response["ServerLocation"] = server_location + return offset + + def decode_client_connection_response(self, line, handshake_response): + parts = [] + if self.get_client_connection_response_parts(line, parts): + handshake_response["Timestamp"] = parts[0] + handshake_response["tid"] = parts[1] + handshake_response["Direction"] = "--->" + handshake_response["Type"] = "ClientConnectionResponse" + response_bytes = parts[2] + offset = 0 + + handshake_response["Direction"] = "<---" + (ssl_enabled, offset) = call_reader_function( + response_bytes, offset, read_byte_value + ) + if ssl_enabled == REPLY_SSL_ENABLED: + handshake_response["SSLEnabled"] = "True" + else: + handshake_response["SSLEnabled"] = "False" + offset = 0 + + (fixed_id, offset) = read_fixed_id_byte_value(response_bytes, offset) + if ds_fids[fixed_id] == "ClientConnectionResponse": + (server_found, offset) = call_reader_function( + response_bytes, offset, read_byte_value + ) + handshake_response["ServerFound"] = ( + "True" if server_found == 1 else "False" + ) + + if server_found == 1: + offset = self.read_server_location( + response_bytes, handshake_response, offset + ) + else: + raise TypeError("Expected type 'ClientConnectionRequest'") + def process_line(self, line): handshake = {} - if self.is_handshake_trace(line): - self.get_handshake_info(line, handshake) + if self.is_client_connection_request(line): + self.decode_client_connection_request(line, handshake) + self.output_queue_.put({"handshake": handshake}) + elif self.is_client_connection_response(line): + self.decode_client_connection_response(line, handshake) + self.output_queue_.put({"handshake": handshake}) + elif self.is_server_handshake_trace(line): + self.get_server_handshake_info(line, handshake) self.output_queue_.put({"handshake": handshake}) diff --git a/tools/gnmsg/modified_utf8.py b/tools/gnmsg/modified_utf8.py index a895966..9c4cb9d 100644 --- a/tools/gnmsg/modified_utf8.py +++ b/tools/gnmsg/modified_utf8.py @@ -129,4 +129,4 @@ def utf8m_to_utf8s(string): new_string.append(byte2) new_string.append(byte3) i += 1 - return bytes(new_string) + return bytes(new_string).decode("utf-8") diff --git a/tools/gnmsg/read_values.py b/tools/gnmsg/read_values.py index 2f18ab5..24a99a9 100644 --- a/tools/gnmsg/read_values.py +++ b/tools/gnmsg/read_values.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from ds_codes import ds_codes +from modified_utf8 import utf8m_to_utf8s def read_number_from_hex_string(string, offset, size): @@ -75,11 +76,71 @@ def read_string_value(string, length, offset): return (string_value, offset + (length * 2)) -def read_jmutf8_string_value(string, length, offset): - # TODO: Read Java Modified utf-8 string from bytes. Cheating is okay for - # now, cause it's super unlikely I'll hit a string where it makes a - # difference - return read_string_value(string, length, offset) +def read_fixed_id_byte_value(string, offset): + (ds_code, offset) = call_reader_function(string, offset, read_byte_value) + if ds_codes[ds_code] == "FixedIDByte": + (byte_value, offset) = call_reader_function(string, offset, read_byte_value) + else: + raise TypeError("Expected DSCode 'FixedIDByte'") + + return (byte_value, offset) + + +def read_cacheable_ascii_string_value(string, offset): + (ds_code, offset) = call_reader_function(string, offset, read_byte_value) + string_value = [] + if ds_codes[ds_code] == "CacheableASCIIString": + (size, offset) = call_reader_function(string, offset, read_short_value) + for i in range(size): + (ascii_char, offset) = call_reader_function(string, offset, read_byte_value) + string_value.append(ascii_char) + else: + raise TypeError("Attempt to decode another type as CacheableASCIIString") + + return (bytes(string_value).decode("ascii"), offset) + + +# Decodes a hex string to JM utf-8 bytes, returns plain utf-8 string +def read_geode_jmutf8_string_value(buffer, offset): + cursor = offset + string = [] + bad_length = IndexError("Insufficient length for JM utf-8 string") + + while cursor < len(buffer): + code_point, cursor = call_reader_function(buffer, cursor, read_byte_value) + if code_point == 0: + if cursor < len(buffer) - 1: + # special treatment for Geode - rather than encode actual JM utf-8 + # NULL char, they chose to just put 0 in for empty strings in the + # protocol. Le sigh + break + else: + raise bad_length + elif code_point < 0x7F: # one-byte encoding + string.append(code_point) + elif (code_point & 0xE0) == 0xC0: # two-byte encoding + if cursor < len(buffer) - 1: + (byte2, cursor) = call_reader_function(buffer, cursor, read_byte_value) + string.append(code_point) + string.append(byte2) + if (byte2 & 0x80) == 0x80: # Null char, end of string(???) + break + else: + raise bad_length + # 3-byte or 6-byte encoding. We don't care which here, because we'll + # just pick up the next 3-byte encoding in the loop, and the conversion + # at the end will raise an exception if there's a problem. + elif (code_point & 0xF0) == 0xE0: + if cursor < len(buffer) - 3: + (byte2, cursor) = call_reader_function(buffer, cursor, read_byte_value) + (byte3, cursor) = call_reader_function(buffer, cursor, read_byte_value) + string.append(code_point) + string.append(byte2) + string.append(byte3) + else: + raise bad_length + + return (utf8m_to_utf8s(string), cursor) def call_reader_function(string, offset, fn):