Changeset: e2d39b39fc91 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e2d39b39fc91 Modified Files: clients/iotclient/requirements.txt clients/iotclient/src/Streams/datatypes.py clients/iotclient/src/Streams/jsonschemas.py Branch: iot Log Message:
Corrected binary representations diffs (truncated from 334 to 300 lines): diff --git a/clients/iotclient/requirements.txt b/clients/iotclient/requirements.txt --- a/clients/iotclient/requirements.txt +++ b/clients/iotclient/requirements.txt @@ -6,5 +6,5 @@ pytz==2016.4 rfc3987==1.3.5 Sphinx==1.4.1 sphinx-rtd-theme==0.1.9 -strict-rfc3339==0.6 +strict-rfc3339==0.7 tzlocal==1.2.2 diff --git a/clients/iotclient/src/Streams/datatypes.py b/clients/iotclient/src/Streams/datatypes.py --- a/clients/iotclient/src/Streams/datatypes.py +++ b/clients/iotclient/src/Streams/datatypes.py @@ -9,14 +9,17 @@ import re from abc import ABCMeta, abstractmethod from dateutil import parser -from jsonschemas import UUID_REGEX, MAC_ADDRESS_REGEX, TIME_REGEX +from jsonschemas import UUID_REGEX, MAC_ADDRESS_REGEX, TIME_REGEX, IPV4_REGEX # Later check the byte order https://docs.python.org/2/library/struct.html#byte-order-size-and-alignment # Also check the consequences of aligment on packing HUGEINTs! # The null constants might change from system to system due to different CPU's -ALIGNMENT = '<' # for now is little-endian for Intel CPU's +LITTLE_ENDIAN_ALIGNMENT = '<' # for now is little-endian for Intel CPU's +BIG_ENDIAN_ALIGNMENT = '>' +UUID_SIZE = 16 NIL_STRING = "\200" +NIL_UUID = "00000000-0000-0000-0000-000000000000" INT8_MIN = 0x80 INT16_MIN = 0x8000 @@ -51,12 +54,12 @@ class StreamDataType(object): def is_nullable(self): # check if the column is nullable or not return self._is_nullable + @abstractmethod def get_nullable_constant(self): # get the nullable constant if the column is nullable return None - @abstractmethod def set_default_value(self, default_value): # set the default value representation in the data type - self._default_value = None + self._default_value = default_value def get_default_value(self): # get the default value representation in the data type return self._default_value @@ -120,9 +123,6 @@ class TextType(StreamDataType): def get_nullable_constant(self): return NIL_STRING - def set_default_value(self, default_value): - self._default_value = default_value - def add_json_schema_entry(self, schema): super(TextType, self).add_json_schema_entry(schema) schema[self._column_name]['type'] = 'string' @@ -137,28 +137,7 @@ class TextType(StreamDataType): def pack_parsed_values(self, extracted_values, counter, parameters): string_pack = "".join(extracted_values) - return struct.pack(ALIGNMENT + str(parameters['lengths_sum']) + 's', string_pack) - - -class UUIDType(TextType): - """Covers: UUID""" - - def __init__(self, **kwargs): - super(UUIDType, self).__init__(**kwargs) - - def add_json_schema_entry(self, schema): - super(UUIDType, self).add_json_schema_entry(schema) - schema[self._column_name]['pattern'] = UUID_REGEX - - def prepare_parameters(self): - return {} - - def process_next_value(self, entry, counter, parameters, errors): - return str(entry) + '\n' - - def pack_parsed_values(self, extracted_values, counter, parameters): - string_pack = "".join(extracted_values) - return struct.pack(ALIGNMENT + str(37 * counter) + 's', string_pack) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(parameters['lengths_sum']) + 's', string_pack) class MACType(TextType): @@ -171,15 +150,12 @@ class MACType(TextType): super(MACType, self).add_json_schema_entry(schema) schema[self._column_name]['pattern'] = MAC_ADDRESS_REGEX - def prepare_parameters(self): - return {} - def process_next_value(self, entry, counter, parameters, errors): return str(entry) + '\n' def pack_parsed_values(self, extracted_values, counter, parameters): string_pack = "".join(extracted_values) - return struct.pack(ALIGNMENT + str(18 * counter) + 's', string_pack) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(18 * counter) + 's', string_pack) def process_sql_parameters(self, array): array[2] = 'char(17)' # A MAC Address has 17 characters @@ -196,32 +172,6 @@ class URLType(TextType): schema[self._column_name]['format'] = 'uri' -class INet(TextType): - """Covers: Inet""" - - def __init__(self, **kwargs): - super(INet, self).__init__(**kwargs) - - def add_json_schema_entry(self, schema): - super(INet, self).add_json_schema_entry(schema) - schema[self._column_name]['format'] = 'ipv4' - - -class INetSix(TextType): - """Covers: Inet6""" - - def __init__(self, **kwargs): - super(INetSix, self).__init__(**kwargs) - - def add_json_schema_entry(self, schema): - super(INetSix, self).add_json_schema_entry(schema) - schema[self._column_name]['format'] = 'ipv6' - - # http://stackoverflow.com/questions/166132/maximum-length-of-the-textual-representation-of-an-ipv6-address - def process_sql_parameters(self, array): - array[2] = 'char(45)' - - class RegexType(TextType): """Covers: Regex""" @@ -309,6 +259,108 @@ class EnumType(TextType): return ''.join(array) +class INetSix(TextType): + """Covers: Inet6""" + + def __init__(self, **kwargs): + super(INetSix, self).__init__(**kwargs) + + def add_json_schema_entry(self, schema): + super(INetSix, self).add_json_schema_entry(schema) + schema[self._column_name]['format'] = 'ipv6' + + # http://stackoverflow.com/questions/166132/maximum-length-of-the-textual-representation-of-an-ipv6-address + def process_sql_parameters(self, array): + array[2] = 'char(45)' + + +class INet(StreamDataType): + """Covers: Inet""" + + def __init__(self, **kwargs): + super(INet, self).__init__(**kwargs) + + def get_nullable_constant(self): + return "0" # has to trick because it is impossible to get a null value from a valid IPv4 address in MonetDB + + def add_json_schema_entry(self, schema): + super(INet, self).add_json_schema_entry(schema) + schema[self._column_name]['pattern'] = IPV4_REGEX + + def process_next_value(self, entry, counter, parameters, errors): + array = [0, 0, 0, 0, 0, 0, 0, 0] # according to MonetDB's source code + + if entry == self.get_nullable_constant(): + array[7] = 1 + return array + + components = re.split(r'[./]+', entry) + for i in range(4): + array[i] = int(components[i]) + if len(components) > 4: # if it has a mask add it to the array + array[4] = int(components[4]) + else: + array[4] = 32 + + return array + + def pack_parsed_values(self, extracted_values, counter, parameters): + extracted_values = list(itertools.chain(*extracted_values)) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(counter << 3) + 'B', *extracted_values) # arrays of 8 uchars + + +class UUIDType(StreamDataType): + """Covers: UUID""" + + def __init__(self, **kwargs): + super(UUIDType, self).__init__(**kwargs) + + def get_nullable_constant(self): + return NIL_UUID + + def add_json_schema_entry(self, schema): + super(UUIDType, self).add_json_schema_entry(schema) + schema[self._column_name]['pattern'] = UUID_REGEX + + def process_next_value(self, entry, counter, parameters, errors): + array = UUID_SIZE * [0] + j = 0 + s = 0 + + for i in range(UUID_SIZE): + if j in (8, 12, 16, 20): # do nothing with the dashes + s += 1 + + next_char = ord(entry[s]) + if 48 <= next_char <= 57: # between '0' and '9' + array[i] = next_char - 48 + elif 97 <= next_char <= 102: # between 'a' and 'f' + array[i] = next_char - 87 + elif 65 <= next_char <= 70: # between 'A' and 'F' + array[i] = next_char - 55 + + s += 1 + j += 1 + array[i] <<= 4 + next_char = ord(entry[s]) + + if 48 <= next_char <= 57: # between '0' and '9' + array[i] |= next_char - 48 + elif 97 <= next_char <= 102: # between 'a' and 'f' + array[i] |= next_char - 87 + elif 65 <= next_char <= 70: # between 'A' and 'F' + array[i] |= next_char - 55 + + s += 1 + j += 1 + + return array + + def pack_parsed_values(self, extracted_values, counter, parameters): + extracted_values = list(itertools.chain(*extracted_values)) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(counter << 4) + 'B', *extracted_values) # arrays of 16 uchars + + class BooleanType(StreamDataType): """Covers: BOOL[EAN]""" @@ -331,7 +383,7 @@ class BooleanType(StreamDataType): return 0 def pack_parsed_values(self, extracted_values, counter, parameters): - return struct.pack(ALIGNMENT + str(counter) + 'b', *extracted_values) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(counter) + 'b', *extracted_values) class NumberBaseType(StreamDataType): @@ -406,7 +458,7 @@ class SmallIntegerType(NumberBaseType): return int(entry) def pack_parsed_values(self, extracted_values, counter, parameters): - return struct.pack(ALIGNMENT + str(counter) + self._pack_sym, *extracted_values) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(counter) + self._pack_sym, *extracted_values) class HugeIntegerType(NumberBaseType): @@ -427,7 +479,7 @@ class HugeIntegerType(NumberBaseType): def pack_parsed_values(self, extracted_values, counter, parameters): extracted_values = list(itertools.chain(*extracted_values)) - return struct.pack(ALIGNMENT + str(counter << 1) + 'Q', *extracted_values) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(counter << 1) + 'Q', *extracted_values) class FloatType(NumberBaseType): @@ -450,7 +502,7 @@ class FloatType(NumberBaseType): return float(entry) def pack_parsed_values(self, extracted_values, counter, parameters): - return struct.pack(ALIGNMENT + str(counter) + self._pack_sym, *extracted_values) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(counter) + self._pack_sym, *extracted_values) class DecimalType(NumberBaseType): @@ -515,7 +567,7 @@ class DecimalType(NumberBaseType): if self._pack_sym == 'Q': extracted_values = list(itertools.chain(*extracted_values)) counter <<= 1 # duplicate the counter for packing - return struct.pack(ALIGNMENT + str(counter) + self._pack_sym, *extracted_values) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(counter) + self._pack_sym, *extracted_values) def to_json_representation(self): json_value = super(DecimalType, self).to_json_representation() @@ -610,7 +662,7 @@ class DateType(BaseDateTimeType): # Sto return int((parsed - day0).days) + 366 def pack_parsed_values(self, extracted_values, counter, parameters): - return struct.pack(ALIGNMENT + str(counter) + 'I', *extracted_values) + return struct.pack(LITTLE_ENDIAN_ALIGNMENT + str(counter) + 'I', *extracted_values) _______________________________________________ checkin-list mailing list [email protected] https://www.monetdb.org/mailman/listinfo/checkin-list
