This is an automated email from the ASF dual-hosted git repository. tvb pushed a commit to branch bschubert/optimize-mapping-node in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 712e6b642e5263a24069f8b2300bd0cd76da1a6f Author: Tristan van Berkom <[email protected]> AuthorDate: Mon Jun 29 22:31:11 2020 +0900 Node code: Node now has internal _set() method to set values Summary of changes: * node.[pyx,pxd]: Added _set() interface Now __setitem__ uses _set() and the content is moved to the internal cdef _set() interface. This is a single point for setting values on dictionaries, and now ensures that all MappingNode keys use interned strings. * _yaml.pyx: Use _set() instead of accessing the MappingNode.value directly. --- src/buildstream/_yaml.pyx | 12 +++++--- src/buildstream/node.pxd | 1 + src/buildstream/node.pyx | 75 ++++++++++++++++++++++++++++------------------- 3 files changed, 54 insertions(+), 34 deletions(-) diff --git a/src/buildstream/_yaml.pyx b/src/buildstream/_yaml.pyx index 1e59b2a..26c7286 100644 --- a/src/buildstream/_yaml.pyx +++ b/src/buildstream/_yaml.pyx @@ -177,14 +177,18 @@ cdef class Representer: cdef RepresenterState _handle_wait_value_ScalarEvent(self, object ev): key = self.keys.pop() - (<MappingNode> self.output[-1]).value[key] = \ - ScalarNode.__new__(ScalarNode, self._file_index, ev.start_mark.line, ev.start_mark.column, ev.value) + (<MappingNode> self.output[-1])._set(key, + ScalarNode.__new__(ScalarNode, + self._file_index, + ev.start_mark.line, + ev.start_mark.column, + ev.value)) return RepresenterState.wait_key cdef RepresenterState _handle_wait_value_MappingStartEvent(self, object ev): cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev) key = self.keys.pop() - (<MappingNode> self.output[-2]).value[key] = self.output[-1] + (<MappingNode> self.output[-2])._set(key, self.output[-1]) return new_state cdef RepresenterState _handle_wait_key_MappingEndEvent(self, object ev): @@ -202,7 +206,7 @@ cdef class Representer: cdef RepresenterState _handle_wait_value_SequenceStartEvent(self, object ev): self.output.append(SequenceNode.__new__( SequenceNode, self._file_index, ev.start_mark.line, ev.start_mark.column, [])) - (<MappingNode> self.output[-2]).value[self.keys[-1]] = self.output[-1] + (<MappingNode> self.output[-2])._set(self.keys[-1], self.output[-1]) return RepresenterState.wait_list_item cdef RepresenterState _handle_wait_list_item_SequenceStartEvent(self, object ev): diff --git a/src/buildstream/node.pxd b/src/buildstream/node.pxd index c26d78a..769de9a 100644 --- a/src/buildstream/node.pxd +++ b/src/buildstream/node.pxd @@ -71,6 +71,7 @@ cdef class MappingNode(Node): # Private Methods cdef void __composite(self, MappingNode target, list path) except * + cdef void _set(self, str key, object value) cdef Node _get(self, str key, default, default_constructor) diff --git a/src/buildstream/node.pyx b/src/buildstream/node.pyx index 3941383..42a0794 100644 --- a/src/buildstream/node.pyx +++ b/src/buildstream/node.pyx @@ -53,6 +53,7 @@ Class Reference """ import string +import sys from ._exceptions import LoadError from .exceptions import LoadErrorReason @@ -503,36 +504,7 @@ cdef class MappingNode(Node): del self.value[key] def __setitem__(self, str key, object value): - cdef Node old_value - - if type(value) in [MappingNode, ScalarNode, SequenceNode]: - self.value[key] = value - else: - node = __create_node_recursive(value, self) - - # FIXME: Do we really want to override provenance? - # - # Related to https://gitlab.com/BuildStream/buildstream/issues/1058 - # - # There are only two cases were nodes are set in the code (hence without provenance): - # - When automatic variables are set by the core (e-g: max-jobs) - # - when plugins call Element.set_public_data - # - # The first case should never throw errors, so it is of limited interests. - # - # The second is more important. What should probably be done here is to have 'set_public_data' - # able of creating a fake provenance with the name of the plugin, the project and probably the - # element name. - # - # We would therefore have much better error messages, and would be able to get rid of most synthetic - # nodes. - old_value = self.value.get(key) - if old_value: - node.file_index = old_value.file_index - node.line = old_value.line - node.column = old_value.column - - self.value[key] = node + self._set(key, value) ############################################################# # Public Methods # @@ -1126,6 +1098,49 @@ cdef class MappingNode(Node): target.line = self.line target.column = self.column + # _set(key, value) + # + # Internal helper method to set an entry. This is used + # everywhere that a MappingNode value can be set, and ensures + # that MappingNode keys are always interned. + # + # Args: + # key (str): the key for which to set the value + # value (object): the value to set + # + cdef void _set(self, str key, object value): + cdef Node old_value + cdef intern_key = sys.intern(key) + + if type(value) in [MappingNode, ScalarNode, SequenceNode]: + self.value[intern_key] = value + else: + node = __create_node_recursive(value, self) + + # FIXME: Do we really want to override provenance? + # + # Related to https://gitlab.com/BuildStream/buildstream/issues/1058 + # + # There are only two cases were nodes are set in the code (hence without provenance): + # - When automatic variables are set by the core (e-g: max-jobs) + # - when plugins call Element.set_public_data + # + # The first case should never throw errors, so it is of limited interests. + # + # The second is more important. What should probably be done here is to have 'set_public_data' + # able of creating a fake provenance with the name of the plugin, the project and probably the + # element name. + # + # We would therefore have much better error messages, and would be able to get rid of most synthetic + # nodes. + old_value = self.value.get(intern_key) + if old_value: + node.file_index = old_value.file_index + node.line = old_value.line + node.column = old_value.column + + self.value[intern_key] = node + # _get(key, default, default_constructor) # # Internal helper method to get an entry from the underlying dictionary.
