On 11/12/22, Undescribed Horrific Abuse, One Victim & Survivor of Many <[email protected]> wrote: > Here's the diff of the file git is specifically complaining about: > > diff --git a/flat_tree/mix_indices.py b/flat_tree/mix_indices.py > index d49ec94..6742218 100644 > --- a/flat_tree/mix_indices.py > +++ b/flat_tree/mix_indices.py > @@ -19,35 +19,30 @@ class append_indices(list): > assert spliced_out_start == self.size # until testing > truncation appends > assert spliced_out_stop == self.size > > - running_size = 0 > - running_leaf_count = 0 > + balanced_size = 0 > + balanced_leaf_count = 0
I'm guessing this change is just naming clarity. > #leaf_count_of_partial_index_at_end_tmp = 0 > #proposed_leaf_count = self.leaf_count - > leaf_count_of_partial_index_at_end_tmp > #new_node_leaf_count = self.leaf_count # + 1 > - > - new_leaf_count = self.leaf_count > - new_size = self.size Reasoning for this likely comes out later on in the file. Looks harmless so far. > > for idx, (branch_leaf_count, branch_offset, branch_size, > branch_id) in enumerate( > self): > - if branch_leaf_count * self.degree <= new_leaf_count: > #proposed_leaf_count > + if branch_leaf_count * self.degree <= self.leaf_count > - balanced_leaf_count: > #proposed_leaf_count > break Okay, I chose to remove the local variables and reference the members directly. I guess that's harmless. > #if new_node_offset + branch_size > spliced_out_start: > # break > - running_size += branch_size > - running_leaf_count += branch_leaf_count > + balanced_size += branch_size > + balanced_leaf_count += branch_leaf_count Here's a propagation of the naming clarity change. Naming clarity changes are likely good things. I probably changed this to help me remember what was going on when I was confused. > #proposed_leaf_count -= branch_leaf_count > #new_node_leaf_count -= branch_leaf_count > #new_total_leaf_count += branch_leaf_count > - new_leaf_count -= branch_leaf_count > - new_size -= branch_size Okay, these are removed because the value is calculated from member variables, above. > else: > idx = len(self) > > - assert new_size == sum((size for leaf_count, offset, > size, value in self[idx:])) > + assert self.size - balanced_size == sum((size for > leaf_count, offset, size, value in self[idx:])) Here, again, making a calculation based on member variables, rather than caching it with local variables. Looks equivalent. If I have to merge something here, I'll probably want to memorise this change I made, so as to understand how to unify it with something else, or whether to choose to reject it. > > self[idx:] = ( > - #(leaf_count_of_partial_index_at_end_tmp, > running_size, spliced_out_start - running_size, last_publish), > - (new_leaf_count, running_size, new_size, last_publish), > + #(leaf_count_of_partial_index_at_end_tmp, > balanced_size, spliced_out_start - balanced_size, last_publish), > + (self.leaf_count - balanced_leaf_count, > balanced_size, self.size - balanced_size, last_publish), > (-1, 0, spliced_in_size, spliced_in_data) > ) > And finally, at the end, um ..... looks like I just made the same change to calculate based on member variables rather than tracking a local variable. I guess it's harmless enough. Seems like it might help one think about bounds of thread safety a little.
