Re: [PATCH v2] cleanup: fix possible overflow errors in binary search

2017-10-09 Thread Jeff King
On Sun, Oct 08, 2017 at 02:29:37PM -0400, Derrick Stolee wrote:

> A common mistake when writing binary search is to allow possible
> integer overflow by using the simple average:
> 
>   mid = (min + max) / 2;
> 
> Instead, use the overflow-safe version:
> 
>   mid = min + (max - min) / 2;
> 
> This translation is safe since the operation occurs inside a loop
> conditioned on "min < max". The included changes were found using
> the following git grep:
> 
>   git grep '/ *2;' '*.c'
> 
> Making this cleanup will prevent future review friction when a new
> binary search is contructed based on existing code.

Thanks, this version looks good to me.

> diff --git a/compat/regex/regex_internal.c b/compat/regex/regex_internal.c
> index d4121f2f4..98342b831 100644
> --- a/compat/regex/regex_internal.c
> +++ b/compat/regex/regex_internal.c
> @@ -613,7 +613,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int 
> eflags)
> int low = 0, high = pstr->valid_len, mid;
> do
>   {
> -   mid = (high + low) / 2;
> +   mid = low + (high - low) / 2;
> if (pstr->offsets[mid] > offset)
>   high = mid;
> else if (pstr->offsets[mid] < offset)

This one is a do-while, so it's less obvious that "high" is always more
than "low" when entering the loop. But one assumes it is so, since the
binary search wouldn't work otherwise.

-Peff


[PATCH v2] cleanup: fix possible overflow errors in binary search

2017-10-08 Thread Derrick Stolee
A common mistake when writing binary search is to allow possible
integer overflow by using the simple average:

mid = (min + max) / 2;

Instead, use the overflow-safe version:

mid = min + (max - min) / 2;

This translation is safe since the operation occurs inside a loop
conditioned on "min < max". The included changes were found using
the following git grep:

git grep '/ *2;' '*.c'

Making this cleanup will prevent future review friction when a new
binary search is contructed based on existing code.

Signed-off-by: Derrick Stolee 
---
 builtin/index-pack.c  | 4 ++--
 builtin/pack-objects.c| 2 +-
 builtin/unpack-objects.c  | 2 +-
 cache-tree.c  | 2 +-
 compat/regex/regex_internal.c | 4 ++--
 compat/regex/regexec.c| 2 +-
 packfile.c| 2 +-
 sha1-lookup.c | 4 ++--
 sha1_name.c   | 2 +-
 string-list.c | 2 +-
 utf8.c| 2 +-
 xdiff/xpatience.c | 2 +-
 12 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index f2be145e1..8ec459f52 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -633,7 +633,7 @@ static int find_ofs_delta(const off_t offset, enum 
object_type type)
int first = 0, last = nr_ofs_deltas;
 
while (first < last) {
-   int next = (first + last) / 2;
+   int next = first + (last - first) / 2;
struct ofs_delta_entry *delta = _deltas[next];
int cmp;
 
@@ -687,7 +687,7 @@ static int find_ref_delta(const unsigned char *sha1, enum 
object_type type)
int first = 0, last = nr_ref_deltas;
 
while (first < last) {
-   int next = (first + last) / 2;
+   int next = first + (last - first) / 2;
struct ref_delta_entry *delta = _deltas[next];
int cmp;
 
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 5ee2c48ff..6e77dfd44 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1277,7 +1277,7 @@ static int done_pbase_path_pos(unsigned hash)
int lo = 0;
int hi = done_pbase_paths_num;
while (lo < hi) {
-   int mi = (hi + lo) / 2;
+   int mi = lo + (hi - lo) / 2;
if (done_pbase_paths[mi] == hash)
return mi;
if (done_pbase_paths[mi] < hash)
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 689a29fac..62ea264c4 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -394,7 +394,7 @@ static void unpack_delta_entry(enum object_type type, 
unsigned long delta_size,
lo = 0;
hi = nr;
while (lo < hi) {
-   mid = (lo + hi)/2;
+   mid = lo + (hi - lo) / 2;
if (base_offset < obj_list[mid].offset) {
hi = mid;
} else if (base_offset > obj_list[mid].offset) {
diff --git a/cache-tree.c b/cache-tree.c
index 71d092ed5..d3f740127 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -49,7 +49,7 @@ static int subtree_pos(struct cache_tree *it, const char 
*path, int pathlen)
lo = 0;
hi = it->subtree_nr;
while (lo < hi) {
-   int mi = (lo + hi) / 2;
+   int mi = lo + (hi - lo) / 2;
struct cache_tree_sub *mdl = down[mi];
int cmp = subtree_name_cmp(path, pathlen,
   mdl->name, mdl->namelen);
diff --git a/compat/regex/regex_internal.c b/compat/regex/regex_internal.c
index d4121f2f4..98342b831 100644
--- a/compat/regex/regex_internal.c
+++ b/compat/regex/regex_internal.c
@@ -613,7 +613,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int 
eflags)
  int low = 0, high = pstr->valid_len, mid;
  do
{
- mid = (high + low) / 2;
+ mid = low + (high - low) / 2;
  if (pstr->offsets[mid] > offset)
high = mid;
  else if (pstr->offsets[mid] < offset)
@@ -1394,7 +1394,7 @@ re_node_set_contains (const re_node_set *set, int elem)
   right = set->nelem - 1;
   while (idx < right)
 {
-  mid = (idx + right) / 2;
+  mid = idx + (right - idx) / 2;
   if (set->elems[mid] < elem)
idx = mid + 1;
   else
diff --git a/compat/regex/regexec.c b/compat/regex/regexec.c
index 0a745d9c3..6f2b48a78 100644
--- a/compat/regex/regexec.c
+++ b/compat/regex/regexec.c
@@ -4284,7 +4284,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, 
int str_idx)
   last = right = mctx->nbkref_ents;
   for (left = 0; left < right;)
 {
-  mid = (left + right) / 2;
+  mid = left + (right - left) / 2;
   if (mctx->bkref_ents[mid].str_idx < str_idx)
left = mid +