kou commented on code in PR #40520:
URL: https://github.com/apache/arrow/pull/40520#discussion_r1551881822


##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +64,71 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, lines):
+        return [line for line in lines if not re.search(r'\s[Ww]\s', line)]
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def find_library_paths(self, libraries):
+        paths = {}
+        system = platform.system()
+        for lib in libraries:
+            paths[lib] = []
+            if system == 'Linux':
+                result = _ldconfig.run('-p', stdout=subprocess.PIPE)

Review Comment:
   Why do you use `ldconfig` not `ldd`?
   In general, `ldconfig` exists at `/sbin/ldconfig` not `/bin/` and normal 
user's `PATH` doesn't include `/sbin/`. So `ldd` is better than `ldconfig`.



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +64,71 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, lines):
+        return [line for line in lines if not re.search(r'\s[Ww]\s', line)]
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def find_library_paths(self, libraries):
+        paths = {}
+        system = platform.system()
+        for lib in libraries:
+            paths[lib] = []
+            if system == 'Linux':
+                result = _ldconfig.run('-p', stdout=subprocess.PIPE)
+                lines = result.stdout.decode('utf-8').splitlines()
+                for line in lines:
+                    if lib in line:
+                        match = re.search(r' => (.*)', line)
+                        if match:
+                            paths[lib].append(match.group(1))
+            else:
+                raise ValueError(f"{platform} is not supported")
+        return paths
+
+
+def _check_undefined_symbols(dylib, allowed):
+    # Check for undefined symbols
+    undefined_symbols = 
dylib.list_undefined_symbols_for_dependency(dylib.path, True)
+    expected_lib_paths = dylib.find_library_paths(allowed)
+    all_paths = []
+
+    for paths in expected_lib_paths.values():
+        all_paths.extend(paths)
+
+    for lb_path in all_paths:
+        expected_symbols = dylib.list_symbols_for_dependency(lb_path, True)
+        for exp_sym in expected_symbols:
+            if exp_sym in undefined_symbols:
+                undefined_symbols.remove(exp_sym)

Review Comment:
   Can we simplify this?
   
   ```suggestion
           undefined_symbols = [symbol for symbol in undefined_symbols if 
symbol not in expected_symbols]
   ```



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +64,71 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, lines):
+        return [line for line in lines if not re.search(r'\s[Ww]\s', line)]
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def find_library_paths(self, libraries):
+        paths = {}
+        system = platform.system()
+        for lib in libraries:
+            paths[lib] = []
+            if system == 'Linux':
+                result = _ldconfig.run('-p', stdout=subprocess.PIPE)
+                lines = result.stdout.decode('utf-8').splitlines()
+                for line in lines:
+                    if lib in line:
+                        match = re.search(r' => (.*)', line)
+                        if match:
+                            paths[lib].append(match.group(1))
+            else:
+                raise ValueError(f"{platform} is not supported")
+        return paths
+
+
+def _check_undefined_symbols(dylib, allowed):
+    # Check for undefined symbols
+    undefined_symbols = 
dylib.list_undefined_symbols_for_dependency(dylib.path, True)
+    expected_lib_paths = dylib.find_library_paths(allowed)
+    all_paths = []
+
+    for paths in expected_lib_paths.values():
+        all_paths.extend(paths)
+
+    for lb_path in all_paths:

Review Comment:
   Could you use `lib_path` or something instead of abbreviated `lb_path`?



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +64,71 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, lines):
+        return [line for line in lines if not re.search(r'\s[Ww]\s', line)]
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]

Review Comment:
   How about extracting this as a method because this is used in 
`list_undefined_symbols_for_dependency()` too?



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +64,71 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]

Review Comment:
   It seems that `lines` is better than `symbol_info` because you call it as 
`lines` in other places.
   
   ```suggestion
       def _extract_symbols(self, lines):
           return [re.search(r'\S+$', line).group() for line in lines if line]
   ```
   
   Or we can use `symbol_info` in other places instead.



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +64,71 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, lines):
+        return [line for line in lines if not re.search(r'\s[Ww]\s', line)]
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def find_library_paths(self, libraries):
+        paths = {}
+        system = platform.system()
+        for lib in libraries:
+            paths[lib] = []
+            if system == 'Linux':
+                result = _ldconfig.run('-p', stdout=subprocess.PIPE)
+                lines = result.stdout.decode('utf-8').splitlines()
+                for line in lines:
+                    if lib in line:
+                        match = re.search(r' => (.*)', line)
+                        if match:
+                            paths[lib].append(match.group(1))
+            else:
+                raise ValueError(f"{platform} is not supported")
+        return paths
+
+
+def _check_undefined_symbols(dylib, allowed):
+    # Check for undefined symbols
+    undefined_symbols = 
dylib.list_undefined_symbols_for_dependency(dylib.path, True)
+    expected_lib_paths = dylib.find_library_paths(allowed)
+    all_paths = []
+
+    for paths in expected_lib_paths.values():
+        all_paths.extend(paths)
+
+    for lb_path in all_paths:

Review Comment:
   Can we simplify this?
   
   ```suggestion
       expected_lib_paths = dylib.find_library_paths(allowed)
       for lb_path in expected_lib_paths.values():
   ```



##########
dev/archery/archery/linking.py:
##########
@@ -61,9 +64,71 @@ def list_dependency_names(self):
             names.append(name)
         return names
 
+    def _extract_symbols(self, symbol_info):
+        return [re.search(r'\S+$', line).group() for line in symbol_info if 
line]
+
+    def _remove_weak_symbols(self, lines):
+        return [line for line in lines if not re.search(r'\s[Ww]\s', line)]
+
+    def list_symbols_for_dependency(self, dependency, 
remove_symbol_versions=False):
+        result = _nm.run('-D', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        return self._extract_symbols(lines)
+
+    def list_undefined_symbols_for_dependency(self, dependency,
+                                              remove_symbol_versions=False):
+        result = _nm.run('-u', dependency, stdout=subprocess.PIPE)
+        lines = result.stdout.decode('utf-8').splitlines()
+        if remove_symbol_versions:
+            lines = [re.split('@@', line)[0] for line in lines]
+        lines = self._remove_weak_symbols(lines)
+        return self._extract_symbols(lines)
+
+    def find_library_paths(self, libraries):
+        paths = {}
+        system = platform.system()
+        for lib in libraries:
+            paths[lib] = []
+            if system == 'Linux':
+                result = _ldconfig.run('-p', stdout=subprocess.PIPE)
+                lines = result.stdout.decode('utf-8').splitlines()
+                for line in lines:
+                    if lib in line:

Review Comment:
   Can we simplify this?
   
   ```suggestion
                   for line in [line for line in lines if lib in line]:
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to