JingsongLi commented on code in PR #7716:
URL: https://github.com/apache/paimon/pull/7716#discussion_r3151165851


##########
paimon-python/pypaimon/filesystem/pyarrow_file_io.py:
##########
@@ -256,13 +256,61 @@ def _initialize_hdfs_fs(self, scheme: str, netloc: 
Optional[str]) -> FileSystem:
         )
         os.environ['CLASSPATH'] = class_paths.stdout.strip()
 
+        principal = (self.properties.get(SecurityOptions.KERBEROS_PRINCIPAL)
+                     or self._get_property("security.principal"))
+        keytab = (self.properties.get(SecurityOptions.KERBEROS_KEYTAB)
+                  or self._get_property("security.keytab"))
+        use_ticket_cache = 
self.properties.get(SecurityOptions.KERBEROS_USE_TICKET_CACHE)
+
+        if bool(principal) != bool(keytab):
+            raise ValueError(
+                "security.kerberos.login.principal and 
security.kerberos.login.keytab "
+                "must be both set or both unset")
+
         host, port_str = splitport(netloc)
-        return pafs.HadoopFileSystem(
-            host=host,
-            port=int(port_str),
-            user=os.environ.get('HADOOP_USER_NAME', 'hadoop')
+        port = int(port_str) if port_str else 0
+
+        kerb_ticket = None
+        if principal and keytab:
+            self._kerberos_login_from_keytab(principal, keytab)
+            kerb_ticket = self._get_ticket_cache_path()
+        elif use_ticket_cache:
+            cache_path = self._get_ticket_cache_path()
+            if cache_path and os.path.exists(cache_path):
+                kerb_ticket = cache_path
+
+        if kerb_ticket:
+            return pafs.HadoopFileSystem(host=host, port=port, 
kerb_ticket=kerb_ticket)
+        else:
+            return pafs.HadoopFileSystem(
+                host=host,
+                port=port,
+                user=os.environ.get('HADOOP_USER_NAME', 'hadoop')
+            )
+
+    @staticmethod
+    def _kerberos_login_from_keytab(principal: str, keytab: str):
+        if not os.path.isfile(keytab):
+            raise FileNotFoundError(f"Kerberos keytab file not found: 
{keytab}")
+        if not os.access(keytab, os.R_OK):
+            raise PermissionError(f"Kerberos keytab file is not readable: 
{keytab}")
+        subprocess.run(
+            ['kinit', '-kt', keytab, principal],
+            check=True, capture_output=True, text=True
         )
 
+    @staticmethod
+    def _get_ticket_cache_path() -> Optional[str]:
+        cc = os.environ.get('KRB5CCNAME')
+        if cc:
+            if cc.startswith('FILE:'):
+                return cc[5:]
+            return cc
+        default_path = f'/tmp/krb5cc_{os.getuid()}'
+        if os.path.exists(default_path):
+            return default_path
+        return None

Review Comment:
   After calling kinit, the code calls _get_ticket_cache_path() to find the 
resulting ticket. But if KRB5CCNAME is not set and the default 
`/tmp/krb5cc_{os.getuid()}` doesn't exist yet (e.g., first-time kinit on a 
fresh container), _get_ticket_cache_path returns None, and the code silently 
falls through to SIMPLE auth. This is a real bug — kinit succeeded but the 
connection won't use Kerberos.
   
   Suggestion: after a successful kinit, either trust that the default cache 
path exists (skip the os.path.exists check) or raise an error if no ticket 
cache is found.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to