GitHub user LucasRoesler added a comment to the discussion: Sharepoint ingest
using Microsoft Graph Filesystem
I submitted too early. I added some more logging into `msgraphfs/core.py` to
see what values are being passed into the `MSGDriveFS` constructor and i have
found that the filesystem is being initialized _twice_!
```python
def __init__(
self,
drive_id: str | None = None,
client_id: str | None = None,
tenant_id: str | None = None,
client_secret: str | None = None,
site_name: str | None = None,
drive_name: str | None = None,
oauth2_client_params: dict | None = None,
asynchronous: bool = False,
loop=None,
url_path: str | None = None,
**kwargs,
):
_logger.warning(f"MSGDriveFS drive_id={drive_id} client_id={client_id}
tenant_id={tenant_id} client_secret={client_secret}
oauth2_client_params={oauth2_client_params}")
# Get OAuth2 credentials from parameters or environment variables
# Check MSGRAPHFS_* variables first, then fall back to standard AZURE_*
variables
self.client_id = (
client_id
or os.getenv("MSGRAPHFS_CLIENT_ID")
or os.getenv("AZURE_CLIENT_ID")
)
self.tenant_id = (
tenant_id
or os.getenv("MSGRAPHFS_TENANT_ID")
or os.getenv("AZURE_TENANT_ID")
)
self.client_secret = (
client_secret
or os.getenv("MSGRAPHFS_CLIENT_SECRET")
or os.getenv("AZURE_CLIENT_SECRET")
)
# Parse URL path if provided to extract site_name and drive_name
if url_path:
parsed_site, parsed_drive, _ = parse_msgraph_url(url_path)
# URL parameters override direct parameters
site_name = parsed_site or site_name
drive_name = parsed_drive or drive_name
# Determine operation mode (single-site if site and drive provided, OR
drive_id provided)
self._multi_site_mode = not ((site_name and drive_name) or drive_id)
_logger.warning(f"MSGDriveFS initialized in
multi_site_mode={self._multi_site_mode}")
# Set site_name and drive_name attributes for all modes
self.site_name = site_name
self.drive_name = drive_name
if self._multi_site_mode:
# Multi-site mode: cache for drive filesystem instances
self._drive_cache = {}
# Store credentials for creating drive instances
self._stored_client_id = self.client_id
self._stored_tenant_id = self.tenant_id
self._stored_client_secret = self.client_secret
self._stored_oauth2_client_params = oauth2_client_params
self._stored_kwargs = kwargs.copy()
# Build oauth2_client_params if not provided
if oauth2_client_params is None:
if not all([self.client_id, self.tenant_id, self.client_secret]):
raise ValueError(
"Either oauth2_client_params must be provided, or all of "
"client_id, tenant_id, and client_secret must be provided "
"(either as parameters or environment variables
MSGRAPHFS_CLIENT_ID/"
"AZURE_CLIENT_ID, MSGRAPHFS_TENANT_ID/AZURE_TENANT_ID, "
"MSGRAPHFS_CLIENT_SECRET/AZURE_CLIENT_SECRET)"
)
# Build OAuth2 client parameters with proper configuration
oauth2_client_params = {
"client_id": self.client_id,
"client_secret": self.client_secret,
"token_endpoint":
f"https://login.microsoftonline.com/{self.tenant_id}/oauth2/v2.0/token",
"scope": " ".join(self.DEFAULT_SCOPES),
"grant_type": "client_credentials",
}
else:
# Extract credentials from provided params for later use
self.client_id = oauth2_client_params.get("client_id")
self.tenant_id = self._extract_tenant_from_token_endpoint(
oauth2_client_params.get("token_endpoint", "")
)
self.client_secret = oauth2_client_params.get("client_secret")
_logger.warning(f"MSGDriveFS
oauth2_client_params={oauth2_client_params}")
super().__init__(
oauth2_client_params=oauth2_client_params,
asynchronous=asynchronous,
loop=loop,
**kwargs,
)
_logger.warning(f"MSGDriveFS post-super site_name={site_name}
drive_name={drive_name} drive_id={drive_id}")
self.site_name = site_name
self.drive_name = drive_name
self.drive_id = drive_id
# We'll set the drive_url later if drive_id is determined
if self.drive_id:
self.drive_url =
f"https://graph.microsoft.com/v1.0/drives/{self.drive_id}"
else:
self.drive_url = None
```
```text
2025-11-24, 14:59:54] INFO - Connection: sharepoint,
Connection(conn_id='sharepoint', conn_type='msgraph', description=None,
host='secret-d265-4611-8dbb-secret', schema=None,
login='secret-63e2-4f4d-8270-secret', password='***', port=None, extra='{\n
"scope": [\n "https://graph.microsoft.com/.default"\n ]\n}'):
chan="stdout": source="task"
[2025-11-24, 14:59:54] WARNING - MSGraphFS options: {'client_id':
'secret-63e2-4f4d-8270-secret', 'client_secret': '***', 'tenant_id':
'secret-d265-4611-8dbb-secret', 'scope':
['https://graph.microsoft.com/.default']}:
source="airflow.providers.microsoft.azure.fs.msgraph"
[2025-11-24, 14:59:54] WARNING - OAuth2 client params: {'client_id':
'secret-63e2-4f4d-8270-secret', 'client_secret': '***', 'tenant_id':
'secret-d265-4611-8dbb-secret', 'scope':
['https://graph.microsoft.com/.default']}:
source="airflow.providers.microsoft.azure.fs.msgraph"
[2025-11-24, 14:59:54] WARNING - MSGDriveFS drive_id=None client_id=None
tenant_id=None client_secret=None oauth2_client_params={'client_id':
'secret-63e2-4f4d-8270-secret', 'client_secret': '***', 'tenant_id':
'secret-d265-4611-8dbb-secret', 'scope':
['https://graph.microsoft.com/.default']}: source="msgraphfs.core"
[2025-11-24, 14:59:54] WARNING - MSGDriveFS initialized in
multi_site_mode=True: source="msgraphfs.core"
[2025-11-24, 14:59:54] WARNING - MSGDriveFS oauth2_client_params={'client_id':
'secret-63e2-4f4d-8270-secret', 'client_secret': '***', 'tenant_id':
'secret-d265-4611-8dbb-secret', 'scope':
['https://graph.microsoft.com/.default']}: source="msgraphfs.core"
[2025-11-24, 14:59:54] WARNING - MSGDriveFS post-super site_name=None
drive_name=None drive_id=None: source="msgraphfs.core"
[2025-11-24, 14:59:54] WARNING - MSGDriveFS drive_id=None
client_id=secret-63e2-4f4d-8270-secret tenant_id=None client_secret=***
oauth2_client_params=None: source="msgraphfs.core"
[2025-11-24, 14:59:54] WARNING - MSGDriveFS initialized in
multi_site_mode=False: source="msgraphfs.core"
[2025-11-24, 14:59:54] ERROR - Task failed with exception: source="task"
ValueError: Either oauth2_client_params must be provided, or all of client_id,
tenant_id, and client_secret must be provided (either as parameters or
environment variables MSGRAPHFS_CLIENT_ID/AZURE_CLIENT_ID,
MSGRAPHFS_TENANT_ID/AZURE_TENANT_ID,
MSGRAPHFS_CLIENT_SECRET/AZURE_CLIENT_SECRET)
```
Note that we clearly get the log from after the `super().__init__` but then two
more logs that look exactly like the earlier logs, but with new parameters this
time.
GitHub link:
https://github.com/apache/airflow/discussions/58221#discussioncomment-15063055
----
This is an automatically sent email for [email protected].
To unsubscribe, please send an email to: [email protected]