jedcunningham commented on code in PR #47371:
URL: https://github.com/apache/airflow/pull/47371#discussion_r1992881525
##########
airflow/cli/commands/remote_commands/config_command.py:
##########
@@ -546,6 +563,12 @@ def lint_config(args) -> None:
):
lint_issues.append(configuration.message)
+ if not skip_default_checks:
Review Comment:
```suggestion
if not args.skip_problematic_default_checks:
```
##########
airflow/cli/commands/remote_commands/config_command.py:
##########
@@ -531,6 +545,9 @@ def lint_config(args) -> None:
ignore_sections = args.ignore_section or []
ignore_options = args.ignore_option or []
+ skip_default_checks = args.skip_default_checks or False
+ upgrade_defaults = args.upgrade_defaults or True
Review Comment:
```suggestion
```
With `or True`, won't this always be true 🙃
##########
airflow/cli/cli_config.py:
##########
@@ -791,6 +791,16 @@ def string_lower_type(val):
("--section",),
help="The section name",
)
+ARG_LINT_CONFIG_UPGRADE_DEFAULTS = Arg(
+ ("--upgrade-defaults",),
Review Comment:
```suggestion
("--upgrade-problematic-defaults",),
```
It's probably worth renaming these flags to include `problematic` - without
it, it implies it'll upgrade all of the old defaults (which isn't a bad idea!).
Probably also means the Arg variable name should change too.
##########
airflow/cli/commands/remote_commands/config_command.py:
##########
@@ -546,6 +563,12 @@ def lint_config(args) -> None:
):
lint_issues.append(configuration.message)
+ if not skip_default_checks:
+ if upgrade_defaults:
+ conf.handle_incompatible_airflow2_defaults(upgrade=True)
+ else:
+ conf.handle_incompatible_airflow2_defaults(upgrade=False)
Review Comment:
```suggestion
conf.handle_incompatible_airflow2_defaults(upgrade=args.upgrade_problematic_defaults)
```
(looks funny, but the whitespace is right I believe)
No need to do a conditional against a bool, to then just hard code a bool.
##########
airflow/cli/cli_config.py:
##########
@@ -791,6 +791,16 @@ def string_lower_type(val):
("--section",),
help="The section name",
)
+ARG_LINT_CONFIG_UPGRADE_DEFAULTS = Arg(
+ ("--upgrade-defaults",),
+ help="Automatically upgrade problematic default values from Airflow 2 to
the new recommended values.",
+ action="store_true",
+)
+ARG_LINT_CONFIG_SKIP_DEFAULT_CHECKS = Arg(
+ ("--skip-default-checks",),
Review Comment:
```suggestion
("--skip-problematic-default-checks",),
```
##########
airflow/configuration.py:
##########
@@ -403,6 +403,64 @@ def inversed_deprecated_sections(self):
upgraded_values: dict[tuple[str, str], str]
"""Mapping of (section,option) to the old value that was upgraded"""
+ legacy_incompatible_defaults: dict[tuple[str, str], tuple[Pattern, str,
str]] = {
+ ("logging", "log_filename_template"): (
+ re.compile(
+ r"^(?:" + re.escape("{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts
}}/{{ try_number }}.log") + r"|"
+
r"dag_id=\{\s*ti\.dag_id\s*\}/run_id=\{\s*ti\.run_id\s*\}/task_id=\{\s*ti\.task_id\s*\}/"
+
r"\{%%\s*if\s+ti\.map_index\s*>=\s*0\s*%%\}map_index=\{\s*ti\.map_index\s*\}/\{%%\s*endif\s*%%\}"
+ r"attempt=\{\s*try_number\s*\}\.log" + r")$"
+ ),
+ "3.0",
+ "The default value for 'log_filename_template' from Airflow 2 may
break task logs in the new UI.",
+ ),
+ ("core", "dag_ignore_file_syntax"): (
+ re.compile(r"^regexp$"),
+ "3.0",
+ "The default value changed from 'regexp' in Airflow 2.x to 'glob'
in Airflow 3.0.",
+ ),
+ ("api", "auth_backends"): (
+ re.compile(r"^airflow\.api\.auth\.backend\.session$"),
+ "3.0",
+ "The default auth backend has changed from
'airflow.api.auth.backend.session' to "
+ "'airflow.providers.fab.auth_manager.api.auth.backend.session' in
Airflow 3.0.",
+ ),
Review Comment:
```suggestion
```
This has been deleted in #47399.
##########
airflow/cli/commands/remote_commands/config_command.py:
##########
@@ -531,6 +545,9 @@ def lint_config(args) -> None:
ignore_sections = args.ignore_section or []
ignore_options = args.ignore_option or []
+ skip_default_checks = args.skip_default_checks or False
Review Comment:
```suggestion
```
This should just work as-is. Probably not worth assigning to a separate
variable. (1 of 2)
##########
airflow/configuration.py:
##########
@@ -403,6 +403,64 @@ def inversed_deprecated_sections(self):
upgraded_values: dict[tuple[str, str], str]
"""Mapping of (section,option) to the old value that was upgraded"""
+ legacy_incompatible_defaults: dict[tuple[str, str], tuple[Pattern, str,
str]] = {
+ ("logging", "log_filename_template"): (
+ re.compile(
+ r"^(?:" + re.escape("{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts
}}/{{ try_number }}.log") + r"|"
+
r"dag_id=\{\s*ti\.dag_id\s*\}/run_id=\{\s*ti\.run_id\s*\}/task_id=\{\s*ti\.task_id\s*\}/"
+
r"\{%%\s*if\s+ti\.map_index\s*>=\s*0\s*%%\}map_index=\{\s*ti\.map_index\s*\}/\{%%\s*endif\s*%%\}"
+ r"attempt=\{\s*try_number\s*\}\.log" + r")$"
+ ),
+ "3.0",
+ "The default value for 'log_filename_template' from Airflow 2 may
break task logs in the new UI.",
+ ),
+ ("core", "dag_ignore_file_syntax"): (
+ re.compile(r"^regexp$"),
+ "3.0",
+ "The default value changed from 'regexp' in Airflow 2.x to 'glob'
in Airflow 3.0.",
+ ),
+ ("api", "auth_backends"): (
+ re.compile(r"^airflow\.api\.auth\.backend\.session$"),
+ "3.0",
+ "The default auth backend has changed from
'airflow.api.auth.backend.session' to "
+ "'airflow.providers.fab.auth_manager.api.auth.backend.session' in
Airflow 3.0.",
+ ),
+ ("scheduler", "task_instance_heartbeat_timeout"): (
+ re.compile(r"^300$"),
+ "3.0",
+ "Airflow 2.x used 'scheduler_zombie_task_threshold' with a default
of 300. In 3.0 this setting "
+ "has been renamed to 'task_instance_heartbeat_timeout' (with the
same numeric default), which may "
+ "cause legacy configurations to be ignored.",
+ ),
+ }
+
+ def handle_incompatible_airflow2_defaults(self, upgrade: bool | None =
True) -> None:
Review Comment:
```suggestion
def handle_incompatible_airflow2_defaults(self, upgrade: bool) -> None:
```
Should always be a bool.
##########
airflow/configuration.py:
##########
@@ -403,6 +403,64 @@ def inversed_deprecated_sections(self):
upgraded_values: dict[tuple[str, str], str]
"""Mapping of (section,option) to the old value that was upgraded"""
+ legacy_incompatible_defaults: dict[tuple[str, str], tuple[Pattern, str,
str]] = {
+ ("logging", "log_filename_template"): (
+ re.compile(
+ r"^(?:" + re.escape("{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts
}}/{{ try_number }}.log") + r"|"
+
r"dag_id=\{\s*ti\.dag_id\s*\}/run_id=\{\s*ti\.run_id\s*\}/task_id=\{\s*ti\.task_id\s*\}/"
+
r"\{%%\s*if\s+ti\.map_index\s*>=\s*0\s*%%\}map_index=\{\s*ti\.map_index\s*\}/\{%%\s*endif\s*%%\}"
+ r"attempt=\{\s*try_number\s*\}\.log" + r")$"
+ ),
+ "3.0",
+ "The default value for 'log_filename_template' from Airflow 2 may
break task logs in the new UI.",
+ ),
+ ("core", "dag_ignore_file_syntax"): (
+ re.compile(r"^regexp$"),
+ "3.0",
+ "The default value changed from 'regexp' in Airflow 2.x to 'glob'
in Airflow 3.0.",
+ ),
+ ("api", "auth_backends"): (
+ re.compile(r"^airflow\.api\.auth\.backend\.session$"),
+ "3.0",
+ "The default auth backend has changed from
'airflow.api.auth.backend.session' to "
+ "'airflow.providers.fab.auth_manager.api.auth.backend.session' in
Airflow 3.0.",
+ ),
+ ("scheduler", "task_instance_heartbeat_timeout"): (
+ re.compile(r"^300$"),
+ "3.0",
+ "Airflow 2.x used 'scheduler_zombie_task_threshold' with a default
of 300. In 3.0 this setting "
+ "has been renamed to 'task_instance_heartbeat_timeout' (with the
same numeric default), which may "
+ "cause legacy configurations to be ignored.",
+ ),
+ }
+
+ def handle_incompatible_airflow2_defaults(self, upgrade: bool | None =
True) -> None:
+ """
+ Check and optionally upgrade default configuration values from Airflow
2 that are incompatible with Airflow 3.
+
+ :param upgrade: If True, auto-upgrade legacy defaults. If False, only
warn about the legacy defaults without making any changes.
+ """
+ for (section, key), (old_pattern, _version, message) in
self.legacy_incompatible_defaults.items():
+ default_val = self.get_default_value(section, key, raw=True)
+ current_value = self.get(section, key, fallback="")
+ if self._using_old_value(old_pattern, current_value):
+ if upgrade:
+ self.upgraded_values[(section, key)] = default_val
+ updated_val = old_pattern.sub(default_val, current_value)
+ self._update_env_var(section, key, updated_val)
+ warnings.warn(
+ f"[{section}] {key}: {message} Auto-upgraded from
'{current_value}' to '{updated_val}'.",
+ FutureWarning,
+ stacklevel=1,
+ )
+ else:
+ warnings.warn(
+ f"[{section}] {key}: {message} Current value:
'{current_value}'. "
+ f"Consider updating to '{default_val}'.",
+ FutureWarning,
+ stacklevel=1,
+ )
Review Comment:
```suggestion
current_value = self.get(section, key, fallback="")
if not self._using_old_value(old_pattern, current_value):
continue
default_val = self.get_default_value(section, key, raw=True)
if upgrade:
self.upgraded_values[(section, key)] = default_val
updated_val = old_pattern.sub(default_val, current_value)
self._update_env_var(section, key, updated_val)
warnings.warn(
f"[{section}] {key}: {message} Auto-upgraded from
'{current_value}' to '{updated_val}'.",
FutureWarning,
stacklevel=1,
)
else:
warnings.warn(
f"[{section}] {key}: {message} Current value:
'{current_value}'. "
f"Consider updating to '{default_val}'.",
FutureWarning,
stacklevel=1,
)
```
Less nesting, also only gets the default value if we will need it.
##########
airflow/configuration.py:
##########
@@ -403,6 +403,64 @@ def inversed_deprecated_sections(self):
upgraded_values: dict[tuple[str, str], str]
"""Mapping of (section,option) to the old value that was upgraded"""
+ legacy_incompatible_defaults: dict[tuple[str, str], tuple[Pattern, str,
str]] = {
+ ("logging", "log_filename_template"): (
+ re.compile(
+ r"^(?:" + re.escape("{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts
}}/{{ try_number }}.log") + r"|"
+
r"dag_id=\{\s*ti\.dag_id\s*\}/run_id=\{\s*ti\.run_id\s*\}/task_id=\{\s*ti\.task_id\s*\}/"
+
r"\{%%\s*if\s+ti\.map_index\s*>=\s*0\s*%%\}map_index=\{\s*ti\.map_index\s*\}/\{%%\s*endif\s*%%\}"
+ r"attempt=\{\s*try_number\s*\}\.log" + r")$"
+ ),
+ "3.0",
+ "The default value for 'log_filename_template' from Airflow 2 may
break task logs in the new UI.",
+ ),
+ ("core", "dag_ignore_file_syntax"): (
+ re.compile(r"^regexp$"),
+ "3.0",
+ "The default value changed from 'regexp' in Airflow 2.x to 'glob'
in Airflow 3.0.",
+ ),
+ ("api", "auth_backends"): (
+ re.compile(r"^airflow\.api\.auth\.backend\.session$"),
+ "3.0",
+ "The default auth backend has changed from
'airflow.api.auth.backend.session' to "
+ "'airflow.providers.fab.auth_manager.api.auth.backend.session' in
Airflow 3.0.",
+ ),
+ ("scheduler", "task_instance_heartbeat_timeout"): (
+ re.compile(r"^300$"),
+ "3.0",
+ "Airflow 2.x used 'scheduler_zombie_task_threshold' with a default
of 300. In 3.0 this setting "
+ "has been renamed to 'task_instance_heartbeat_timeout' (with the
same numeric default), which may "
+ "cause legacy configurations to be ignored.",
+ ),
Review Comment:
```suggestion
```
Shouldn't this just go to the normal [deprecated_options
list](https://github.com/apache/airflow/blob/46759a355d603167535591745c70037d691c866a/airflow/configuration.py#L327)
instead?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]