Copilot commented on code in PR #64540:
URL: https://github.com/apache/airflow/pull/64540#discussion_r3025328790
##########
airflow-core/src/airflow/ui/tests/e2e/global-setup.ts:
##########
@@ -53,6 +54,25 @@ async function globalSetup(config: FullConfig) {
} finally {
await browser.close();
}
+
+ // Pre-warm: wait for all Dags used by E2E tests to be parsed before workers
start.
+ const apiContext = await request.newContext({
+ baseURL,
+ storageState: AUTH_FILE,
+ });
+
+ try {
+ await Promise.all(
+ [
+ testConfig.testDag.id,
+ testConfig.testDag.hitlId,
+ testConfig.xcomDag.id,
+ "example_python_operator",
+ ].map((dagId) => waitForDagReady(apiContext, dagId, { timeout: 300_000
})),
Review Comment:
The pre-warm DAG list is hard-coded here (`example_python_operator` etc.)
and partially overlaps with other constants (e.g. `asset_produces_1` is treated
in global-teardown but not pre-warmed). To avoid drift as specs/fixtures
change, consider centralizing the set of E2E DAG IDs in
`playwright.config.ts`/`testConfig` (or a shared constants module) and using it
for both global setup and teardown.
```suggestion
const e2eDagIds =
"e2eDagIds" in testConfig && Array.isArray((testConfig as {
e2eDagIds?: unknown }).e2eDagIds)
? (testConfig as { e2eDagIds: string[] }).e2eDagIds
: [
testConfig.testDag.id,
testConfig.testDag.hitlId,
testConfig.xcomDag.id,
"example_python_operator",
];
await Promise.all(
e2eDagIds.map((dagId) => waitForDagReady(apiContext, dagId, { timeout:
300_000 })),
```
##########
airflow-core/src/airflow/ui/tests/e2e/utils/test-helpers.ts:
##########
@@ -497,29 +497,40 @@ export async function apiDeleteDagRun(source:
RequestLike, dagId: string, runId:
* Delete a DAG run, logging (not throwing) unexpected errors.
* Use this in fixture teardown where cleanup must not abort the loop.
* 404 is already handled inside `apiDeleteDagRun`.
- * 409 (running state) is handled by force-failing the run first, then
retrying.
+ *
+ * Strategy: force-fail the run first so the server doesn't wait for
+ * running tasks during deletion, then delete with one retry on timeout.
*/
export async function safeCleanupDagRun(source: RequestLike, dagId: string,
runId: string): Promise<void> {
+ const request = getRequestContext(source);
+
try {
- await apiDeleteDagRun(source, dagId, runId);
- } catch (error) {
- const message = error instanceof Error ? error.message : String(error);
-
- // 409 = DAG run is still running — force-fail, then retry deletion.
- if (message.includes("409")) {
- try {
- await apiSetDagRunState(source, { dagId, runId, state: "failed" });
- await apiDeleteDagRun(source, dagId, runId);
- } catch (retryError) {
- console.warn(
- `[e2e cleanup] Retry failed for DAG run ${dagId}/${runId}:
${retryError instanceof Error ? retryError.message : String(retryError)}`,
- );
+ await request.patch(`${baseUrl}/api/v2/dags/${dagId}/dagRuns/${runId}`, {
+ data: { state: "failed" },
+ headers: { "Content-Type": "application/json" },
+ timeout: 10_000,
+ });
+ } catch {
+ // Run may already be terminal or deleted — ignore.
+ }
+
+ for (let attempt = 0; attempt < 2; attempt++) {
+ try {
+ await apiDeleteDagRun(source, dagId, runId);
+
+ return;
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ const isTimeout = message.includes("Timeout");
+
+ if (isTimeout && attempt === 0) {
+ continue;
}
Review Comment:
Timeout retry detection in `safeCleanupDagRun` relies on
`message.includes("Timeout")`, which is brittle across Playwright/node versions
and may miss actual timeout errors (or match unrelated messages). Prefer
checking `error` type/shape (e.g. `error instanceof Error && error.name ===
"TimeoutError"`, or inspecting Playwright’s known timeout error pattern) so the
retry only triggers when it’s truly a timeout.
##########
airflow-core/src/airflow/ui/playwright.config.ts:
##########
@@ -50,7 +50,7 @@ export const AUTH_FILE = path.join(currentDirname,
"playwright/.auth/user.json")
export default defineConfig({
expect: {
- timeout: 5000,
+ timeout: 10_000,
},
Review Comment:
Config uses mixed numeric separator styles (`5000` -> `10_000` while other
options still use `60_000`, `120_000`, etc.). Consider consistently using
numeric separators across this config (or none) to keep formatting predictable.
##########
airflow-core/src/airflow/ui/tests/e2e/global-teardown.ts:
##########
@@ -48,8 +48,8 @@ async function globalTeardown() {
timeout: 10_000,
});
} catch (error) {
- console.warn(
- `[e2e teardown] Failed to re-pause DAG ${dagId}: ${error instanceof
Error ? error.message : String(error)}`,
+ console.debug(
+ `[e2e teardown] Could not re-pause DAG ${dagId}: ${error instanceof
Error ? error.message : String(error)}`,
Review Comment:
Switching the teardown failure log from `console.warn` to `console.debug`
may effectively hide teardown failures in CI logs, making it harder to diagnose
residual state/flaky follow-on runs. If the intent is to reduce noise, consider
keeping `warn` (or using `info`) but making the message less alarming, or
conditionally using debug only for known/expected statuses while warning on
unexpected errors.
```suggestion
console.warn(
`[e2e teardown] Cleanup: failed to re-pause DAG ${dagId}: ${
error instanceof Error ? error.message : String(error)
}`,
```
##########
airflow-core/src/airflow/ui/tests/e2e/utils/test-helpers.ts:
##########
@@ -497,29 +497,40 @@ export async function apiDeleteDagRun(source:
RequestLike, dagId: string, runId:
* Delete a DAG run, logging (not throwing) unexpected errors.
* Use this in fixture teardown where cleanup must not abort the loop.
* 404 is already handled inside `apiDeleteDagRun`.
- * 409 (running state) is handled by force-failing the run first, then
retrying.
+ *
+ * Strategy: force-fail the run first so the server doesn't wait for
+ * running tasks during deletion, then delete with one retry on timeout.
*/
export async function safeCleanupDagRun(source: RequestLike, dagId: string,
runId: string): Promise<void> {
+ const request = getRequestContext(source);
+
try {
- await apiDeleteDagRun(source, dagId, runId);
- } catch (error) {
- const message = error instanceof Error ? error.message : String(error);
-
- // 409 = DAG run is still running — force-fail, then retry deletion.
- if (message.includes("409")) {
- try {
- await apiSetDagRunState(source, { dagId, runId, state: "failed" });
- await apiDeleteDagRun(source, dagId, runId);
- } catch (retryError) {
- console.warn(
- `[e2e cleanup] Retry failed for DAG run ${dagId}/${runId}:
${retryError instanceof Error ? retryError.message : String(retryError)}`,
- );
+ await request.patch(`${baseUrl}/api/v2/dags/${dagId}/dagRuns/${runId}`, {
+ data: { state: "failed" },
+ headers: { "Content-Type": "application/json" },
+ timeout: 10_000,
+ });
Review Comment:
`safeCleanupDagRun` calls `request.patch(...)` to force-fail the run but
never checks `response.ok()`/`response.status()`. Playwright does not throw on
non-2xx HTTP responses, so a 4xx/5xx will be silently treated as success and
the run may remain running, defeating the cleanup strategy. Please capture the
response and either (a) reuse `apiSetDagRunState(...)` here, or (b) explicitly
verify acceptable statuses (e.g. 200/204/404/409) and log/throw otherwise.
```suggestion
const response = await
request.patch(`${baseUrl}/api/v2/dags/${dagId}/dagRuns/${runId}`, {
data: { state: "failed" },
headers: { "Content-Type": "application/json" },
timeout: 10_000,
});
const status = response.status();
// Acceptable statuses: success (200/204) or "already terminal/gone"
(404/409).
if (![200, 204, 404, 409].includes(status)) {
let body: string;
try {
body = await response.text();
} catch {
body = "<unable to read response body>";
}
console.warn(
`[e2e cleanup] Failed to set DAG run ${dagId}/${runId} state to
failed (${status}): ${body}`,
);
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]