Copilot commented on code in PR #5675:
URL: https://github.com/apache/texera/pull/5675#discussion_r3444862464
##########
frontend/src/app/workspace/component/result-panel/result-panel-modal.component.ts:
##########
@@ -42,29 +48,79 @@ import { NgxJsonViewerModule } from "ngx-json-viewer";
selector: "texera-row-modal-content",
templateUrl: "./result-panel-modal.component.html",
styleUrls: ["./result-panel-model.component.scss"],
- imports: [NgxJsonViewerModule],
+ imports: [CommonModule, NzButtonModule, NzIconModule, NgxJsonViewerModule],
})
export class RowModalComponent implements OnChanges {
+ rowEntries: { key: string; value: string; mediaSrc: string; isVideo:
boolean; isImage: boolean; isAudio: boolean }[] =
+ [];
// Index of current displayed row in currentResult
- readonly operatorId: string = inject(NZ_MODAL_DATA).operatorId;
- rowIndex: number = inject(NZ_MODAL_DATA).rowIndex;
+ private readonly modalData: { operatorId: string; rowIndex: number;
rowData?: Record<string, unknown> } =
+ inject(NZ_MODAL_DATA);
+ readonly operatorId: string = this.modalData.operatorId;
+ rowIndex: number = this.modalData.rowIndex;
currentDisplayRowData: Record<string, unknown> = {};
constructor(
public modal: NzModalRef<any, number>,
private workflowResultService: WorkflowResultService,
- private resizeService: PanelResizeService
+ private resizeService: PanelResizeService,
+ private notificationService: NotificationService
) {
+ if (this.modalData.rowData) {
+ this.currentDisplayRowData = this.modalData.rowData;
+ this.rowEntries = this.buildRowEntries(this.currentDisplayRowData);
+ }
this.ngOnChanges();
}
+ get prettyRowJson(): string {
+ return JSON.stringify(this.currentDisplayRowData, null, 2);
+ }
+
+ copyText(text: string): void {
+ navigator.clipboard.writeText(text).then(
+ () => this.notificationService.success("Copied to clipboard"),
+ () => this.notificationService.error("Failed to copy")
+ );
+ }
+
ngOnChanges(): void {
this.workflowResultService
.getPaginatedResultService(this.operatorId)
?.selectTuple(this.rowIndex, this.resizeService.pageSize)
.pipe(untilDestroyed(this))
.subscribe(res => {
- this.currentDisplayRowData = res.tuple;
+ if (res?.tuple) {
+ this.currentDisplayRowData = res.tuple;
+ this.rowEntries = this.buildRowEntries(this.currentDisplayRowData);
+ }
});
}
+
+ trackByEntryKey(_index: number, entry: { key: string }): string {
+ return entry.key;
+ }
+
+ private resolveMediaSrc(value: string): string {
+ if (!value.startsWith("http://") && !value.startsWith("https://")) {
+ return value;
+ }
+ return
`${AppSettings.getApiEndpoint()}/huggingface/media-proxy?url=${encodeURIComponent(value)}`;
+ }
+
+ private buildRowEntries(
+ rowData: Record<string, unknown>
+ ): { key: string; value: string; mediaSrc: string; isVideo: boolean;
isImage: boolean; isAudio: boolean }[] {
+ return Object.entries(rowData).map(([key, val]) => {
+ const value = typeof val === "string" ? val : JSON.stringify(val);
+ return {
+ key,
+ value,
+ mediaSrc: this.resolveMediaSrc(value),
+ isVideo: typeof val === "string" && isVideoUrl(val),
+ isImage: typeof val === "string" && isImageUrl(val),
+ isAudio: typeof val === "string" && isAudioUrl(val),
+ };
+ });
Review Comment:
`JSON.stringify(val)` can return `undefined` (e.g., when `val` is
`undefined`), but `value` is treated as a `string` and passed into
`resolveMediaSrc(value)` which calls `startsWith`. This can throw at runtime
and also violates the declared `value: string` type. Coalesce to a real string
for non-JSON-serializable values.
##########
frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts:
##########
@@ -0,0 +1,637 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Component, OnInit, OnDestroy, ChangeDetectorRef } from
"@angular/core";
+import { CommonModule } from "@angular/common";
+import { FormsModule } from "@angular/forms";
+import { FieldType, FieldTypeConfig, FormlyModule } from "@ngx-formly/core";
+import { HttpClient } from "@angular/common/http";
+import { NzSelectModule } from "ng-zorro-antd/select";
+import { NzInputModule } from "ng-zorro-antd/input";
+import { NzSpinModule } from "ng-zorro-antd/spin";
+import { NzButtonModule } from "ng-zorro-antd/button";
+import { NzIconModule } from "ng-zorro-antd/icon";
+import { AppSettings } from "../../../common/app-setting";
+import { Subject, Subscription } from "rxjs";
+import { debounceTime, finalize, switchMap, takeUntil } from "rxjs/operators";
+
+export interface HuggingFaceModelOption {
+ id: string;
+ label: string;
+ pipeline_tag?: string;
+ downloads?: number;
+ likes?: number;
+}
+
+export interface HuggingFaceTaskOption {
+ tag: string;
+ label: string;
+}
+
+// ── Static fallback task list (used when the dynamic fetch fails) ──
+export const STATIC_TASK_OPTIONS: HuggingFaceTaskOption[] = [
+ { tag: "text-generation", label: "Text Generation" },
+ { tag: "automatic-speech-recognition", label: "Automatic Speech Recognition"
},
+ { tag: "audio-classification", label: "Audio Classification" },
+ { tag: "text-classification", label: "Text Classification" },
+ { tag: "text-to-speech", label: "Text to Speech" },
+ { tag: "token-classification", label: "Token Classification" },
+ { tag: "question-answering", label: "Question Answering" },
+ { tag: "table-question-answering", label: "Table Question Answering" },
+ { tag: "zero-shot-classification", label: "Zero-Shot Classification" },
+ { tag: "translation", label: "Translation" },
+ { tag: "summarization", label: "Summarization" },
+ { tag: "feature-extraction", label: "Feature Extraction" },
+ { tag: "fill-mask", label: "Fill-Mask" },
+ { tag: "sentence-similarity", label: "Sentence Similarity" },
+ { tag: "text-ranking", label: "Text Ranking" },
+ { tag: "image-classification", label: "Image Classification" },
+ { tag: "object-detection", label: "Object Detection" },
+ { tag: "image-segmentation", label: "Image Segmentation" },
+ { tag: "image-to-text", label: "Image to Text" },
+ { tag: "visual-question-answering", label: "Visual Question Answering" },
+ { tag: "document-question-answering", label: "Document Question Answering" },
+ { tag: "zero-shot-image-classification", label: "Zero-Shot Image
Classification" },
+];
+
+const PAGE_SIZE = 50;
+
+const TRUNCATED_HEADER = "X-Texera-Truncated";
+
+// ── Module-level caches (reused across component instances) ──
+const allModelsByTag: Map<string, HuggingFaceModelOption[]> = new Map();
+const truncatedByTag: Set<string> = new Set();
+const inFlightByTag: Map<string, Subscription> = new Map();
+const errorByTag: Map<string, string> = new Map();
+
+let cachedTaskOptions: HuggingFaceTaskOption[] | null = null;
+let tasksFetchSubscription: Subscription | null = null;
+let tasksFetchError: string | null = null;
+
+/** Clear all cached data (useful for tests or manual invalidation). */
+export function invalidateHuggingFaceModelCache(): void {
+ allModelsByTag.clear();
+ truncatedByTag.clear();
+ errorByTag.clear();
+ inFlightByTag.forEach(sub => sub.unsubscribe());
+ inFlightByTag.clear();
+ cachedTaskOptions = null;
+ tasksFetchError = null;
+ tasksFetchSubscription?.unsubscribe();
+ tasksFetchSubscription = null;
+}
+
+@Component({
+ selector: "texera-hugging-face-model-select",
+ templateUrl: "./hugging-face.component.html",
+ styleUrls: ["hugging-face.component.scss"],
+ imports: [
+ CommonModule,
+ FormsModule,
+ NzSelectModule,
+ NzInputModule,
+ NzSpinModule,
+ NzButtonModule,
+ NzIconModule,
+ FormlyModule,
+ ],
+})
+export class HuggingFaceComponent extends FieldType<FieldTypeConfig>
implements OnInit, OnDestroy {
+ private readonly taskScopedKeys = [
+ "modelId",
+ "promptColumn",
+ "imageInput",
+ "audioInput",
+ "inputImageColumn",
+ "inputAudioColumn",
+ "candidateLabels",
+ "sentencesColumn",
+ "contextColumn",
+ "systemPrompt",
+ "maxNewTokens",
+ "temperature",
+ ] as const;
+ private readonly taskStateByTag = new Map<string, Partial<Record<(typeof
this.taskScopedKeys)[number], unknown>>>();
+ // ── Task state ──
+ taskOptions: HuggingFaceTaskOption[] = cachedTaskOptions ??
STATIC_TASK_OPTIONS;
+ selectedTaskTag = "text-generation";
+ tasksLoading = false;
+ tasksError: string | null = null;
+
+ // ── All models for the current task (fetched once from backend, cached) ──
+ private allModels: HuggingFaceModelOption[] = [];
+
+ // ── Displayed state ──
+ pagedModels: HuggingFaceModelOption[] = [];
+ currentPage = 0;
+ totalPages = 0;
+
+ loading = false;
+ errorMessage: string | null = null;
+
+ // ── Truncation notice ──
+ truncated = false;
+
+ // ── Search state ──
+ searchText = "";
+ searchLoading = false;
+ private filteredModels: HuggingFaceModelOption[] | null = null;
+ private readonly searchSubject$ = new Subject<string>();
+ private searchSubscription: Subscription | null = null;
+
+ private readonly destroy$ = new Subject<void>();
+ private subscription: Subscription | null = null;
+ private taskPollInterval: ReturnType<typeof setInterval> | null = null;
+ private modelPollInterval: ReturnType<typeof setInterval> | null = null;
+ private initTimeout: ReturnType<typeof setTimeout> | null = null;
+
+ constructor(
+ private http: HttpClient,
+ private cdr: ChangeDetectorRef
+ ) {
+ super();
+ }
+
+ ngOnInit(): void {
+ const savedTag = this.getCurrentTaskTag();
+ this.selectedTaskTag = savedTag ?? this.selectedTaskTag;
+ this.syncTaskSelection(this.selectedTaskTag, false);
+ this.loadTasks();
+ this.loadAllModels();
+ this.setupServerSearch();
+ // Formly can attach sibling controls after this field initializes.
+ // Re-sync once the control tree settles so a fresh operator starts in a
valid task state.
+ this.initTimeout = setTimeout(
+ () => this.syncTaskSelection(this.getCurrentTaskTag() ??
this.selectedTaskTag, false),
+ 0
+ );
+ }
+
+ ngOnDestroy(): void {
+ this.destroy$.next();
+ this.destroy$.complete();
+ this.subscription?.unsubscribe();
+ this.searchSubscription?.unsubscribe();
+ this.searchSubject$.complete();
+ if (this.taskPollInterval !== null) {
+ clearInterval(this.taskPollInterval);
+ }
+ if (this.modelPollInterval !== null) {
+ clearInterval(this.modelPollInterval);
+ }
+ if (this.initTimeout !== null) {
+ clearTimeout(this.initTimeout);
+ }
+ }
+
+ // ── Task loading ──
+
+ /**
+ * Fetch available pipeline tags from the backend, which proxies
HuggingFace's /api/tasks.
+ * Falls back to STATIC_TASK_OPTIONS if the fetch fails.
+ */
+ private loadTasks(): void {
+ // Already fetched and cached
+ if (cachedTaskOptions !== null) {
+ this.taskOptions = cachedTaskOptions;
+ return;
+ }
+
+ // Previous fetch errored — show static list, don't retry automatically
+ if (tasksFetchError !== null) {
+ this.tasksError = tasksFetchError;
+ this.taskOptions = STATIC_TASK_OPTIONS;
+ return;
+ }
+
+ // Another component instance already has a fetch in flight — wait for it
+ if (tasksFetchSubscription !== null) {
+ this.tasksLoading = true;
+ // Poll for completion (the module-level cache will be set when done)
+ this.taskPollInterval = setInterval(() => {
+ if (cachedTaskOptions !== null || tasksFetchError !== null) {
+ clearInterval(this.taskPollInterval!);
+ this.taskPollInterval = null;
+ this.tasksLoading = false;
+ this.taskOptions = cachedTaskOptions ?? STATIC_TASK_OPTIONS;
+ if (tasksFetchError) this.tasksError = tasksFetchError;
+ this.cdr.detectChanges();
+ }
+ }, 200);
+ return;
+ }
+
+ this.tasksLoading = true;
+ this.tasksError = null;
+ this.cdr.detectChanges();
+
+ tasksFetchSubscription = this.http
+
.get<HuggingFaceTaskOption[]>(`${AppSettings.getApiEndpoint()}/huggingface/tasks`)
+ .pipe(
+ takeUntil(this.destroy$),
+ finalize(() => {
+ // If takeUntil fires before next/error, reset the module-level guard
+ // so the next component instance can start a fresh fetch.
+ if (cachedTaskOptions === null && tasksFetchError === null) {
+ tasksFetchSubscription = null;
+ }
+ })
+ )
+ .subscribe({
+ next: tasks => {
+ tasksFetchSubscription = null;
+ cachedTaskOptions = tasks.length > 0 ? tasks : STATIC_TASK_OPTIONS;
+ this.taskOptions = cachedTaskOptions;
+ this.tasksLoading = false;
+ this.cdr.detectChanges();
+ },
+ error: (err: unknown) => {
+ console.error("Failed to load HuggingFace tasks:", err);
+ tasksFetchSubscription = null;
+ tasksFetchError = "Could not load tasks from Hugging Face. Using
default list.";
+ this.tasksError = tasksFetchError;
+ this.taskOptions = STATIC_TASK_OPTIONS;
+ this.tasksLoading = false;
+ this.cdr.detectChanges();
+ },
+ });
+ }
+
+ retryTasksLoad(): void {
+ tasksFetchError = null;
+ this.tasksError = null;
+ this.loadTasks();
+ }
+
+ // ── Task selection ──
+
+ onTaskSelected(tag: string): void {
+ const previousTask = this.getCurrentTaskTag() ?? this.selectedTaskTag;
+ this.snapshotTaskState(previousTask);
+ this.syncTaskSelection(tag, true);
+ this.restoreTaskState(tag);
+ this.searchText = "";
+ this.filteredModels = null;
+ this.loadAllModels();
+ }
+
+ // ── Data loading ──
+
+ /**
+ * Fetch ALL models for the selected task.
+ * The backend paginates through HF Hub internally and caches the result.
+ * The first request per task may be slow; subsequent requests are instant.
+ */
+ private loadAllModels(): void {
+ const tag = this.selectedTaskTag || "text-generation";
+
+ this.loading = false;
+ this.errorMessage = null;
+
+ // Fast path: cached on the frontend
+ if (allModelsByTag.has(tag)) {
+ this.allModels = allModelsByTag.get(tag)!;
+ this.truncated = truncatedByTag.has(tag);
+ this.goToPage(0);
+ return;
+ }
+
+ // Previous error
+ if (errorByTag.has(tag)) {
+ this.errorMessage = errorByTag.get(tag)!;
+ this.allModels = [];
+ this.pagedModels = [];
+ this.totalPages = 0;
+ return;
+ }
+
+ // Another instance is already fetching this task — wait for it
+ if (inFlightByTag.has(tag)) {
+ this.loading = true;
+ this.modelPollInterval = setInterval(() => {
+ if (allModelsByTag.has(tag) || errorByTag.has(tag)) {
+ clearInterval(this.modelPollInterval!);
+ this.modelPollInterval = null;
+ this.loading = false;
+ if (allModelsByTag.has(tag)) {
+ this.allModels = allModelsByTag.get(tag)!;
+ this.truncated = truncatedByTag.has(tag);
+ this.goToPage(0);
+ } else {
+ this.errorMessage = errorByTag.get(tag)!;
+ this.cdr.detectChanges();
+ }
+ }
+ }, 200);
+ return;
+ }
+
+ // Cancel previous
+ this.subscription?.unsubscribe();
+ this.subscription = null;
+
+ this.allModels = [];
+ this.pagedModels = [];
+ this.totalPages = 0;
+
+ // Show spinner immediately for the initial fetch — it can take a while
+ // as the backend pages through HF Hub for the first time.
+ this.loading = true;
+ this.cdr.detectChanges();
+
+ this.subscription = this.http
+ .get<
+ HuggingFaceModelOption[]
+
>(`${AppSettings.getApiEndpoint()}/huggingface/models?task=${encodeURIComponent(tag)}`)
+ .subscribe({
+ next: resp => {
+ const models = resp.body ?? [];
+ if (resp.headers.get(TRUNCATED_HEADER) === "true") {
+ truncatedByTag.add(tag);
+ }
+ allModelsByTag.set(tag, models);
+ inFlightByTag.delete(tag);
+ this.loading = false;
+ this.truncated = truncatedByTag.has(tag);
+ this.allModels = models;
+ this.goToPage(0);
+ },
+ error: (err: unknown) => {
+ console.error(`Failed to load HuggingFace models for task
'${tag}':`, err);
+ const msg = "Failed to load models. Click retry to try again.";
+ errorByTag.set(tag, msg);
+ inFlightByTag.delete(tag);
+ this.loading = false;
+ this.errorMessage = msg;
+ this.cdr.detectChanges();
+ },
+ });
Review Comment:
`HttpClient.get<T>()` returns `T` by default, but this code treats the
response as an `HttpResponse` (uses `resp.body` / `resp.headers`). This will
not compile under `strict` TS and would break the truncation-header logic. Use
`observe: "response"` (and optionally `finalize` to always clear
`inFlightByTag`) so headers/body are available and `inFlightByTag` can't get
stuck if the request is unsubscribed.
##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDesc.scala:
##########
@@ -128,29 +128,15 @@ class HuggingFaceInferenceOpDesc extends
PythonOperatorDescriptor {
private def codegenForTask(t: String): TaskCodegen =
registeredCodegens.getOrElse(t, TextGenCodegen)
- /**
- * The output column name to use in generated Python and in the output
- * schema. Falls back to the `"hf_response"` sentinel when the user
- * leaves the field null or blank.
- *
- * Shared between [[generatePythonCode]] and [[getOutputSchemas]] so the
- * two never drift apart (a divergence would cause the Python operator
- * to write to a column the schema didn't declare). Returns
- * [[EncodableString]] rather than `String` so the value flows into the
- * `pyb` template with the encoding annotation intact.
- */
- private def resolvedResultColumn: EncodableString =
- if (resultColumn == null || resultColumn.trim.isEmpty) "hf_response"
- else resultColumn
-
override def generatePythonCode(): String = {
val safeTask: EncodableString =
if (task == null || task.trim.isEmpty) "text-generation" else task
val safeModelId: EncodableString =
if (modelId == null) "" else modelId.trim
val safePromptCol: EncodableString =
if (promptColumn == null) "" else promptColumn
- val safeResultCol: EncodableString = resolvedResultColumn
+ val safeResultCol: EncodableString =
+ if (resultColumn == null || resultColumn.trim.isEmpty) "hf_response"
else resultColumn
val safeSystemPrompt: EncodableString =
if (systemPrompt == null) "" else systemPrompt
Review Comment:
The operator currently only registers `TextGenCodegen` in
`registeredCodegens`, so any non-text-generation task selected in the UI
(image/audio/video/QA/etc.) will silently fall back to the text-generation
payload/parse logic. Given this PR adds UI support for many tasks (and adds new
`*Codegen` objects), those codegens also need to be wired into
`registeredCodegens` (and imported) so the backend generates task-correct
Python.
##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/ImageTaskCodegen.scala:
##########
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.huggingFace.codegen
+
+/**
+ * Codegen for the Hugging Face image-pipeline task family.
+ *
+ * Splits into two sub-families:
+ * - "image-only" tasks send raw image bytes as the request body and don't
+ * consume the prompt column: image-classification, object-detection,
+ * image-segmentation, image-to-text.
+ * - "image + prompt" tasks bundle a base64 image and a text prompt in a
+ * JSON payload: visual-question-answering, document-question-answering,
+ * zero-shot-image-classification, image-text-to-text, image-to-image.
+ *
+ * Per-row `current_image_bytes` is resolved upstream in
+ * [[PythonCodegenBase]]'s `process_table` (either from the operator's
+ * uploaded image or from `INPUT_IMAGE_COLUMN`). The image helpers
+ * (`_read_image_input`, `_compress_image_bytes`, `_image_input_as_base64`,
+ * `_read_binary_value`, `_looks_like_html`, `_html_to_image_bytes`,
+ * `_extract_json_arg`) live in PythonCodegenBase alongside the per-task
+ * tuples (`image_only_tasks`, `image_prompt_tasks`, `image_tasks`).
+ */
+object ImageTaskCodegen extends TaskCodegen {
+
+ /** Primary key for registration; the dispatcher maps every task in
+ * [[tasks]] to this codegen.
+ */
+ override val task: String = "image-classification"
+
+ /** All HF tasks routed through this codegen. */
+ override val tasks: Set[String] = Set(
+ // image-only
+ "image-classification",
+ "object-detection",
+ "image-segmentation",
+ "image-to-text",
+ // image + prompt
+ "visual-question-answering",
+ "document-question-answering",
+ "zero-shot-image-classification",
+ "image-text-to-text",
+ "image-to-image"
+ )
+
+ override def payloadPython(ctx: CodegenContext): String =
+ """ if task in image_only_tasks:
+ | payload = current_image_bytes
+ | use_raw_binary_body = True
+ | raw_binary_headers = image_headers
+ | elif task in ("visual-question-answering",
"document-question-answering"):
+ | payload = {
+ | "inputs": {
+ | "image":
self._image_input_as_base64(current_image_bytes),
+ | "question": prompt_value,
+ | }
+ | }
+ | elif task == "image-text-to-text":
+ | img_b64 =
self._image_input_as_base64(current_image_bytes)
+ | payload = {
+ | "model": self.MODEL_ID,
+ | "messages": [{
+ | "role": "user",
+ | "content": [
+ | {"type": "image_url", "image_url": {"url":
f"data:image/png;base64,{img_b64}"}},
+ | {"type": "text", "text": prompt_value if
prompt_value else "Describe this image."},
+ | ],
+ | }],
+ | "max_tokens": self.MAX_NEW_TOKENS,
+ | }
+ | elif task == "image-to-image":
+ | payload = current_image_bytes
+ | use_raw_binary_body = True
+ | raw_binary_headers = image_headers
+ | elif task == "zero-shot-image-classification":
+ | # Zero-shot requires the caller to supply candidate
labels.
+ | # We reuse the prompt column as a comma-separated label
list so
+ | # the task is shippable without a dedicated operator
field.
+ | # TODO: replace with a first-class `candidateLabels`
field once
+ | # the property panel supports task-specific inputs.
+ | #
+ | # Fail fast if usable labels can't be derived. Both
modes lead to
+ | # a meaningless inference call:
+ | # 1. Empty prompt column -> labels = []
+ | # The HF API rejects candidate_labels: [] with an
opaque 400.
+ | # 2. Missing prompt column -> upstream sets
prompt_value
+ | # to the fallback "What is shown in this image?",
which has
+ | # no comma, so labels collapses to a single
nonsense entry.
+ | # Zero-shot classification needs >= 2 candidate labels
to be
+ | # meaningful — surface a configuration error in both
cases.
+ | labels = [s.strip() for s in prompt_value.split(",") if
s.strip()]
+ | if len(labels) < 2:
+ | raise ValueError(
+ | "zero-shot-image-classification requires at
least 2 candidate "
+ | "labels: provide a comma-separated list in the
prompt column."
+ | )
+ | payload = {
+ | "inputs":
self._image_input_as_base64(current_image_bytes),
+ | "parameters": {"candidate_labels": labels},
+ | }
Review Comment:
`zero-shot-image-classification` still reuses `prompt_value` as the
comma-separated candidate-label list (and the error message tells the user to
put labels in the prompt column). In this PR, the UI/schema introduces a
dedicated `candidateLabels` field (and other codegen, e.g. `QaRankingCodegen`,
already uses `self.CANDIDATE_LABELS`). This mismatch will make the UI
configuration misleading and can cause runtime failures even when
`candidateLabels` is provided.
##########
frontend/src/app/workspace/component/result-panel/result-panel-modal.component.ts:
##########
@@ -42,29 +48,79 @@ import { NgxJsonViewerModule } from "ngx-json-viewer";
selector: "texera-row-modal-content",
templateUrl: "./result-panel-modal.component.html",
styleUrls: ["./result-panel-model.component.scss"],
- imports: [NgxJsonViewerModule],
+ imports: [CommonModule, NzButtonModule, NzIconModule, NgxJsonViewerModule],
})
Review Comment:
`NgxJsonViewerModule` is still listed in the component `imports`, but the
template no longer uses `<ngx-json-viewer>`. Keeping unused standalone imports
increases bundle size and can hide dead dependencies; it should be removed from
`imports` (and then the TS import can be removed separately).
##########
frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html:
##########
@@ -161,7 +161,40 @@ <h5 class="rightAlign"><span
[innerHTML]="compare(column.header, 'other')"></spa
class="table-cell"
nzEllipsis
(click)="open(i, row)">
- <span class="cell-content">{{ column.getCell(row) }}</span>
+ <span class="cell-content">
+ <ng-container *ngIf="isVideoCell(column.getCell(row)); else
checkAudio">
+ <i
+ nz-icon
+ nzType="play-circle"
+ nzTheme="outline"
Review Comment:
The result-table cell template currently renders only an icon + "Play/View"
text for media cells, but the PR description says media should be rendered
inline in the result table via `<img>`, `<audio>`, and `<video>` tags. Either
update the implementation to render the actual media elements in-table, or
adjust the PR description/UX expectation if the intended behavior is "show
label and open modal to view".
##########
frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts:
##########
@@ -0,0 +1,637 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Component, OnInit, OnDestroy, ChangeDetectorRef } from
"@angular/core";
+import { CommonModule } from "@angular/common";
+import { FormsModule } from "@angular/forms";
+import { FieldType, FieldTypeConfig, FormlyModule } from "@ngx-formly/core";
+import { HttpClient } from "@angular/common/http";
+import { NzSelectModule } from "ng-zorro-antd/select";
+import { NzInputModule } from "ng-zorro-antd/input";
+import { NzSpinModule } from "ng-zorro-antd/spin";
+import { NzButtonModule } from "ng-zorro-antd/button";
+import { NzIconModule } from "ng-zorro-antd/icon";
+import { AppSettings } from "../../../common/app-setting";
+import { Subject, Subscription } from "rxjs";
+import { debounceTime, finalize, switchMap, takeUntil } from "rxjs/operators";
+
+export interface HuggingFaceModelOption {
+ id: string;
+ label: string;
+ pipeline_tag?: string;
+ downloads?: number;
+ likes?: number;
+}
+
+export interface HuggingFaceTaskOption {
+ tag: string;
+ label: string;
+}
+
+// ── Static fallback task list (used when the dynamic fetch fails) ──
+export const STATIC_TASK_OPTIONS: HuggingFaceTaskOption[] = [
+ { tag: "text-generation", label: "Text Generation" },
+ { tag: "automatic-speech-recognition", label: "Automatic Speech Recognition"
},
+ { tag: "audio-classification", label: "Audio Classification" },
+ { tag: "text-classification", label: "Text Classification" },
+ { tag: "text-to-speech", label: "Text to Speech" },
+ { tag: "token-classification", label: "Token Classification" },
+ { tag: "question-answering", label: "Question Answering" },
+ { tag: "table-question-answering", label: "Table Question Answering" },
+ { tag: "zero-shot-classification", label: "Zero-Shot Classification" },
+ { tag: "translation", label: "Translation" },
+ { tag: "summarization", label: "Summarization" },
+ { tag: "feature-extraction", label: "Feature Extraction" },
+ { tag: "fill-mask", label: "Fill-Mask" },
+ { tag: "sentence-similarity", label: "Sentence Similarity" },
+ { tag: "text-ranking", label: "Text Ranking" },
+ { tag: "image-classification", label: "Image Classification" },
+ { tag: "object-detection", label: "Object Detection" },
+ { tag: "image-segmentation", label: "Image Segmentation" },
+ { tag: "image-to-text", label: "Image to Text" },
+ { tag: "visual-question-answering", label: "Visual Question Answering" },
+ { tag: "document-question-answering", label: "Document Question Answering" },
+ { tag: "zero-shot-image-classification", label: "Zero-Shot Image
Classification" },
+];
+
+const PAGE_SIZE = 50;
+
+const TRUNCATED_HEADER = "X-Texera-Truncated";
+
+// ── Module-level caches (reused across component instances) ──
+const allModelsByTag: Map<string, HuggingFaceModelOption[]> = new Map();
+const truncatedByTag: Set<string> = new Set();
+const inFlightByTag: Map<string, Subscription> = new Map();
+const errorByTag: Map<string, string> = new Map();
+
+let cachedTaskOptions: HuggingFaceTaskOption[] | null = null;
+let tasksFetchSubscription: Subscription | null = null;
+let tasksFetchError: string | null = null;
+
+/** Clear all cached data (useful for tests or manual invalidation). */
+export function invalidateHuggingFaceModelCache(): void {
+ allModelsByTag.clear();
+ truncatedByTag.clear();
+ errorByTag.clear();
+ inFlightByTag.forEach(sub => sub.unsubscribe());
+ inFlightByTag.clear();
+ cachedTaskOptions = null;
+ tasksFetchError = null;
+ tasksFetchSubscription?.unsubscribe();
+ tasksFetchSubscription = null;
+}
+
+@Component({
+ selector: "texera-hugging-face-model-select",
+ templateUrl: "./hugging-face.component.html",
+ styleUrls: ["hugging-face.component.scss"],
+ imports: [
+ CommonModule,
+ FormsModule,
+ NzSelectModule,
+ NzInputModule,
+ NzSpinModule,
+ NzButtonModule,
+ NzIconModule,
+ FormlyModule,
+ ],
+})
+export class HuggingFaceComponent extends FieldType<FieldTypeConfig>
implements OnInit, OnDestroy {
+ private readonly taskScopedKeys = [
+ "modelId",
+ "promptColumn",
+ "imageInput",
+ "audioInput",
+ "inputImageColumn",
+ "inputAudioColumn",
+ "candidateLabels",
+ "sentencesColumn",
+ "contextColumn",
+ "systemPrompt",
+ "maxNewTokens",
+ "temperature",
+ ] as const;
+ private readonly taskStateByTag = new Map<string, Partial<Record<(typeof
this.taskScopedKeys)[number], unknown>>>();
+ // ── Task state ──
+ taskOptions: HuggingFaceTaskOption[] = cachedTaskOptions ??
STATIC_TASK_OPTIONS;
+ selectedTaskTag = "text-generation";
+ tasksLoading = false;
+ tasksError: string | null = null;
+
+ // ── All models for the current task (fetched once from backend, cached) ──
+ private allModels: HuggingFaceModelOption[] = [];
+
+ // ── Displayed state ──
+ pagedModels: HuggingFaceModelOption[] = [];
+ currentPage = 0;
+ totalPages = 0;
+
+ loading = false;
+ errorMessage: string | null = null;
+
+ // ── Truncation notice ──
+ truncated = false;
+
+ // ── Search state ──
+ searchText = "";
+ searchLoading = false;
+ private filteredModels: HuggingFaceModelOption[] | null = null;
+ private readonly searchSubject$ = new Subject<string>();
+ private searchSubscription: Subscription | null = null;
+
+ private readonly destroy$ = new Subject<void>();
+ private subscription: Subscription | null = null;
+ private taskPollInterval: ReturnType<typeof setInterval> | null = null;
+ private modelPollInterval: ReturnType<typeof setInterval> | null = null;
+ private initTimeout: ReturnType<typeof setTimeout> | null = null;
+
+ constructor(
+ private http: HttpClient,
+ private cdr: ChangeDetectorRef
+ ) {
+ super();
+ }
+
+ ngOnInit(): void {
+ const savedTag = this.getCurrentTaskTag();
+ this.selectedTaskTag = savedTag ?? this.selectedTaskTag;
+ this.syncTaskSelection(this.selectedTaskTag, false);
+ this.loadTasks();
+ this.loadAllModels();
+ this.setupServerSearch();
+ // Formly can attach sibling controls after this field initializes.
+ // Re-sync once the control tree settles so a fresh operator starts in a
valid task state.
+ this.initTimeout = setTimeout(
+ () => this.syncTaskSelection(this.getCurrentTaskTag() ??
this.selectedTaskTag, false),
+ 0
+ );
+ }
+
+ ngOnDestroy(): void {
+ this.destroy$.next();
+ this.destroy$.complete();
+ this.subscription?.unsubscribe();
+ this.searchSubscription?.unsubscribe();
+ this.searchSubject$.complete();
+ if (this.taskPollInterval !== null) {
+ clearInterval(this.taskPollInterval);
+ }
+ if (this.modelPollInterval !== null) {
+ clearInterval(this.modelPollInterval);
+ }
+ if (this.initTimeout !== null) {
+ clearTimeout(this.initTimeout);
+ }
+ }
+
+ // ── Task loading ──
+
+ /**
+ * Fetch available pipeline tags from the backend, which proxies
HuggingFace's /api/tasks.
+ * Falls back to STATIC_TASK_OPTIONS if the fetch fails.
+ */
+ private loadTasks(): void {
+ // Already fetched and cached
+ if (cachedTaskOptions !== null) {
+ this.taskOptions = cachedTaskOptions;
+ return;
+ }
+
+ // Previous fetch errored — show static list, don't retry automatically
+ if (tasksFetchError !== null) {
+ this.tasksError = tasksFetchError;
+ this.taskOptions = STATIC_TASK_OPTIONS;
+ return;
+ }
+
+ // Another component instance already has a fetch in flight — wait for it
+ if (tasksFetchSubscription !== null) {
+ this.tasksLoading = true;
+ // Poll for completion (the module-level cache will be set when done)
+ this.taskPollInterval = setInterval(() => {
+ if (cachedTaskOptions !== null || tasksFetchError !== null) {
+ clearInterval(this.taskPollInterval!);
+ this.taskPollInterval = null;
+ this.tasksLoading = false;
+ this.taskOptions = cachedTaskOptions ?? STATIC_TASK_OPTIONS;
+ if (tasksFetchError) this.tasksError = tasksFetchError;
+ this.cdr.detectChanges();
+ }
+ }, 200);
+ return;
+ }
+
+ this.tasksLoading = true;
+ this.tasksError = null;
+ this.cdr.detectChanges();
+
+ tasksFetchSubscription = this.http
+
.get<HuggingFaceTaskOption[]>(`${AppSettings.getApiEndpoint()}/huggingface/tasks`)
+ .pipe(
+ takeUntil(this.destroy$),
+ finalize(() => {
+ // If takeUntil fires before next/error, reset the module-level guard
+ // so the next component instance can start a fresh fetch.
+ if (cachedTaskOptions === null && tasksFetchError === null) {
+ tasksFetchSubscription = null;
+ }
+ })
+ )
+ .subscribe({
Review Comment:
`tasksFetchSubscription` is module-level/shared across component instances,
but the request is canceled via `takeUntil(this.destroy$)`. If the initiating
component is destroyed before the request completes,
`cachedTaskOptions`/`tasksFetchError` remain null and any other instances
currently polling will spin indefinitely. Since this is a shared fetch, it
shouldn’t be tied to a single component’s lifecycle.
##########
frontend/src/app/common/util/media-type.util.ts:
##########
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+export function isVideoUrl(value: string): boolean {
+ if (typeof value !== "string") return false;
+ return (
+ value.match(/\.(mp4|webm|ogg)(\?.*)?$/i) !== null ||
+ value.startsWith("data:video/") ||
+ value.startsWith("https://v3b.fal.media/files/")
+ );
Review Comment:
The helpers are not mutually exclusive for `.ogg` URLs: `isVideoUrl` and
`isAudioUrl` both return true for `*.ogg`, so callers that check video first
will render many Ogg Vorbis audio files as `<video>`. Since video Ogg is
typically `.ogv`, it’s safer to treat `.ogg` as audio-only and use `.ogv` for
video extension detection.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]