Re: [PR] feat(huggingFace): add image task family via ImageTaskCodegen [texera]

via GitHub Wed, 17 Jun 2026 14:41:34 -0700


PG1204 commented on code in PR #5320:
URL: https://github.com/apache/texera/pull/5320#discussion_r3431636395



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/PythonCodegenBase.scala:
##########
@@ -361,6 +605,186 @@ object PythonCodegenBase {
        |            detail = "<empty response>"
        |        return f"{title} [status={status_code}] response={detail}"
        |
+       |    # 
──────────────────────────────────────────────────────────────────
+       |    # Image-task helpers (used by ImageTaskCodegen and image-related
+       |    # branches of _call_provider).
+       |    # 
──────────────────────────────────────────────────────────────────
+       |
+       |    def _read_image_input(self):
+       |        image_input = str(self.IMAGE_INPUT or "").strip()
+       |        if image_input.startswith("data:"):
+       |            _, encoded = image_input.split(",", 1)
+       |            return base64.b64decode(encoded)
+       |        if image_input.startswith("http://";) or 
image_input.startswith("https://";):
+       |            resp = requests.get(image_input, timeout=120)
+       |            resp.raise_for_status()
+       |            return resp.content
+       |        if not os.path.exists(image_input):
+       |            raise FileNotFoundError(f"Image file not found at path: 
{image_input}")
+       |        if not os.path.isfile(image_input):
+       |            raise ValueError(f"Image input path is not a file: 
{image_input}")
+       |        with open(image_input, "rb") as image_file:
+       |            return image_file.read()
+       |
+       |    def _compress_image_bytes(self, image_bytes, max_bytes=33000):
+       |        from io import BytesIO
+       |        from PIL import Image as PILImage
+       |        if len(image_bytes) <= max_bytes:
+       |            return image_bytes
+       |        try:
+       |            img = PILImage.open(BytesIO(image_bytes))
+       |            img = img.convert("RGB")
+       |            max_dim = 512
+       |            quality = 75
+       |            while max_dim >= 160:
+       |                scale = min(1, max_dim / max(img.width, img.height))
+       |                w = max(1, round(img.width * scale))
+       |                h = max(1, round(img.height * scale))
+       |                resized = img.resize((w, h), PILImage.LANCZOS)
+       |                q = quality
+       |                while q >= 35:
+       |                    buf = BytesIO()
+       |                    resized.save(buf, format="JPEG", quality=q)
+       |                    if buf.tell() <= max_bytes:
+       |                        return buf.getvalue()
+       |                    q -= 10
+       |                max_dim = int(max_dim * 0.75)
+       |            buf = BytesIO()
+       |            resized.save(buf, format="JPEG", quality=35)
+       |            return buf.getvalue()
+       |        except Exception:
+       |            return image_bytes
+       |
+       |    def _image_input_as_base64(self, image_bytes):
+       |        return base64.b64encode(image_bytes).decode("utf-8")
+       |
+       |    def _read_binary_value(self, value):
+       |        if value is None or (isinstance(value, float) and 
pd.isna(value)):
+       |            return None
+       |        if isinstance(value, bytes):
+       |            return value
+       |        val = str(value).strip()
+       |        if not val:
+       |            return None
+       |        if self._looks_like_html(val):
+       |            return self._html_to_image_bytes(val)
+       |        if val.startswith("data:"):
+       |            _, encoded = val.split(",", 1)
+       |            return base64.b64decode(encoded)
+       |        if val.startswith("http://";) or val.startswith("https://";):
+       |            resp = requests.get(val, timeout=120)
+       |            resp.raise_for_status()
+       |            return resp.content
+       |        if os.path.exists(val) and os.path.isfile(val):

Review Comment:
   Addressed in 
[35e8348](https://github.com/apache/texera/pull/5320/commits/35e8348019f06dadde74d8c399dd2c7623773475)



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/PythonCodegenBase.scala:
##########
@@ -361,6 +605,186 @@ object PythonCodegenBase {
        |            detail = "<empty response>"
        |        return f"{title} [status={status_code}] response={detail}"
        |
+       |    # 
──────────────────────────────────────────────────────────────────
+       |    # Image-task helpers (used by ImageTaskCodegen and image-related
+       |    # branches of _call_provider).
+       |    # 
──────────────────────────────────────────────────────────────────
+       |
+       |    def _read_image_input(self):
+       |        image_input = str(self.IMAGE_INPUT or "").strip()
+       |        if image_input.startswith("data:"):
+       |            _, encoded = image_input.split(",", 1)
+       |            return base64.b64decode(encoded)
+       |        if image_input.startswith("http://";) or 
image_input.startswith("https://";):
+       |            resp = requests.get(image_input, timeout=120)
+       |            resp.raise_for_status()
+       |            return resp.content
+       |        if not os.path.exists(image_input):
+       |            raise FileNotFoundError(f"Image file not found at path: 
{image_input}")
+       |        if not os.path.isfile(image_input):
+       |            raise ValueError(f"Image input path is not a file: 
{image_input}")
+       |        with open(image_input, "rb") as image_file:
+       |            return image_file.read()

Review Comment:
   Addressed in 
[35e8348](https://github.com/apache/texera/pull/5320/commits/35e8348019f06dadde74d8c399dd2c7623773475)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(huggingFace): add image task family via ImageTaskCodegen [texera]

Reply via email to