Re: [PR] ML pipelines: RunInference - OSS Image Object detection, OSS Image Captioning, OSS Image Classification [beam]

via GitHub Tue, 10 Mar 2026 06:35:07 -0700


Amar3tto commented on code in PR #37186:
URL: https://github.com/apache/beam/pull/37186#discussion_r2911789850



##########
sdks/python/apache_beam/examples/inference/pytorch_imagenet_rightfit.py:
##########
@@ -0,0 +1,552 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This pipeline performs image classification using an open-source
+PyTorch EfficientNet-B0 model optimized for T4 GPUs.
+It reads image URIs from Pub/Sub, decodes and preprocesses them in parallel,
+and runs inference with adaptive batch sizing for optimal GPU utilization.
+The pipeline ensures exactly-once semantics via stateful deduplication and
+idempotent BigQuery writes, allowing stable and reproducible performance
+measurements under continuous load.
+Resources like Pub/Sub topic/subscription cleanup is handled programmatically.
+"""
+
+import argparse
+import io
+import json
+import logging
+import threading
+import time
+from typing import Iterable
+from typing import Optional
+from typing import Tuple
+
+import torch
+import torch.nn.functional as F
+
+import apache_beam as beam
+from apache_beam.coders import BytesCoder
+from apache_beam.io.filesystems import FileSystems
+from apache_beam.ml.inference.base import KeyedModelHandler
+from apache_beam.ml.inference.base import PredictionResult
+from apache_beam.ml.inference.base import RunInference
+from apache_beam.ml.inference.pytorch_inference import 
PytorchModelHandlerTensor
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.options.pipeline_options import SetupOptions
+from apache_beam.options.pipeline_options import StandardOptions
+from apache_beam.runners.runner import PipelineResult
+from apache_beam.transforms import userstate
+from apache_beam.transforms import window
+
+from google.cloud import pubsub_v1
+import PIL.Image as PILImage
+
+# ============ Utility & Preprocessing ============
+
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+
+
+def now_millis() -> int:
+  return int(time.time() * 1000)
+
+
+def read_gcs_file_lines(gcs_path: str) -> Iterable[str]:
+  """Reads text lines from a GCS file."""
+  with FileSystems.open(gcs_path) as f:
+    for line in f.read().decode("utf-8").splitlines():
+      yield line.strip()
+
+
+def load_image_from_uri(uri: str) -> bytes:
+  with FileSystems.open(uri) as f:
+    return f.read()
+
+
+def decode_and_preprocess(image_bytes: bytes, size: int = 224) -> torch.Tensor:
+  """Decode bytes->RGB PIL->resize/crop->tensor->normalize."""
+  with PILImage.open(io.BytesIO(image_bytes)) as img:
+    img = img.convert("RGB")
+    img.thumbnail((256, 256))
+    w, h = img.size
+    left = (w - size) // 2
+    top = (h - size) // 2
+    img = img.crop(
+        (max(0, left), max(0, top), min(w, left + size), min(h, top + size)))
+
+    # To tensor [0..1]
+    import numpy as np
+    mean = np.array(IMAGENET_MEAN, dtype=np.float32)
+    std = np.array(IMAGENET_STD, dtype=np.float32)
+    arr = np.asarray(img).astype("float32") / 255.0  # H,W,3
+    # Normalize
+    arr = (arr - mean) / std
+    # HWC -> CHW
+    arr = np.transpose(arr, (2, 0, 1)).astype("float32")
+    return torch.from_numpy(arr).float()  # float32, shape (3,224,224)
+
+
+class RateLimitDoFn(beam.DoFn):
+  def __init__(self, rate_per_sec: float):
+    self.delay = 1.0 / rate_per_sec
+
+  def process(self, element):
+    time.sleep(self.delay)
+    yield element
+
+
+class MakeKeyDoFn(beam.DoFn):
+  """Produce (image_id, payload) stable for dedup & BQ insertId."""
+  def __init__(self, input_mode: str):
+    self.input_mode = input_mode
+
+  def process(self, element: str | bytes):
+    # Input can be raw bytes from Pub/Sub or a GCS URI string, depends on mode
+    if self.input_mode == "bytes":
+      # element is bytes message, assume it includes
+      # {"image_id": "...", "bytes": base64?} or just raw bytes.
+      import hashlib
+      b = element if isinstance(element, (bytes, bytearray)) else 
bytes(element)
+      image_id = hashlib.sha1(b).hexdigest()
+      yield image_id, b
+    else:
+      # gcs_uris: element is uri string; image_id = sha1(uri)
+      import hashlib
+      uri = element.decode("utf-8") if isinstance(
+          element, (bytes, bytearray)) else str(element)
+      image_id = hashlib.sha1(uri.encode("utf-8")).hexdigest()
+      yield image_id, uri
+
+
+class DedupDoFn(beam.DoFn):
+  seen = userstate.ReadModifyWriteStateSpec('seen', BytesCoder())
+
+  def process(self, element, seen=beam.DoFn.StateParam(seen)):
+    if seen.read() == b'1':
+      return
+    seen.write(b'1')
+    yield element
+
+
+class DecodePreprocessDoFn(beam.DoFn):
+  """Turn (image_id, bytes|uri) -> (image_id, torch.Tensor)"""
+  def __init__(
+      self, input_mode: str, image_size: int = 224, decode_threads: int = 4):
+    self.input_mode = input_mode
+    self.image_size = image_size
+    self.decode_threads = decode_threads
+
+  def process(self, kv: Tuple[str, object]):
+    image_id, payload = kv
+    start = now_millis()
+
+    try:
+      if self.input_mode == "bytes":
+        b = payload if isinstance(payload,
+                                  (bytes, bytearray)) else bytes(payload)
+      else:
+        uri = payload if isinstance(payload, str) else payload.decode("utf-8")
+        b = load_image_from_uri(uri)
+
+      tensor = decode_and_preprocess(b, self.image_size)
+      preprocess_ms = now_millis() - start
+      yield image_id, {"tensor": tensor, "preprocess_ms": preprocess_ms}
+    except Exception as e:
+      logging.warning("Decode failed for %s: %s", image_id, e)
+      return
+
+
+class PostProcessDoFn(beam.DoFn):
+  """PredictionResult -> dict row for BQ."""
+  def __init__(self, top_k: int, model_name: str):
+    self.top_k = top_k
+    self.model_name = model_name
+
+  def process(self, kv: Tuple[str, PredictionResult]):
+    image_id, pred = kv
+
+    # pred can be PredictionResult OR raw inference object.
+    inference_obj = pred.inference if hasattr(pred, "inference") else pred
+
+    # inference_obj can be dict {'logits': tensor} OR tensor directly.
+    if isinstance(inference_obj, dict):
+      logits = inference_obj.get("logits", None)
+      if logits is None:
+        # fallback: try first value if dict shape differs
+        try:
+          logits = next(iter(inference_obj.values()))
+        except Exception:
+          logits = None
+    else:
+      logits = inference_obj
+
+    if not isinstance(logits, torch.Tensor):
+      logging.warning(
+          "Unexpected logits type for %s: %s", image_id, type(logits))
+      return
+
+    # Ensure shape [1, C]
+    if logits.ndim == 1:
+      logits = logits.unsqueeze(0)
+
+    probs = F.softmax(logits, dim=-1)  # [B, C]
+    values, indices = torch.topk(
+        probs, k=min(self.top_k, probs.shape[-1]), dim=-1
+    )
+
+    topk = [{
+        "class_id": int(idx.item()), "score": float(val.item())
+    } for idx, val in zip(indices[0], values[0])]
+
+    yield {
+        "image_id": image_id,
+        "model_name": self.model_name,
+        "topk": json.dumps(topk),
+        "infer_ms": now_millis(),
+    }
+
+
+# ============ Args & Helpers ============
+
+
+def parse_known_args(argv):
+  parser = argparse.ArgumentParser()
+  # I/O & runtime
+  parser.add_argument(
+      '--project', default='apache-beam-testing', help='GCP project ID')
+  parser.add_argument(
+      '--mode', default='streaming', choices=['streaming', 'batch'])
+  parser.add_argument(
+      '--output_table',
+      required=True,
+      help='BigQuery output table: dataset.table')
+  parser.add_argument(
+      '--publish_to_big_query', default='true', choices=['true', 'false'])
+  parser.add_argument(
+      '--input_mode', default='gcs_uris', choices=['gcs_uris', 'bytes'])
+  parser.add_argument(
+      '--input',
+      required=True,
+      help='GCS path to file with URIs (for load) OR unused for bytes')
+  parser.add_argument(
+      '--pubsub_topic',
+      default='projects/apache-beam-testing/topics/images_topic')
+  parser.add_argument(
+      '--pubsub_subscription',
+      default='projects/apache-beam-testing/subscriptions/images_subscription')
+  parser.add_argument(
+      '--rate_limit',
+      type=float,
+      default=None,
+      help='Elements per second for load pipeline')
+
+  # Model & inference
+  parser.add_argument(
+      '--pretrained_model_name',
+      default='efficientnet_b0',
+      help='OSS model name (e.g., efficientnet_b0|mobilenetv3_large_100)')
+  parser.add_argument(
+      '--model_state_dict_path',
+      default=None,
+      help='Optional state_dict to load')
+  parser.add_argument('--device', default='GPU', choices=['CPU', 'GPU'])
+  parser.add_argument('--image_size', type=int, default=224)
+  parser.add_argument('--top_k', type=int, default=5)
+  parser.add_argument(
+      '--inference_batch_size',
+      default='auto',
+      help='int or "auto"; auto tries 64→32→16')
+
+  # Windows
+  parser.add_argument('--window_sec', type=int, default=60)
+  parser.add_argument('--trigger_proc_time_sec', type=int, default=30)
+
+  # Dedup
+  parser.add_argument(
+      '--enable_dedup', default='false', choices=['true', 'false'])
+
+  known_args, pipeline_args = parser.parse_known_args(argv)
+  return known_args, pipeline_args
+
+
+def ensure_pubsub_resources(
+    project: str, topic_path: str, subscription_path: str):
+  publisher = pubsub_v1.PublisherClient()
+  subscriber = pubsub_v1.SubscriberClient()
+
+  topic_name = topic_path.split("/")[-1]
+  subscription_name = subscription_path.split("/")[-1]
+
+  full_topic_path = publisher.topic_path(project, topic_name)
+  full_subscription_path = subscriber.subscription_path(
+      project, subscription_name)
+
+  try:
+    publisher.get_topic(request={"topic": full_topic_path})
+  except Exception:
+    publisher.create_topic(name=full_topic_path)
+
+  try:
+    subscriber.get_subscription(
+        request={"subscription": full_subscription_path})
+  except Exception:
+    subscriber.create_subscription(
+        name=full_subscription_path, topic=full_topic_path)
+
+
+def cleanup_pubsub_resources(
+    project: str, topic_path: str, subscription_path: str):
+  publisher = pubsub_v1.PublisherClient()
+  subscriber = pubsub_v1.SubscriberClient()
+
+  topic_name = topic_path.split("/")[-1]
+  subscription_name = subscription_path.split("/")[-1]
+
+  full_topic_path = publisher.topic_path(project, topic_name)
+  full_subscription_path = subscriber.subscription_path(
+      project, subscription_name)
+
+  try:
+    subscriber.delete_subscription(
+        request={"subscription": full_subscription_path})
+    print(f"Deleted subscription: {subscription_name}")
+  except Exception as e:
+    print(f"Failed to delete subscription: {e}")
+
+  try:
+    publisher.delete_topic(request={"topic": full_topic_path})
+    print(f"Deleted topic: {topic_name}")
+  except Exception as e:
+    print(f"Failed to delete topic: {e}")
+
+
+def override_or_add(args, flag, value):
+  if flag in args:
+    idx = args.index(flag)
+    args[idx + 1] = str(value)
+  else:
+    args.extend([flag, str(value)])
+
+
+# ============ Model factory (timm) ============
+
+
+def create_timm_m(model_name: str, num_classes: int = 1000):
+  import timm
+  model = timm.create_model(
+      model_name, pretrained=True, num_classes=num_classes)
+  model.eval()
+  return model
+
+
+def pick_batch_size(arg: str) -> Optional[int]:
+  if isinstance(arg, str) and arg.lower() == 'auto':
+    return None
+  try:
+    return int(arg)
+  except Exception:
+    return None
+
+
+# ============ Load pipeline ============
+
+
+def run_load_pipeline(known_args, pipeline_args):
+  """Reads GCS file with URIs and publishes them to Pub/Sub (for streaming)."""
+  # enforce smaller/CPU-only defaults for feeder
+  override_or_add(pipeline_args, '--device', 'CPU')
+  override_or_add(pipeline_args, '--num_workers', '5')
+  override_or_add(pipeline_args, '--max_num_workers', '10')
+  override_or_add(
+      pipeline_args, '--job_name', f"images-load-pubsub-{int(time.time())}")
+  override_or_add(pipeline_args, '--project', known_args.project)
+  pipeline_args = [
+      arg for arg in pipeline_args if not arg.startswith("--experiments")
+  ]
+
+  pipeline_options = PipelineOptions(pipeline_args)
+  pipeline = beam.Pipeline(options=pipeline_options)
+
+  lines = (
+      pipeline
+      |
+      'ReadGCSFile' >> beam.Create(list(read_gcs_file_lines(known_args.input)))
+      | 'FilterEmpty' >> beam.Filter(lambda line: line.strip()))
+  if known_args.rate_limit:
+    lines = lines | 'RateLimit' >> beam.ParDo(
+        RateLimitDoFn(rate_per_sec=known_args.rate_limit))
+
+  _ = (
+      lines
+      | 'ToBytes' >> beam.Map(lambda line: line.encode('utf-8'))
+      | 'ToPubSub' >> beam.io.WriteToPubSub(topic=known_args.pubsub_topic))
+  return pipeline.run()
+
+
+# ============ Main pipeline ============
+
+
+def run(

Review Comment:
   Fixed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] ML pipelines: RunInference - OSS Image Object detection, OSS Image Captioning, OSS Image Classification [beam]

Reply via email to