feat: HSAP platform v2 — modular navigation, quality review, audit log, world model simulation
Major changes: - New frontend (platform/web/): Vite + React 18 + TypeScript + Tailwind - 4-module navigation: 数据送标 / 模型管理 / 车队管理 / 系统管理 - Data catalog with charts (DMS/ADAS/Lane 3-tab view) - Quality review workflow (标注质检): Good/Fine/Bad scoring with auto-advance - Audit enhancements: batch operations, rejection categories, Feishu notifications - Operation audit log (操作日志) - World model simulation studio (仿真工坊) - Dataset version management with snapshots and diff - ADAS 7-class dataset integration (138K images organized + compressed) - User management with Feishu integration and pagination - CRUD/search/filter on all pages, card layout redesign - PIL-optimized image overlay rendering - Auto-snapshot on build, in_review workflow stage - Removed embedded algorithm code (now in workspace)
This commit is contained in:
@@ -0,0 +1,179 @@
|
||||
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
import torch
|
||||
|
||||
from ultralytics.engine.results import Results
|
||||
from ultralytics.utils import ASSETS, YAML, nms, ops
|
||||
from ultralytics.utils.checks import check_yaml
|
||||
|
||||
|
||||
class YOLOv8Seg:
|
||||
"""YOLOv8 segmentation model for performing instance segmentation using ONNX Runtime.
|
||||
|
||||
This class implements a YOLOv8 instance segmentation model using ONNX Runtime for inference. It handles
|
||||
preprocessing of input images, running inference with the ONNX model, and postprocessing the results to generate
|
||||
bounding boxes and segmentation masks.
|
||||
|
||||
Attributes:
|
||||
session (ort.InferenceSession): ONNX Runtime inference session for model execution.
|
||||
imgsz (tuple[int, int]): Input image size as (height, width) for the model.
|
||||
classes (dict): Dictionary mapping class indices to class names from the dataset.
|
||||
conf (float): Confidence threshold for filtering detections.
|
||||
iou (float): IoU threshold used by non-maximum suppression.
|
||||
|
||||
Methods:
|
||||
letterbox: Resize and pad image while maintaining aspect ratio.
|
||||
preprocess: Preprocess the input image before feeding it into the model.
|
||||
postprocess: Post-process model predictions to extract meaningful results.
|
||||
process_mask: Process prototype masks with predicted mask coefficients to generate instance segmentation masks.
|
||||
|
||||
Examples:
|
||||
>>> model = YOLOv8Seg("yolov8n-seg.onnx", conf=0.25, iou=0.7)
|
||||
>>> img = cv2.imread("image.jpg")
|
||||
>>> results = model(img)
|
||||
>>> cv2.imshow("Segmentation", results[0].plot())
|
||||
"""
|
||||
|
||||
def __init__(self, onnx_model: str, conf: float = 0.25, iou: float = 0.7, imgsz: int | tuple[int, int] = 640):
|
||||
"""Initialize the instance segmentation model using an ONNX model.
|
||||
|
||||
Args:
|
||||
onnx_model (str): Path to the ONNX model file.
|
||||
conf (float, optional): Confidence threshold for filtering detections.
|
||||
iou (float, optional): IoU threshold for non-maximum suppression.
|
||||
imgsz (int | tuple[int, int], optional): Input image size of the model. Can be an integer for square input
|
||||
or a tuple for rectangular input.
|
||||
"""
|
||||
available = ort.get_available_providers()
|
||||
providers = [p for p in ("CUDAExecutionProvider", "CPUExecutionProvider") if p in available]
|
||||
self.session = ort.InferenceSession(onnx_model, providers=providers or available)
|
||||
|
||||
self.imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz
|
||||
self.classes = YAML.load(check_yaml("coco8.yaml"))["names"]
|
||||
self.conf = conf
|
||||
self.iou = iou
|
||||
|
||||
def __call__(self, img: np.ndarray) -> list[Results]:
|
||||
"""Run inference on the input image using the ONNX model.
|
||||
|
||||
Args:
|
||||
img (np.ndarray): The original input image in BGR format.
|
||||
|
||||
Returns:
|
||||
(list[Results]): Processed detection results after post-processing, containing bounding boxes and
|
||||
segmentation masks.
|
||||
"""
|
||||
prep_img = self.preprocess(img, self.imgsz)
|
||||
outs = self.session.run(None, {self.session.get_inputs()[0].name: prep_img})
|
||||
return self.postprocess(img, prep_img, outs)
|
||||
|
||||
def letterbox(self, img: np.ndarray, new_shape: tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""Resize and pad image while maintaining aspect ratio.
|
||||
|
||||
Args:
|
||||
img (np.ndarray): Input image in BGR format.
|
||||
new_shape (tuple[int, int], optional): Target shape as (height, width).
|
||||
|
||||
Returns:
|
||||
(np.ndarray): Resized and padded image.
|
||||
"""
|
||||
shape = img.shape[:2] # current shape [height, width]
|
||||
|
||||
# Scale ratio (new / old)
|
||||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
|
||||
# Compute padding
|
||||
new_unpad = round(shape[1] * r), round(shape[0] * r)
|
||||
dw, dh = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding
|
||||
|
||||
if shape[::-1] != new_unpad: # resize
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
top, bottom = round(dh - 0.1), round(dh + 0.1)
|
||||
left, right = round(dw - 0.1), round(dw + 0.1)
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
|
||||
|
||||
return img
|
||||
|
||||
def preprocess(self, img: np.ndarray, new_shape: tuple[int, int]) -> np.ndarray:
|
||||
"""Preprocess the input image before feeding it into the model.
|
||||
|
||||
Args:
|
||||
img (np.ndarray): The input image in BGR format.
|
||||
new_shape (tuple[int, int]): The target shape for resizing as (height, width).
|
||||
|
||||
Returns:
|
||||
(np.ndarray): Preprocessed image ready for model inference, with shape (1, 3, height, width) and normalized
|
||||
to [0, 1].
|
||||
"""
|
||||
img = self.letterbox(img, new_shape)
|
||||
img = img[..., ::-1].transpose([2, 0, 1])[None] # BGR to RGB, BHWC to BCHW
|
||||
img = np.ascontiguousarray(img)
|
||||
img = img.astype(np.float32) / 255 # Normalize to [0, 1]
|
||||
return img
|
||||
|
||||
def postprocess(self, img: np.ndarray, prep_img: np.ndarray, outs: list) -> list[Results]:
|
||||
"""Post-process model predictions to extract meaningful results.
|
||||
|
||||
Args:
|
||||
img (np.ndarray): The original input image.
|
||||
prep_img (np.ndarray): The preprocessed image used for inference.
|
||||
outs (list): Model outputs containing predictions and prototype masks.
|
||||
|
||||
Returns:
|
||||
(list[Results]): Processed detection results containing bounding boxes and segmentation masks.
|
||||
"""
|
||||
preds, protos = (torch.from_numpy(p) for p in outs)
|
||||
preds = nms.non_max_suppression(preds, self.conf, self.iou, nc=len(self.classes))
|
||||
|
||||
results = []
|
||||
for i, pred in enumerate(preds):
|
||||
pred[:, :4] = ops.scale_boxes(prep_img.shape[2:], pred[:, :4], img.shape)
|
||||
masks = self.process_mask(protos[i], pred[:, 6:], pred[:, :4], img.shape[:2])
|
||||
results.append(Results(img, path="", names=self.classes, boxes=pred[:, :6], masks=masks))
|
||||
|
||||
return results
|
||||
|
||||
def process_mask(
|
||||
self, protos: torch.Tensor, masks_in: torch.Tensor, bboxes: torch.Tensor, shape: tuple[int, int]
|
||||
) -> torch.Tensor:
|
||||
"""Process prototype masks with predicted mask coefficients to generate instance segmentation masks.
|
||||
|
||||
Args:
|
||||
protos (torch.Tensor): Prototype masks with shape (mask_dim, mask_h, mask_w).
|
||||
masks_in (torch.Tensor): Predicted mask coefficients with shape (N, mask_dim), where N is number of
|
||||
detections.
|
||||
bboxes (torch.Tensor): Bounding boxes with shape (N, 4), where N is number of detections.
|
||||
shape (tuple[int, int]): The size of the input image as (height, width).
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): Binary segmentation masks with shape (N, height, width).
|
||||
"""
|
||||
c, mh, mw = protos.shape # CHW
|
||||
masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) # Matrix multiplication
|
||||
masks = ops.scale_masks(masks[None], shape)[0] # Scale masks to original image size
|
||||
masks = ops.crop_mask(masks, bboxes) # Crop masks to bounding boxes
|
||||
return masks.gt_(0.0) # Convert to binary masks
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model", type=str, required=True, help="Path to ONNX model")
|
||||
parser.add_argument("--source", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image")
|
||||
parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
|
||||
parser.add_argument("--iou", type=float, default=0.7, help="NMS IoU threshold")
|
||||
args = parser.parse_args()
|
||||
|
||||
model = YOLOv8Seg(args.model, args.conf, args.iou)
|
||||
img = cv2.imread(args.source)
|
||||
results = model(img)
|
||||
|
||||
cv2.imshow("Segmented Image", results[0].plot())
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
Reference in New Issue
Block a user