ultralytics 8.0.239 Ultralytics Actions and hub-sdk adoption (#7431)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2024-01-10 03:16:08 +01:00 committed by GitHub
parent e795277391
commit fe27db2f6e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
139 changed files with 6870 additions and 5125 deletions

View file

@ -28,7 +28,7 @@ class YOLOv8:
self.iou_thres = iou_thres
# Load the class names from the COCO dataset
self.classes = yaml_load(check_yaml('coco128.yaml'))['names']
self.classes = yaml_load(check_yaml("coco128.yaml"))["names"]
# Generate a color palette for the classes
self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
@ -57,7 +57,7 @@ class YOLOv8:
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
# Create the label text with class name and score
label = f'{self.classes[class_id]}: {score:.2f}'
label = f"{self.classes[class_id]}: {score:.2f}"
# Calculate the dimensions of the label text
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
@ -67,8 +67,9 @@ class YOLOv8:
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
# Draw a filled rectangle as the background for the label text
cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color,
cv2.FILLED)
cv2.rectangle(
img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED
)
# Draw the label text on the image
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
@ -182,7 +183,7 @@ class YOLOv8:
output_img: The output image with drawn detections.
"""
# Create an inference session using the ONNX model and specify execution providers
session = ort.InferenceSession(self.onnx_model, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
session = ort.InferenceSession(self.onnx_model, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
# Get the model inputs
model_inputs = session.get_inputs()
@ -202,17 +203,17 @@ class YOLOv8:
return self.postprocess(self.img, outputs) # output image
if __name__ == '__main__':
if __name__ == "__main__":
# Create an argument parser to handle command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, default='yolov8n.onnx', help='Input your ONNX model.')
parser.add_argument('--img', type=str, default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
parser.add_argument('--conf-thres', type=float, default=0.5, help='Confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
parser.add_argument("--model", type=str, default="yolov8n.onnx", help="Input your ONNX model.")
parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.")
parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
args = parser.parse_args()
# Check the requirements and select the appropriate backend (CPU or GPU)
check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime')
check_requirements("onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime")
# Create an instance of the YOLOv8 class with the specified arguments
detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres)
@ -221,8 +222,8 @@ if __name__ == '__main__':
output_image = detection.main()
# Display the output image in a window
cv2.namedWindow('Output', cv2.WINDOW_NORMAL)
cv2.imshow('Output', output_image)
cv2.namedWindow("Output", cv2.WINDOW_NORMAL)
cv2.imshow("Output", output_image)
# Wait for a key press to exit
cv2.waitKey(0)

View file

@ -6,7 +6,7 @@ import numpy as np
from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_yaml
CLASSES = yaml_load(check_yaml('coco128.yaml'))['names']
CLASSES = yaml_load(check_yaml("coco128.yaml"))["names"]
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
@ -23,7 +23,7 @@ def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
"""
label = f'{CLASSES[class_id]} ({confidence:.2f})'
label = f"{CLASSES[class_id]} ({confidence:.2f})"
color = colors[class_id]
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
@ -76,8 +76,11 @@ def main(onnx_model, input_image):
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
if maxScore >= 0.25:
box = [
outputs[0][i][0] - (0.5 * outputs[0][i][2]), outputs[0][i][1] - (0.5 * outputs[0][i][3]),
outputs[0][i][2], outputs[0][i][3]]
outputs[0][i][0] - (0.5 * outputs[0][i][2]),
outputs[0][i][1] - (0.5 * outputs[0][i][3]),
outputs[0][i][2],
outputs[0][i][3],
]
boxes.append(box)
scores.append(maxScore)
class_ids.append(maxClassIndex)
@ -92,26 +95,34 @@ def main(onnx_model, input_image):
index = result_boxes[i]
box = boxes[index]
detection = {
'class_id': class_ids[index],
'class_name': CLASSES[class_ids[index]],
'confidence': scores[index],
'box': box,
'scale': scale}
"class_id": class_ids[index],
"class_name": CLASSES[class_ids[index]],
"confidence": scores[index],
"box": box,
"scale": scale,
}
detections.append(detection)
draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale),
round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale))
draw_bounding_box(
original_image,
class_ids[index],
scores[index],
round(box[0] * scale),
round(box[1] * scale),
round((box[0] + box[2]) * scale),
round((box[1] + box[3]) * scale),
)
# Display the image with bounding boxes
cv2.imshow('image', original_image)
cv2.imshow("image", original_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
return detections
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--model', default='yolov8n.onnx', help='Input your ONNX model.')
parser.add_argument('--img', default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
parser.add_argument("--model", default="yolov8n.onnx", help="Input your ONNX model.")
parser.add_argument("--img", default=str(ASSETS / "bus.jpg"), help="Path to input image.")
args = parser.parse_args()
main(args.model, args.img)

View file

@ -13,14 +13,9 @@ img_height = 640
class LetterBox:
def __init__(self,
new_shape=(img_width, img_height),
auto=False,
scaleFill=False,
scaleup=True,
center=True,
stride=32):
def __init__(
self, new_shape=(img_width, img_height), auto=False, scaleFill=False, scaleup=True, center=True, stride=32
):
self.new_shape = new_shape
self.auto = auto
self.scaleFill = scaleFill
@ -33,9 +28,9 @@ class LetterBox:
if labels is None:
labels = {}
img = labels.get('img') if image is None else image
img = labels.get("img") if image is None else image
shape = img.shape[:2] # current shape [height, width]
new_shape = labels.pop('rect_shape', self.new_shape)
new_shape = labels.pop("rect_shape", self.new_shape)
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
@ -63,15 +58,16 @@ class LetterBox:
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=(114, 114, 114)) # add border
if labels.get('ratio_pad'):
labels['ratio_pad'] = (labels['ratio_pad'], (left, top)) # for evaluation
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
) # add border
if labels.get("ratio_pad"):
labels["ratio_pad"] = (labels["ratio_pad"], (left, top)) # for evaluation
if len(labels):
labels = self._update_labels(labels, ratio, dw, dh)
labels['img'] = img
labels['resized_shape'] = new_shape
labels["img"] = img
labels["resized_shape"] = new_shape
return labels
else:
return img
@ -79,15 +75,14 @@ class LetterBox:
def _update_labels(self, labels, ratio, padw, padh):
"""Update labels."""
labels['instances'].convert_bbox(format='xyxy')
labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
labels['instances'].scale(*ratio)
labels['instances'].add_padding(padw, padh)
labels["instances"].convert_bbox(format="xyxy")
labels["instances"].denormalize(*labels["img"].shape[:2][::-1])
labels["instances"].scale(*ratio)
labels["instances"].add_padding(padw, padh)
return labels
class Yolov8TFLite:
def __init__(self, tflite_model, input_image, confidence_thres, iou_thres):
"""
Initializes an instance of the Yolov8TFLite class.
@ -105,7 +100,7 @@ class Yolov8TFLite:
self.iou_thres = iou_thres
# Load the class names from the COCO dataset
self.classes = yaml_load(check_yaml('coco128.yaml'))['names']
self.classes = yaml_load(check_yaml("coco128.yaml"))["names"]
# Generate a color palette for the classes
self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
@ -134,7 +129,7 @@ class Yolov8TFLite:
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
# Create the label text with class name and score
label = f'{self.classes[class_id]}: {score:.2f}'
label = f"{self.classes[class_id]}: {score:.2f}"
# Calculate the dimensions of the label text
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
@ -144,8 +139,13 @@ class Yolov8TFLite:
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
# Draw a filled rectangle as the background for the label text
cv2.rectangle(img, (int(label_x), int(label_y - label_height)),
(int(label_x + label_width), int(label_y + label_height)), color, cv2.FILLED)
cv2.rectangle(
img,
(int(label_x), int(label_y - label_height)),
(int(label_x + label_width), int(label_y + label_height)),
color,
cv2.FILLED,
)
# Draw the label text on the image
cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
@ -161,7 +161,7 @@ class Yolov8TFLite:
# Read the input image using OpenCV
self.img = cv2.imread(self.input_image)
print('image befor', self.img)
print("image before", self.img)
# Get the height and width of the input image
self.img_height, self.img_width = self.img.shape[:2]
@ -209,8 +209,10 @@ class Yolov8TFLite:
# Get the box, score, and class ID corresponding to the index
box = boxes[i]
gain = min(img_width / self.img_width, img_height / self.img_height)
pad = round((img_width - self.img_width * gain) / 2 -
0.1), round((img_height - self.img_height * gain) / 2 - 0.1)
pad = (
round((img_width - self.img_width * gain) / 2 - 0.1),
round((img_height - self.img_height * gain) / 2 - 0.1),
)
box[0] = (box[0] - pad[0]) / gain
box[1] = (box[1] - pad[1]) / gain
box[2] = box[2] / gain
@ -242,7 +244,7 @@ class Yolov8TFLite:
output_details = interpreter.get_output_details()
# Store the shape of the input for later use
input_shape = input_details[0]['shape']
input_shape = input_details[0]["shape"]
self.input_width = input_shape[1]
self.input_height = input_shape[2]
@ -251,19 +253,19 @@ class Yolov8TFLite:
img_data = img_data
# img_data = img_data.cpu().numpy()
# Set the input tensor to the interpreter
print(input_details[0]['index'])
print(input_details[0]["index"])
print(img_data.shape)
img_data = img_data.transpose((0, 2, 3, 1))
scale, zero_point = input_details[0]['quantization']
interpreter.set_tensor(input_details[0]['index'], img_data)
scale, zero_point = input_details[0]["quantization"]
interpreter.set_tensor(input_details[0]["index"], img_data)
# Run inference
interpreter.invoke()
# Get the output tensor from the interpreter
output = interpreter.get_tensor(output_details[0]['index'])
scale, zero_point = output_details[0]['quantization']
output = interpreter.get_tensor(output_details[0]["index"])
scale, zero_point = output_details[0]["quantization"]
output = (output.astype(np.float32) - zero_point) * scale
output[:, [0, 2]] *= img_width
@ -273,16 +275,15 @@ class Yolov8TFLite:
return self.postprocess(self.img, output)
if __name__ == '__main__':
if __name__ == "__main__":
# Create an argument parser to handle command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--model',
type=str,
default='yolov8n_full_integer_quant.tflite',
help='Input your TFLite model.')
parser.add_argument('--img', type=str, default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
parser.add_argument('--conf-thres', type=float, default=0.5, help='Confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
parser.add_argument(
"--model", type=str, default="yolov8n_full_integer_quant.tflite", help="Input your TFLite model."
)
parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.")
parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
args = parser.parse_args()
# Create an instance of the Yolov8TFLite class with the specified arguments
@ -292,7 +293,7 @@ if __name__ == '__main__':
output_image = detection.main()
# Display the output image in a window
cv2.imshow('Output', output_image)
cv2.imshow("Output", output_image)
# Wait for a key press to exit
cv2.waitKey(0)

View file

@ -16,21 +16,22 @@ track_history = defaultdict(list)
current_region = None
counting_regions = [
{
'name': 'YOLOv8 Polygon Region',
'polygon': Polygon([(50, 80), (250, 20), (450, 80), (400, 350), (100, 350)]), # Polygon points
'counts': 0,
'dragging': False,
'region_color': (255, 42, 4), # BGR Value
'text_color': (255, 255, 255) # Region Text Color
"name": "YOLOv8 Polygon Region",
"polygon": Polygon([(50, 80), (250, 20), (450, 80), (400, 350), (100, 350)]), # Polygon points
"counts": 0,
"dragging": False,
"region_color": (255, 42, 4), # BGR Value
"text_color": (255, 255, 255), # Region Text Color
},
{
'name': 'YOLOv8 Rectangle Region',
'polygon': Polygon([(200, 250), (440, 250), (440, 550), (200, 550)]), # Polygon points
'counts': 0,
'dragging': False,
'region_color': (37, 255, 225), # BGR Value
'text_color': (0, 0, 0), # Region Text Color
}, ]
"name": "YOLOv8 Rectangle Region",
"polygon": Polygon([(200, 250), (440, 250), (440, 550), (200, 550)]), # Polygon points
"counts": 0,
"dragging": False,
"region_color": (37, 255, 225), # BGR Value
"text_color": (0, 0, 0), # Region Text Color
},
]
def mouse_callback(event, x, y, flags, param):
@ -40,32 +41,33 @@ def mouse_callback(event, x, y, flags, param):
# Mouse left button down event
if event == cv2.EVENT_LBUTTONDOWN:
for region in counting_regions:
if region['polygon'].contains(Point((x, y))):
if region["polygon"].contains(Point((x, y))):
current_region = region
current_region['dragging'] = True
current_region['offset_x'] = x
current_region['offset_y'] = y
current_region["dragging"] = True
current_region["offset_x"] = x
current_region["offset_y"] = y
# Mouse move event
elif event == cv2.EVENT_MOUSEMOVE:
if current_region is not None and current_region['dragging']:
dx = x - current_region['offset_x']
dy = y - current_region['offset_y']
current_region['polygon'] = Polygon([
(p[0] + dx, p[1] + dy) for p in current_region['polygon'].exterior.coords])
current_region['offset_x'] = x
current_region['offset_y'] = y
if current_region is not None and current_region["dragging"]:
dx = x - current_region["offset_x"]
dy = y - current_region["offset_y"]
current_region["polygon"] = Polygon(
[(p[0] + dx, p[1] + dy) for p in current_region["polygon"].exterior.coords]
)
current_region["offset_x"] = x
current_region["offset_y"] = y
# Mouse left button up event
elif event == cv2.EVENT_LBUTTONUP:
if current_region is not None and current_region['dragging']:
current_region['dragging'] = False
if current_region is not None and current_region["dragging"]:
current_region["dragging"] = False
def run(
weights='yolov8n.pt',
weights="yolov8n.pt",
source=None,
device='cpu',
device="cpu",
view_img=False,
save_img=False,
exist_ok=False,
@ -100,8 +102,8 @@ def run(
raise FileNotFoundError(f"Source path '{source}' does not exist.")
# Setup Model
model = YOLO(f'{weights}')
model.to('cuda') if device == '0' else model.to('cpu')
model = YOLO(f"{weights}")
model.to("cuda") if device == "0" else model.to("cpu")
# Extract classes names
names = model.model.names
@ -109,12 +111,12 @@ def run(
# Video setup
videocapture = cv2.VideoCapture(source)
frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4))
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*'mp4v')
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*"mp4v")
# Output setup
save_dir = increment_path(Path('ultralytics_rc_output') / 'exp', exist_ok)
save_dir = increment_path(Path("ultralytics_rc_output") / "exp", exist_ok)
save_dir.mkdir(parents=True, exist_ok=True)
video_writer = cv2.VideoWriter(str(save_dir / f'{Path(source).stem}.mp4'), fourcc, fps, (frame_width, frame_height))
video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.mp4"), fourcc, fps, (frame_width, frame_height))
# Iterate over video frames
while videocapture.isOpened():
@ -146,43 +148,48 @@ def run(
# Check if detection inside region
for region in counting_regions:
if region['polygon'].contains(Point((bbox_center[0], bbox_center[1]))):
region['counts'] += 1
if region["polygon"].contains(Point((bbox_center[0], bbox_center[1]))):
region["counts"] += 1
# Draw regions (Polygons/Rectangles)
for region in counting_regions:
region_label = str(region['counts'])
region_color = region['region_color']
region_text_color = region['text_color']
region_label = str(region["counts"])
region_color = region["region_color"]
region_text_color = region["text_color"]
polygon_coords = np.array(region['polygon'].exterior.coords, dtype=np.int32)
centroid_x, centroid_y = int(region['polygon'].centroid.x), int(region['polygon'].centroid.y)
polygon_coords = np.array(region["polygon"].exterior.coords, dtype=np.int32)
centroid_x, centroid_y = int(region["polygon"].centroid.x), int(region["polygon"].centroid.y)
text_size, _ = cv2.getTextSize(region_label,
cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.7,
thickness=line_thickness)
text_size, _ = cv2.getTextSize(
region_label, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.7, thickness=line_thickness
)
text_x = centroid_x - text_size[0] // 2
text_y = centroid_y + text_size[1] // 2
cv2.rectangle(frame, (text_x - 5, text_y - text_size[1] - 5), (text_x + text_size[0] + 5, text_y + 5),
region_color, -1)
cv2.putText(frame, region_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, region_text_color,
line_thickness)
cv2.rectangle(
frame,
(text_x - 5, text_y - text_size[1] - 5),
(text_x + text_size[0] + 5, text_y + 5),
region_color,
-1,
)
cv2.putText(
frame, region_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, region_text_color, line_thickness
)
cv2.polylines(frame, [polygon_coords], isClosed=True, color=region_color, thickness=region_thickness)
if view_img:
if vid_frame_count == 1:
cv2.namedWindow('Ultralytics YOLOv8 Region Counter Movable')
cv2.setMouseCallback('Ultralytics YOLOv8 Region Counter Movable', mouse_callback)
cv2.imshow('Ultralytics YOLOv8 Region Counter Movable', frame)
cv2.namedWindow("Ultralytics YOLOv8 Region Counter Movable")
cv2.setMouseCallback("Ultralytics YOLOv8 Region Counter Movable", mouse_callback)
cv2.imshow("Ultralytics YOLOv8 Region Counter Movable", frame)
if save_img:
video_writer.write(frame)
for region in counting_regions: # Reinitialize count for each region
region['counts'] = 0
region["counts"] = 0
if cv2.waitKey(1) & 0xFF == ord('q'):
if cv2.waitKey(1) & 0xFF == ord("q"):
break
del vid_frame_count
@ -194,16 +201,16 @@ def run(
def parse_opt():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='yolov8n.pt', help='initial weights path')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--source', type=str, required=True, help='video file path')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-img', action='store_true', help='save results')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--line-thickness', type=int, default=2, help='bounding box thickness')
parser.add_argument('--track-thickness', type=int, default=2, help='Tracking line thickness')
parser.add_argument('--region-thickness', type=int, default=4, help='Region thickness')
parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--source", type=str, required=True, help="video file path")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-img", action="store_true", help="save results")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")
parser.add_argument("--line-thickness", type=int, default=2, help="bounding box thickness")
parser.add_argument("--track-thickness", type=int, default=2, help="Tracking line thickness")
parser.add_argument("--region-thickness", type=int, default=4, help="Region thickness")
return parser.parse_args()
@ -213,6 +220,6 @@ def main(opt):
run(**vars(opt))
if __name__ == '__main__':
if __name__ == "__main__":
opt = parse_opt()
main(opt)

View file

@ -9,7 +9,7 @@ from sahi.utils.yolov8 import download_yolov8s_model
from ultralytics.utils.files import increment_path
def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False, exist_ok=False):
def run(weights="yolov8n.pt", source="test.mp4", view_img=False, save_img=False, exist_ok=False):
"""
Run object detection on a video using YOLOv8 and SAHI.
@ -25,41 +25,41 @@ def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False,
if not Path(source).exists():
raise FileNotFoundError(f"Source path '{source}' does not exist.")
yolov8_model_path = f'models/{weights}'
yolov8_model_path = f"models/{weights}"
download_yolov8s_model(yolov8_model_path)
detection_model = AutoDetectionModel.from_pretrained(model_type='yolov8',
model_path=yolov8_model_path,
confidence_threshold=0.3,
device='cpu')
detection_model = AutoDetectionModel.from_pretrained(
model_type="yolov8", model_path=yolov8_model_path, confidence_threshold=0.3, device="cpu"
)
# Video setup
videocapture = cv2.VideoCapture(source)
frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4))
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*'mp4v')
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*"mp4v")
# Output setup
save_dir = increment_path(Path('ultralytics_results_with_sahi') / 'exp', exist_ok)
save_dir = increment_path(Path("ultralytics_results_with_sahi") / "exp", exist_ok)
save_dir.mkdir(parents=True, exist_ok=True)
video_writer = cv2.VideoWriter(str(save_dir / f'{Path(source).stem}.mp4'), fourcc, fps, (frame_width, frame_height))
video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.mp4"), fourcc, fps, (frame_width, frame_height))
while videocapture.isOpened():
success, frame = videocapture.read()
if not success:
break
results = get_sliced_prediction(frame,
detection_model,
slice_height=512,
slice_width=512,
overlap_height_ratio=0.2,
overlap_width_ratio=0.2)
results = get_sliced_prediction(
frame, detection_model, slice_height=512, slice_width=512, overlap_height_ratio=0.2, overlap_width_ratio=0.2
)
object_prediction_list = results.object_prediction_list
boxes_list = []
clss_list = []
for ind, _ in enumerate(object_prediction_list):
boxes = object_prediction_list[ind].bbox.minx, object_prediction_list[ind].bbox.miny, \
object_prediction_list[ind].bbox.maxx, object_prediction_list[ind].bbox.maxy
boxes = (
object_prediction_list[ind].bbox.minx,
object_prediction_list[ind].bbox.miny,
object_prediction_list[ind].bbox.maxx,
object_prediction_list[ind].bbox.maxy,
)
clss = object_prediction_list[ind].category.name
boxes_list.append(boxes)
clss_list.append(clss)
@ -69,21 +69,19 @@ def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False,
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (56, 56, 255), 2)
label = str(cls)
t_size = cv2.getTextSize(label, 0, fontScale=0.6, thickness=1)[0]
cv2.rectangle(frame, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), (56, 56, 255),
-1)
cv2.putText(frame,
label, (int(x1), int(y1) - 2),
0,
0.6, [255, 255, 255],
thickness=1,
lineType=cv2.LINE_AA)
cv2.rectangle(
frame, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), (56, 56, 255), -1
)
cv2.putText(
frame, label, (int(x1), int(y1) - 2), 0, 0.6, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA
)
if view_img:
cv2.imshow(Path(source).stem, frame)
if save_img:
video_writer.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
if cv2.waitKey(1) & 0xFF == ord("q"):
break
video_writer.release()
videocapture.release()
@ -93,11 +91,11 @@ def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False,
def parse_opt():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='yolov8n.pt', help='initial weights path')
parser.add_argument('--source', type=str, required=True, help='video file path')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-img', action='store_true', help='save results')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
parser.add_argument("--source", type=str, required=True, help="video file path")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-img", action="store_true", help="save results")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
return parser.parse_args()
@ -106,6 +104,6 @@ def main(opt):
run(**vars(opt))
if __name__ == '__main__':
if __name__ == "__main__":
opt = parse_opt()
main(opt)

View file

@ -21,18 +21,21 @@ class YOLOv8Seg:
"""
# Build Ort session
self.session = ort.InferenceSession(onnx_model,
providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
self.session = ort.InferenceSession(
onnx_model,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
if ort.get_device() == "GPU"
else ["CPUExecutionProvider"],
)
# Numpy dtype: support both FP32 and FP16 onnx model
self.ndtype = np.half if self.session.get_inputs()[0].type == 'tensor(float16)' else np.single
self.ndtype = np.half if self.session.get_inputs()[0].type == "tensor(float16)" else np.single
# Get model width and height(YOLOv8-seg only has one input)
self.model_height, self.model_width = [x.shape for x in self.session.get_inputs()][0][-2:]
# Load COCO class names
self.classes = yaml_load(check_yaml('coco128.yaml'))['names']
self.classes = yaml_load(check_yaml("coco128.yaml"))["names"]
# Create color palette
self.color_palette = Colors()
@ -60,14 +63,16 @@ class YOLOv8Seg:
preds = self.session.run(None, {self.session.get_inputs()[0].name: im})
# Post-process
boxes, segments, masks = self.postprocess(preds,
im0=im0,
ratio=ratio,
pad_w=pad_w,
pad_h=pad_h,
conf_threshold=conf_threshold,
iou_threshold=iou_threshold,
nm=nm)
boxes, segments, masks = self.postprocess(
preds,
im0=im0,
ratio=ratio,
pad_w=pad_w,
pad_h=pad_h,
conf_threshold=conf_threshold,
iou_threshold=iou_threshold,
nm=nm,
)
return boxes, segments, masks
def preprocess(self, img):
@ -98,7 +103,7 @@ class YOLOv8Seg:
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
# Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
img = np.ascontiguousarray(np.einsum("HWC->CHW", img)[::-1], dtype=self.ndtype) / 255.0
img_process = img[None] if len(img.shape) == 3 else img
return img_process, ratio, (pad_w, pad_h)
@ -124,7 +129,7 @@ class YOLOv8Seg:
x, protos = preds[0], preds[1] # Two outputs: predictions and protos
# Transpose the first output: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm)
x = np.einsum('bcn->bnc', x)
x = np.einsum("bcn->bnc", x)
# Predictions filtering by conf-threshold
x = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold]
@ -138,7 +143,6 @@ class YOLOv8Seg:
# Decode and return
if len(x) > 0:
# Bounding boxes format change: cxcywh -> xyxy
x[..., [0, 1]] -= x[..., [2, 3]] / 2
x[..., [2, 3]] += x[..., [0, 1]]
@ -173,13 +177,13 @@ class YOLOv8Seg:
segments (List): list of segment masks.
"""
segments = []
for x in masks.astype('uint8'):
for x in masks.astype("uint8"):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # CHAIN_APPROX_SIMPLE
if c:
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
else:
c = np.zeros((0, 2)) # no segments found
segments.append(c.astype('float32'))
segments.append(c.astype("float32"))
return segments
@staticmethod
@ -219,7 +223,7 @@ class YOLOv8Seg:
masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWN
masks = np.ascontiguousarray(masks)
masks = self.scale_mask(masks, im0_shape) # re-scale mask from P3 shape to original input image shape
masks = np.einsum('HWN -> NHW', masks) # HWN -> NHW
masks = np.einsum("HWN -> NHW", masks) # HWN -> NHW
masks = self.crop_mask(masks, bboxes)
return np.greater(masks, 0.5)
@ -250,8 +254,9 @@ class YOLOv8Seg:
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
masks = masks[top:bottom, left:right]
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]),
interpolation=cv2.INTER_LINEAR) # INTER_CUBIC would be better
masks = cv2.resize(
masks, (im0_shape[1], im0_shape[0]), interpolation=cv2.INTER_LINEAR
) # INTER_CUBIC would be better
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
@ -279,32 +284,46 @@ class YOLOv8Seg:
cv2.fillPoly(im_canvas, np.int32([segment]), self.color_palette(int(cls_), bgr=True))
# draw bbox rectangle
cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
self.color_palette(int(cls_), bgr=True), 1, cv2.LINE_AA)
cv2.putText(im, f'{self.classes[cls_]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette(int(cls_), bgr=True), 2, cv2.LINE_AA)
cv2.rectangle(
im,
(int(box[0]), int(box[1])),
(int(box[2]), int(box[3])),
self.color_palette(int(cls_), bgr=True),
1,
cv2.LINE_AA,
)
cv2.putText(
im,
f"{self.classes[cls_]}: {conf:.3f}",
(int(box[0]), int(box[1] - 9)),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
self.color_palette(int(cls_), bgr=True),
2,
cv2.LINE_AA,
)
# Mix image
im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0)
# Show image
if vis:
cv2.imshow('demo', im)
cv2.imshow("demo", im)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Save image
if save:
cv2.imwrite('demo.jpg', im)
cv2.imwrite("demo.jpg", im)
if __name__ == '__main__':
if __name__ == "__main__":
# Create an argument parser to handle command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, required=True, help='Path to ONNX model')
parser.add_argument('--source', type=str, default=str(ASSETS / 'bus.jpg'), help='Path to input image')
parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument("--model", type=str, required=True, help="Path to ONNX model")
parser.add_argument("--source", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image")
parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold")
args = parser.parse_args()
# Build model