ultralytics 8.0.239 Ultralytics Actions and hub-sdk adoption (#7431)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com> Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com>
This commit is contained in:
parent
e795277391
commit
fe27db2f6e
139 changed files with 6870 additions and 5125 deletions
|
|
@ -28,7 +28,7 @@ class YOLOv8:
|
|||
self.iou_thres = iou_thres
|
||||
|
||||
# Load the class names from the COCO dataset
|
||||
self.classes = yaml_load(check_yaml('coco128.yaml'))['names']
|
||||
self.classes = yaml_load(check_yaml("coco128.yaml"))["names"]
|
||||
|
||||
# Generate a color palette for the classes
|
||||
self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
|
||||
|
|
@ -57,7 +57,7 @@ class YOLOv8:
|
|||
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
|
||||
|
||||
# Create the label text with class name and score
|
||||
label = f'{self.classes[class_id]}: {score:.2f}'
|
||||
label = f"{self.classes[class_id]}: {score:.2f}"
|
||||
|
||||
# Calculate the dimensions of the label text
|
||||
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
||||
|
|
@ -67,8 +67,9 @@ class YOLOv8:
|
|||
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
|
||||
|
||||
# Draw a filled rectangle as the background for the label text
|
||||
cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color,
|
||||
cv2.FILLED)
|
||||
cv2.rectangle(
|
||||
img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED
|
||||
)
|
||||
|
||||
# Draw the label text on the image
|
||||
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
|
||||
|
|
@ -182,7 +183,7 @@ class YOLOv8:
|
|||
output_img: The output image with drawn detections.
|
||||
"""
|
||||
# Create an inference session using the ONNX model and specify execution providers
|
||||
session = ort.InferenceSession(self.onnx_model, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
||||
session = ort.InferenceSession(self.onnx_model, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
|
||||
|
||||
# Get the model inputs
|
||||
model_inputs = session.get_inputs()
|
||||
|
|
@ -202,17 +203,17 @@ class YOLOv8:
|
|||
return self.postprocess(self.img, outputs) # output image
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# Create an argument parser to handle command-line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model', type=str, default='yolov8n.onnx', help='Input your ONNX model.')
|
||||
parser.add_argument('--img', type=str, default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.5, help='Confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
|
||||
parser.add_argument("--model", type=str, default="yolov8n.onnx", help="Input your ONNX model.")
|
||||
parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.")
|
||||
parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
|
||||
parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check the requirements and select the appropriate backend (CPU or GPU)
|
||||
check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime')
|
||||
check_requirements("onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime")
|
||||
|
||||
# Create an instance of the YOLOv8 class with the specified arguments
|
||||
detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres)
|
||||
|
|
@ -221,8 +222,8 @@ if __name__ == '__main__':
|
|||
output_image = detection.main()
|
||||
|
||||
# Display the output image in a window
|
||||
cv2.namedWindow('Output', cv2.WINDOW_NORMAL)
|
||||
cv2.imshow('Output', output_image)
|
||||
cv2.namedWindow("Output", cv2.WINDOW_NORMAL)
|
||||
cv2.imshow("Output", output_image)
|
||||
|
||||
# Wait for a key press to exit
|
||||
cv2.waitKey(0)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import numpy as np
|
|||
from ultralytics.utils import ASSETS, yaml_load
|
||||
from ultralytics.utils.checks import check_yaml
|
||||
|
||||
CLASSES = yaml_load(check_yaml('coco128.yaml'))['names']
|
||||
CLASSES = yaml_load(check_yaml("coco128.yaml"))["names"]
|
||||
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
|
||||
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
|
|||
x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
|
||||
y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
|
||||
"""
|
||||
label = f'{CLASSES[class_id]} ({confidence:.2f})'
|
||||
label = f"{CLASSES[class_id]} ({confidence:.2f})"
|
||||
color = colors[class_id]
|
||||
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
|
||||
cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
|
||||
|
|
@ -76,8 +76,11 @@ def main(onnx_model, input_image):
|
|||
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
|
||||
if maxScore >= 0.25:
|
||||
box = [
|
||||
outputs[0][i][0] - (0.5 * outputs[0][i][2]), outputs[0][i][1] - (0.5 * outputs[0][i][3]),
|
||||
outputs[0][i][2], outputs[0][i][3]]
|
||||
outputs[0][i][0] - (0.5 * outputs[0][i][2]),
|
||||
outputs[0][i][1] - (0.5 * outputs[0][i][3]),
|
||||
outputs[0][i][2],
|
||||
outputs[0][i][3],
|
||||
]
|
||||
boxes.append(box)
|
||||
scores.append(maxScore)
|
||||
class_ids.append(maxClassIndex)
|
||||
|
|
@ -92,26 +95,34 @@ def main(onnx_model, input_image):
|
|||
index = result_boxes[i]
|
||||
box = boxes[index]
|
||||
detection = {
|
||||
'class_id': class_ids[index],
|
||||
'class_name': CLASSES[class_ids[index]],
|
||||
'confidence': scores[index],
|
||||
'box': box,
|
||||
'scale': scale}
|
||||
"class_id": class_ids[index],
|
||||
"class_name": CLASSES[class_ids[index]],
|
||||
"confidence": scores[index],
|
||||
"box": box,
|
||||
"scale": scale,
|
||||
}
|
||||
detections.append(detection)
|
||||
draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale),
|
||||
round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale))
|
||||
draw_bounding_box(
|
||||
original_image,
|
||||
class_ids[index],
|
||||
scores[index],
|
||||
round(box[0] * scale),
|
||||
round(box[1] * scale),
|
||||
round((box[0] + box[2]) * scale),
|
||||
round((box[1] + box[3]) * scale),
|
||||
)
|
||||
|
||||
# Display the image with bounding boxes
|
||||
cv2.imshow('image', original_image)
|
||||
cv2.imshow("image", original_image)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
return detections
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model', default='yolov8n.onnx', help='Input your ONNX model.')
|
||||
parser.add_argument('--img', default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
|
||||
parser.add_argument("--model", default="yolov8n.onnx", help="Input your ONNX model.")
|
||||
parser.add_argument("--img", default=str(ASSETS / "bus.jpg"), help="Path to input image.")
|
||||
args = parser.parse_args()
|
||||
main(args.model, args.img)
|
||||
|
|
|
|||
|
|
@ -13,14 +13,9 @@ img_height = 640
|
|||
|
||||
|
||||
class LetterBox:
|
||||
|
||||
def __init__(self,
|
||||
new_shape=(img_width, img_height),
|
||||
auto=False,
|
||||
scaleFill=False,
|
||||
scaleup=True,
|
||||
center=True,
|
||||
stride=32):
|
||||
def __init__(
|
||||
self, new_shape=(img_width, img_height), auto=False, scaleFill=False, scaleup=True, center=True, stride=32
|
||||
):
|
||||
self.new_shape = new_shape
|
||||
self.auto = auto
|
||||
self.scaleFill = scaleFill
|
||||
|
|
@ -33,9 +28,9 @@ class LetterBox:
|
|||
|
||||
if labels is None:
|
||||
labels = {}
|
||||
img = labels.get('img') if image is None else image
|
||||
img = labels.get("img") if image is None else image
|
||||
shape = img.shape[:2] # current shape [height, width]
|
||||
new_shape = labels.pop('rect_shape', self.new_shape)
|
||||
new_shape = labels.pop("rect_shape", self.new_shape)
|
||||
if isinstance(new_shape, int):
|
||||
new_shape = (new_shape, new_shape)
|
||||
|
||||
|
|
@ -63,15 +58,16 @@ class LetterBox:
|
|||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
|
||||
left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
|
||||
value=(114, 114, 114)) # add border
|
||||
if labels.get('ratio_pad'):
|
||||
labels['ratio_pad'] = (labels['ratio_pad'], (left, top)) # for evaluation
|
||||
img = cv2.copyMakeBorder(
|
||||
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
|
||||
) # add border
|
||||
if labels.get("ratio_pad"):
|
||||
labels["ratio_pad"] = (labels["ratio_pad"], (left, top)) # for evaluation
|
||||
|
||||
if len(labels):
|
||||
labels = self._update_labels(labels, ratio, dw, dh)
|
||||
labels['img'] = img
|
||||
labels['resized_shape'] = new_shape
|
||||
labels["img"] = img
|
||||
labels["resized_shape"] = new_shape
|
||||
return labels
|
||||
else:
|
||||
return img
|
||||
|
|
@ -79,15 +75,14 @@ class LetterBox:
|
|||
def _update_labels(self, labels, ratio, padw, padh):
|
||||
"""Update labels."""
|
||||
|
||||
labels['instances'].convert_bbox(format='xyxy')
|
||||
labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
|
||||
labels['instances'].scale(*ratio)
|
||||
labels['instances'].add_padding(padw, padh)
|
||||
labels["instances"].convert_bbox(format="xyxy")
|
||||
labels["instances"].denormalize(*labels["img"].shape[:2][::-1])
|
||||
labels["instances"].scale(*ratio)
|
||||
labels["instances"].add_padding(padw, padh)
|
||||
return labels
|
||||
|
||||
|
||||
class Yolov8TFLite:
|
||||
|
||||
def __init__(self, tflite_model, input_image, confidence_thres, iou_thres):
|
||||
"""
|
||||
Initializes an instance of the Yolov8TFLite class.
|
||||
|
|
@ -105,7 +100,7 @@ class Yolov8TFLite:
|
|||
self.iou_thres = iou_thres
|
||||
|
||||
# Load the class names from the COCO dataset
|
||||
self.classes = yaml_load(check_yaml('coco128.yaml'))['names']
|
||||
self.classes = yaml_load(check_yaml("coco128.yaml"))["names"]
|
||||
|
||||
# Generate a color palette for the classes
|
||||
self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
|
||||
|
|
@ -134,7 +129,7 @@ class Yolov8TFLite:
|
|||
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
|
||||
|
||||
# Create the label text with class name and score
|
||||
label = f'{self.classes[class_id]}: {score:.2f}'
|
||||
label = f"{self.classes[class_id]}: {score:.2f}"
|
||||
|
||||
# Calculate the dimensions of the label text
|
||||
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
||||
|
|
@ -144,8 +139,13 @@ class Yolov8TFLite:
|
|||
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
|
||||
|
||||
# Draw a filled rectangle as the background for the label text
|
||||
cv2.rectangle(img, (int(label_x), int(label_y - label_height)),
|
||||
(int(label_x + label_width), int(label_y + label_height)), color, cv2.FILLED)
|
||||
cv2.rectangle(
|
||||
img,
|
||||
(int(label_x), int(label_y - label_height)),
|
||||
(int(label_x + label_width), int(label_y + label_height)),
|
||||
color,
|
||||
cv2.FILLED,
|
||||
)
|
||||
|
||||
# Draw the label text on the image
|
||||
cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
|
||||
|
|
@ -161,7 +161,7 @@ class Yolov8TFLite:
|
|||
# Read the input image using OpenCV
|
||||
self.img = cv2.imread(self.input_image)
|
||||
|
||||
print('image befor', self.img)
|
||||
print("image before", self.img)
|
||||
# Get the height and width of the input image
|
||||
self.img_height, self.img_width = self.img.shape[:2]
|
||||
|
||||
|
|
@ -209,8 +209,10 @@ class Yolov8TFLite:
|
|||
# Get the box, score, and class ID corresponding to the index
|
||||
box = boxes[i]
|
||||
gain = min(img_width / self.img_width, img_height / self.img_height)
|
||||
pad = round((img_width - self.img_width * gain) / 2 -
|
||||
0.1), round((img_height - self.img_height * gain) / 2 - 0.1)
|
||||
pad = (
|
||||
round((img_width - self.img_width * gain) / 2 - 0.1),
|
||||
round((img_height - self.img_height * gain) / 2 - 0.1),
|
||||
)
|
||||
box[0] = (box[0] - pad[0]) / gain
|
||||
box[1] = (box[1] - pad[1]) / gain
|
||||
box[2] = box[2] / gain
|
||||
|
|
@ -242,7 +244,7 @@ class Yolov8TFLite:
|
|||
output_details = interpreter.get_output_details()
|
||||
|
||||
# Store the shape of the input for later use
|
||||
input_shape = input_details[0]['shape']
|
||||
input_shape = input_details[0]["shape"]
|
||||
self.input_width = input_shape[1]
|
||||
self.input_height = input_shape[2]
|
||||
|
||||
|
|
@ -251,19 +253,19 @@ class Yolov8TFLite:
|
|||
img_data = img_data
|
||||
# img_data = img_data.cpu().numpy()
|
||||
# Set the input tensor to the interpreter
|
||||
print(input_details[0]['index'])
|
||||
print(input_details[0]["index"])
|
||||
print(img_data.shape)
|
||||
img_data = img_data.transpose((0, 2, 3, 1))
|
||||
|
||||
scale, zero_point = input_details[0]['quantization']
|
||||
interpreter.set_tensor(input_details[0]['index'], img_data)
|
||||
scale, zero_point = input_details[0]["quantization"]
|
||||
interpreter.set_tensor(input_details[0]["index"], img_data)
|
||||
|
||||
# Run inference
|
||||
interpreter.invoke()
|
||||
|
||||
# Get the output tensor from the interpreter
|
||||
output = interpreter.get_tensor(output_details[0]['index'])
|
||||
scale, zero_point = output_details[0]['quantization']
|
||||
output = interpreter.get_tensor(output_details[0]["index"])
|
||||
scale, zero_point = output_details[0]["quantization"]
|
||||
output = (output.astype(np.float32) - zero_point) * scale
|
||||
|
||||
output[:, [0, 2]] *= img_width
|
||||
|
|
@ -273,16 +275,15 @@ class Yolov8TFLite:
|
|||
return self.postprocess(self.img, output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# Create an argument parser to handle command-line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model',
|
||||
type=str,
|
||||
default='yolov8n_full_integer_quant.tflite',
|
||||
help='Input your TFLite model.')
|
||||
parser.add_argument('--img', type=str, default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
|
||||
parser.add_argument('--conf-thres', type=float, default=0.5, help='Confidence threshold')
|
||||
parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
|
||||
parser.add_argument(
|
||||
"--model", type=str, default="yolov8n_full_integer_quant.tflite", help="Input your TFLite model."
|
||||
)
|
||||
parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.")
|
||||
parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
|
||||
parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create an instance of the Yolov8TFLite class with the specified arguments
|
||||
|
|
@ -292,7 +293,7 @@ if __name__ == '__main__':
|
|||
output_image = detection.main()
|
||||
|
||||
# Display the output image in a window
|
||||
cv2.imshow('Output', output_image)
|
||||
cv2.imshow("Output", output_image)
|
||||
|
||||
# Wait for a key press to exit
|
||||
cv2.waitKey(0)
|
||||
|
|
|
|||
|
|
@ -16,21 +16,22 @@ track_history = defaultdict(list)
|
|||
current_region = None
|
||||
counting_regions = [
|
||||
{
|
||||
'name': 'YOLOv8 Polygon Region',
|
||||
'polygon': Polygon([(50, 80), (250, 20), (450, 80), (400, 350), (100, 350)]), # Polygon points
|
||||
'counts': 0,
|
||||
'dragging': False,
|
||||
'region_color': (255, 42, 4), # BGR Value
|
||||
'text_color': (255, 255, 255) # Region Text Color
|
||||
"name": "YOLOv8 Polygon Region",
|
||||
"polygon": Polygon([(50, 80), (250, 20), (450, 80), (400, 350), (100, 350)]), # Polygon points
|
||||
"counts": 0,
|
||||
"dragging": False,
|
||||
"region_color": (255, 42, 4), # BGR Value
|
||||
"text_color": (255, 255, 255), # Region Text Color
|
||||
},
|
||||
{
|
||||
'name': 'YOLOv8 Rectangle Region',
|
||||
'polygon': Polygon([(200, 250), (440, 250), (440, 550), (200, 550)]), # Polygon points
|
||||
'counts': 0,
|
||||
'dragging': False,
|
||||
'region_color': (37, 255, 225), # BGR Value
|
||||
'text_color': (0, 0, 0), # Region Text Color
|
||||
}, ]
|
||||
"name": "YOLOv8 Rectangle Region",
|
||||
"polygon": Polygon([(200, 250), (440, 250), (440, 550), (200, 550)]), # Polygon points
|
||||
"counts": 0,
|
||||
"dragging": False,
|
||||
"region_color": (37, 255, 225), # BGR Value
|
||||
"text_color": (0, 0, 0), # Region Text Color
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def mouse_callback(event, x, y, flags, param):
|
||||
|
|
@ -40,32 +41,33 @@ def mouse_callback(event, x, y, flags, param):
|
|||
# Mouse left button down event
|
||||
if event == cv2.EVENT_LBUTTONDOWN:
|
||||
for region in counting_regions:
|
||||
if region['polygon'].contains(Point((x, y))):
|
||||
if region["polygon"].contains(Point((x, y))):
|
||||
current_region = region
|
||||
current_region['dragging'] = True
|
||||
current_region['offset_x'] = x
|
||||
current_region['offset_y'] = y
|
||||
current_region["dragging"] = True
|
||||
current_region["offset_x"] = x
|
||||
current_region["offset_y"] = y
|
||||
|
||||
# Mouse move event
|
||||
elif event == cv2.EVENT_MOUSEMOVE:
|
||||
if current_region is not None and current_region['dragging']:
|
||||
dx = x - current_region['offset_x']
|
||||
dy = y - current_region['offset_y']
|
||||
current_region['polygon'] = Polygon([
|
||||
(p[0] + dx, p[1] + dy) for p in current_region['polygon'].exterior.coords])
|
||||
current_region['offset_x'] = x
|
||||
current_region['offset_y'] = y
|
||||
if current_region is not None and current_region["dragging"]:
|
||||
dx = x - current_region["offset_x"]
|
||||
dy = y - current_region["offset_y"]
|
||||
current_region["polygon"] = Polygon(
|
||||
[(p[0] + dx, p[1] + dy) for p in current_region["polygon"].exterior.coords]
|
||||
)
|
||||
current_region["offset_x"] = x
|
||||
current_region["offset_y"] = y
|
||||
|
||||
# Mouse left button up event
|
||||
elif event == cv2.EVENT_LBUTTONUP:
|
||||
if current_region is not None and current_region['dragging']:
|
||||
current_region['dragging'] = False
|
||||
if current_region is not None and current_region["dragging"]:
|
||||
current_region["dragging"] = False
|
||||
|
||||
|
||||
def run(
|
||||
weights='yolov8n.pt',
|
||||
weights="yolov8n.pt",
|
||||
source=None,
|
||||
device='cpu',
|
||||
device="cpu",
|
||||
view_img=False,
|
||||
save_img=False,
|
||||
exist_ok=False,
|
||||
|
|
@ -100,8 +102,8 @@ def run(
|
|||
raise FileNotFoundError(f"Source path '{source}' does not exist.")
|
||||
|
||||
# Setup Model
|
||||
model = YOLO(f'{weights}')
|
||||
model.to('cuda') if device == '0' else model.to('cpu')
|
||||
model = YOLO(f"{weights}")
|
||||
model.to("cuda") if device == "0" else model.to("cpu")
|
||||
|
||||
# Extract classes names
|
||||
names = model.model.names
|
||||
|
|
@ -109,12 +111,12 @@ def run(
|
|||
# Video setup
|
||||
videocapture = cv2.VideoCapture(source)
|
||||
frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4))
|
||||
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*'mp4v')
|
||||
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*"mp4v")
|
||||
|
||||
# Output setup
|
||||
save_dir = increment_path(Path('ultralytics_rc_output') / 'exp', exist_ok)
|
||||
save_dir = increment_path(Path("ultralytics_rc_output") / "exp", exist_ok)
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
video_writer = cv2.VideoWriter(str(save_dir / f'{Path(source).stem}.mp4'), fourcc, fps, (frame_width, frame_height))
|
||||
video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.mp4"), fourcc, fps, (frame_width, frame_height))
|
||||
|
||||
# Iterate over video frames
|
||||
while videocapture.isOpened():
|
||||
|
|
@ -146,43 +148,48 @@ def run(
|
|||
|
||||
# Check if detection inside region
|
||||
for region in counting_regions:
|
||||
if region['polygon'].contains(Point((bbox_center[0], bbox_center[1]))):
|
||||
region['counts'] += 1
|
||||
if region["polygon"].contains(Point((bbox_center[0], bbox_center[1]))):
|
||||
region["counts"] += 1
|
||||
|
||||
# Draw regions (Polygons/Rectangles)
|
||||
for region in counting_regions:
|
||||
region_label = str(region['counts'])
|
||||
region_color = region['region_color']
|
||||
region_text_color = region['text_color']
|
||||
region_label = str(region["counts"])
|
||||
region_color = region["region_color"]
|
||||
region_text_color = region["text_color"]
|
||||
|
||||
polygon_coords = np.array(region['polygon'].exterior.coords, dtype=np.int32)
|
||||
centroid_x, centroid_y = int(region['polygon'].centroid.x), int(region['polygon'].centroid.y)
|
||||
polygon_coords = np.array(region["polygon"].exterior.coords, dtype=np.int32)
|
||||
centroid_x, centroid_y = int(region["polygon"].centroid.x), int(region["polygon"].centroid.y)
|
||||
|
||||
text_size, _ = cv2.getTextSize(region_label,
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
fontScale=0.7,
|
||||
thickness=line_thickness)
|
||||
text_size, _ = cv2.getTextSize(
|
||||
region_label, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.7, thickness=line_thickness
|
||||
)
|
||||
text_x = centroid_x - text_size[0] // 2
|
||||
text_y = centroid_y + text_size[1] // 2
|
||||
cv2.rectangle(frame, (text_x - 5, text_y - text_size[1] - 5), (text_x + text_size[0] + 5, text_y + 5),
|
||||
region_color, -1)
|
||||
cv2.putText(frame, region_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, region_text_color,
|
||||
line_thickness)
|
||||
cv2.rectangle(
|
||||
frame,
|
||||
(text_x - 5, text_y - text_size[1] - 5),
|
||||
(text_x + text_size[0] + 5, text_y + 5),
|
||||
region_color,
|
||||
-1,
|
||||
)
|
||||
cv2.putText(
|
||||
frame, region_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, region_text_color, line_thickness
|
||||
)
|
||||
cv2.polylines(frame, [polygon_coords], isClosed=True, color=region_color, thickness=region_thickness)
|
||||
|
||||
if view_img:
|
||||
if vid_frame_count == 1:
|
||||
cv2.namedWindow('Ultralytics YOLOv8 Region Counter Movable')
|
||||
cv2.setMouseCallback('Ultralytics YOLOv8 Region Counter Movable', mouse_callback)
|
||||
cv2.imshow('Ultralytics YOLOv8 Region Counter Movable', frame)
|
||||
cv2.namedWindow("Ultralytics YOLOv8 Region Counter Movable")
|
||||
cv2.setMouseCallback("Ultralytics YOLOv8 Region Counter Movable", mouse_callback)
|
||||
cv2.imshow("Ultralytics YOLOv8 Region Counter Movable", frame)
|
||||
|
||||
if save_img:
|
||||
video_writer.write(frame)
|
||||
|
||||
for region in counting_regions: # Reinitialize count for each region
|
||||
region['counts'] = 0
|
||||
region["counts"] = 0
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
del vid_frame_count
|
||||
|
|
@ -194,16 +201,16 @@ def run(
|
|||
def parse_opt():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', type=str, default='yolov8n.pt', help='initial weights path')
|
||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
||||
parser.add_argument('--source', type=str, required=True, help='video file path')
|
||||
parser.add_argument('--view-img', action='store_true', help='show results')
|
||||
parser.add_argument('--save-img', action='store_true', help='save results')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
|
||||
parser.add_argument('--line-thickness', type=int, default=2, help='bounding box thickness')
|
||||
parser.add_argument('--track-thickness', type=int, default=2, help='Tracking line thickness')
|
||||
parser.add_argument('--region-thickness', type=int, default=4, help='Region thickness')
|
||||
parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
|
||||
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
|
||||
parser.add_argument("--source", type=str, required=True, help="video file path")
|
||||
parser.add_argument("--view-img", action="store_true", help="show results")
|
||||
parser.add_argument("--save-img", action="store_true", help="save results")
|
||||
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
|
||||
parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")
|
||||
parser.add_argument("--line-thickness", type=int, default=2, help="bounding box thickness")
|
||||
parser.add_argument("--track-thickness", type=int, default=2, help="Tracking line thickness")
|
||||
parser.add_argument("--region-thickness", type=int, default=4, help="Region thickness")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
|
@ -213,6 +220,6 @@ def main(opt):
|
|||
run(**vars(opt))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
opt = parse_opt()
|
||||
main(opt)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from sahi.utils.yolov8 import download_yolov8s_model
|
|||
from ultralytics.utils.files import increment_path
|
||||
|
||||
|
||||
def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False, exist_ok=False):
|
||||
def run(weights="yolov8n.pt", source="test.mp4", view_img=False, save_img=False, exist_ok=False):
|
||||
"""
|
||||
Run object detection on a video using YOLOv8 and SAHI.
|
||||
|
||||
|
|
@ -25,41 +25,41 @@ def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False,
|
|||
if not Path(source).exists():
|
||||
raise FileNotFoundError(f"Source path '{source}' does not exist.")
|
||||
|
||||
yolov8_model_path = f'models/{weights}'
|
||||
yolov8_model_path = f"models/{weights}"
|
||||
download_yolov8s_model(yolov8_model_path)
|
||||
detection_model = AutoDetectionModel.from_pretrained(model_type='yolov8',
|
||||
model_path=yolov8_model_path,
|
||||
confidence_threshold=0.3,
|
||||
device='cpu')
|
||||
detection_model = AutoDetectionModel.from_pretrained(
|
||||
model_type="yolov8", model_path=yolov8_model_path, confidence_threshold=0.3, device="cpu"
|
||||
)
|
||||
|
||||
# Video setup
|
||||
videocapture = cv2.VideoCapture(source)
|
||||
frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4))
|
||||
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*'mp4v')
|
||||
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*"mp4v")
|
||||
|
||||
# Output setup
|
||||
save_dir = increment_path(Path('ultralytics_results_with_sahi') / 'exp', exist_ok)
|
||||
save_dir = increment_path(Path("ultralytics_results_with_sahi") / "exp", exist_ok)
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
video_writer = cv2.VideoWriter(str(save_dir / f'{Path(source).stem}.mp4'), fourcc, fps, (frame_width, frame_height))
|
||||
video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.mp4"), fourcc, fps, (frame_width, frame_height))
|
||||
|
||||
while videocapture.isOpened():
|
||||
success, frame = videocapture.read()
|
||||
if not success:
|
||||
break
|
||||
|
||||
results = get_sliced_prediction(frame,
|
||||
detection_model,
|
||||
slice_height=512,
|
||||
slice_width=512,
|
||||
overlap_height_ratio=0.2,
|
||||
overlap_width_ratio=0.2)
|
||||
results = get_sliced_prediction(
|
||||
frame, detection_model, slice_height=512, slice_width=512, overlap_height_ratio=0.2, overlap_width_ratio=0.2
|
||||
)
|
||||
object_prediction_list = results.object_prediction_list
|
||||
|
||||
boxes_list = []
|
||||
clss_list = []
|
||||
for ind, _ in enumerate(object_prediction_list):
|
||||
boxes = object_prediction_list[ind].bbox.minx, object_prediction_list[ind].bbox.miny, \
|
||||
object_prediction_list[ind].bbox.maxx, object_prediction_list[ind].bbox.maxy
|
||||
boxes = (
|
||||
object_prediction_list[ind].bbox.minx,
|
||||
object_prediction_list[ind].bbox.miny,
|
||||
object_prediction_list[ind].bbox.maxx,
|
||||
object_prediction_list[ind].bbox.maxy,
|
||||
)
|
||||
clss = object_prediction_list[ind].category.name
|
||||
boxes_list.append(boxes)
|
||||
clss_list.append(clss)
|
||||
|
|
@ -69,21 +69,19 @@ def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False,
|
|||
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (56, 56, 255), 2)
|
||||
label = str(cls)
|
||||
t_size = cv2.getTextSize(label, 0, fontScale=0.6, thickness=1)[0]
|
||||
cv2.rectangle(frame, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), (56, 56, 255),
|
||||
-1)
|
||||
cv2.putText(frame,
|
||||
label, (int(x1), int(y1) - 2),
|
||||
0,
|
||||
0.6, [255, 255, 255],
|
||||
thickness=1,
|
||||
lineType=cv2.LINE_AA)
|
||||
cv2.rectangle(
|
||||
frame, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), (56, 56, 255), -1
|
||||
)
|
||||
cv2.putText(
|
||||
frame, label, (int(x1), int(y1) - 2), 0, 0.6, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA
|
||||
)
|
||||
|
||||
if view_img:
|
||||
cv2.imshow(Path(source).stem, frame)
|
||||
if save_img:
|
||||
video_writer.write(frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
video_writer.release()
|
||||
videocapture.release()
|
||||
|
|
@ -93,11 +91,11 @@ def run(weights='yolov8n.pt', source='test.mp4', view_img=False, save_img=False,
|
|||
def parse_opt():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--weights', type=str, default='yolov8n.pt', help='initial weights path')
|
||||
parser.add_argument('--source', type=str, required=True, help='video file path')
|
||||
parser.add_argument('--view-img', action='store_true', help='show results')
|
||||
parser.add_argument('--save-img', action='store_true', help='save results')
|
||||
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
||||
parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
|
||||
parser.add_argument("--source", type=str, required=True, help="video file path")
|
||||
parser.add_argument("--view-img", action="store_true", help="show results")
|
||||
parser.add_argument("--save-img", action="store_true", help="save results")
|
||||
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
|
@ -106,6 +104,6 @@ def main(opt):
|
|||
run(**vars(opt))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
opt = parse_opt()
|
||||
main(opt)
|
||||
|
|
|
|||
|
|
@ -21,18 +21,21 @@ class YOLOv8Seg:
|
|||
"""
|
||||
|
||||
# Build Ort session
|
||||
self.session = ort.InferenceSession(onnx_model,
|
||||
providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
|
||||
self.session = ort.InferenceSession(
|
||||
onnx_model,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
if ort.get_device() == "GPU"
|
||||
else ["CPUExecutionProvider"],
|
||||
)
|
||||
|
||||
# Numpy dtype: support both FP32 and FP16 onnx model
|
||||
self.ndtype = np.half if self.session.get_inputs()[0].type == 'tensor(float16)' else np.single
|
||||
self.ndtype = np.half if self.session.get_inputs()[0].type == "tensor(float16)" else np.single
|
||||
|
||||
# Get model width and height(YOLOv8-seg only has one input)
|
||||
self.model_height, self.model_width = [x.shape for x in self.session.get_inputs()][0][-2:]
|
||||
|
||||
# Load COCO class names
|
||||
self.classes = yaml_load(check_yaml('coco128.yaml'))['names']
|
||||
self.classes = yaml_load(check_yaml("coco128.yaml"))["names"]
|
||||
|
||||
# Create color palette
|
||||
self.color_palette = Colors()
|
||||
|
|
@ -60,14 +63,16 @@ class YOLOv8Seg:
|
|||
preds = self.session.run(None, {self.session.get_inputs()[0].name: im})
|
||||
|
||||
# Post-process
|
||||
boxes, segments, masks = self.postprocess(preds,
|
||||
im0=im0,
|
||||
ratio=ratio,
|
||||
pad_w=pad_w,
|
||||
pad_h=pad_h,
|
||||
conf_threshold=conf_threshold,
|
||||
iou_threshold=iou_threshold,
|
||||
nm=nm)
|
||||
boxes, segments, masks = self.postprocess(
|
||||
preds,
|
||||
im0=im0,
|
||||
ratio=ratio,
|
||||
pad_w=pad_w,
|
||||
pad_h=pad_h,
|
||||
conf_threshold=conf_threshold,
|
||||
iou_threshold=iou_threshold,
|
||||
nm=nm,
|
||||
)
|
||||
return boxes, segments, masks
|
||||
|
||||
def preprocess(self, img):
|
||||
|
|
@ -98,7 +103,7 @@ class YOLOv8Seg:
|
|||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
|
||||
|
||||
# Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
|
||||
img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
|
||||
img = np.ascontiguousarray(np.einsum("HWC->CHW", img)[::-1], dtype=self.ndtype) / 255.0
|
||||
img_process = img[None] if len(img.shape) == 3 else img
|
||||
return img_process, ratio, (pad_w, pad_h)
|
||||
|
||||
|
|
@ -124,7 +129,7 @@ class YOLOv8Seg:
|
|||
x, protos = preds[0], preds[1] # Two outputs: predictions and protos
|
||||
|
||||
# Transpose the first output: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm)
|
||||
x = np.einsum('bcn->bnc', x)
|
||||
x = np.einsum("bcn->bnc", x)
|
||||
|
||||
# Predictions filtering by conf-threshold
|
||||
x = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold]
|
||||
|
|
@ -138,7 +143,6 @@ class YOLOv8Seg:
|
|||
|
||||
# Decode and return
|
||||
if len(x) > 0:
|
||||
|
||||
# Bounding boxes format change: cxcywh -> xyxy
|
||||
x[..., [0, 1]] -= x[..., [2, 3]] / 2
|
||||
x[..., [2, 3]] += x[..., [0, 1]]
|
||||
|
|
@ -173,13 +177,13 @@ class YOLOv8Seg:
|
|||
segments (List): list of segment masks.
|
||||
"""
|
||||
segments = []
|
||||
for x in masks.astype('uint8'):
|
||||
for x in masks.astype("uint8"):
|
||||
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # CHAIN_APPROX_SIMPLE
|
||||
if c:
|
||||
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
|
||||
else:
|
||||
c = np.zeros((0, 2)) # no segments found
|
||||
segments.append(c.astype('float32'))
|
||||
segments.append(c.astype("float32"))
|
||||
return segments
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -219,7 +223,7 @@ class YOLOv8Seg:
|
|||
masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWN
|
||||
masks = np.ascontiguousarray(masks)
|
||||
masks = self.scale_mask(masks, im0_shape) # re-scale mask from P3 shape to original input image shape
|
||||
masks = np.einsum('HWN -> NHW', masks) # HWN -> NHW
|
||||
masks = np.einsum("HWN -> NHW", masks) # HWN -> NHW
|
||||
masks = self.crop_mask(masks, bboxes)
|
||||
return np.greater(masks, 0.5)
|
||||
|
||||
|
|
@ -250,8 +254,9 @@ class YOLOv8Seg:
|
|||
if len(masks.shape) < 2:
|
||||
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
|
||||
masks = masks[top:bottom, left:right]
|
||||
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]),
|
||||
interpolation=cv2.INTER_LINEAR) # INTER_CUBIC would be better
|
||||
masks = cv2.resize(
|
||||
masks, (im0_shape[1], im0_shape[0]), interpolation=cv2.INTER_LINEAR
|
||||
) # INTER_CUBIC would be better
|
||||
if len(masks.shape) == 2:
|
||||
masks = masks[:, :, None]
|
||||
return masks
|
||||
|
|
@ -279,32 +284,46 @@ class YOLOv8Seg:
|
|||
cv2.fillPoly(im_canvas, np.int32([segment]), self.color_palette(int(cls_), bgr=True))
|
||||
|
||||
# draw bbox rectangle
|
||||
cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
|
||||
self.color_palette(int(cls_), bgr=True), 1, cv2.LINE_AA)
|
||||
cv2.putText(im, f'{self.classes[cls_]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette(int(cls_), bgr=True), 2, cv2.LINE_AA)
|
||||
cv2.rectangle(
|
||||
im,
|
||||
(int(box[0]), int(box[1])),
|
||||
(int(box[2]), int(box[3])),
|
||||
self.color_palette(int(cls_), bgr=True),
|
||||
1,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
cv2.putText(
|
||||
im,
|
||||
f"{self.classes[cls_]}: {conf:.3f}",
|
||||
(int(box[0]), int(box[1] - 9)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
self.color_palette(int(cls_), bgr=True),
|
||||
2,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
|
||||
# Mix image
|
||||
im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0)
|
||||
|
||||
# Show image
|
||||
if vis:
|
||||
cv2.imshow('demo', im)
|
||||
cv2.imshow("demo", im)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Save image
|
||||
if save:
|
||||
cv2.imwrite('demo.jpg', im)
|
||||
cv2.imwrite("demo.jpg", im)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# Create an argument parser to handle command-line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model', type=str, required=True, help='Path to ONNX model')
|
||||
parser.add_argument('--source', type=str, default=str(ASSETS / 'bus.jpg'), help='Path to input image')
|
||||
parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
|
||||
parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
|
||||
parser.add_argument("--model", type=str, required=True, help="Path to ONNX model")
|
||||
parser.add_argument("--source", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image")
|
||||
parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
|
||||
parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Build model
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue