Fixed OpenVINO int8 dynamic export and other minor changes (#14872)
Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
9ec8e9acbf
commit
0f105f4ea2
1 changed files with 9 additions and 45 deletions
|
|
@ -138,7 +138,7 @@ def try_export(inner_func):
|
||||||
LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{f}' ({file_size(f):.1f} MB)")
|
LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{f}' ({file_size(f):.1f} MB)")
|
||||||
return f, model
|
return f, model
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOGGER.info(f"{prefix} export failure ❌ {dt.t:.1f}s: {e}")
|
LOGGER.error(f"{prefix} export failure ❌ {dt.t:.1f}s: {e}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
return outer_func
|
return outer_func
|
||||||
|
|
@ -204,9 +204,8 @@ class Exporter:
|
||||||
self.args.half = False
|
self.args.half = False
|
||||||
assert not self.args.dynamic, "half=True not compatible with dynamic=True, i.e. use only one."
|
assert not self.args.dynamic, "half=True not compatible with dynamic=True, i.e. use only one."
|
||||||
self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size
|
self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size
|
||||||
if self.args.int8 and not self.args.dynamic and (engine or xml):
|
if self.args.int8 and engine:
|
||||||
self.args.dynamic = True # enforce dynamic to export TensorRT INT8; ensures ONNX is dynamic
|
self.args.dynamic = True # enforce dynamic to export TensorRT INT8
|
||||||
LOGGER.warning("WARNING ⚠️ INT8 export requires dynamic image sizes, setting dynamic=True.")
|
|
||||||
if self.args.optimize:
|
if self.args.optimize:
|
||||||
assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False"
|
assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False"
|
||||||
assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'"
|
assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'"
|
||||||
|
|
@ -355,18 +354,20 @@ class Exporter:
|
||||||
"""Build and return a dataloader suitable for calibration of INT8 models."""
|
"""Build and return a dataloader suitable for calibration of INT8 models."""
|
||||||
LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
|
LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
|
||||||
data = (check_cls_dataset if self.model.task == "classify" else check_det_dataset)(self.args.data)
|
data = (check_cls_dataset if self.model.task == "classify" else check_det_dataset)(self.args.data)
|
||||||
|
# TensorRT INT8 calibration should use 2x batch size
|
||||||
|
batch = self.args.batch * (2 if self.args.format == "engine" else 1)
|
||||||
dataset = YOLODataset(
|
dataset = YOLODataset(
|
||||||
data[self.args.split or "val"],
|
data[self.args.split or "val"],
|
||||||
data=data,
|
data=data,
|
||||||
task=self.model.task,
|
task=self.model.task,
|
||||||
imgsz=self.imgsz[0],
|
imgsz=self.imgsz[0],
|
||||||
augment=False,
|
augment=False,
|
||||||
batch_size=self.args.batch * 2, # NOTE TensorRT INT8 calibration should use 2x batch size
|
batch_size=batch,
|
||||||
)
|
)
|
||||||
n = len(dataset)
|
n = len(dataset)
|
||||||
if n < 300:
|
if n < 300:
|
||||||
LOGGER.warning(f"{prefix} WARNING ⚠️ >300 images recommended for INT8 calibration, found {n} images.")
|
LOGGER.warning(f"{prefix} WARNING ⚠️ >300 images recommended for INT8 calibration, found {n} images.")
|
||||||
return build_dataloader(dataset, batch=self.args.batch * 2, workers=0) # required for batch loading
|
return build_dataloader(dataset, batch=batch, workers=0) # required for batch loading
|
||||||
|
|
||||||
@try_export
|
@try_export
|
||||||
def export_torchscript(self, prefix=colorstr("TorchScript:")):
|
def export_torchscript(self, prefix=colorstr("TorchScript:")):
|
||||||
|
|
@ -422,7 +423,6 @@ class Exporter:
|
||||||
|
|
||||||
# Checks
|
# Checks
|
||||||
model_onnx = onnx.load(f) # load onnx model
|
model_onnx = onnx.load(f) # load onnx model
|
||||||
# onnx.checker.check_model(model_onnx) # check onnx model
|
|
||||||
|
|
||||||
# Simplify
|
# Simplify
|
||||||
if self.args.simplify:
|
if self.args.simplify:
|
||||||
|
|
@ -432,10 +432,6 @@ class Exporter:
|
||||||
LOGGER.info(f"{prefix} slimming with onnxslim {onnxslim.__version__}...")
|
LOGGER.info(f"{prefix} slimming with onnxslim {onnxslim.__version__}...")
|
||||||
model_onnx = onnxslim.slim(model_onnx)
|
model_onnx = onnxslim.slim(model_onnx)
|
||||||
|
|
||||||
# ONNX Simplifier (deprecated as must be compiled with 'cmake' in aarch64 and Conda CI environments)
|
|
||||||
# import onnxsim
|
|
||||||
# model_onnx, check = onnxsim.simplify(model_onnx)
|
|
||||||
# assert check, "Simplified ONNX model could not be validated"
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOGGER.warning(f"{prefix} simplifier failure: {e}")
|
LOGGER.warning(f"{prefix} simplifier failure: {e}")
|
||||||
|
|
||||||
|
|
@ -679,7 +675,6 @@ class Exporter:
|
||||||
def export_engine(self, prefix=colorstr("TensorRT:")):
|
def export_engine(self, prefix=colorstr("TensorRT:")):
|
||||||
"""YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt."""
|
"""YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt."""
|
||||||
assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
|
assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
|
||||||
# self.args.simplify = True
|
|
||||||
f_onnx, _ = self.export_onnx() # run before TRT import https://github.com/ultralytics/ultralytics/issues/7016
|
f_onnx, _ = self.export_onnx() # run before TRT import https://github.com/ultralytics/ultralytics/issues/7016
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -786,7 +781,7 @@ class Exporter:
|
||||||
# Load dataset w/ builder (for batching) and calibrate
|
# Load dataset w/ builder (for batching) and calibrate
|
||||||
config.int8_calibrator = EngineCalibrator(
|
config.int8_calibrator = EngineCalibrator(
|
||||||
dataset=self.get_int8_calibration_dataloader(prefix),
|
dataset=self.get_int8_calibration_dataloader(prefix),
|
||||||
batch=2 * self.args.batch,
|
batch=2 * self.args.batch, # TensorRT INT8 calibration should use 2x batch size
|
||||||
cache=str(self.file.with_suffix(".cache")),
|
cache=str(self.file.with_suffix(".cache")),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -869,8 +864,6 @@ class Exporter:
|
||||||
f.mkdir()
|
f.mkdir()
|
||||||
images = [batch["img"].permute(0, 2, 3, 1) for batch in self.get_int8_calibration_dataloader(prefix)]
|
images = [batch["img"].permute(0, 2, 3, 1) for batch in self.get_int8_calibration_dataloader(prefix)]
|
||||||
images = torch.cat(images, 0).float()
|
images = torch.cat(images, 0).float()
|
||||||
# mean = images.view(-1, 3).mean(0) # imagenet mean [123.675, 116.28, 103.53]
|
|
||||||
# std = images.view(-1, 3).std(0) # imagenet std [58.395, 57.12, 57.375]
|
|
||||||
np.save(str(tmp_file), images.numpy().astype(np.float32)) # BHWC
|
np.save(str(tmp_file), images.numpy().astype(np.float32)) # BHWC
|
||||||
np_data = [["images", tmp_file, [[[[0, 0, 0]]]], [[[[255, 255, 255]]]]]]
|
np_data = [["images", tmp_file, [[[[0, 0, 0]]]], [[[[255, 255, 255]]]]]]
|
||||||
else:
|
else:
|
||||||
|
|
@ -998,20 +991,7 @@ class Exporter:
|
||||||
if " " in f:
|
if " " in f:
|
||||||
LOGGER.warning(f"{prefix} WARNING ⚠️ your model may not work correctly with spaces in path '{f}'.")
|
LOGGER.warning(f"{prefix} WARNING ⚠️ your model may not work correctly with spaces in path '{f}'.")
|
||||||
|
|
||||||
# f_json = Path(f) / 'model.json' # *.json path
|
# Add metadata
|
||||||
# with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order
|
|
||||||
# subst = re.sub(
|
|
||||||
# r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '
|
|
||||||
# r'"Identity.?.?": {"name": "Identity.?.?"}, '
|
|
||||||
# r'"Identity.?.?": {"name": "Identity.?.?"}, '
|
|
||||||
# r'"Identity.?.?": {"name": "Identity.?.?"}}}',
|
|
||||||
# r'{"outputs": {"Identity": {"name": "Identity"}, '
|
|
||||||
# r'"Identity_1": {"name": "Identity_1"}, '
|
|
||||||
# r'"Identity_2": {"name": "Identity_2"}, '
|
|
||||||
# r'"Identity_3": {"name": "Identity_3"}}}',
|
|
||||||
# f_json.read_text(),
|
|
||||||
# )
|
|
||||||
# j.write(subst)
|
|
||||||
yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
|
yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
|
||||||
return f, None
|
return f, None
|
||||||
|
|
||||||
|
|
@ -1104,27 +1084,11 @@ class Exporter:
|
||||||
names = self.metadata["names"]
|
names = self.metadata["names"]
|
||||||
nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height
|
nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height
|
||||||
_, nc = out0_shape # number of anchors, number of classes
|
_, nc = out0_shape # number of anchors, number of classes
|
||||||
# _, nc = out0.type.multiArrayType.shape
|
|
||||||
assert len(names) == nc, f"{len(names)} names found for nc={nc}" # check
|
assert len(names) == nc, f"{len(names)} names found for nc={nc}" # check
|
||||||
|
|
||||||
# Define output shapes (missing)
|
# Define output shapes (missing)
|
||||||
out0.type.multiArrayType.shape[:] = out0_shape # (3780, 80)
|
out0.type.multiArrayType.shape[:] = out0_shape # (3780, 80)
|
||||||
out1.type.multiArrayType.shape[:] = out1_shape # (3780, 4)
|
out1.type.multiArrayType.shape[:] = out1_shape # (3780, 4)
|
||||||
# spec.neuralNetwork.preprocessing[0].featureName = '0'
|
|
||||||
|
|
||||||
# Flexible input shapes
|
|
||||||
# from coremltools.models.neural_network import flexible_shape_utils
|
|
||||||
# s = [] # shapes
|
|
||||||
# s.append(flexible_shape_utils.NeuralNetworkImageSize(320, 192))
|
|
||||||
# s.append(flexible_shape_utils.NeuralNetworkImageSize(640, 384)) # (height, width)
|
|
||||||
# flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='image', sizes=s)
|
|
||||||
# r = flexible_shape_utils.NeuralNetworkImageSizeRange() # shape ranges
|
|
||||||
# r.add_height_range((192, 640))
|
|
||||||
# r.add_width_range((192, 640))
|
|
||||||
# flexible_shape_utils.update_image_size_range(spec, feature_name='image', size_range=r)
|
|
||||||
|
|
||||||
# Print
|
|
||||||
# print(spec.description)
|
|
||||||
|
|
||||||
# Model from spec
|
# Model from spec
|
||||||
model = ct.models.MLModel(spec, weights_dir=weights_dir)
|
model = ct.models.MLModel(spec, weights_dir=weights_dir)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue