JetPack4.6.0とJetson NanoでPytorchとOnnxでyolov7の物体検出の環境設定(2023/02)

Jetson NanoでPytorchとOnnxで機械学習の物体検出とその環境設定をしたのでその備忘録。

ちなみに2023の2月でJetson NanoのJetPackの最新版は「4.６.0」

今回は「4.6.0」で環境構築してみた。

目次
1. versionと必要用語
2. 必要なライブラリのinstall
3. Pytorch(1.10.0)のインストール
4. OnnxRuntime(1.11.0)のインストール
5. yolov7で物体検出してみる

1. versionと必要用語

JetPack 4.6.0の各ライブラリバージョン
・ubuntu 18.04
・Python3 3.6
・cuda 10.2
・cuDNN 8.2.1

Wheelファイルについて

Wheelファイル(.whl)はNVIDIAがすでにビルドしたものを提供しているファイルのこと。
downloadして使う。
それ以外は直接ソースからビルドしてinstallしたりする。

2. 必要なライブラリのinstall

JetPackをinstallしてから行う。

sudo apt update 
sudo apt install curl git unzip tree vim python3-pip
sudo apt-get install libopenblas-base libopenmpi-dev libomp-dev
pip3 install --upgrade pip
sudo reboot

3. Pytorch(1.10.0)のインストール

# pytoerch 1.10.0 from wheel 
# dependencies whee
apt-get install libopenblas-base libopenmpi-dev libomp-dev
pip3 install Cython
pip3 install numpy torch-1.10.0-cp36-cp36m-linux_aarch64.whl

torchvisionのインストール

# torchvision 0.11
sudo apt-get install libjpeg-dev zlib1g-dev
git clone --branch release/0.11 https://github.com/pytorch/vision torchvision
cd torchvision
sudo python3 setup.py install
pip3 install 'pillow<7'

# Versionチェック
$ torch.__version__
# 1.10.0
$ torchvision.__version__
# 0.11.0

Pytorch とtorchVisionのversionの対応表

1.9.0	0.10.0
Pytorchのバージョン	対応するtorchVisionのバージョン
1.9.1	0.10.1
1.10.0	0.11.0
1.10.1	0.11.2
1.11.0	0.12.0
1.12.1	0.13.1

4. OnnxRuntime(1.11.0)のインストール

# onnxruntime 1.11.0
wget https://nvidia.box.com/shared/static/pmsqsiaw4pg9qrbeckcbymho6c01jj4z.whl -O onnxruntime_gpu-1.11.0-cp36-cp36m-linux_aarch64.whl
pip3 install onnxruntime_gpu-1.11.0-cp36-cp36m-linux_aarch64.whl

onnxruntime.__version__
'1.11.0'

5. yolov7で物体検出してみる

import cv2
import random
import math
import numpy as np
import onnxruntime

def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, r, (dw, dh)

def preprocess(img):
    # Scale input pixel values to 0 to 1
    image = img.transpose((2, 0, 1))
    image = np.expand_dims(image, 0)
    image = np.ascontiguousarray(image)
    return image.astype(np.float32) / 255

def onnx_inference(session, input_tensor):
    output_names = [i.name for i in session.get_outputs()]
    input_names = [i.name for i in session.get_inputs()]
    inp = {input_names[0]:input_tensor}
    outputs = session.run(output_names, inp)[0]
    return outputs

def post_process(outputs, ori_images, ratio, dwdh, conf_thres):
    for i, (batch_id, x0, y0, x1, y1, cls_id, score) in enumerate(outputs):
        image = ori_images[int(batch_id)]
        box = np.array([x0,y0,x1,y1])
        box -= np.array(dwdh*2)
        box /= ratio
        box = box.round().astype(np.int32).tolist()
        cls_id = int(cls_id)
        score = round(float(score),3)
        if score < conf_thres:
            continue
        name = names[cls_id]
        color = colors[name]
        name += ' '+str(score)
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2)
        cv2.putText(image, name, (box[0], box[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,[225, 255, 255],thickness=2)
    return ori_images

def onnx_setup(opt):
    cuda = False if opt.cpu=='True' else True
    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
    session = onnxruntime.InferenceSession(opt.onnx_path, providers=providers)

    IN_IMAGE_H = session.get_inputs()[0].shape[2]
    IN_IMAGE_W = session.get_inputs()[0].shape[3]
    new_shape = (IN_IMAGE_W, IN_IMAGE_H)
    return session, new_shape

def inference_(frame, session, new_shape, conf_thres):
    ori_images = [frame.copy()]
    resized_image, ratio, dwdh = letterbox(frame, new_shape=new_shape, auto=False)
    input_tensor = preprocess(resized_image)
    outputs = onnx_inference(session, input_tensor)
    pred_output = post_process(outputs, ori_images, ratio, dwdh, conf_thres)
    return pred_output

session, new_shape = onnx_setup(opt)

_, left_image = left_camera.read()

Routput, Rx, Ry, Rrange = inference_(left_image, session, new_shape, opt.conf_ 
camera_images = Routput[0] 
# 中略 〜〜〜〜〜
cv2.imshow(window_title, camera_images)
〜〜〜〜