yolov5とDeepsortとかいうtrackingのアルゴリズムを使ってtrackingの物体検出をしてみた。
最終的にpythonのGUIツールtkinterでマルチスレッド化して動かした。
全体像
備忘録として使った技術をまとめてく。
目次
1.yolov5
2.trackingアルゴリズム「DeepSort」
3.GUIツールの「tkinter」
4.pythonでマルチスレッド
5.出力結果
1.yolov5
yolov5はDarknetを使ってない。
今回はmac上で動かして、Pytorch のyolov5を使った。
yolov5は
Small(YOLOv5s) => Medium(YOLOv5m) => Large(YOLOv5l)=> Xlarge(YOLOv5x)
の順で大きさと精度が増してくらしい。
2.trackingアルゴリズム「DeepSort」
DeepSORTのアーキテクチャは物体検出とトラッキングに分かれてて、yolov5の推論後に、物体検出のbboxに番号をつけて検出物をtrackingする。ここのgithubを参考にした
・Yolov5_DeepSort_Pytorch
・yolov4-deepsort
DeepSortとyolov5のtrackingコードの一部。
yolov5_tracking.py(の一部)
import sys from pathlib import Path import cv2, os import numpy as np import time import torch import torch.backends.cudnn as cudnn import PIL.Image, PIL.ImageTk from multiprocessing import Queue def load_model(weights, device, half=False): global deepsort cfg = get_config() cfg.merge_from_file('Deepsort/configs/deep_sort.yaml') deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) yolov5 = attempt_load(weights, map_location=device) # load FP32 model if half: yolov5.half() # to FP16 return yolov5 def yolov5_detection(q:Queue, opt, save_vid=False, show_vid=False, tkinter_is=False): initialize() weights, source, imgsz, conf_thres, iou_thres = opt.weights, opt.source, opt.imgsz, opt.conf_thres, opt.iou_thres save_txt, classes, agnostic_nms, augment = opt.save_txt, opt.classes, opt.agnostic_nms, opt.augment nosave, exist_ok = opt.nosave, opt.exist_ok # 〜省略〜 # Load model w = weights[0] if isinstance(weights, list) else weights classify, suffix, suffixes = False, Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults yolov5 = load_model(weights, device, half=False) stride = int(yolov5.stride.max()) # model stride names = yolov5.module.names if hasattr(yolov5, 'module') else yolov5.names # get class names # Dataloader vid_path, vid_writer = None, None if webcam: cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) bs = 1 # batch_size # Run inference if pt and device.type != 'cpu': yolov5(torch.zeros(1, 3, *imgsz).to(device).type_as(next(yolov5.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 t0 = time.time() for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): # Inference t1 = time_sync() img = preprocess_img(img, device, half=half) pred = yolov5(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_sync() # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(project) / Path(p).name) annotator = Annotator(im0, line_width=2, pil=not ascii) deepsort, annotator, s = deepsort_detection(annotator, det, img, im0, names, s) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results im0 = annotator.result() if show_vid: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) elif save_vid: vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) elif tkinter_is: im_rgb = cv2.cvtColor(im0, cv2.COLOR_BGR2RGB) im_rgb = cv2.resize(im_rgb, (1280, 720)) q.put(im_rgb) print('Done. (%.3fs)' % (time.time() - t0))
3.GUIツールの「tkinter」
pythonのGUIツールはありすぎるけどtkinterが1, 2位の人気があるっぽい。(2021/09)Google trandで一年間の推移を見ても全ての国で人気が衰えてないし、グラフ化してもはっきりわかる。
yolov5_tracking.pyで取得した画像をtkinterで表示。
tkinter_app.py
import tkinter as tk from tkinter import ttk import cv2 import PIL.Image, PIL.ImageTk from tkinter import font import time from multiprocessing import Queue class Application(tk.Frame): def __init__(self,master, q:Queue, video_source=0): super().__init__(master) self.master.geometry("1280x768") self.master.title("Tkinter with Video Streaming and Capture") self.q = q self.font_setup() self.vcap = cv2.VideoCapture( video_source ) self.width = self.vcap.get( cv2.CAP_PROP_FRAME_WIDTH ) self.height = self.vcap.get( cv2.CAP_PROP_FRAME_HEIGHT ) self.create_widgets() self.create_frame_button(self.master) self.delay = 15 #[ms] self.update() def font_setup(self): self.font_frame = font.Font( family="Meiryo UI", size=15, weight="normal" ) self.font_btn_big = font.Font( family="Meiryo UI", size=20, weight="bold" ) self.font_btn_small = font.Font( family="Meiryo UI", size=15, weight="bold" ) self.font_lbl_bigger = font.Font( family="Meiryo UI", size=45, weight="bold" ) self.font_lbl_big = font.Font( family="Meiryo UI", size=30, weight="bold" ) self.font_lbl_middle = font.Font( family="Meiryo UI", size=15, weight="bold" ) self.font_lbl_small = font.Font( family="Meiryo UI", size=12, weight="normal" ) def create_widgets(self): #Frame_Camera self.frame_cam = tk.LabelFrame(self.master, text = 'Camera', font=self.font_frame) self.frame_cam.place(x = 10, y = 10) self.frame_cam.configure(width = self.width+30, height = self.height+50) self.frame_cam.grid_propagate(0) #Canvas self.canvas1 = tk.Canvas(self.frame_cam) self.canvas1.configure( width= self.width, height=self.height) self.canvas1.grid(column= 0, row=0,padx = 10, pady=10) def create_frame_button(self, root): # Frame_Button self.frame_btn = tk.LabelFrame(root, text='Control', font=self.font_frame) self.frame_btn.place(x=10, y=650 ) self.frame_btn.configure(width=self.width, height=120 ) self.frame_btn.grid_propagate(0) # Close self.btn_close = tk.Button( self.frame_btn, text='Close', font=self.font_btn_big ) self.btn_close.configure( width=15, height=1, command=self.press_close_button ) self.btn_close.grid( column=1, row=0, padx=20, pady=10 ) def update(self): frame = self.q.get() self.photo = PIL.ImageTk.PhotoImage(image = PIL.Image.fromarray(frame)) #self.photo -> Canvas self.canvas1.create_image(0,0, image= self.photo, anchor = tk.NW) self.master.after(self.delay, self.update) def press_close_button(self): self.master.destroy() self.vcap.release() self.canvas.delete("o")
4.pythonでマルチスレッド
c++でマルチスレッドをしたことがあるけど、phthonでしてみた。yolov5で取得したimgをtkinterで出力する。
yolov5_tracking.pyのProcess部分
elif tkinter_is: im_rgb = cv2.cvtColor(im0, cv2.COLOR_BGR2RGB) im_rgb = cv2.resize(im_rgb, (1280, 720)) q.put(im_rgb)
tkinter_app.pyでyolov5_tracking.pyを取得して、canvasにplotする部分
def __init__(self,master, q:Queue, video_source=0): super().__init__(master) self.master.geometry("1280x768") self.master.title("Tkinter with Video Streaming and Capture") self.q = q def update(self): frame = self.q.get() self.photo = PIL.ImageTk.PhotoImage(image = PIL.Image.fromarray(frame)) #self.photo -> Canvas self.canvas1.create_image(0,0, image= self.photo, anchor = tk.NW) self.master.after(self.delay, self.update)
最終的にtkinter_app.py とyolov5_tracking.pyをマルチスレッド化して動かす
main.py
#https://stackoverflow.com/questions/23599087/multiprocessing-python-core-foundation-error/23982497 from multiprocessing import Process, Queue def tkapp_thread(q): import tkinter as tk from tkapp_thread import Application root = tk.Tk() app = Application(root, q, video_source=0) app.mainloop() def yolov5_thread(q): from yolov5_detect import yolov5_detection from option_parser import get_parser opt = get_parser() yolov5_detection(q, opt, save_vid=False, show_vid=False, tkinter_is=True) if __name__ == '__main__': q = Queue() p1 = Process(target = tkapp_thread, args=(q,)) p2 = Process(target = yolov5_thread, args=(q,)) p1.start() p2.start() p1.join() p2.join()
5.出力結果
Yolov5 + DeepSort + multi-thread + tkinter + 動画で出力した結果はこちら
出力1
出力2
CPUのMac上だとかなり遅いけど、精度はかなりいい。
DeepSort + multi-thread + Tkinter
の組み合わせは面白かった。