TensorRT-classify

环境

caffe, Ubuntu16.04, python2.7

编译caffe

这里需要注意一点,读取数据用python读取图片,不用官方推荐的lmdb数据格式,这样更容易定为问题

定义类名为

1
2


import caffe
import numpy as np
import yaml
import os
import cPickle
import cv2

class classifyInput(caffe.Layer):

def load_data_annotations(self, index):
    label, img_path = index.split(' ')
    return {'label' : label, 'image' : img_path, 'flipped' : False}


def get_rand_idx(self):
    inds = np.arange(len(self._train_roidb))
    inds = np.reshape(inds, (-1, 2))
    row_perm = np.random.permutation(np.arange(inds.shape[0]))
    inds = np.reshape(inds[row_perm, :], (-1,))
    self._perm = inds
    self._cur = 0


def setup(self, bottom, top):
    layer_params = yaml.load(self.param_str_)
    self._cfg_path = layer_params['cfg_path']
    cfg_dict = {}
    with open(self._cfg_path) as fp:
        for line in fp:
            data = line.strip().split(':')
            cfg_dict[data[0]] = data[1]
    self._batch_size = int(cfg_dict['batch_size'])
    self._num_classes = int(cfg_dict['num_classes'])
    self._train_path = cfg_dict['train_path']
    self._test_path = cfg_dict['test_path']
    self._size_w = int(cfg_dict['size_w'])
    self._size_h = int(cfg_dict['size_h'])
    self._flip = int(cfg_dict['flip'])
    self._mean_value = int(cfg_dict['mean_value'])

    self._train_image_index = []
    with open(self._train_path) as fp:
        for line in fp:
            data_path = line.strip()
            self._train_image_index.append(data_path)
    self._test_image_index = []
    with open(self._test_path) as fp:
        for line in fp:
            data_path = line.strip()
            self._test_image_index.append(data_path)

    self._test_roidb = [self.load_data_annotations(index) for index in self._test_image_index]
    self._train_roidb = [self.load_data_annotations(index) for index in self._train_image_index]
    if(self._flip == 1):
        num_images = len(self._train_roidb)
        for i in xrange(num_images):
            self._train_roidb.append({'label' : self._train_roidb[i]['label'], 'image' : self._train_roidb[i]['image'], 'flipped' : True})
            self._train_image_index = self._train_image_index * 2

    self._name_to_top_map = {}
    self._name_to_top_map['data'] = 0
    self._name_to_top_map['label'] = 1
    top[0].reshape(1, 3, self._size_h, self._size_w)
    top[1].reshape(1,)
    self.get_rand_idx()
    self._test_cur = 0


def forward(self, bottom, top):
    if (self.phase == caffe.TRAIN):
        image_num = len(self._train_roidb)
        if(self._cur + self._batch_size > image_num):
            self.get_rand_idx()
        db_inds = self._perm[self._cur:self._cur + self._batch_size]
        self._cur += self._batch_size
        minibatch_db = [self._train_roidb[i] for i in db_inds]

    elif (self.phase == caffe.TEST):
        test_image_num = len(self._test_roidb)
        if(self._test_cur + self._batch_size > test_image_num):
            minibatch_db = self._test_roidb[self._test_cur:] + self._test_roidb[:self._batch_size - test_image_num + self._test_cur]
            self._test_cur = self._batch_size - test_image_num + self._test_cur
        else:
            minibatch_db = self._test_roidb[self._test_cur : self._test_cur + self._batch_size]
            self._test_cur += self._batch_size

    im_blob = np.zeros((len(minibatch_db), 3, self._size_h, self._size_w), dtype=np.float32)
    im_labels = np.zeros((len(minibatch_db),), dtype=np.float32)
    for i in xrange(len(minibatch_db)):
        im = cv2.imread(minibatch_db[i]['image'])
        im = cv2.resize(im, (self._size_w, self._size_h), interpolation=cv2.INTER_LINEAR)
        if(minibatch_db[i]['flipped']):
            im = im[:, ::-1, :]
        im = im.astype(np.float32, copy=False)
        if(self._mean_value == 1):
            pixel_means = np.array([[[103.52, 116.28, 123.675]]])
            im -= pixel_means
        else:
            im = im / 255.0
        im_blob[i, :] = im[:,:,::-1].transpose([2,0,1])
        im_labels[i] = minibatch_db[i]['label']
    blobs = {'data': im_blob, 'label': im_labels}
    for blob_name, blob in blobs.iteritems():
        top_ind = self._name_to_top_map[blob_name]
        # Reshape net's input blobs
        top[top_ind].reshape(*(blob.shape))
        # Copy data into net's input blobs
        top[top_ind].data[...] = blob.astype(np.float32, copy=False)


def backward(self, top, propagate_down, bottom):
    """This layer does not propagate gradients."""
    pass


def reshape(self, bottom, top):
    """Reshaping happens during the call to forward."""
    pass
1
2
3
4

继承caffe.Layer,复写setup()方法和forward()方法,定义完后将文件放在```path/caffe-master/python```文件夹下

#### 添加环境变量

export LD_LIBRARY_PATH=/usr/local/cuda-8.0/lib64:/usr/local/lib:/home/tongbei/software/anaconda2/lib/

1
2
3
4

#### 制作数据

将数据标注成pascal voc的格式,再使用如下脚本随机切取目标

import os
import cv2
import numpy.random as npr
import numpy as np
import xml.dom.minidom

def IoU(box, boxes):
box_area = box[2] box[3]
area = boxes[:, 3]
boxes[:, 4]
xx1 = np.maximum(box[0]-box[2]/2, boxes[:, 1]-boxes[:, 3]/2)
yy1 = np.maximum(box[1]-box[3]/2, boxes[:, 2]-boxes[:, 4]/2)
xx2 = np.minimum(box[0]+box[2]/2, boxes[:, 1]+boxes[:, 3]/2)
yy2 = np.minimum(box[1]+box[3]/2, boxes[:, 2]+boxes[:, 4]/2)

w = np.maximum(0, xx2 - xx1)
h = np.maximum(0, yy2 - yy1)

inter = w * h
ovr = inter / (box_area + area - inter)
return ovr

def iou(box1, box2):
s1x = box1[2] - box1[0]
s1y = box1[3] - box1[1]
s1 = s1x s1y
s2x = box2[2] - box2[0]
s2y = box2[3] - box2[1]
s2 = s2x
s2y
x_left = max(box1[0], box2[0])
y_left = max(box1[1], box2[1])
x_right = min(box1[2], box2[2])
y_right = min(box1[3], box2[3])
if x_left > x_right or y_left > y_right:
return 0
s_delta = (x_right - x_left) * (y_right - y_left)
iou = s_delta / (s1 + s2 - s_delta)
return iou

filepath = “F:/data/classify/晚上数据/image/“
green_light_path_train = “F:/data/VOCdevkit/light_9_3/train/green/“
red_light_path_train = “F:/data/VOCdevkit/light_9_3/train/red/“
yellow_light_path_train = “F:/data/VOCdevkit/light_9_3/train/yellow/“
black_light_path_train = “F:/data/VOCdevkit/light_9_3/train/black/“
negative_path_train = “F:/data/VOCdevkit/light_9_3/train/neg/“

green_light_path_valid = “F:/data/VOCdevkit/light_9_3/valid/green/“
red_light_path_valid = “F:/data/VOCdevkit/light_9_3/valid/red/“
yellow_light_path_valid = “F:/data/VOCdevkit/light_9_3/valid/yellow/“
black_light_path_valid = “F:/data/VOCdevkit/light_9_3/valid/black/“
negative_path_valid = “F:/data/VOCdevkit/light_9_3/valid/neg/“

#label:4(open) label:5(close)

min_w = 20
min_h = 20
n_idx = 0
p_idx = 0
img_num = 0

for root,dirs,files in os.walk(filepath):
valid_count = 0
for file in files:
valid_count += 1
postfix = os.path.splitext(file)[1].lower()
if (postfix == “.jpg”):
moveFlag = False
imgpath = os.path.join(root, file)
annopath = imgpath.replace(‘image’, ‘label’)
annopath = annopath.replace(‘jpg’, ‘xml’)
door_list = []
airplane = []

dom_tree = xml.dom.minidom.parse(annopath)
annotation = dom_tree.documentElement
objects = annotation.getElementsByTagName("object")
for object in objects:
    name = object.getElementsByTagName("name")[0]
    name_data = name.childNodes[0].data
    bndbox = object.getElementsByTagName("bndbox")[0]
    xmin = bndbox.getElementsByTagName("xmin")[0]
    xmin_data = xmin.childNodes[0].data
    ymin = bndbox.getElementsByTagName("ymin")[0]
    ymin_data = ymin.childNodes[0].data
    xmax = bndbox.getElementsByTagName("xmax")[0]
    xmax_data = xmax.childNodes[0].data
    ymax = bndbox.getElementsByTagName("ymax")[0]
    ymax_data = ymax.childNodes[0].data
    bbox = [xmin_data, ymin_data, xmax_data, ymax_data, name_data]
    door_list.append(bbox)
img_num += 1

if (len(door_list) != 0):
    img = cv2.imdecode(np.fromfile(imgpath,dtype=np.uint8),-1)
    w, h = img.shape[1], img.shape[0]
    # gt_bboxes = np.array(door_list, dtype=np.float32).reshape(-1, 5)
    for i in range(len(door_list)):
        x_left = int(door_list[i][0])
        y_top = int(door_list[i][1])
        x_right = int(door_list[i][2])
        y_bottom = int(door_list[i][3])
        crop_w = x_right - x_left + 1
        crop_h = y_bottom - y_top + 1

        neg_num = 0
        end_flag = False
        while (neg_num < 3):
            neg_w = npr.randint(int(crop_w * 0.8), np.ceil(1.25 * crop_w))
            neg_h = npr.randint(int(crop_h * 0.8), np.ceil(1.25 * crop_h))
            neg_x = npr.randint(0, w)
            neg_y = npr.randint(0, h)
            neg_x = min(w - neg_w - 1, neg_x)
            neg_y = min(h - neg_h - 1, neg_y)
            crop_box = np.array([neg_x, neg_y, neg_x+neg_w, neg_y+neg_h])
            label_box = list(map(float, door_list[i][0:-1]))
            Iou = iou(crop_box, label_box)
            if (np.max(Iou) < 0.3):
                if valid_count%10 != 0:
                    save_file = os.path.join(negative_path_train, "%s.jpg"%n_idx)
                else:
                    save_file = os.path.join(negative_path_valid, "%s.jpg" % n_idx)
                cropped_im = img[neg_y : neg_y + neg_h, neg_x : neg_x + neg_w, :]
                cv2.imwrite(save_file, cropped_im)
                n_idx += 1
                neg_num += 1

        if (end_flag):
            break

        pos_num = 0
        while (pos_num < 3):
            pos_w = npr.randint(int(crop_w * 0.8), np.ceil(1.25 * crop_w))
            pos_h = npr.randint(int(crop_h * 0.8), np.ceil(1.25 * crop_h))
            delta_x = npr.randint(-crop_w * 0.2, crop_w * 0.2)
            delta_y = npr.randint(-crop_h * 0.2, crop_h * 0.2)
            pos_x = max(x_left + crop_w / 2 + delta_x - pos_w / 2, 0)
            pos_y = max(y_top + crop_h / 2 + delta_y - pos_h / 2, 0)
            if pos_x + pos_w > w or pos_y + pos_h > h:
                continue
            crop_box = np.array([pos_x, pos_y, pos_x+pos_w, pos_y+pos_h])
            label_box = list(map(float, door_list[i][0:-1]))
            Iou = iou(crop_box, label_box)
            if (Iou >= 0.65):
                cropped_im = img[int(pos_y) : int(pos_y+pos_h), int(pos_x) : int(pos_x+pos_w), :]
                if (door_list[i][-1] == "red_light"):
                    if valid_count % 10 != 0:
                        save_file = os.path.join(red_light_path_train, "%s.jpg" % p_idx)
                    else:
                        save_file = os.path.join(red_light_path_valid, "%s.jpg" % p_idx)
                elif (door_list[i][-1] == "yellow_light"):
                    if valid_count % 10 != 0:
                        save_file = os.path.join(yellow_light_path_train, "%s.jpg" % p_idx)
                    else:
                        save_file = os.path.join(yellow_light_path_valid, "%s.jpg" % p_idx)
                elif (door_list[i][-1] == "green_light"):
                    if valid_count % 10 != 0:
                        save_file = os.path.join(green_light_path_train, "%s.jpg" % p_idx)
                    else:
                        save_file = os.path.join(green_light_path_valid, "%s.jpg" % p_idx)
                elif (door_list[i][-1] == "black_light"):
                    if valid_count % 10 != 0:
                        save_file = os.path.join(black_light_path_train, "%s.jpg" % p_idx)
                    else:
                        save_file = os.path.join(black_light_path_valid, "%s.jpg" % p_idx)
                cv2.imwrite(save_file, cropped_im)
                p_idx += 1
                pos_num += 1

print ("%s images done, pos: %s neg: %s"%(img_num, p_idx, n_idx))
1
2
3
4
5
6

每个目标在扩大20%的范围内随机裁剪3个正样本,三个负样本

#### cfg文件

定义配置文件,描述数据路径,训练的batch,类别数量,图片resize高宽

batch_size:256
num_classes:5
train_path:/home/huanghanqing/data/light_9_3/light_9_3_train.txt
test_path:/home/huanghanqing/data/light_9_3/light_9_3_valid.txt
size_w:96
size_h:96
flip:0
mean_value:0

1
2

#### solver文件

net: “door-classify-train-ohem.prototxt”

#test_net: “door-classify-train-ohem.prototxt”
test_iter: 13
test_interval: 20

#test_initialization: false
display: 20
average_loss: 20
lr_policy: “multifixed”

#stepsize: 1000

#gamma: 0.1

#base_lr: 0.0001
stagelr: 0.0001
stagelr: 0.001
stagelr: 0.0001
stagelr: 0.00001
stageiter: 500
stageiter: 1500
stageiter: 3000
stageiter: 4500
max_iter: 5000
iter_size: 2
momentum: 0.9
weight_decay: 0.0005
snapshot: 1000
snapshot_prefix: “../output/light_9_3/door-classifyxx”
solver_mode: GPU

1
2
3
4

#### 模型文件

自定义的数据读取层如下:

name: “YOLOV3-TINY”
layer {
name: ‘input-data’
type: ‘Python’
top: ‘data’
top: ‘label’
include {
phase:TRAIN
}
python_param {
module: ‘classify-input’
layer: ‘classifyInput’
param_str: “‘cfg_path’: ./cfg.txt”
}
}

layer {
name: ‘input-data’
type: ‘Python’
top: ‘data’
top: ‘label’
include {
phase:TEST
}
python_param {
module: ‘classify-input’
layer: ‘classifyInput’
param_str: “‘cfg_path’: ./cfg.txt”
}
}

1
2
3
4

除了最后一层loss,剩下的和推理的一样

#### 训练

cd path/light_classify/cfg
sh train-door-classify.sh

1
2
3
4

#### 测试

python测试:

cd path/light_classify
python test-classify-light.py
`

TensorRT

使用docker,运行TensorRT-caffe项目