YOLO是一个基于深度学习的end-to-end、real-time目标检测方法,至今已经有YOLO、YOLOv2、YOLO9000、YOLOv3 4个版本。YOLO网络由作者用C和CUDA语言写的一个卷积神经网络框架darknet实现,目前github也有tensorflow,pytorch等开源框架的复现,本文主要基于darknet源码修改实现对一个目录下的图片进行检测并保存检测结果图片和标签信息。
源码修改版本已上传至github:https://github.com/bichenghu/YOLO
YOLO测试图片有两种方式
对于单张图片:
1 | #不指定输出路径 |
其中不指定输出路径的命令在darknet/目录下生成predictions.jpg;指定输出路径的时候只需要输入前缀,结果规定了.jpg后缀。
事实上,单张图片的测试也可以用如下通用方式:1
./darknet detctor test cfg/coco.data cfg/yolov3.cfg yolov3.weights /home/username/data/xxx.jpg
对于多张图片:
如果上述通用命令不指定输出路径,就能实现多张图片测试,作者写的多张图片测试是在加载一次模型后,再一次一次的输入图片路径测试,这样的方式似乎不太实用,一般情况下我们想在一个文件夹下对所有图片进行检测,保存其标签信息,这就需要通过修改源码来实现。需要修改到的文件主要有:include/darknet.h src/image.c examples/detector.c examples/darknet.c
examples/darknet.c
整个检测的入口为:examples/darknet.c 其main函数用于判断参数,从而选择对应函数。对于detect参数,将直接跳转到detector.c中的test_detector函数。test_detector函数定义如下:1
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen);
在darknet.c中ln437处,这里char *filename = (argc > 4) ? argv[4]: 0;说明当参数大于4个时,默认第5个参数为测试图片路径filename,显然这样我们就无法检测文件夹下的图片了,因此为filename加一个输入标识 -input,这样我们后面就可以添加输入路径idir和输出路径odir参数了(这里输出路径定义为存放images和labels路径)。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108int main(int argc, char **argv)
{
//test_resize("data/bad.jpg");
//test_box();
//test_convolutional_layer();
if(argc < 2){
fprintf(stderr, "usage: %s <function>\n", argv[0]);
return 0;
}
gpu_index = find_int_arg(argc, argv, "-i", 0);
if(find_arg(argc, argv, "-nogpu")) {
gpu_index = -1;
}
#ifndef GPU
gpu_index = -1;
#else
if(gpu_index >= 0){
cuda_set_device(gpu_index);
}
#endif
if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "yolo")){
run_yolo(argc, argv);
} else if (0 == strcmp(argv[1], "super")){
run_super(argc, argv);
} else if (0 == strcmp(argv[1], "lsd")){
run_lsd(argc, argv);
} else if (0 == strcmp(argv[1], "detector")){
run_detector(argc, argv);
} else if (0 == strcmp(argv[1], "detect")){
float thresh = find_float_arg(argc, argv, "-thresh", .5);
//char *filename = (argc > 4) ? argv[4]: 0;
char *filename = find_char_arg(argc, argv, "-input",0);
char *outfile = find_char_arg(argc, argv, "-out", 0);
char *idir = find_char_arg(argc, argv, "-idir",0);
char *odir = find_char_arg(argc,argv,"-odir",0);
int fullscreen = find_arg(argc, argv, "-fullscreen");
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen, idir, odir);
//********modified 0612********//
} else if (0 == strcmp(argv[1], "cifar")){
run_cifar(argc, argv);
} else if (0 == strcmp(argv[1], "go")){
run_go(argc, argv);
} else if (0 == strcmp(argv[1], "rnn")){
run_char_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "coco")){
run_coco(argc, argv);
} else if (0 == strcmp(argv[1], "classify")){
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
} else if (0 == strcmp(argv[1], "classifier")){
run_classifier(argc, argv);
} else if (0 == strcmp(argv[1], "regressor")){
run_regressor(argc, argv);
} else if (0 == strcmp(argv[1], "segmenter")){
run_segmenter(argc, argv);
} else if (0 == strcmp(argv[1], "art")){
run_art(argc, argv);
} else if (0 == strcmp(argv[1], "tag")){
run_tag(argc, argv);
} else if (0 == strcmp(argv[1], "3d")){
composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
} else if (0 == strcmp(argv[1], "test")){
test_resize(argv[2]);
} else if (0 == strcmp(argv[1], "captcha")){
run_captcha(argc, argv);
} else if (0 == strcmp(argv[1], "nightmare")){
run_nightmare(argc, argv);
} else if (0 == strcmp(argv[1], "rgbgr")){
rgbgr_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "reset")){
reset_normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "denormalize")){
denormalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "statistics")){
statistics_net(argv[2], argv[3]);
} else if (0 == strcmp(argv[1], "normalize")){
normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "rescale")){
rescale_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "ops")){
operations(argv[2]);
} else if (0 == strcmp(argv[1], "speed")){
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
} else if (0 == strcmp(argv[1], "oneoff")){
oneoff(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "oneoff2")){
oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "print")){
print_weights(argv[2], argv[3], atoi(argv[4]));
} else if (0 == strcmp(argv[1], "partial")){
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "visualize")){
visualize(argv[2], (argc > 3) ? argv[3] : 0);
} else if (0 == strcmp(argv[1], "mkimg")){
mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]);
} else if (0 == strcmp(argv[1], "imtest")){
test_resize(argv[2]);
} else {
fprintf(stderr, "Not an option: %s\n", argv[1]);
}
return 0;
}
examples/detector.c
detector.c主要修改test_detector函数,其中draw_detections_person函数由draw_detections函数修改而来,draw_detections函数定义在src/image.c中,用于对每张图片进行画框处理。在这里我只需要行人信息,因此对于draw_detections_person函数,需要用到图片路径信息,输出文件夹路径。对于单张图片,图片路径信息就是前面的filename,对于多张图片,图片路径信息为输入文件夹加上在该文件夹内遍历的得到的图片名,所以修改后的test_detector函数需要添加idir,odir两个参数。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen, char *idir, char *odir)
{
list *options = read_data_cfg(datacfg);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
image **alphabet = load_alphabet();
network *net = load_network(cfgfile, weightfile, 0);
set_batch_network(net, 1);
srand(2222222);
double time;
char buff[256];
char *input = buff;
float nms=.45;
while(1)
{
if(filename) //判断是否有filename参数
{
strncpy(input, filename, 256);
}
else
{
if(!idir || !odir) //原版测试多张图片
{
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input)
return;
strtok(input, "\n");
}
else
{
//带indir 和 odir参数
//idir && odir
char imagepath[512];
char savedir[512];
struct dirent *imagename; //readdir return
DIR *dir;
dir = opendir(idir);
//遍历输入文件夹
while((imagename=readdir(dir))!= NULL)
{
//忽略 ./ ../目录
if(!strcmp(imagename->d_name,".")||!strcmp(imagename->d_name,".."))
continue;
sprintf(imagepath,"%s%s",idir,imagename->d_name);
image im = load_image_color(imagepath, 0, 0);
image sized = letterbox_image(im, net->w, net->h);
layer l = net->layers[net->n-1];
float *X = sized.data;
time=what_time_is_it_now();
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", imagepath, what_time_is_it_now()-time);
int nboxes = 0;
detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
if (nms)
do_nms_sort(dets, nboxes, l.classes, nms);
//****modified0612******//
//draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
draw_detections_person(imagename->d_name, odir, im, dets, nboxes, thresh, names, alphabet, l.classes);
free_detections(dets, nboxes);
char imagesdir[512];
sprintf(imagesdir,"%s%s",odir,"images/");
sprintf(savedir,"%s%s",imagesdir,imagename->d_name);
//strcat(odir, imagename->d_name);
int k = 0;
for (k = strlen(savedir)-1; k>=0; k--)
{
if((savedir[k]!='j')&&(savedir[k]!='p')&&(savedir[k]!='g')&&(savedir[k]!='.'))
{
break;
}
else
{
savedir[k] = '\0';
}
}
save_image(im, savedir);
printf("image saved success!\n");
free_image(im);
free_image(sized);
}
closedir(dir);
break;
}
}
image im = load_image_color(input,0,0);
image sized = letterbox_image(im, net->w, net->h);
//image sized = resize_image(im, net->w, net->h);
//image sized2 = resize_max(im, net->w);
//image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
//resize_network(net, sized.w, sized.h);
layer l = net->layers[net->n-1];
float *X = sized.data;
time=what_time_is_it_now();
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
int nboxes = 0;
detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
//printf("%d\n", nboxes);
//if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
//draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
//person detection and save labels
draw_detections_person(input, odir, im, dets, nboxes, thresh, names, alphabet, l.classes);
free_detections(dets, nboxes);
if(outfile){
save_image(im, outfile);
printf("image saved success!\n");
}
else{
save_image(im, "predictions");
#ifdef OPENCV
cvNamedWindow("predictions", CV_WINDOW_NORMAL);
if(fullscreen){
cvSetWindowProperty("predictions", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
}
show_image(im, "predictions");
cvWaitKey(0);
cvDestroyAllWindows();
#endif
}
free_image(im);
free_image(sized);
if (filename) break;
}
}
src/image.c
image.c主要修改draw_detections_person函数,用于对图片进行画框处理,并保存标签信息。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143void draw_detections_person(char *imagename, char *odir, image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes)
{
int i,j;
int count=0;
char *output=0;
char outdir[512];
if(!odir)
{
// 单张
output = imagename;
//xxx.jpg to xxx.txt
int k=0;
for (k = strlen(imagename)-1; k>=0; k--)
{
if((imagename[k]!='j')&&(imagename[k]!='p')&&(imagename[k]!='g')&&(imagename[k]!='.'))
{
break;
}
else
{
output[k] = '\0';
}
}
output = strcat(imagename, ".txt");
}
else
{
//多张
//output = strcat(odir,imagename);
char labelsdir[512];
sprintf(labelsdir,"%s%s", odir, "labels/");
sprintf(outdir,"%s%s", labelsdir, imagename);
int k=0;
for (k = strlen(outdir)-1; k>=0; k--)
{
if((outdir[k]!='j')&&(outdir[k]!='p')&&(outdir[k]!='g')&&(outdir[k]!='.'))
{
break;
}
else
{
outdir[k] = '\0';
}
}
output = strcat(outdir, ".txt");
}
//new xxx.txt
FILE *fp;
if ( (fp = fopen(output, "w+")) == NULL )
{
printf("wrong:\n");
}
for(i = 0; i < num; ++i)
{
char labelstr[4096] = {0};
int class = -1; //class id
for(j=0; j<classes; ++j)
{
//person filter
if(strcmp(names[j],"person") != 0)
{
continue;
}
//thresh filter
if(dets[i].prob[j]>thresh)
{
strcat(labelstr, names[j]);
class = j;
++count;
printf("%s %d:%0.f%%\n",names[j],count,dets[i].prob[j]*100);
}
else
{
strcat(labelstr, ", ");
strcat(labelstr, names[j]);
}
}
if(class >= 0)
{
//boxes width
int width = im.h * .006;
/*
if(0){
width = pow(prob, 1./2.)*10+1;
alphabet = 0;
}
*/
//printf("%d %s: %.0f%%\n", i, names[class], prob*100);
//printf("%s: %.0f%%\n", names[class], prob*100);
int offset = class*123457 % classes;
float red = get_color(2,offset,classes);
float green = get_color(1,offset,classes);
float blue = get_color(0,offset,classes);
float rgb[3];
//width = prob*20+2;
rgb[0] = red;
rgb[1] = green;
rgb[2] = blue;
box b = dets[i].bbox;
int left = (b.x-b.w/2.)*im.w;
int right = (b.x+b.w/2.)*im.w;
int top = (b.y-b.h/2.)*im.h;
int bot = (b.y+b.h/2.)*im.h;
//printf("box_axis:%f,%f,%f,%f.\n",b.x,b.y,b.w,b.h);
//printf("img_box:%d,%d,%d,%d.\n",left,top,right,bot);
if(left < 0) left = 0;
if(right > im.w-1) right = im.w-1;
if(top < 0) top = 0;
if(bot > im.h-1) bot = im.h-1;
//写入txt坐标框
printf("saved box in:%s \n",output);
fprintf(fp, "%d %d %d %d\n", left, top, right, bot);
draw_box_width(im, left, top, right, bot, width, red, green, blue);
if (alphabet)
{
image label = get_label(alphabet, labelstr, (im.h*.03)/10);
draw_label(im, top + width, left, label, rgb);
free_image(label);
}
if (dets[i].mask)
{
image mask = float_to_image(14, 14, 1, dets[i].mask);
image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h);
image tmask = threshold_image(resized_mask, .5);
embed_image(tmask, im, left, top);
free_image(mask);
free_image(resized_mask);
free_image(tmask);
}
}
}
//关闭txt文件
fclose(fp);
}
include/darknet.h
添加draw_detections_person函数定义:1
void draw_detections_person(char *imagename, char *odir, image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes);
测试命令
对于单张图片,可以用如下命令:1
./darknet detect cfg/yolov3.cfg yolov3.weights -input /home/username/data/xxx.jpg -out test
对于文件夹内多张图片,可以用如下命令:1
./darknet detect cfg/yolov3.cfg yolov3.weights -idir /home/username/data/imagedir/ -odir /home/username/data/results/
当然,在results目录下要提前建好images和labels文件夹。