DeepLearning: 五月 2018

2018年5月25日星期五

keras-yolo3训练自己数据集

1）报错TypeError: 'range' object is not callable ：需要重启kernal
2）报错No such file or directory:'/Users/sisyphus/kerasyolo3/VOCdevkit/VOC2007/JPEGImages/.DS_S.jpgg'：删除JPEGImage和Annotations中DS文件

2018年5月22日星期二

#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Run a YOLO_v3 style detection model on test images.
"""
import cv2
import json
import colorsys
import os
import random
from timeit import time
from timeit import default_timer as timer ### to calculate FPS

import numpy as np
from keras import backend as K
from keras.models import load_model
from PIL import Image, ImageFont, ImageDraw

from yolo3.model import yolo_eval
from yolo3.utils import letterbox_image

class YOLO(object):
def __init__(self):
self.model_path = 'model_data/yolo.h5'
self.anchors_path = 'model_data/yolo_anchors.txt'
# self.classes_path = 'model_data/coco_classes.txt'
self.classes_path = 'model_data/voc_6c_classes.txt'
self.score = 0.3
self.iou = 0.5
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.model_image_size = (416, 416) # fixed size or (None, None)
self.is_fixed_size = self.model_image_size != (None, None)
self.boxes, self.scores, self.classes = self.generate()

def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names

def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
anchors = np.array(anchors).reshape(-1, 2)
return anchors

def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.'

self.yolo_model = load_model(model_path, compile=False)
print('{} model, anchors, and classes loaded.'.format(model_path))

# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
random.seed(10101) # Fixed seed for consistent colors across runs.
random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
random.seed(None) # Reset seed to default.

# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes

def detect_image(self, pathall):
image = Image.open(pathall)
start = time.time()
# imgcv = cv2.imread(image)##########

if self.is_fixed_size:
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')

print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.

out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})

print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300

resultsForJSON = []#+++

for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]

label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)

top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))

area=(bottom-top)*(right-left)#####+
resultsForJSON.append({"label": predicted_class,\
"confidence": float('%.2f' % score),\
"topleft": {"x": float(left), "y": float(top)},\
"bottomright": {"x": float(right), "y": float(bottom)},\
"area":float(area)})

if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])

# My kingdom for a good redistributable image drawing library.
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, label, fill=(0, 0, 0), font=font)
del draw

end = time.time()
print(end - start)
####################保存json##########start
print(resultsForJSON)
imgdir = "/Users/sisyphus/keras-yolo3/" ###########+
outfolder = os.path.join(imgdir, 'out')
img_name = os.path.join(outfolder, os.path.basename(pathall))
# cv2.imwrite(img_name, image)
if resultsForJSON == []:
resultsForJSON.append({"label": "Normal",\
"confidence": float(0), \
"topleft": {"x": float(0), "y": float(0)}, \
"bottomright": {"x": float(0), "y": float(0)},\
"area":float(0)})
textJSON = json.dumps(resultsForJSON)
textFile = os.path.splitext(img_name)[0] + ".json"
print('\n')
with open(textFile, 'w') as f:
f.write(textJSON)
print('Normal\n')
else:
textJSON = json.dumps(resultsForJSON)
textFile = os.path.splitext(img_name)[0] + ".json"
print(textFile)
print('\n')
with open(textFile, 'w') as f:
f.write(textJSON)
####################保存json##########end
return image

def close_session(self):
self.sess.close()

def detect_image_video(self, image, curr_fps):
start = time.time()

if self.is_fixed_size:
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')

print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.

out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})

print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300

resultsForJSON = []#+++

for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]

label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)

top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))

area=(bottom-top)*(right-left)#####+
resultsForJSON.append({"label": predicted_class,\
"confidence": float('%.2f' % score),\
"topleft": {"x": float(left), "y": float(top)},\
"bottomright": {"x": float(right), "y": float(bottom)},\
"area":float(area)})

if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])

# My kingdom for a good redistributable image drawing library.
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, label, fill=(0, 0, 0), font=font)
del draw

end = time.time()
print(end - start)

####################保存json##########start
print(resultsForJSON)
imgdir = "/Users/sisyphus/keras-yolo3/" ###########+
outfolder = os.path.join(imgdir, 'video_out/')
# img_name = os.path.join(outfolder, curr_fps)
# cv2.imwrite(img_name, image)
if resultsForJSON == []:
resultsForJSON.append({"label": "Normal",\
"confidence": float(0), \
"topleft": {"x": float(0), "y": float(0)}, \
"bottomright": {"x": float(0), "y": float(0)},\
"area":float(0)})
textJSON = json.dumps(resultsForJSON)
textFile = outfolder + str(curr_fps) + ".json"
print('\n')
with open(textFile, 'w') as f:
f.write(textJSON)
print('Normal\n')
else:
textJSON = json.dumps(resultsForJSON)
textFile = outfolder + str(curr_fps) + ".json"
print(textFile)
print('\n')
with open(textFile, 'w') as f:
f.write(textJSON)
####################保存json##########end
####################保存图片##########start
imgdir = "/Users/sisyphus/keras-yolo3/" ###########+
outfolder = os.path.join(imgdir, 'video_out/')
img_name = outfolder + str(curr_fps) + ".jpg"
image.save(img_name)
####################保存图片##########end
return image

def close_session(self):
self.sess.close()

def detect_video(yolo, video_path):
import cv2
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
while True:
return_value, frame = vid.read()
# frame = np.asarray(frame)####+
image = Image.fromarray(frame)
image = yolo.detect_image_video(image, curr_fps)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
print(str(curr_fps)+"########")
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
# curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# choice = cv2.waitKey(1)
# if choice == 27: break
# cap.release()
# cv2.destroyAllWindows()
yolo.close_session()

#def detect_img(yolo):
# while True:
# img = input('Input image filename:')
## img = "/Users/sisyphus/keras-yolo3/sample_img_test30/000271.jpg"
# try:
# image = Image.open(img)
# except:
# print('Open Error! Try again!')
# continue
# else:
# r_image = yolo.detect_image(image)
# r_image.show()
# yolo.close_session()

#####################################单张图片预测
#def detect_img(yolo):
# img = "/Users/sisyphus/keras-yolo3/sample_img_test30/000271.jpg"
# image = Image.open(img)
# r_image = yolo.detect_image(image)
# r_image.show()

####################################文件夹内所有图片预测
def detect_img(yolo):
imgdir = "/Users/sisyphus/keras-yolo3/" ###########+
outfolder = os.path.join(imgdir, 'out')
path = "/Users/sisyphus/keras-yolo3/sample_img_test1"
for xd in os.listdir(path):
print(xd)
print(os.path.join(path,xd))
pathall = os.path.join(path,xd)
r_image = yolo.detect_image(pathall)
# r_image.show()
img_name = os.path.join(outfolder, os.path.basename(pathall))
r_image.save(img_name)

if __name__ == '__main__':
detect_img(YOLO())

2018年5月17日星期四

Keras-retinanet训练自己数据集

VOC文件夹ImageSets/Main中txt文件只有图片名称，没有路径没有后缀。

keras-retinanet/keras_retinanet/preprocessing/pascal_voc.py中修改：

#voc_classes = {
# 'aeroplane' : 0,
# 'bicycle' : 1,
# 'bird' : 2,
# 'boat' : 3,
# 'bottle' : 4,
# 'bus' : 5,
# 'car' : 6,
# 'cat' : 7,
# 'chair' : 8,
# 'cow' : 9,
# 'diningtable' : 10,
# 'dog' : 11,
# 'horse' : 12,
# 'motorbike' : 13,
# 'person' : 14,
# 'pottedplant' : 15,
# 'sheep' : 16,
# 'sofa' : 17,
# 'train' : 18,
# 'tvmonitor' : 19
#}

voc_classes = {
'crazing' :0,
'inclusion' :1,
'patches' :2,
'pitted_surface' :3,
'rolled-in_scale' :4,
'scratches' :5
}

运行命令

keras_retinanet/bin/train.py pascal （绝对路径）/VOCdevkit/VOC2007

最好采用csv格式训练


 python3 keras_retinanet/bin/train.py csv ./CSV/annotations.csv ./CSV/classes.csv

yolov3 darknet GPU训练makefile配置

makefile修改

GPU=1

CUDNN=1

OPENCV=1

NVCC=/usr/local/cuda-8.0/bin/nvcc

编译命令：

make -j16

2018年5月16日星期三

删除Mac文件夹中DS_S文件

import os, sys;

def walk(path):
print("cd directory:"+path)

for item in os.listdir(path):
if(item == '.DS_Store'):
global count
count = count+1
print("find file .Ds_Store")
os.remove(path + '/' +item)
else:
if(os.path.isdir(path + '/' + item)):
print(" " + path + item + " "+"is directory")
walk(path + '/' + item)
else:
print(" " + path + item + "is file")

if __name__=='__main__':
count = 0
# dir = '/Users/sisyphus/darkflow/VOCtest2018/testJPEG/'
dir = "/Users/sisyphus/darkflow/VOCtest2018/JPEGImages270AugCon"
# dir = "/Users/sisyphus/darkflow/VOCtest2018/Annotations270AugCon"
# dir = "/Users/sisyphus/SSD-Tensorflow/VOCtest2018/Annotations270Enh"
# dir = "/Users/sisyphus/darkflow/VOCtest2018/AnnotationTrainAugEnh"
# dir = "/Users/sisyphus/Fromhuanxian/NEU-DET 2/IMG_2"

walk(dir)
print("\ntotal number:" + str(count))

2018年5月15日星期二

Yolov2计算自己数据集上anchors

1. 首先生成训练集xml转txt文件：
import os
import random

trainval_percent = 0.0
train_percent = 1

classes='crazing'
classes='inclusion'
classes='patches'
classes='pitted_surface'
classes='rolled-in_scale'
classes='scratches'

xmlfilepath = '/Users/sisyphus/darkflow/VOC2018/Annotations/'
txtsavepath = '/Users/sisyphus/darkflow/VOC2018/ImageSets/Main/'
total_xml = os.listdir(xmlfilepath)

num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)

ftrainval = open(txtsavepath+'_trainval.txt', 'w')
ftest = open(txtsavepath+'_test.txt', 'w')
ftrain = open(txtsavepath+'_train.txt', 'w')
fval = open(txtsavepath+'_val.txt', 'w')

for i in list:
name='/Users/sisyphus/darkflow/VOC2018/JPEGImages/'+total_xml[i][:-4]+'.jpg'+'\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest .close()

txt文件中类似：
/Users/sisyphus/darkflow/VOC2018/JPEGImages/000191.jpg
/Users/sisyphus/darkflow/VOC2018/JPEGImages/000185.jpg
/Users/sisyphus/darkflow/VOC2018/JPEGImages/000813.jpg
/Users/sisyphus/darkflow/VOC2018/JPEGImages/000807.jpg
/Users/sisyphus/darkflow/VOC2018/JPEGImages/000152.jpg
/Users/sisyphus/darkflow/VOC2018/JPEGImages/000634.jpg
/Users/sisyphus/darkflow/VOC2018/JPEGImages/000620.jpg
/Users/sisyphus/darkflow/VOC2018/JPEGImages/000146.jpg
...

2. 这样就可以通过kmeans方法计算anchors：
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import numpy as np
import os
import random
from tqdm import tqdm
import sklearn.cluster as cluster

def iou(x, centroids):
dists = []
for centroid in centroids:
c_w, c_h = centroid
w, h = x
if c_w >= w and c_h >= h:
dist = w * h / (c_w * c_h)
elif c_w >= w and c_h <= h:
dist = w * c_h / (w * h + (c_w - w) * c_h)
elif c_w <= w and c_h >= h:
dist = c_w * h / (w * h + c_w * (c_h - h))
else: # means both w,h are bigger than c_w and c_h respectively
dist = (c_w * c_h) / (w * h)
dists.append(dist)
return np.array(dists)

def avg_iou(x, centroids):
n, d = x.shape
sums = 0.
for i in range(x.shape[0]):
# note IOU() will return array which contains IoU for each centroid and X[i]
# slightly ineffective, but I am too lazy
sums += max(iou(x[i], centroids))
return sums / n

def write_anchors_to_file(centroids, distance, anchor_file):
anchors = centroids * 416 / 32 # I do not know whi it is 416/32
anchors = [str(i) for i in anchors.ravel()]
print(
"\n",
"Cluster Result:\n",
"Clusters:", len(centroids), "\n",
"Average IoU:", distance, "\n",
"Anchors:\n",
", ".join(anchors)
)

with open(anchor_file, 'w') as f:
f.write(", ".join(anchors))
f.write('\n%f\n' % distance)

def k_means(x, n_clusters, eps):
init_index = [random.randrange(x.shape[0]) for _ in range(n_clusters)]
centroids = x[init_index]

d = old_d = []
iterations = 0
diff = 1e10
c, dim = centroids.shape

while True:
iterations += 1
d = np.array([1 - iou(i, centroids) for i in x])
if len(old_d) > 0:
diff = np.sum(np.abs(d - old_d))

print('diff = %f' % diff)

if diff < eps or iterations > 1000:
print("Number of iterations took = %d" % iterations)
print("Centroids = ", centroids)
return centroids

# assign samples to centroids
belonging_centroids = np.argmin(d, axis=1)

# calculate the new centroids
centroid_sums = np.zeros((c, dim), np.float)
for i in range(belonging_centroids.shape[0]):
centroid_sums[belonging_centroids[i]] += x[i]

for j in range(c):
centroids[j] = centroid_sums[j] / np.sum(belonging_centroids == j)

old_d = d.copy()

def get_file_content(fnm):
with open(fnm) as f:
return [line.strip() for line in f]

def main(args):
print("Reading Data ...")

file_list = []
for f in args.file_list:
file_list.extend(get_file_content(f))

data = []
for one_file in tqdm(file_list):
one_file = one_file.replace('images', 'labels') \
.replace('JPEGImages', 'labels') \
.replace('.png', '.txt') \
.replace('.jpg', '.txt')
for line in get_file_content(one_file):
clazz, xx, yy, w, h = line.split()
data.append([float(w),float(h)])

data = np.array(data)
if args.engine.startswith("sklearn"):
if args.engine == "sklearn":
km = cluster.KMeans(n_clusters=args.num_clusters, tol=args.tol, verbose=True)
elif args.engine == "sklearn-mini":
km = cluster.MiniBatchKMeans(n_clusters=args.num_clusters, tol=args.tol, verbose=True)
km.fit(data)
result = km.cluster_centers_
# distance = km.inertia_ / data.shape[0]
distance = avg_iou(data, result)
else:
result = k_means(data, args.num_clusters, args.tol)
distance = avg_iou(data, result)

write_anchors_to_file(result, distance, args.output)

if "__main__" == __name__:
parser = argparse.ArgumentParser()
parser.add_argument('file_list', nargs='+', help='TrainList')
parser.add_argument('--num_clusters', '-n', default=5, type=int, help='Number of Clusters')
parser.add_argument('--output', '-o', default='/Users/sisyphus/darkflow/VOC2018/anchor.txt', type=str, help='Result Output File')
parser.add_argument('--tol', '-t', default=0.005, type=float, help='Tolerate')
parser.add_argument('--engine', '-m', default='sklearn', type=str,
choices=['original', 'sklearn', 'sklearn-mini'], help='Method to use')

args = parser.parse_args()

main(args)

cd到darkflow文件夹，终端命令行：

python anchors.py /Users/sisyphus/darkflow/VOC2018/ImageSets/Main/train.txt

得到结果：

Cluster Result:

Clusters: 5

Average IoU: 0.5959804798056495

Anchors:

2.838930446194225, 11.596443569553825, 9.612902298850576, 3.9339942528735645, 4.220859872611465, 6.363582802547771, 10.27776450511945, 11.884573378839587, 2.344780763790667, 3.2156152758132945

2018年5月14日星期一

SSD VOC评估与训练自己的数据集

在验证VOC2007测试集时
1. 首先将数据集转换为tfrecord格式：

DATASET_DIR=./VOC2007/test/
OUTPUT_DIR=./tfrecords
python tf_convert_data.py \
    --dataset_name=pascalvoc \
    --dataset_dir=${DATASET_DIR} \
    --output_name=voc_2007_test \
    --output_dir=${OUTPUT_DIR}

调用tf_convert_data.py将test set转化成tfrecoeds：（注意：这里直接运行会碰到无法读取图片，UTF-8无法decode的Erro，解决办法是打开SSD工程—>datasets—>pascalvoc_to_tfrecords.py 。。。然后更改文件的83行读取方式为’rb’）

注意将voc_2007_train改为voc_2007_test。

2. 进行模型评估：

DATASET_DIR=./tfrecords

EVAL_DIR=./logs/
CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt
python eval_ssd_network.py \
    --eval_dir=${EVAL_DIR} \
    --dataset_dir=${DATASET_DIR} \
    --dataset_name=pascalvoc_2007 \
    --dataset_split_name=test \
    --model_name=ssd_300_vgg \
    --checkpoint_path=${CHECKPOINT_PATH} \
    --batch_size=

运行以上代码报错：


TypeError: Can not convert a tuple into a Tensor or Operation.

解决方法：

打开eval_ssd_network.py文件，然后加入以下代码：


def flatten(x):  
    result = []  
    for el in x:  
         if isinstance(el, tuple):  
               result.extend(flatten(el))  
         else:  
               result.append(el)  
    return result 

下面两处地方调用：

# Standard evaluation loop.  
            start = time.time()  
            slim.evaluation.evaluate_once(  
                master=FLAGS.master,  
                checkpoint_path=checkpoint_path,  
                logdir=FLAGS.eval_dir,  
                num_evals=num_batches,  
                eval_op=flatten(list(names_to_updates.values())), #这里也调用flatten  
                variables_to_restore=variables_to_restore,  
                session_config=config)


  


# Waiting loop.  
            slim.evaluation.evaluation_loop(  
                master=FLAGS.master,  
                checkpoint_dir=checkpoint_path,  
                logdir=FLAGS.eval_dir,  
                num_evals=num_batches,  
                eval_op=flatten(list(names_to_updates.values())), #这里调用flatten  
                variables_to_restore=variables_to_restore,  
                eval_interval_secs=60,  
                max_number_of_evaluations=np.inf,  
                session_config=config,  
                timeout=None

2 训练自己的数据集：

voc格式的数据集制作好以后，转换成tfrecords。需要修改一下源码，

datasets\pascalvoc_common.py：

#VOC_LABELS = {
#    'none': (0, 'Background'),
#    'aeroplane': (1, 'Vehicle'),
#    'bicycle': (2, 'Vehicle'),
#    'bird': (3, 'Animal'),
#    'boat': (4, 'Vehicle'),
#    'bottle': (5, 'Indoor'),
#    'bus': (6, 'Vehicle'),
#    'car': (7, 'Vehicle'),
#    'cat': (8, 'Animal'),
#    'chair': (9, 'Indoor'),
#    'cow': (10, 'Animal'),
#    'diningtable': (11, 'Indoor'),
#    'dog': (12, 'Animal'),
#    'horse': (13, 'Animal'),
#    'motorbike': (14, 'Vehicle'),
#    'person': (15, 'Person'),
#    'pottedplant': (16, 'Indoor'),
#    'sheep': (17, 'Animal'),
#    'sofa': (18, 'Indoor'),
#    'train': (19, 'Vehicle'),
#    'tvmonitor': (20, 'Indoor'),
#}

VOC_LABELS = {
    'none': (0, 'Background'),
    'crazing': (1, 'crazing'),
    'inclusion': (2, 'inclusion'),
    'patches': (3, 'patches'),
    'pitted_surface': (4, 'pitted_surface'),
    'rolled-in_scale': (5, 'rolled-in_scale'),
    'scratches': (6, 'scratches'),
}

接着跳转到SSD-tensorflow目录下，进行tfrecords操作，我的运行命令如下：


DATASET_DIR=VOCtest2018/  
OUTPUT_DIR=tfrecords/  
python3 tf_convert_data.py \  
    --dataset_name=pascalvoc \  
    --dataset_dir=${DATASET_DIR} \  
    --output_name=voc_2007_train \  
    --output_dir=${OUTPUT_DIR}

这样就可以进行训练了，运行的命令为：


DATASET_DIR=tfrecords  
TRAIN_DIR=logs/  
CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt  
python3 train_ssd_network.py \  
    --train_dir=${TRAIN_DIR} \  
    --dataset_dir=${DATASET_DIR} \  
    --dataset_name=pascalvoc_2007 \  
    --dataset_split_name=train \  
    --model_name=ssd_300_vgg \  
    --checkpoint_path=${CHECKPOINT_PATH} \  
    --save_summaries_secs=60 \  
    --save_interval_secs=600 \  
    --weight_decay=0.0005 \  
    --optimizer=adam \  
    --learning_rate=0.001 \  
    --batch_size=16 
    --device=cpu(如果在cpu机器上跑)
    --data_format=NHWC（针对cpu，或者在train_ssd_network中第27行代码改）

订阅：博文 (Atom)

DeepLearning