#1. Algorithm source program: [net] # Testing # batch=1 # subdivisions=1 # Training batch=64 # Number of training samples subdivisions=16 #net->batch /= subdivisions width=416 height=416 channels=3 momentum=0.9 #Momentum decay=0.0005 #Weight decay regularization angle=0 #Rotation angle data enhancement saturation = 1.5 #Saturation data enhancement exposure = 1.5 #Adjust exposure data enhancement hue=.1 #Adjust tone data enhancement learning_rate=0.001 #The learning rate determines the speed of weight update. # When the number of iterations is less than burn _ in, there is one way to update the learning rate, which is greater than burn _ in, using policyburn _ in = 1000 max_batches = 50200 #Number of iteration stops policy=steps # Learning rate update strategy steps=40000,45000 #steps Update policy scales=.1,.1 [convolutional] batch_normalize=1 # BN processing or not filters=32 # Number of convolution kernels and outputs size=3 # Convolution kernel size stride=1 pad=1 activation=leaky # The convolution kernel 3 * 3 with the padding step size of 1 does not change the feature map size, and the padding step size of 2 changes the original size by half # Downsample [convolutional] batch_normalize=1 filters=64 size=3 stride=2 pad=1 activation=leaky # Network layer activation function [convolutional] batch_normalize=1 filters=32 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=64 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear # Downsample [convolutional] batch_normalize=1 filters=128 size=3 stride=2 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=64 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=128 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 # indicates the first three layers, which is Resnet. activation=linear #Activation function [convolutional] batch_normalize=1 filters=64 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=128 size=3 stride=1 pad=1 activation=leaky [shortcut] from=-3 activation=linear # Downsample [convolutional] batch_normalize=1 filters=256 size=3 stride=2 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky 。。。Repeated in the middleconv。。。 [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=1024 activation=leaky [convolutional] size=1 stride=1 pad=1 filters=75 #filters = num*(classed+5), The meaning of 5 is 4 coordinates + confidence, num represents the number of boxes predicted by each cell in yolo, which is 3, VOC data set is 20 classes, and coco data set is 80 classes. activation=linear [yolo] mask = 6,7,8 # The index of the anchor corresponding to the size of different scales. # size of anchor anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 classes=20 num=9 # Each grid cell predicts a total of several boxes, consistent with the number of anchors. jitter=.3 # Data enhancement method: jitter is the range of randomly adjusting the aspect ratio. ignore_thresh = .5 # The IOU threshold involved in the calculation. When the IOU between the predicted detection frame and the ground truth is greater than the ignore _ thre, the detection frame participates in the loss calculation. Otherwise, the detection frame does not participate in the loss calculation. truth_thresh = 1 random=1 # The routing layer can contain attributes with one or two values. When the attribute has only one value, it outputs the feature map of the layer indexed by it. In the example, it is -4, so the routing layer will output the feature map of the fourth layer from the route layer. [route] layers = -4 [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [upsample] stride=2 # When an attribute has two values, it returns the spliced feature map of the layer indexed by its value, -1 and 61, and the routing layer will output the feature maps of the previous layer (-1) and layer 61, spliced along the depth dimension. [route] layers = -1, 61 [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=512 activation=leaky [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=512 activation=leaky [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=512 activation=leaky [convolutional] size=1 stride=1 pad=1 filters=75 activation=linear [yolo] mask = 3,4,5 anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 classes=20 num=9 jitter=.3 ignore_thresh = .5 truth_thresh = 1 random=1 [route] layers = -4 [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [upsample] stride=2 [route] layers = -1, 36 [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=256 activation=leaky [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=256 activation=leaky [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 size=3 stride=1 pad=1 filters=256 activation=leaky [convolutional] size=1 stride=1 pad=1 filters=75 activation=linear # There are 9 anchors, but only those with mask tags are used. Here mask = 0, 1, 2 means that the first, second, and third anchors are used. Each cell predicts 3 boxes. In total, our detection network has 3 scales and 9 anchors in total. [yolo] mask = 0,1,2 anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 classes=20 num=9 jitter=.3 ignore_thresh = .5 truth_thresh = 1 random=1