2017-03-03 112 views
1

我想用張量流設計一個基於預訓練網絡的網絡,以Reset50爲例。然而,我不知道如何使用它來建立我的模型與他們的檢查點? resnet的定義可以在resnet.py中找到。誰能幫我?非常感謝你!如何在張量流中使用預先訓練的ResNet50?

def inference(x, is_training, 
      num_classes=1000, 
      num_blocks=[3, 4, 6, 3], # defaults to 50-layer network 
      use_bias=False, # defaults to using batch norm 
      bottleneck=True): 
c = Config() 
c['bottleneck'] = bottleneck 
c['is_training'] = tf.convert_to_tensor(is_training, 
             dtype='bool', 
             name='is_training') 
c['ksize'] = 3 
c['stride'] = 1 
c['use_bias'] = use_bias 
c['fc_units_out'] = num_classes 
c['num_blocks'] = num_blocks 
c['stack_stride'] = 2 

with tf.variable_scope('scale1'): 
    c['conv_filters_out'] = 64 
    c['ksize'] = 7 
    c['stride'] = 2 
    x = conv(x, c) 
    x = bn(x, c) 
    x = activation(x) 

with tf.variable_scope('scale2'): 
    x = _max_pool(x, ksize=3, stride=2) 
    c['num_blocks'] = num_blocks[0] 
    c['stack_stride'] = 1 
    c['block_filters_internal'] = 64 
    x = stack(x, c) 

with tf.variable_scope('scale3'): 
    c['num_blocks'] = num_blocks[1] 
    c['block_filters_internal'] = 128 
    assert c['stack_stride'] == 2 
    x = stack(x, c) 

with tf.variable_scope('scale4'): 
    c['num_blocks'] = num_blocks[2] 
    c['block_filters_internal'] = 256 
    x = stack(x, c) 

with tf.variable_scope('scale5'): 
    c['num_blocks'] = num_blocks[3] 
    c['block_filters_internal'] = 512 
    x = stack(x, c) 

# post-net 
x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool") 

if num_classes != None: 
    with tf.variable_scope('fc'): 
     x = fc(x, c) 

return x 
def stack(x, c): 
for n in range(c['num_blocks']): 
    s = c['stack_stride'] if n == 0 else 1 
    c['block_stride'] = s 
    with tf.variable_scope('block%d' % (n + 1)): 
     x = block(x, c) 
return x 


def block(x, c): 
filters_in = x.get_shape()[-1] 

m = 4 if c['bottleneck'] else 1 
filters_out = m * c['block_filters_internal'] 

shortcut = x # branch 1 

c['conv_filters_out'] = c['block_filters_internal'] 

if c['bottleneck']: 
    with tf.variable_scope('a'): 
     c['ksize'] = 1 
     c['stride'] = c['block_stride'] 
     x = conv(x, c) 
     x = bn(x, c) 
     x = activation(x) 

    with tf.variable_scope('b'): 
     x = conv(x, c) 
     x = bn(x, c) 
     x = activation(x) 

    with tf.variable_scope('c'): 
     c['conv_filters_out'] = filters_out 
     c['ksize'] = 1 
     assert c['stride'] == 1 
     x = conv(x, c) 
     x = bn(x, c) 
else: 
    with tf.variable_scope('A'): 
     c['stride'] = c['block_stride'] 
     assert c['ksize'] == 3 
     x = conv(x, c) 
     x = bn(x, c) 
     x = activation(x) 

    with tf.variable_scope('B'): 
     c['conv_filters_out'] = filters_out 
     assert c['ksize'] == 3 
     assert c['stride'] == 1 
     x = conv(x, c) 
     x = bn(x, c) 

with tf.variable_scope('shortcut'): 
    if filters_out != filters_in or c['block_stride'] != 1: 
     c['ksize'] = 1 
     c['stride'] = c['block_stride'] 
     c['conv_filters_out'] = filters_out 
     shortcut = conv(shortcut, c) 
     shortcut = bn(shortcut, c) 

return activation(x + shortcut) 


def bn(x, c): 
x_shape = x.get_shape() 
params_shape = x_shape[-1:] 

if c['use_bias']: 
    bias = _get_variable('bias', params_shape, 
         initializer=tf.zeros_initializer) 
    return x + bias 


axis = list(range(len(x_shape) - 1)) 

beta = _get_variable('beta', 
        params_shape, 
        initializer=tf.zeros_initializer) 
gamma = _get_variable('gamma', 
         params_shape, 
         initializer=tf.ones_initializer) 

moving_mean = _get_variable('moving_mean', 
          params_shape, 
          initializer=tf.zeros_initializer, 
          trainable=False) 
moving_variance = _get_variable('moving_variance', 
           params_shape, 
           initializer=tf.ones_initializer, 
           trainable=False) 

# These ops will only be preformed when training. 
mean, variance = tf.nn.moments(x, axis) 
update_moving_mean = moving_averages.assign_moving_average(moving_mean, 
                  mean, BN_DECAY) 
update_moving_variance = moving_averages.assign_moving_average(
    moving_variance, variance, BN_DECAY) 
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) 
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) 

mean, variance = control_flow_ops.cond(
    c['is_training'], lambda: (mean, variance), 
    lambda: (moving_mean, moving_variance)) 

x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) 
#x.set_shape(inputs.get_shape()) ?? 

return x 


def fc(x, c): 
num_units_in = x.get_shape()[1] 
num_units_out = c['fc_units_out'] 
weights_initializer = tf.truncated_normal_initializer(
    stddev=FC_WEIGHT_STDDEV) 

weights = _get_variable('weights', 
         shape=[num_units_in, num_units_out], 
         initializer=weights_initializer, 
         weight_decay=FC_WEIGHT_STDDEV) 
biases = _get_variable('biases', 
         shape=[num_units_out], 
         initializer=tf.zeros_initializer) 
x = tf.nn.xw_plus_b(x, weights, biases) 
return x 


def _get_variable(name, 
       shape, 
       initializer, 
       weight_decay=0.0, 
       dtype='float', 
       trainable=True): 
"A little wrapper around tf.get_variable to do weight decay and add to" 
"resnet collection" 
if weight_decay > 0: 
    regularizer = tf.contrib.layers.l2_regularizer(weight_decay) 
else: 
    regularizer = None 
collections = [tf.GraphKeys.VARIABLES, RESNET_VARIABLES] 
return tf.get_variable(name, 
         shape=shape, 
         initializer=initializer, 
         dtype=dtype, 
         regularizer=regularizer, 
         collections=collections, 
         trainable=trainable) 


def conv(x, c): 
ksize = c['ksize'] 
stride = c['stride'] 
filters_out = c['conv_filters_out'] 

filters_in = x.get_shape()[-1] 
shape = [ksize, ksize, filters_in, filters_out] 
initializer = tf.truncated_normal_initializer(stddev=CONV_WEIGHT_STDDEV) 
weights = _get_variable('weights', 
         shape=shape, 
         dtype='float', 
         initializer=initializer, 
         weight_decay=CONV_WEIGHT_DECAY) 
return tf.nn.conv2d(x, weights, [1, stride, stride, 1], padding='SAME') 


def _max_pool(x, ksize=3, stride=2): 
return tf.nn.max_pool(x, 
         ksize=[1, ksize, ksize, 1], 
         strides=[1, stride, stride, 1], 
         padding='SAME') 

回答

1

基本上你應該使用爲模型提供的代碼。您可以創建圖形使用它們,然後提供檢查點文件,看看如何做到這一點在下面ResNet50的情況下:

from tensorflow.contrib.slim.nets import resnet_v1 
import tensorflow as tf 
import tensorflow.contrib.slim as slim 

# Create graph 
inputs = tf.placeholder(tf.float32, shape=[batch_size, height, width, channels]) 
with slim.arg_scope(resnet_v1.resnet_arg_scope()): 
    net, end_points = resnet_v1.resnet_v1_50(inputs, is_training=False) 

saver = tf.train.Saver()  

with tf.Session() as sess: 
    saver.restore(sess, '.resnet_v1_50.ckpt') 
    representation_tensor = sess.graph.get_tensor_by_name('resnet_v1_50/pool5:0') # if you don't know names like these, consider referring to corresponding model file or generate .pbtxt file as mentioned in @civilman628 's answer in link below 
    img = ... #load image here with size [1, 224,224, 3] 
    features = sess.run(representation_tensor, {'Placeholder:0': x}) 

有關詳細信息,請參閱與此相關的Tensorflow Github的問題,我的(@parthg)答案:#7172

1

我使用使用TensorFlow的keras。以下是一次送入一張圖像的示例:

import numpy as np 

from keras.preprocessing import image 
from keras.applications import resnet50 

# Load Keras' ResNet50 model that was pre-trained against the ImageNet database 
model = resnet50.ResNet50() 

# Load the image file, resizing it to 224x224 pixels (required by this model) 
img = image.load_img("path_to_image.jpg", target_size=(224, 224)) 

# Convert the image to a numpy array 
x = image.img_to_array(img) 

# Add a forth dimension since Keras expects a list of images 
x = np.expand_dims(x, axis=0) 

# Scale the input image to the range used in the trained network 
x = resnet50.preprocess_input(x) 

# Run the image through the deep neural network to make a prediction 
predictions = model.predict(x) 

# Look up the names of the predicted classes. Index zero is the results for the first image. 
predicted_classes = resnet50.decode_predictions(predictions, top=9) 

print("This is an image of:") 

for imagenet_id, name, likelihood in predicted_classes[0]: 
    print(" - {}: {:2f} likelihood".format(name, likelihood)) 
相關問題