In [1]:
import tensorflow as tf
from vision.data.proto.annotated_image_pb2 import AnnotatedImage, MultipleImages
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def generate_decoding_func():
    def f(ai_str):
        ai = AnnotatedImage()
        ai.ParseFromString(ai_str)
        assert(ai.HasExtension(MultipleImages.ext))
        multiple_images_ext=ai.Extensions[MultipleImages.ext]

        result = []
        #result = {'image_0': cv2.imdecode(np.fromstring(ai.image_data, dtype=np.uint8), cv2.IMREAD_COLOR)}
        #result = {"a":tf.random_uniform([4])}
        for i,image_data in enumerate(multiple_images_ext.image_data):
            image = cv2.imdecode(np.fromstring(image_data, dtype=np.uint8), cv2.IMREAD_COLOR)
            result.append(((image-128.0) / 255.0).astype(np.float32))
            break

        labels = set()
        for gt in ai.groundtruth:
            for attribute in gt.region.attribute:
                for dbin in attribute.distribution.bin:
                    if dbin.probability < 0.99:
                        continue
                    label = dbin.label
                    if label < 3001 or label > 3009:
                        continue
                    labels.add(label)

        label_array = np.zeros(9,)
        for label in labels:
            label_array[label - 3001] = 1
         
        #return result, label_array.astype(np.float32)
        return result + [label_array.astype(np.float32)]
    return f


In [13]:
def parse_tf_record(serialized_record):
    features = tf.parse_single_example(
        serialized_record,
        features={
            'key': tf.FixedLenFeature([], tf.string),
            'value': tf.FixedLenFeature([], tf.string),
        },
    )
    key = features['key']
    value = features['value']
    
    decoding_func = generate_decoding_func()
    images_and_labels = tf.py_func(decoding_func,
      [value], [tf.float32] * 2)
    images = images_and_labels[0:len(images_and_labels)-1]
    d = {}
    for i,image in enumerate(images):
        d['image_%d' % i] = image
    d['labels'] = images_and_labels[-1]
    return d

In [14]:
filenames = ['/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/%d.1.tfrecords' % light_id for light_id in range(3000,3010)]

In [15]:
filenames

['/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3000.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3001.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3002.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3003.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3004.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3005.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3006.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3007.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3008.1.tfrecords',
 '/mnt/sun-pcs01/praveen/tl/tf_records/tfrecords/3009.1.tfrecords']

In [16]:
base_training_dataset = (tf.data.Dataset.from_tensor_slices(filenames)
           .interleave(lambda x:
               tf.data.TFRecordDataset(x).map(parse_tf_record, num_parallel_calls=12).repeat(),
               cycle_length=10, block_length=1))

In [17]:
training_dataset = base_training_dataset.batch(10)

In [18]:
mini_training_eval_dataset = base_training_dataset.batch(100)

In [19]:
validation_filenames = ['/mnt/sun-pcs01/praveen/tl/tf_records_test/tfrecords/%d.1.tfrecords' % light_id for light_id in range(3000,3010)]

In [20]:
validation_dataset = (tf.data.Dataset.from_tensor_slices(validation_filenames)
           .interleave(lambda x:
               tf.data.TFRecordDataset(x).map(parse_tf_record, num_parallel_calls=12).repeat(),
               cycle_length=10, block_length=1)).batch(100)

In [21]:
elems = []
with tf.Session() as sess:
    it = validation_dataset.make_one_shot_iterator()
    batch = []
    for i in range(0,1):
        elems.append(sess.run(it.get_next()))

In [22]:
elems[0]['image_0'].shape

(100, 100, 100, 3)

In [None]:
plt.imshow(elems[0]['image_0'][3,:,:,::-1] + 0.5)
plt.show()

In [24]:
elems[11]['labels']

IndexError: list index out of range

In [None]:
plt.imshow((elems[0]['image_0'] + 0.5)[1,:,:,::-1], interpolation='nearest')
plt.show()

In [26]:
iterator_handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(
    iterator_handle, training_dataset.output_types, training_dataset.output_shapes)
next_element = iterator.get_next()

In [27]:
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)
def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')


In [28]:
W_conv1a = weight_variable((5,5,3,16))
b_conv1a = bias_variable((16,))

image_keys = filter(lambda x: 'image_0' in x, next_element.keys())

h_conv1a = [tf.nn.relu(conv2d(next_element[k], W_conv1a) + b_conv1a) for k in image_keys]

W_conv1b = weight_variable((5,5,16,16))
b_conv1b = bias_variable((16,))

h_conv1b =  [tf.nn.relu(conv2d(h, W_conv1b) + b_conv1b) for h in h_conv1a]

pool1 = [max_pool_2x2(h) for h in h_conv1b]

W_conv2a = weight_variable((5,5,16,32))
b_conv2a = bias_variable((32,))

h_conv2a = [tf.nn.relu(conv2d(h, W_conv2a) + b_conv2a) for h in pool1]

W_conv2b = weight_variable((5,5,32,32))
b_conv2b = bias_variable((32,))


h_conv2b = [tf.nn.relu(conv2d(h, W_conv2b) + b_conv2b) for h in h_conv2a]

keep_prob = tf.placeholder(tf.float32)
h_dropout2 = [tf.nn.dropout(h, keep_prob) for h in h_conv2b]

pool2 = [max_pool_2x2(h) for h in h_dropout2]

if False:
    W_conv3a = weight_variable((5,5,32,32))
    b_conv3a = bias_variable((32,))
    h_conv3a = [tf.nn.relu(conv2d(h, W_conv3a) + b_conv3a) for h in pool2]

    W_conv3b = weight_variable((9,9,32,9))
    b_conv3b = bias_variable((9,))
    h_conv3b = [conv2d(h, W_conv3b) + b_conv3b for h in h_conv3a]

    max_per_scale = [tf.squeeze(tf.reduce_max(h,axis=[1,2], keep_dims=True),1) for h in h_conv3b]
    logits = tf.reduce_max(tf.concat(max_per_scale, 1),axis=[1])
else:
    W_fc1 = weight_variable([25*25*32, 32])
    b_fc1 = bias_variable([32])
    h_fc1 = tf.nn.relu(tf.matmul(tf.reshape(pool2[0], [-1, 25*25*32]), W_fc1) + b_fc1)
    
    W_fc2 = weight_variable([32, 9])
    b_fc2 = bias_variable([9])
    logits = tf.matmul(tf.reshape(h_fc1, [-1, 32]), W_fc2) + b_fc2
    
#h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
#h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
   # W_ip0 = weight_variable([]
# max_over_all_scales - shape is batch_size by 9. Threshold by 0 and compare with labels

loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=next_element['labels'], logits = logits))
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(tf.greater_equal(logits,0), tf.float32), 
                                   next_element['labels']),tf.float32))

In [29]:
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)

In [30]:
sess = tf.Session()
train_iter = -1
sess.run(tf.global_variables_initializer())

In [31]:
training_dataset_iterator = training_dataset.make_one_shot_iterator()
training_dataset_iterator_handle = sess.run(training_dataset_iterator.string_handle())

In [32]:
sess.run(pool2,feed_dict={keep_prob: 0.5, iterator_handle: training_dataset_iterator_handle})[0].shape

(10, 25, 25, 32)

In [35]:
with sess.as_default():
    while train_iter < 100000:
        train_iter+=1
        
        _, loss_val = sess.run([train_step, loss],
                           feed_dict={keep_prob: 0.5, iterator_handle: training_dataset_iterator_handle})

        if train_iter % 1000 == 0:
            validation_iterator = validation_dataset.make_one_shot_iterator()
            validation_iterator_handle = sess.run(validation_iterator.string_handle())
            
            validation_accuracy, validation_loss = sess.run([accuracy, loss], 
                                                            feed_dict={keep_prob: 1.0, 
                                                                       iterator_handle: validation_iterator_handle})
            mini_training_iterator = mini_training_eval_dataset.make_one_shot_iterator()
            mini_training_iterator_handle = sess.run(mini_training_iterator.string_handle())
            
            mini_training_accuracy, mini_training_loss = sess.run([accuracy, loss], 
                                                            feed_dict={keep_prob: 1.0, 
                                                                       iterator_handle: mini_training_iterator_handle})
            print 'On iter %d, validation accuracy/loss: %f/%f, mini training accuracy/loss: %f/%f' % (train_iter, 
                                                                                                       validation_accuracy,
                                                                                                       validation_loss,
                                                                                                       mini_training_accuracy,
                                                                                                      mini_training_loss)
            
        

On iter 20000, validation accuracy/loss: 0.945556/0.164636, mini training accuracy/loss: 0.988889/0.058106
On iter 21000, validation accuracy/loss: 0.952222/0.158970, mini training accuracy/loss: 0.987778/0.059500
On iter 22000, validation accuracy/loss: 0.944444/0.174153, mini training accuracy/loss: 0.986667/0.062454
On iter 23000, validation accuracy/loss: 0.944444/0.162874, mini training accuracy/loss: 0.986667/0.055394
On iter 24000, validation accuracy/loss: 0.944444/0.162493, mini training accuracy/loss: 0.983333/0.054094
On iter 25000, validation accuracy/loss: 0.945556/0.164865, mini training accuracy/loss: 0.987778/0.052476
On iter 26000, validation accuracy/loss: 0.945556/0.160633, mini training accuracy/loss: 0.987778/0.051190
On iter 27000, validation accuracy/loss: 0.943333/0.166311, mini training accuracy/loss: 0.988889/0.050422
On iter 28000, validation accuracy/loss: 0.948889/0.167086, mini training accuracy/loss: 0.987778/0.047654
On iter 29000, validation accuracy/lo

On iter 97000, validation accuracy/loss: 0.952222/0.244806, mini training accuracy/loss: 0.993333/0.022286
On iter 98000, validation accuracy/loss: 0.953333/0.244898, mini training accuracy/loss: 0.991111/0.025469
On iter 99000, validation accuracy/loss: 0.947778/0.249978, mini training accuracy/loss: 0.992222/0.021668
On iter 100000, validation accuracy/loss: 0.948889/0.254633, mini training accuracy/loss: 0.993333/0.020931


In [None]:
plt.imshow(W_conv2b.eval(sess)[:,:,1,0])

In [36]:
images, these_logits= sess.run([next_element, logits], 
                                                            feed_dict={keep_prob: 1.0, 
                                                                       iterator_handle: validation_iterator_handle})

In [44]:
images['labels'][3,:]

array([ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

In [52]:
these_logits[4,:]

array([ -3.64982963,  -4.98215008, -10.91512489,   1.71288323,
        -6.46317244, -12.66110229,  -6.9718523 ,  -5.13114166,  -7.15203094], dtype=float32)

In [None]:
plt.imshow(images['image_0'][4,:,:,::-1] + 0.5)

In [39]:
plt.imshow(these_logits)

array([[ -1.99582505,  -2.4556427 ,  -5.18372917,  -3.07179952,
         -5.20456982,  -7.48662472,  -4.32516146,  -6.09419107,
         -8.42727852],
       [  0.57584977,  -3.25740027,  -7.17012215,  -1.03444958,
         -5.63783455, -11.92416668,  -4.57273626,  -5.93011093,  -8.0736866 ],
       [ -0.40752053,  -1.58861709,  -1.01039743,  -2.1605022 ,
         -5.43729067,  -6.50182962,  -3.02869034,  -6.28620338,
         -5.74144316],
       [ -3.93032384,  -4.00665569,  -4.76316309,   1.07567024,
         -5.84726477,  -4.4604001 ,  -8.06663132, -13.18647861,
        -19.02998543],
       [ -3.64982963,  -4.98215008, -10.91512489,   1.71288323,
         -6.46317244, -12.66110229,  -6.9718523 ,  -5.13114166,
         -7.15203094],
       [-10.56683254,  -6.03184271, -13.29619312,  -8.47851372,
         12.07903767,  -9.82136345, -14.94621086, -19.14881706,
        -25.77438354],
       [ -4.33030081,  -4.74963665,  -5.29449749,   1.43469679,
         -2.40223908,  -6.25775194,  -

In [80]:
[np.max(h.flatten()) for h in W_conv3b]

TypeError: 'Variable' object is not iterable.