mardi 4 août 2015

a weired error about updates in theano

I designed a variable net, but it occurred some problems with theano. The general idea is that different input will get different net with same parameters, something like a recursive neural network with auto-encoder. There are two cases in my code, one case is run combine_feat_gt1_1() if c > 1, the other case is run combine_feat_gt1_0().

It is weird that the code can run without bugs if I comment updates=updates, which is not my expected (train_test theano function in code). However, if I uncomment updates=updates, an error occurred (train_test_bug theano function in code). The later one is that I'd like to implement.

I have been already spend some days on this bug. Who can help me? I will appreciate that.

import os
import sys
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from theano.ifelse import ifelse

class Test(object):

def __init__(
    self,
    numpy_rng,
    input=None,
    output=None,
    n_output=6,
    n_input=3,
    n_group=2,
    W_r=None,
    b_r=None
):

    self.n_output = n_output
    self.n_input = n_input
    self.n_group = n_group

    if not W_r:
        initial_W_r = numpy.asarray(
            numpy_rng.uniform(
                low=-4 * numpy.sqrt(6. / (n_input + n_input)),
                high=4 * numpy.sqrt(6. / (n_input + n_input)),
                size=(n_input, n_input)
            ),
            dtype=theano.config.floatX
        )
        W_r = theano.shared(value=initial_W_r, name='W_r', borrow=True)

    if not b_r:
        b_r = theano.shared(
            value=numpy.zeros(
                n_input,
                dtype=theano.config.floatX
            ),
            borrow=True
        )

    self.W_r = W_r
    self.b_r = b_r

    if input is None:
        self.x = T.tensor4(name='input', dtype=theano.config.floatX)
    else:
        self.x = input

    if output is None:
        self.y = T.matrix(name='output', dtype=theano.config.floatX)
    else:
        self.y = output

    self.params = [self.W_r, self.b_r]

def get_output_values(self, input):
    a, b, c, d = input.shape

    def recusive(x_t, h_tm1, wr, hr):
        h_t = T.dot(h_tm1, wr) + T.dot(x_t, wr) +  hr
        return h_t

    def combine_recusive(data):
        hidden, _ = theano.scan(fn=recusive,
                           sequences=data[1:],
                           outputs_info=data[0],
                           non_sequences=[self.W_r, self.b_r],
                           n_steps=data[1:].shape[0],
                           strict=True)
        return hidden[-1]

    def combine_feat_gt1_1(input):
        feats, _ = theano.scan(fn=combine_recusive,
                               sequences=input[0],
                               outputs_info=None,
                               n_steps=input[0].shape[0])
        recusive_flag = T.ones(1)
        return T.reshape(feats, (1,-1)) # concatenation

    def combine_feat_gt1_0(input):
        feats = input[0]
        recusive_flag = T.zeros(1)
        return T.reshape(feats, (1,-1)) # concatenation

    feat = ifelse(T.gt(c, 1), combine_feat_gt1_1(input), combine_feat_gt1_0(input))

    # debug code snippet
    self.debug_ifelse = theano.function([input], T.gt(c, 1))
    self.debug_1_0 = theano.function([input], ifelse(T.gt(c, 1), 1, 0))

    return feat

def get_cost_updates(self):

    learning_rate = 0.1
    self.y_given_x = self.get_output_values(self.x)
    cost = T.sum(( self.y_given_x - self.y) ** 2)

    gparams = T.grad(cost, self.params)
    updates = [
         (param, param - learning_rate * gparam)
         for param, gparam in zip(self.params, gparams)
     ]

    return (cost, updates)


if __name__ == "__main__":

toy_data = numpy.array([[[[1,1,1],[2,2,2]], [[3, 4,5],[4,5,6]]]],dtype=theano.config.floatX)
lable = numpy.array([[1,2,3,4,5,6]],dtype=theano.config.floatX)
toy_data2 = numpy.array([[[[1,1,1]], [[3,4,5]]]],dtype=theano.config.floatX)
lable2 = numpy.array([[6,5,4,3,2,1]],dtype=theano.config.floatX)

x = T.tensor4('x', dtype=theano.config.floatX)
y = T.matrix('y', dtype=theano.config.floatX)
newX = T.tensor4(dtype=x.dtype)
newY = T.matrix(dtype=y.dtype)

rng = numpy.random.RandomState(123)
test = Test(
    numpy_rng=rng,
    input=x,
    output=y,
    n_group=2,
    n_input=3,
    n_output=6
)

cost, updates= test.get_cost_updates()

train_test = theano.function(
    [newX, newY],
    cost,
    # updates=updates,
    givens={
        x : newX,
        y : newY
    }
)

train_test_bug = theano.function(
    [newX, newY],
    cost,
    updates=updates,
    givens={
        x : newX,
        y : newY
    }
)


print train_test(toy_data, lable)
print train_test(toy_data2, lable2)

# code with bug
# print train_test_bug(toy_data, lable)
# print train_test_bug(toy_data2, lable2)

Aucun commentaire:

Enregistrer un commentaire