本文整理汇总了Python中util.init_weight函数的典型用法代码示例。如果您正苦于以下问题:Python init_weight函数的具体用法?Python init_weight怎么用?Python init_weight使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了init_weight函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, V, D, K, activation):
self.D = D
self.f = activation
# word embedding
We = init_weight(V, D)
# linear terms
W1 = init_weight(D, D)
W2 = init_weight(D, D)
# bias
bh = np.zeros(D)
# output layer
Wo = init_weight(D, K)
bo = np.zeros(K)
# make them tensorflow variables
self.We = tf.Variable(We.astype(np.float32))
self.W1 = tf.Variable(W1.astype(np.float32))
self.W2 = tf.Variable(W2.astype(np.float32))
self.bh = tf.Variable(bh.astype(np.float32))
self.Wo = tf.Variable(Wo.astype(np.float32))
self.bo = tf.Variable(bo.astype(np.float32))
self.params = [self.We, self.W1, self.W2, self.Wo]
开发者ID:renjinghai,项目名称:machine_learning_examples,代码行数:26,代码来源:recursive_tensorflow.py
示例2: setUp
def setUp(self):
rng = np.random.RandomState(0)
init_w_e, init_b_e = util.init_weight(rng, self.n_in, self.n_hidden)
init_w_d, init_b_d = util.init_weight(rng, self.n_hidden, self.n_in)
self.w_e.set_value(init_w_e, borrow=True)
self.b_e.set_value(init_b_e, borrow=True)
self.w_d.set_value(init_w_d, borrow=True)
self.b_d.set_value(init_b_d, borrow=True)
开发者ID:yuichiro-s,项目名称:lstm,代码行数:8,代码来源:test_optim.py
示例3: fit
def fit(self, X, epochs=500, show_fig=False):
N = len(X)
D = self.D
M = self.M
V = self.V
# initial weights
We = init_weight(V, D).astype(np.float32)
Wx = init_weight(D, M).astype(np.float32)
Wh = init_weight(M, M).astype(np.float32)
bh = np.zeros(M).astype(np.float32)
h0 = np.zeros(M).astype(np.float32)
Wo = init_weight(M, V).astype(np.float32)
bo = np.zeros(V).astype(np.float32)
# build tensorflow functions
self.build(We, Wx, Wh, bh, h0, Wo, bo)
# sentence input:
# [START, w1, w2, ..., wn]
# sentence target:
# [w1, w2, w3, ..., END]
costs = []
n_total = sum((len(sentence)+1) for sentence in X)
for i in range(epochs):
X = shuffle(X)
n_correct = 0
cost = 0
for j in range(N):
# problem! many words --> END token are overrepresented
# result: generated lines will be very short
# we will try to fix in a later iteration
# BAD! magic numbers 0 and 1...
input_sequence = [0] + X[j]
output_sequence = X[j] + [1]
# we set 0 to start and 1 to end
_, c, p = self.session.run(
(self.train_op, self.cost, self.predict_op),
feed_dict={self.tfX: input_sequence, self.tfY: output_sequence}
)
# print "p:", p
cost += c
# print "j:", j, "c:", c/len(X[j]+1)
for pj, xj in zip(p, output_sequence):
if pj == xj:
n_correct += 1
print("i:", i, "cost:", cost, "correct rate:", (float(n_correct)/n_total))
costs.append(cost)
if show_fig:
plt.plot(costs)
plt.show()
开发者ID:cmagnusb,项目名称:machine_learning_examples,代码行数:54,代码来源:srn_language_tf.py
示例4: __init__
def __init__(self, M1, M2, an_id):
self.id = an_id
self.M1 = M1
self.M2 = M2
W = init_weight(M1, M2)
b = np.zeros(M2)
self.W = theano.shared(W, 'W_%s' % self.id)
self.b = theano.shared(b, 'b_%s' % self.id)
self.params = [self.W, self.b]
开发者ID:cmagnusb,项目名称:machine_learning_examples,代码行数:9,代码来源:mlp_parity.py
示例5: get_param
def get_param(name, n_in, n_out, params, rng):
w_name = "w_" + name
b_name = "b_" + name
if params is not None and w_name in params:
assert b_name in params
init_w = params[w_name]
init_b = params[b_name]
else:
init_w, init_b = util.init_weight(rng, n_in, n_out)
w = theano.shared(name=w_name, borrow=True, value=init_w)
b = theano.shared(name=b_name, borrow=True, value=init_b)
return w, b
开发者ID:yuichiro-s,项目名称:lstm,代码行数:12,代码来源:lstm.py
示例6: __init__
def __init__(self, Mi, Mo, activation):
self.Mi = Mi
self.Mo = Mo
self.f = activation
# numpy init
Wxr = init_weight(Mi, Mo)
Whr = init_weight(Mo, Mo)
br = np.zeros(Mo)
Wxz = init_weight(Mi, Mo)
Whz = init_weight(Mo, Mo)
bz = np.zeros(Mo)
Wxh = init_weight(Mi, Mo)
Whh = init_weight(Mo, Mo)
bh = np.zeros(Mo)
h0 = np.zeros(Mo)
# theano vars
self.Wxr = theano.shared(Wxr)
self.Whr = theano.shared(Whr)
self.br = theano.shared(br)
self.Wxz = theano.shared(Wxz)
self.Whz = theano.shared(Whz)
self.bz = theano.shared(bz)
self.Wxh = theano.shared(Wxh)
self.Whh = theano.shared(Whh)
self.bh = theano.shared(bh)
self.h0 = theano.shared(h0)
self.params = [self.Wxr, self.Whr, self.br, self.Wxz, self.Whz, self.bz, self.Wxh, self.Whh, self.bh, self.h0]
开发者ID:cmagnusb,项目名称:machine_learning_examples,代码行数:29,代码来源:batch_gru.py
示例7: fit
def fit(self, X, Y, batch_sz=20, learning_rate=10e-1, mu=0.99, activation=tf.nn.sigmoid, epochs=100, show_fig=False):
N, T, D = X.shape # X is of size N x T(n) x D
K = len(set(Y.flatten()))
M = self.M
self.f = activation
# initial weights
# note: Wx, Wh, bh are all part of the RNN unit and will be created
# by BasicRNNCell
Wo = init_weight(M, K).astype(np.float32)
bo = np.zeros(K, dtype=np.float32)
# make them tf variables
self.Wo = tf.Variable(Wo)
self.bo = tf.Variable(bo)
# tf Graph input
tfX = tf.placeholder(tf.float32, shape=(batch_sz, T, D), name='inputs')
tfY = tf.placeholder(tf.int64, shape=(batch_sz, T), name='targets')
# turn tfX into a sequence, e.g. T tensors all of size (batch_sz, D)
sequenceX = x2sequence(tfX, T, D, batch_sz)
# create the simple rnn unit
rnn_unit = BasicRNNCell(num_units=self.M, activation=self.f)
# Get rnn cell output
# outputs, states = rnn_module.rnn(rnn_unit, sequenceX, dtype=tf.float32)
outputs, states = get_rnn_output(rnn_unit, sequenceX, dtype=tf.float32)
# outputs are now of size (T, batch_sz, M)
# so make it (batch_sz, T, M)
outputs = tf.transpose(outputs, (1, 0, 2))
outputs = tf.reshape(outputs, (T*batch_sz, M))
# Linear activation, using rnn inner loop last output
logits = tf.matmul(outputs, self.Wo) + self.bo
predict_op = tf.argmax(logits, 1)
targets = tf.reshape(tfY, (T*batch_sz,))
cost_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, targets))
train_op = tf.train.MomentumOptimizer(learning_rate, momentum=mu).minimize(cost_op)
costs = []
n_batches = N / batch_sz
init = tf.initialize_all_variables()
with tf.Session() as session:
session.run(init)
for i in xrange(epochs):
X, Y = shuffle(X, Y)
n_correct = 0
cost = 0
for j in xrange(n_batches):
Xbatch = X[j*batch_sz:(j+1)*batch_sz]
Ybatch = Y[j*batch_sz:(j+1)*batch_sz]
_, c, p = session.run([train_op, cost_op, predict_op], feed_dict={tfX: Xbatch, tfY: Ybatch})
cost += c
for b in xrange(batch_sz):
idx = (b + 1)*T - 1
n_correct += (p[idx] == Ybatch[b][-1])
if i % 10 == 0:
print "i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N)
if n_correct == N:
print "i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N)
break
costs.append(cost)
if show_fig:
plt.plot(costs)
plt.show()
开发者ID:YongHoJung,项目名称:NLP_DeepLearning,代码行数:72,代码来源:tf_parity.py
示例8: fit
def fit(self, X, Y, batch_sz=20, learning_rate=1.0, mu=0.99, reg=1.0, activation=T.tanh, epochs=100, show_fig=False):
D = X[0].shape[1] # X is of size N x T(n) x D
K = len(set(Y.flatten()))
N = len(Y)
M = self.M
self.f = activation
# initial weights
Wx = init_weight(D, M)
Wh = init_weight(M, M)
bh = np.zeros(M)
h0 = np.zeros(M)
Wo = init_weight(M, K)
bo = np.zeros(K)
# make them theano shared
self.Wx = theano.shared(Wx)
self.Wh = theano.shared(Wh)
self.bh = theano.shared(bh)
self.h0 = theano.shared(h0)
self.Wo = theano.shared(Wo)
self.bo = theano.shared(bo)
self.params = [self.Wx, self.Wh, self.bh, self.h0, self.Wo, self.bo]
thX = T.fmatrix('X') # will represent multiple batches concatenated
thY = T.ivector('Y')
thStartPoints = T.ivector('start_points')
XW = thX.dot(self.Wx)
# startPoints will contain 1 where a sequence starts and 0 otherwise
# Ex. if I have 3 sequences: [[1,2,3], [4,5], [6,7,8]]
# Then I will concatenate these into one X: [1,2,3,4,5,6,7,8]
# And startPoints will be [1,0,0,1,0,1,0,0]
# One possible solution: loop through index
# def recurrence(t, h_t1, XW, h0, startPoints):
# # returns h(t)
# # if at a boundary, state should be h0
# h_t = T.switch(
# T.eq(startPoints[t], 1),
# self.f(XW[t] + h0.dot(self.Wh) + self.bh),
# self.f(XW[t] + h_t1.dot(self.Wh) + self.bh)
# )
# return h_t
# h, _ = theano.scan(
# fn=recurrence,
# outputs_info=[self.h0],
# sequences=T.arange(XW.shape[0]),
# non_sequences=[XW, self.h0, thStartPoints],
# n_steps=XW.shape[0],
# )
# other solution - loop through all sequences simultaneously
def recurrence(xw_t, is_start, h_t1, h0):
# if at a boundary, state should be h0
h_t = T.switch(
T.eq(is_start, 1),
self.f(xw_t + h0.dot(self.Wh) + self.bh),
self.f(xw_t + h_t1.dot(self.Wh) + self.bh)
)
return h_t
h, _ = theano.scan(
fn=recurrence,
outputs_info=[self.h0],
sequences=[XW, thStartPoints],
non_sequences=[self.h0],
n_steps=XW.shape[0],
)
# h is of shape (T*batch_sz, M)
py_x = T.nnet.softmax(h.dot(self.Wo) + self.bo)
prediction = T.argmax(py_x, axis=1)
cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
grads = T.grad(cost, self.params)
dparams = [theano.shared(p.get_value()*0) for p in self.params]
updates = [
(p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
] + [
(dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
]
# self.predict_op = theano.function(inputs=[thX, thStartPoints], outputs=prediction)
self.train_op = theano.function(
inputs=[thX, thY, thStartPoints],
outputs=[cost, prediction, py_x],
updates=updates
)
costs = []
n_batches = N // batch_sz
sequenceLength = X.shape[1]
# if each sequence was of variable length, we would need to
# initialize this inside the loop for every new batch
#.........这里部分代码省略.........
开发者ID:cmagnusb,项目名称:machine_learning_examples,代码行数:101,代码来源:batch_parity.py
示例9: fit
def fit(self, X, Y, learning_rate=10e-1, mu=0.99, reg=1.0, activation=T.tanh, epochs=500, show_fig=False):
M = self.M
V = self.V
K = len(set(Y))
print "V:", V
X, Y = shuffle(X, Y)
Nvalid = 10
Xvalid, Yvalid = X[-Nvalid:], Y[-Nvalid:]
X, Y = X[:-Nvalid], Y[:-Nvalid]
N = len(X)
# initial weights
Wx = init_weight(V, M)
Wh = init_weight(M, M)
bh = np.zeros(M)
h0 = np.zeros(M)
Wo = init_weight(M, K)
bo = np.zeros(K)
thX, thY, py_x, prediction = self.set(Wx, Wh, bh, h0, Wo, bo, activation)
cost = -T.mean(T.log(py_x[thY]))
grads = T.grad(cost, self.params)
dparams = [theano.shared(p.get_value()*0) for p in self.params]
lr = T.scalar('learning_rate')
updates = [
(p, p + mu*dp - lr*g) for p, dp, g in zip(self.params, dparams, grads)
] + [
(dp, mu*dp - lr*g) for dp, g in zip(dparams, grads)
]
self.train_op = theano.function(
inputs=[thX, thY, lr],
outputs=[cost, prediction],
updates=updates,
allow_input_downcast=True,
)
costs = []
for i in xrange(epochs):
X, Y = shuffle(X, Y)
n_correct = 0
cost = 0
for j in xrange(N):
c, p = self.train_op(X[j], Y[j], learning_rate)
cost += c
if p == Y[j]:
n_correct += 1
learning_rate *= 0.9999
n_correct_valid = 0
for j in xrange(Nvalid):
p = self.predict_op(Xvalid[j])
if p == Yvalid[j]:
n_correct_valid += 1
print "i:", i, "cost:", cost, "correct rate:", (float(n_correct)/N),
print "validation correct rate:", (float(n_correct_valid)/Nvalid)
costs.append(cost)
if show_fig:
plt.plot(costs)
plt.show()
开发者ID:ashishlal,项目名称:machine_learning_examples,代码行数:65,代码来源:poetry_classifier.py
示例10: fit
def fit(self, X, learning_rate=10e-5, mu=0.99, epochs=10, batch_sz=100, show_fig=True, activation=T.nnet.relu, RecurrentUnit=GRU):
D = self.D
V = self.V
N = len(X)
We = init_weight(V, D)
self.hidden_layers = []
Mi = D
for Mo in self.hidden_layer_sizes:
ru = RecurrentUnit(Mi, Mo, activation)
self.hidden_layers.append(ru)
Mi = Mo
Wo = init_weight(Mi, V)
bo = np.zeros(V)
self.We = theano.shared(We)
self.Wo = theano.shared(Wo)
self.bo = theano.shared(bo)
self.params = [self.We, self.Wo, self.bo]
for ru in self.hidden_layers:
self.params += ru.params
thX = T.ivector('X') # will represent multiple batches concatenated
thY = T.ivector('Y') # represents next word
thStartPoints = T.ivector('start_points')
Z = self.We[thX]
for ru in self.hidden_layers:
Z = ru.output(Z, thStartPoints)
py_x = T.nnet.softmax(Z.dot(self.Wo) + self.bo)
prediction = T.argmax(py_x, axis=1)
cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
grads = T.grad(cost, self.params)
dparams = [theano.shared(p.get_value()*0) for p in self.params]
updates = [
(p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
] + [
(dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
]
# self.predict_op = theano.function(inputs=[thX, thStartPoints], outputs=prediction)
self.train_op = theano.function(
inputs=[thX, thY, thStartPoints],
outputs=[cost, prediction],
updates=updates
)
costs = []
n_batches = N / batch_sz
for i in xrange(epochs):
t0 = datetime.now()
X = shuffle(X)
n_correct = 0
n_total = 0
cost = 0
for j in xrange(n_batches):
# construct input sequence and output sequence as
# concatenatation of multiple input sequences and output sequences
# input X should be a list of 2-D arrays or one 3-D array
# N x T(n) x D - batch size x sequence length x num features
# sequence length can be variable
sequenceLengths = []
input_sequence = []
output_sequence = []
for k in xrange(j*batch_sz, (j+1)*batch_sz):
# don't always add the end token
if np.random.random() < 0.01 or len(X[k]) <= 1:
input_sequence += [0] + X[k]
output_sequence += X[k] + [1]
sequenceLengths.append(len(X[k]) + 1)
else:
input_sequence += [0] + X[k][:-1]
output_sequence += X[k]
sequenceLengths.append(len(X[k]))
n_total += len(output_sequence)
startPoints = np.zeros(len(output_sequence), dtype=np.int32)
last = 0
for length in sequenceLengths:
startPoints[last] = 1
last += length
c, p = self.train_op(input_sequence, output_sequence, startPoints)
cost += c
for pj, xj in zip(p, output_sequence):
if pj == xj:
n_correct += 1
if j % 1 == 0:
sys.stdout.write("j/n_batches: %d/%d correct rate so far: %f\r" % (j, n_batches, float(n_correct)/n_total))
sys.stdout.flush()
print "i:", i, "cost:", cost, "correct rate:", (float(n_correct)/n_total), "time for epoch:", (datetime.now() - t0)
costs.append(cost)
if show_fig:
plt.plot(costs)
plt.show()
开发者ID:vivianduan,项目名称:machine_learning_examples,代码行数:100,代码来源:batch_wiki.py
示例11: fit
def fit(self, trees, learning_rate=3*10e-4, mu=0.99, reg=10e-5, epochs=15, activation=T.nnet.relu, train_inner_nodes=False):
D = self.D
V = self.V
K = self.K
self.f = activation
N = len(trees)
We = init_weight(V, D)
Wh = np.random.randn(2, D, D) / np.sqrt(2 + D + D)
bh = np.zeros(D)
Wo = init_weight(D, K)
bo = np.zeros(K)
self.We = theano.shared(We)
self.Wh = theano.shared(Wh)
self.bh = theano.shared(bh)
self.Wo = theano.shared(Wo)
self.bo = theano.shared(bo)
self.params = [self.We, self.Wh, self.bh, self.Wo, self.bo]
words = T.ivector('words')
parents = T.ivector('parents')
relations = T.ivector('relations')
labels = T.ivector('labels')
def recurrence(n, hiddens, words, parents, relations):
w = words[n]
# any non-word will have index -1
# if T.ge(w, 0):
# hiddens = T.set_subtensor(hiddens[n], self.We[w])
# else:
# hiddens = T.set_subtensor(hiddens[n], self.f(hiddens[n] + self.bh))
hiddens = T.switch(
T.ge(w, 0),
T.set_subtensor(hiddens[n], self.We[w]),
T.set_subtensor(hiddens[n], self.f(hiddens[n] + self.bh))
)
r = relations[n] # 0 = is_left, 1 = is_right
p = parents[n] # parent idx
# if T.ge(p, 0):
# # root will have parent -1
# hiddens = T.set_subtensor(hiddens[p], hiddens[p] + hiddens[n].dot(self.Wh[r]))
hiddens = T.switch(
T.ge(p, 0),
T.set_subtensor(hiddens[p], hiddens[p] + hiddens[n].dot(self.Wh[r])),
hiddens
)
return hiddens
hiddens = T.zeros((words.shape[0], D))
h, _ = theano.scan(
fn=recurrence,
outputs_info=[hiddens],
n_steps=words.shape[0],
sequences=T.arange(words.shape[0]),
non_sequences=[words, parents, relations],
)
# shape of h that is returned by scan is TxTxD
# because hiddens is TxD, and it does the recurrence T times
# technically this stores T times too much data
py_x = T.nnet.softmax(h[-1].dot(self.Wo) + self.bo)
prediction = T.argmax(py_x, axis=1)
rcost = reg*T.mean([(p*p).sum() for p in self.params])
if train_inner_nodes:
# won't work for binary classification
cost = -T.mean(T.log(py_x[T.arange(labels.shape[0]), labels])) + rcost
else:
# print "K is:", K
# premean = T.log(py_x[-1])
# target = T.zeros(K)
# target = T.set_subtensor(target[labels[-1]], 1)
# cost = -T.mean(target * premean)
cost = -T.mean(T.log(py_x[-1, labels[-1]])) + rcost
grads = T.grad(cost, self.params)
dparams = [theano.shared(p.get_value()*0) for p in self.params]
updates = [
(p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
] + [
(dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
]
self.cost_predict_op = theano.function(
inputs=[words, parents, relations, labels],
outputs=[cost, prediction],
allow_input_downcast=True,
)
self.train_op = theano.function(
inputs=[words, parents, relations, labels],
outputs=[h, cost, prediction],
updates=updates
)
#.........这里部分代码省略.........
开发者ID:renjinghai,项目名称:machine_learning_examples,代码行数:101,代码来源:recursive_theano.py
示例12: fit
def fit(self, X, Y, learning_rate=10e-1, mu=0.99, reg=1.0, activation=T.tanh, epochs=100, show_fig=False):
D = X[0].shape[1]
K = len(set(Y.flatten()))
N = len(Y)
M = self.M
self.f = activation
# initialize weights
Wx = init_weight(D, M)
Wh = init_weight(M, M)
bh = np.zeros(M)
h0 = np.zeros(M)
Wo = init_weight(M, K)
bo = np.zeros(K)
self.Wx = theano.shared(Wx)
self.Wh = theano.shared(Wh)
self.bh = theano.shared(bh)
self.h0 = theano.shared(h0)
self.Wo = theano.shared(Wo)
self.bo = theano.shared(bo)
self.params = [self.Wx, self.Wh, self.bh, self.h0, self.Wo, self.bo]
thX = T.fmatrix('X')
thY = T.ivector('Y')
def recurrence(x_t, h_t1):
# returns h(t), y(t)
h_t = self.f(x_t.dot(self.Wx) + h_t1.dot(self.Wh) + self.bh)
y_t = T.nnet.softmax(h_t.dot(self.Wo) + self.bo)
return h_t, y_t
[h, y], _ = theano.scan(
fn=recurrence,
outputs_info=[self.h0, None],
sequences=thX,
n_steps=thX.shape[0],
)
py_x = y[:, 0, :]
prediction = T.argmax(py_x, axis=1)
cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
grads = T.grad(cost, self.params)
dparams = [theano.shared(p.get_value()*0) for p in self.params]
updates = [
(p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
] + [
(dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
]
self.predict_op = theano.function(inputs=[thX], outputs=prediction)
self.train_op = theano.function(
inputs=[thX, thY],
outputs=[cost, prediction, y],
updates=updates,
)
costs = []
for i in xrange(epochs):
X, Y = shuffle(X, Y)
n_correct = 0
cost = 0
for j in xrange(N):
c, p, rout = self.train_op(X[j], Y[j])
cost += c
if p[-1] == Y[j,-1]:
n_correct += 1
print "shape y:", rout.shape
print "i:", i, "cost:", cost, "classification rate:", (float(n_correct) / N)
costs.append(cost)
if show_fig:
plt.plot(costs)
plt.show()
开发者ID:ashishlal,项目名称:machine_learning_examples,代码行数:76,代码来源:srn_parity.py
示例13: fit
def fit(self, X, Y, learning_rate=1e-4, mu=0.99, epochs=30, show_fig=True, activation=T.nnet.relu, RecurrentUnit=GRU, normalize=False):
D = self.D
V = self.V
N = len(X)
We = init_weight(V, D)
self.hidden_layers = []
Mi = D
for Mo in self.hidden_layer_sizes:
ru = RecurrentUnit(Mi, Mo, activation)
self.hidden_layers.append(ru)
Mi = Mo
Wo = init_weight(Mi, self.K)
bo = np.zeros(self.K)
self.We = theano.shared(We)
self.Wo = theano.shared(Wo)
self.bo = theano.shared(bo)
self.params = [self.Wo, self.bo]
for ru in self.hidden_layers:
self.params += ru.params
thX = T.ivector('X')
thY = T.ivector('Y')
Z = self.We[thX]
for ru in self.hidden_layers:
Z = ru.output(Z)
py_x = T.nnet.softmax(Z.dot(self.Wo) + self.bo)
testf = theano.function(
inputs=[thX],
outputs=py_x,
)
testout = testf(X[0])
print("py_x.shape:", testout.shape)
prediction = T.argmax(py_x, axis=1)
cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
grads = T.grad(cost, self.params)
dparams = [theano.shared(p.get_value()*0) for p in self.params]
dWe = theano.shared(self.We.get_value()*0)
gWe = T.grad(cost, self.We)
dWe_update = mu*dWe - learning_rate*gWe
We_update = self.We + dWe_update
if normalize:
We_update /= We_update.norm(2)
updates = [
(p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
] + [
(dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
] + [
(self.We, We_update), (dWe, dWe_update)
]
self.cost_predict_op = theano.function(
inputs=[thX, thY],
outputs=[cost, prediction],
allow_input_downcast=True,
)
self.train_op = theano.function(
inputs=[thX, thY],
outputs=[cost, prediction],
updates=updates
)
costs = []
sequence_indexes = range(N)
n_total = sum(len(y) for y in Y)
for i in range(epochs):
t0 = datetime.now()
sequence_indexes = shuffle(sequence_indexes)
n_correct = 0
cost = 0
it = 0
for j in sequence_indexes:
c, p = self.train_op(X[j], Y[j])
cost += c
n_correct += np.sum(p == Y[j])
it += 1
if it % 200 == 0:
sys.stdout.write(
"j/N: %d/%d correct rate so far: %f, cost so far: %f\r" %
(it, N, float(n_correct)/n_total, cost)
)
sys.stdout.flush()
print(
"i:", i, "cost:", cost,
"correct rate:", (float(n_correct)/n_total),
"time for epoch:", (datetime.now() - t0)
)
costs.append(cost)
if show_fig:
plt.plot(costs)
#.........这里部分代码省略.........
开发者ID:cmagnusb,项目名称:machine_learning_examples,代码行数:101,代码来源:pos_rnn.py
示例14: __init__
def __init__(self, Mi, Mo, activation):
self.Mi = Mi
self.Mo = Mo
self.f = activation
# numpy init
Wxi = init_weight(Mi, Mo)
Whi = init_weight(Mo, Mo)
Wci = init_weight(Mo, Mo)
bi = np.zeros(Mo)
Wxf = init_weight(Mi, Mo)
Whf = init_weight(Mo, Mo)
Wcf = init_weight(Mo, Mo)
bf = np.zeros(Mo)
Wxc = init_weight(Mi, Mo)
Whc = init_weight(Mo, Mo)
bc = np.zeros(Mo)
Wxo = init_weight(Mi, Mo)
Who = init_weight(Mo, Mo)
Wco = init_weight(Mo, Mo)
bo = np.zeros(Mo)
c0 = np.zeros(Mo)
h0 = np.zeros(Mo)
# theano vars
self.Wxi = theano.shared(Wxi)
self.Whi = theano.shared(Whi)
self.Wci = theano.shared(Wci)
self.bi = theano.shared(bi)
self.Wxf = theano.shared(Wxf)
self.Whf = theano.shared(Whf)
self.Wcf = theano.shared(Wcf)
self.bf = theano.shared(bf)
self.Wxc = theano.shared(Wxc)
self.Whc = theano.shared(Whc)
self.bc = theano.shared(bc)
self.Wxo = theano.shared(Wxo)
self.Who = theano.shared(Who)
self.Wco = theano.shared(Wco)
self.bo = theano.shared(bo)
self.c0 = theano.shared(c0)
self.h0 = theano.shared(h0)
self.params = [
self.Wxi,
self.Whi,
self.Wci,
self.bi,
self.Wxf,
self.Whf,
self.Wcf,
self.bf,
self.Wxc,
self.Whc,
self.bc,
self.Wxo,
self.Who,
self.Wco,
self.bo,
self.c0,
self.h0,
]
开发者ID:ShuvenduBikash,项目名称:machine_learning_examples,代码行数:61,代码来源:lstm_wiki.py
示例15: fit
def fit(self, X, learning_rate=1e-5, mu=0.99, epochs=10, show_fig=True, activation=T.nnet.relu, RecurrentUnit=GRU, normalize=True):
D = self.D
V = self.V
N = len(X)
We = init_weight(V, D)
self.hidden_layers = []
Mi = D
for Mo in self.hidden_layer_sizes:
ru = RecurrentUnit(Mi, Mo, activation)
self.hidden_layers.append(ru)
Mi = Mo
Wo = init_weight(Mi, V)
bo = np.zeros(V)
self.We = theano.shared(We)
self.Wo = theano.shared(Wo)
self.bo = theano.shared(bo)
self.params = [self.Wo, self.bo]
for ru in self.hidden_layers:
self.params += ru.params
thX = T.ivector('X')
thY = T.ivector('Y')
Z = self.We[thX]
for ru in self.hidden_layers:
Z = ru.output(Z)
py_x = T.nnet.softmax(Z.dot(self.Wo) + self.bo)
prediction = T.argmax(py_x, axis=1)
# let's return py_x too so we can draw a sample instead
self.predict_op = theano.function(
inputs=[thX],
outputs=[py_x, prediction],
allow_input_downcast=True,
)
cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
grads = T.grad(cost, self.params)
dparams = [theano.shared(p.get_value()*0) for p in self.params]
dWe = theano.shared(self.We.get_value()*0)
gWe = T.grad(cost, self.We)
dWe_update = mu*dWe - learning_rate*gWe
We_update = self.We + dWe_update
if normalize:
We_update /= We_update.norm(2)
updates = [
(p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
] + [
(dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
] + [
(self.We, We_update), (dWe, dWe_update)
]
self.train_op = theano.function(
inputs=[thX, thY],
outputs=[cost, prediction],
updates=updates
)
costs = []
for i in range(epochs):
t0 = datetime.now()
X = shuffle(X)
n_correct = 0
n_total = 0
cost = 0
for j in range(N):
if np.random.random() < 0.01 or len(X[j]) <= 1:
input_sequence = [0] + X[j]
output_sequence = X[j] + [1]
else:
input_sequence = [0] + X[j][:-1]
output_sequence = X[j]
n_total += len(output_sequence)
# test:
try:
# we set 0 to start and 1 to end
c, p = self.train_op(input_sequence, output_sequence)
except Exception as e:
PYX, pred = self.predict_op(input_sequence)
print("input_sequence len:", len(input_sequence))
print("PYX.shape:",PYX.shape)
print("pred.shape:", pred.shape)
raise e
# print "p:", p
cost += c
# print "j:", j, "c:", c/len(X[j]+1)
for pj, xj in zip(p, output_sequence):
if pj == xj:
n_correct += 1
if j % 200 == 0:
sys.stdout.write("j/N: %d/%d correct rate so far: %f\r" % (j, N, float(n_correct)/n_total))
sys.stdout.flush()
#.........这里部分代码省略.........
开发者ID:cmagnusb,项目名称:machine_learning_examples,代码行数:101,代码来源:wiki.py
示例16: fit
def fit(self, trees, test_trees, reg=1e-3, epochs=8, train_inner_nodes=False):
D = self.D
V = self.V
K = self.K
N = len(trees)
We = init_weight(V, D)
W11 = np.random.randn(D, D, D) / np.sqrt(3*D)
W22 = np.random.randn(D, D, D) / np.sqrt(3*D)
W12 = np.random.randn(D, D, D) / np.sqrt(3*D)
W1 = init_weight(D, D)
W2 = init_weight(D, D)
bh = np.zeros(D)
Wo = init_weight(D, K)
bo = np.zeros(K)
self.We = tf.Variable(We.astype(np.float32))
self.W11 = tf.Variable(W11.astype(np.float32))
self.W22 = tf.Variable(W22.astype(np.float32))
self.W12 = tf.Variable(W12.astype(np.float32))
self.W1 = tf.Variable(W1.astype(np.float32))
self.W2 = tf.Variable(W2.astype(np.float32))
self.bh = tf.Variable(bh.astype(np.float32))
self.Wo = tf.Variable(Wo.astype(np.float32))
self.bo = tf.Variable(bo.astype(np.float32))
self.weights = [self.We, self.W11, self.W22, self.W12, self.W1, self.W2, self.Wo]
words = tf.placeholder(tf.int32, shape=(None,), name='words')
left_children = tf.placeholder(tf.int32, shape=(None,), name='left_children')
right_children = tf.placeholder(tf.int32, shape=(None,), name='right_children')
labels = tf.placeholder(tf.int32, shape=(None,), name='labels')
# save for later
self.words = words
self.left = left_children
self.right = right_children
self.labels = labels
def dot1(a, B):
return tf.tensordot(a, B, axes=[[0], [1]])
def dot2(B, a):
return tf.tensordot(B, a, axes=[[1], [0]])
def recursive_net_transform(hiddens, n):
h_left = hiddens.read(left_children[n])
h_right = hiddens.read(right_children[n])
return self.f(
dot1(h_left, dot2(self.W11, h_left)) +
dot1(h_right, dot2(self.W22, h_right)) +
dot1(h_left, dot2(self.W12, h_right)) +
dot1(h_left, self.W1) +
dot1(h_right, self.W2) +
self.bh
)
def recurrence(hiddens, n):
w = words[n]
# any non-word will have index -1
h_n = tf.cond(
w >= 0,
lambda: tf.nn.embedding_lookup(self.We, w),
lambda: recursive_net_transform(hiddens, n)
)
hiddens = hiddens.write(n, h_n)
n = tf.add(n, 1)
return hiddens, n
def condition(hiddens, n):
# loop should continue while n < len(words)
return tf.less(n, tf.shape(words)[0])
hiddens = tf.TensorArray(
tf.float32,
size=0,
dynamic_size=True,
clear_after_read=False,
infer_shape=False
)
hiddens, _ = tf.while_loop(
condition,
recurrence,
[hiddens, tf.constant(0)],
parallel_iterations=1
)
h = hiddens.stack()
logits = tf.matmul(h, self.Wo) + self.bo
prediction_op = tf.argmax(logits, axis=1)
self.prediction_op = prediction_op
rcost = reg*sum(tf.nn.l2_loss(p) for p in self.weights)
if train_inner_nodes:
# filter out -1s
#.........这里部分代码省略.........
开发者ID:cmagnusb,项目名称:machine_learning_examples,代码行数:101,代码来源:rntn_tensorflow_rnn.py
示例17: fit
def fit(self, X, Y, learning_rate=1.0, mu=0.99, reg=1.0, activation=tf.tanh, epochs=100, show_fig=False):
N, T, D = X.shape
K = len(set(Y.flatten()))
M = self.M
self.f = activation
# initial weights
Wx = init_weight(D, M).astype(np.float32)
Wh = init_weight(M, M).astype(np.float32)
bh = np.zeros(M, dtype=np.float32)
h0 = np.zeros(M, dtype=np.float32)
Wo = init_weight(M, K).astype(np.float32)
bo = np.zeros(K, dtype=np.float32)
# make them theano shared
self.Wx = tf.Variable(Wx)
self.Wh = tf.Variable(Wh)
self.bh = tf.Variable(bh)
self.h0 = tf.Variable(h0)
self.Wo = tf.Variable(Wo)
self.bo = tf.Variable(bo)
tfX = tf.placeholder(tf.float32, shape=(T, D), name='X')
tfY = tf.placeholder(tf.int32, shape=(T,), name='Y')
XWx = tf.matmul(tfX, self.Wx)
def recurrence(h_t1, xw_t):
# matmul() only works with 2-D objects
# we want to return a 1-D object of size M
# so that the final result is T x M
# not T x 1 x M
h_t = self.f(xw_t + tf.matmul(tf.reshape(h_t1, (1, M)), self.Wh) + self.bh)
return tf.reshape(h_t, (M,))
h = tf.scan(
fn=recurrence,
elems=XWx,
initializer=self.h0,
)
logits = tf.matmul(h, self.Wo) + self.bo
cost = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=tfY,
logits=logits,
)
)
predict_op = tf.argmax(logits, 1)
train_op = tf.train.AdamOptimizer(1e-2).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
costs = []
for i in range(epochs):
X, Y = shuffle(X, Y)
n_correct = 0
batch_cost = 0
for j in range(N):
_, c, p = session.run([train_op, cost, predict_op], feed_dict={tfX: X[j].reshape(T, D), tfY: Y[j]})
batch_cost += c
if p[-1] == Y[j,-1]:
n_correct += 1
print("i:", i, "cost:", batch_cost, "classification rate:", (float(n_correct)/N))
costs.append(batch_cost)
if n_correct == N:
break
if show_fig:
plt.plot(costs)
plt.show()
开发者ID:cmagnusb,项目名称:machine_learning_examples,代码行数:75,代码来源:srn_parity_tf.py
示例18: fit
def fit(self, X, learning_rate=10e-1, mu=0.99, reg=1.0, activation=T.tanh, epochs=500, show_fig=False):
N = len(X)
D = self.D
M = self.M
V = self.V
# initial weights
We = init_weight(V, D)
Wx = init_weight(D, M)
Wh = init_weight(M, M)
bh = np.zeros(M)
h0 = np.zeros(M)
# z = np.ones(M)
Wxz = init_weight(D, M)
Whz = init_weight(M, M)
bz = np.zeros(M)
Wo = init_weight(M, V)
bo = np.zeros(V)
thX, thY, py_x, prediction = self.set(We, Wx, Wh, bh, h0, Wxz, Whz, bz, Wo, bo, activation)
lr = T.scalar('lr')
cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
grads = T.grad(cost, self.params)
dparams = [theano.shared(p.get_value()*0) for p in self.params]
updates = [
(p, p + mu*dp - lr*g) for p, dp, g in zip(self.params, dparams, grads)
] + [
(dp, mu*dp - lr*g) for dp, g in zip(dparams, grads)
]
self.predict_op = theano.function(inputs=[thX], outputs=prediction)
self.train_op = theano.function(
inputs=[thX, thY, lr],
outputs=[cost, prediction],
updates=updates
)
costs = []
for i in xrange(epochs):
X = shuffle(X)
n_correct = 0
n_total = 0
cost = 0
for j in xrange(N):
if np.random.random() < 0.1:
input_sequence = [0] + X[j]
output_sequence = X[j] + [1]
else:
input_sequence = [0] + X[j][:-1]
output_sequence
|
请发表评论