Make codebase Python 2.7-3.4 compatible
This commit is contained in:
parent
46d18f1767
commit
1629f7f35b
@ -1,3 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
from keras.datasets import cifar10
|
||||
from keras.preprocessing.image import ImageDataGenerator
|
||||
from keras.models import Sequential
|
||||
@ -5,6 +7,7 @@ from keras.layers.core import Dense, Dropout, Activation, Flatten
|
||||
from keras.layers.convolutional import Convolution2D, MaxPooling2D
|
||||
from keras.optimizers import SGD, Adadelta, Adagrad
|
||||
from keras.utils import np_utils, generic_utils
|
||||
from six.moves import range
|
||||
|
||||
'''
|
||||
Train a (fairly simple) deep CNN on the CIFAR10 small images dataset.
|
||||
@ -14,6 +17,10 @@ from keras.utils import np_utils, generic_utils
|
||||
|
||||
It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
|
||||
(it's still underfitting at that point, though).
|
||||
|
||||
Note: the data was pickled with Python 2, and some encoding issues might prevent you
|
||||
from loading it in Python 3. You might have to load it in Python 2,
|
||||
save it in a different format, load it in Python 3 and repickle it.
|
||||
'''
|
||||
|
||||
batch_size = 32
|
||||
@ -23,8 +30,8 @@ data_augmentation = True
|
||||
|
||||
# the data, shuffled and split between tran and test sets
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data(test_split=0.1)
|
||||
print X_train.shape[0], 'train samples'
|
||||
print X_test.shape[0], 'test samples'
|
||||
print(X_train.shape[0], 'train samples')
|
||||
print(X_test.shape[0], 'test samples')
|
||||
|
||||
# convert class vectors to binary class matrices
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
@ -59,7 +66,7 @@ sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||
model.compile(loss='categorical_crossentropy', optimizer=sgd)
|
||||
|
||||
if not data_augmentation:
|
||||
print "Not using data augmentation or normalization"
|
||||
print("Not using data augmentation or normalization")
|
||||
|
||||
X_train = X_train.astype("float32")
|
||||
X_test = X_test.astype("float32")
|
||||
@ -67,10 +74,10 @@ if not data_augmentation:
|
||||
X_test /= 255
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10)
|
||||
score = model.evaluate(X_test, Y_test, batch_size=batch_size)
|
||||
print 'Test score:', score
|
||||
print('Test score:', score)
|
||||
|
||||
else:
|
||||
print "Using real time data augmentation"
|
||||
print("Using real time data augmentation")
|
||||
|
||||
# this will do preprocessing and realtime data augmentation
|
||||
datagen = ImageDataGenerator(
|
||||
@ -90,17 +97,17 @@ else:
|
||||
datagen.fit(X_train)
|
||||
|
||||
for e in range(nb_epoch):
|
||||
print '-'*40
|
||||
print 'Epoch', e
|
||||
print '-'*40
|
||||
print "Training..."
|
||||
print('-'*40)
|
||||
print('Epoch', e)
|
||||
print('-'*40)
|
||||
print("Training...")
|
||||
# batch train with realtime data augmentation
|
||||
progbar = generic_utils.Progbar(X_train.shape[0])
|
||||
for X_batch, Y_batch in datagen.flow(X_train, Y_train):
|
||||
loss = model.train(X_batch, Y_batch)
|
||||
progbar.add(X_batch.shape[0], values=[("train loss", loss)])
|
||||
|
||||
print "Testing..."
|
||||
print("Testing...")
|
||||
# test time!
|
||||
progbar = generic_utils.Progbar(X_test.shape[0])
|
||||
for X_batch, Y_batch in datagen.flow(X_test, Y_test):
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
@ -28,25 +30,25 @@ from keras.datasets import imdb
|
||||
GPU command:
|
||||
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_lstm.py
|
||||
|
||||
180s/epoch on GPU (GT 650M), vs. 400s/epoch on CPU (2.4Ghz Core i7).
|
||||
250s/epoch on GPU (GT 650M), vs. 400s/epoch on CPU (2.4Ghz Core i7).
|
||||
'''
|
||||
|
||||
max_features=20000
|
||||
maxlen = 100 # cut texts after this number of words (among top max_features most common words)
|
||||
batch_size = 16
|
||||
|
||||
print "Loading data..."
|
||||
print("Loading data...")
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2)
|
||||
print len(X_train), 'train sequences'
|
||||
print len(X_test), 'test sequences'
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
print "Pad sequences (samples x time)"
|
||||
print("Pad sequences (samples x time)")
|
||||
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
|
||||
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
|
||||
print 'X_train shape:', X_train.shape
|
||||
print 'X_test shape:', X_test.shape
|
||||
print('X_train shape:', X_train.shape)
|
||||
print('X_test shape:', X_test.shape)
|
||||
|
||||
print 'Build model...'
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
model.add(Embedding(max_features, 256))
|
||||
model.add(LSTM(256, 128)) # try using a GRU instead, for fun
|
||||
@ -55,14 +57,14 @@ model.add(Dense(128, 1))
|
||||
model.add(Activation('sigmoid'))
|
||||
|
||||
# try using different optimizers and different optimizer configs
|
||||
model.compile(loss='binary_crossentropy', optimizer='adam')
|
||||
model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")
|
||||
|
||||
print "Train..."
|
||||
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=5, verbose=1)
|
||||
print("Train...")
|
||||
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=5, validation_split=0.1, show_accuracy=True)
|
||||
score = model.evaluate(X_test, y_test, batch_size=batch_size)
|
||||
print 'Test score:', score
|
||||
print('Test score:', score)
|
||||
|
||||
classes = model.predict_classes(X_test, batch_size=batch_size)
|
||||
acc = np_utils.accuracy(classes, y_test)
|
||||
print 'Test accuracy:', acc
|
||||
print('Test accuracy:', acc)
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
|
||||
from keras.datasets import reuters
|
||||
@ -20,28 +22,28 @@ from keras.preprocessing.text import Tokenizer
|
||||
max_words = 10000
|
||||
batch_size = 16
|
||||
|
||||
print "Loading data..."
|
||||
print("Loading data...")
|
||||
(X_train, y_train), (X_test, y_test) = reuters.load_data(nb_words=max_words, test_split=0.2)
|
||||
print len(X_train), 'train sequences'
|
||||
print len(X_test), 'test sequences'
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
nb_classes = np.max(y_train)+1
|
||||
print nb_classes, 'classes'
|
||||
print(nb_classes, 'classes')
|
||||
|
||||
print "Vectorizing sequence data..."
|
||||
print("Vectorizing sequence data...")
|
||||
tokenizer = Tokenizer(nb_words=max_words)
|
||||
X_train = tokenizer.sequences_to_matrix(X_train, mode="binary")
|
||||
X_test = tokenizer.sequences_to_matrix(X_test, mode="binary")
|
||||
print 'X_train shape:', X_train.shape
|
||||
print 'X_test shape:', X_test.shape
|
||||
print('X_train shape:', X_train.shape)
|
||||
print('X_test shape:', X_test.shape)
|
||||
|
||||
print "Convert class vector to binary class matrix (for use with categorical_crossentropy)"
|
||||
print("Convert class vector to binary class matrix (for use with categorical_crossentropy)")
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
print 'Y_train shape:', Y_train.shape
|
||||
print 'Y_test shape:', Y_test.shape
|
||||
print('Y_train shape:', Y_train.shape)
|
||||
print('Y_test shape:', Y_test.shape)
|
||||
|
||||
print "Building model..."
|
||||
print("Building model...")
|
||||
model = Sequential()
|
||||
model.add(Dense(max_words, 256, init='normal'))
|
||||
model.add(Activation('relu'))
|
||||
@ -50,14 +52,16 @@ model.add(Dropout(0.5))
|
||||
model.add(Dense(256, nb_classes, init='normal'))
|
||||
model.add(Activation('softmax'))
|
||||
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adadelta')
|
||||
|
||||
print "Training..."
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
print(model.optimizer)
|
||||
|
||||
print("Training...")
|
||||
model.fit(X_train, Y_train, nb_epoch=5, batch_size=batch_size)
|
||||
score = model.evaluate(X_test, Y_test, batch_size=batch_size)
|
||||
print 'Test score:', score
|
||||
print('Test score:', score)
|
||||
|
||||
classes = model.predict_classes(X_test, batch_size=batch_size)
|
||||
acc = np_utils.accuracy(classes, y_test)
|
||||
print 'Test accuracy:', acc
|
||||
print('Test accuracy:', acc)
|
||||
|
||||
|
@ -27,10 +27,12 @@
|
||||
https://mega.co.nz/#F!YohlwD7R!wec0yNO86SeaNGIYQBOR0A
|
||||
(HNCommentsAll.1perline.json.bz2)
|
||||
'''
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import theano
|
||||
import cPickle
|
||||
import six.moves.cPickle
|
||||
import os, re, json
|
||||
|
||||
from keras.preprocessing import sequence, text
|
||||
@ -38,6 +40,8 @@ from keras.optimizers import SGD, RMSprop, Adagrad
|
||||
from keras.utils import np_utils, generic_utils
|
||||
from keras.models import Sequential
|
||||
from keras.layers.embeddings import WordContextProduct, Embedding
|
||||
from six.moves import range
|
||||
from six.moves import zip
|
||||
|
||||
max_features = 50000 # vocabulary size: top 50,000 most common words in data
|
||||
skip_top = 100 # ignore top 100 most common words
|
||||
@ -74,30 +78,30 @@ def text_generator(path=data_path):
|
||||
comment_text = comment_data["comment_text"]
|
||||
comment_text = clean_comment(comment_text)
|
||||
if i % 10000 == 0:
|
||||
print i
|
||||
print(i)
|
||||
yield comment_text
|
||||
f.close()
|
||||
|
||||
# model management
|
||||
if load:
|
||||
print 'Load tokenizer...'
|
||||
tokenizer = cPickle.load(open(os.path.join(save_dir, tokenizer_fname)))
|
||||
print 'Load model...'
|
||||
model = cPickle.load(open(os.path.join(save_dir, model_load_fname)))
|
||||
print('Load tokenizer...')
|
||||
tokenizer = six.moves.cPickle.load(open(os.path.join(save_dir, tokenizer_fname)))
|
||||
print('Load model...')
|
||||
model = six.moves.cPickle.load(open(os.path.join(save_dir, model_load_fname)))
|
||||
else:
|
||||
print "Fit tokenizer..."
|
||||
print("Fit tokenizer...")
|
||||
tokenizer = text.Tokenizer(nb_words=max_features)
|
||||
tokenizer.fit_on_texts(text_generator())
|
||||
if save:
|
||||
print "Save tokenizer..."
|
||||
print("Save tokenizer...")
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
cPickle.dump(tokenizer, open(os.path.join(save_dir, tokenizer_fname), "w"))
|
||||
six.moves.cPickle.dump(tokenizer, open(os.path.join(save_dir, tokenizer_fname), "w"))
|
||||
|
||||
# training process
|
||||
if train_model:
|
||||
if not load:
|
||||
print 'Build model...'
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
model.add(WordContextProduct(max_features, proj_dim=dim_proj, init="normal"))
|
||||
model.compile(loss='mse', optimizer='rmsprop')
|
||||
@ -105,9 +109,9 @@ if train_model:
|
||||
sampling_table = sequence.make_sampling_table(max_features)
|
||||
|
||||
for e in range(nb_epoch):
|
||||
print '-'*40
|
||||
print 'Epoch', e
|
||||
print '-'*40
|
||||
print('-'*40)
|
||||
print('Epoch', e)
|
||||
print('-'*40)
|
||||
|
||||
progbar = generic_utils.Progbar(tokenizer.document_count)
|
||||
samples_seen = 0
|
||||
@ -125,17 +129,17 @@ if train_model:
|
||||
progbar.update(i, values=[("loss", np.mean(losses))])
|
||||
losses = []
|
||||
samples_seen += len(labels)
|
||||
print 'Samples seen:', samples_seen
|
||||
print "Training completed!"
|
||||
print('Samples seen:', samples_seen)
|
||||
print("Training completed!")
|
||||
|
||||
if save:
|
||||
print "Saving model..."
|
||||
print("Saving model...")
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
cPickle.dump(model, open(os.path.join(save_dir, model_save_fname), "w"))
|
||||
six.moves.cPickle.dump(model, open(os.path.join(save_dir, model_save_fname), "w"))
|
||||
|
||||
|
||||
print "It's test time!"
|
||||
print("It's test time!")
|
||||
|
||||
# recover the embedding weights trained with skipgram:
|
||||
weights = model.layers[0].get_weights()[0]
|
||||
@ -147,7 +151,7 @@ weights[:skip_top] = np.zeros((skip_top, dim_proj))
|
||||
norm_weights = np_utils.normalize(weights)
|
||||
|
||||
word_index = tokenizer.word_index
|
||||
reverse_word_index = dict([(v, k) for k, v in word_index.items()])
|
||||
reverse_word_index = dict([(v, k) for k, v in list(word_index.items())])
|
||||
word_index = tokenizer.word_index
|
||||
|
||||
def embed_word(w):
|
||||
@ -158,7 +162,7 @@ def embed_word(w):
|
||||
|
||||
def closest_to_point(point, nb_closest=10):
|
||||
proximities = np.dot(norm_weights, point)
|
||||
tups = zip(range(len(proximities)), proximities)
|
||||
tups = list(zip(list(range(len(proximities))), proximities))
|
||||
tups.sort(key=lambda x: x[1], reverse=True)
|
||||
return [(reverse_word_index.get(t[0]), t[1]) for t in tups[:nb_closest]]
|
||||
|
||||
@ -203,7 +207,7 @@ words = ["article", # post, story, hn, read, comments
|
||||
|
||||
for w in words:
|
||||
res = closest_to_word(w)
|
||||
print '====', w
|
||||
print('====', w)
|
||||
for r in res:
|
||||
print r
|
||||
print(r)
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
import types
|
||||
@ -28,6 +29,6 @@ def hard_sigmoid(x):
|
||||
def linear(x):
|
||||
return x
|
||||
|
||||
from utils.generic_utils import get_from_module
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
return get_from_module(identifier, globals(), 'activation function')
|
@ -1,8 +1,10 @@
|
||||
from data_utils import get_file
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from .data_utils import get_file
|
||||
import random
|
||||
import cPickle
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import six.moves.cPickle
|
||||
from six.moves import range
|
||||
|
||||
def load_data(test_split=0.1, seed=113):
|
||||
dirname = "cifar-10-batches-py"
|
||||
@ -15,7 +17,7 @@ def load_data(test_split=0.1, seed=113):
|
||||
for i in range(1, 6):
|
||||
fpath = path + '/data_batch_' + str(i)
|
||||
f = open(fpath, 'rb')
|
||||
d = cPickle.load(f)
|
||||
d = six.moves.cPickle.load(f)
|
||||
f.close()
|
||||
data = d["data"]
|
||||
labels = d["labels"]
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import urllib, tarfile
|
||||
import inspect, os
|
||||
from ..utils.generic_utils import Progbar
|
||||
@ -16,7 +18,7 @@ def get_file(fname, origin, untar=False):
|
||||
try:
|
||||
f = open(fpath)
|
||||
except:
|
||||
print 'Downloading data from', origin
|
||||
print('Downloading data from', origin)
|
||||
|
||||
global progbar
|
||||
progbar = None
|
||||
@ -32,7 +34,7 @@ def get_file(fname, origin, untar=False):
|
||||
|
||||
if untar:
|
||||
if not os.path.exists(untar_fpath):
|
||||
print 'Untaring file...'
|
||||
print('Untaring file...')
|
||||
tfile = tarfile.open(fpath, 'r:gz')
|
||||
tfile.extractall(path=datadir)
|
||||
tfile.close()
|
||||
|
@ -1,7 +1,9 @@
|
||||
import cPickle
|
||||
from __future__ import absolute_import
|
||||
import six.moves.cPickle
|
||||
import gzip
|
||||
from data_utils import get_file
|
||||
from .data_utils import get_file
|
||||
import random
|
||||
from six.moves import zip
|
||||
|
||||
def load_data(path="imdb.pkl", nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113):
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/imdb.pkl")
|
||||
@ -11,7 +13,7 @@ def load_data(path="imdb.pkl", nb_words=None, skip_top=0, maxlen=None, test_spli
|
||||
else:
|
||||
f = open(path, 'rb')
|
||||
|
||||
X, labels = cPickle.load(f)
|
||||
X, labels = six.moves.cPickle.load(f)
|
||||
f.close()
|
||||
|
||||
random.seed(seed)
|
||||
|
@ -1,8 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from data_utils import get_file
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
from .data_utils import get_file
|
||||
import string
|
||||
import random
|
||||
import cPickle
|
||||
import six.moves.cPickle
|
||||
from six.moves import zip
|
||||
|
||||
def make_reuters_dataset(path='datasets/temp/reuters21578/', min_samples_per_topic=15):
|
||||
import os
|
||||
@ -34,15 +37,15 @@ def make_reuters_dataset(path='datasets/temp/reuters21578/', min_samples_per_top
|
||||
wire_bodies.append(body)
|
||||
|
||||
# only keep most common topics
|
||||
items = topic_counts.items()
|
||||
items = list(topic_counts.items())
|
||||
items.sort(key = lambda x: x[1])
|
||||
kept_topics = set()
|
||||
for x in items:
|
||||
print x[0] + ': ' + str(x[1])
|
||||
print(x[0] + ': ' + str(x[1]))
|
||||
if x[1] >= min_samples_per_topic:
|
||||
kept_topics.add(x[0])
|
||||
print '-'
|
||||
print 'Kept topics:', len(kept_topics)
|
||||
print('-')
|
||||
print('Kept topics:', len(kept_topics))
|
||||
|
||||
# filter wires with rare topics
|
||||
kept_wires = []
|
||||
@ -64,15 +67,15 @@ def make_reuters_dataset(path='datasets/temp/reuters21578/', min_samples_per_top
|
||||
tokenizer.fit_on_texts(kept_wires)
|
||||
X = tokenizer.texts_to_sequences(kept_wires)
|
||||
|
||||
print 'Sanity check:'
|
||||
print('Sanity check:')
|
||||
for w in ["banana", "oil", "chocolate", "the", "dsft"]:
|
||||
print '...index of', w, ':', tokenizer.word_index.get(w)
|
||||
print('...index of', w, ':', tokenizer.word_index.get(w))
|
||||
|
||||
dataset = (X, labels)
|
||||
print '-'
|
||||
print 'Saving...'
|
||||
cPickle.dump(dataset, open('datasets/data/reuters.pkl', 'w'))
|
||||
cPickle.dump(tokenizer.word_index, open('datasets/data/reuters_word_index.pkl', 'w'))
|
||||
print('-')
|
||||
print('Saving...')
|
||||
six.moves.cPickle.dump(dataset, open('datasets/data/reuters.pkl', 'w'))
|
||||
six.moves.cPickle.dump(tokenizer.word_index, open('datasets/data/reuters_word_index.pkl', 'w'))
|
||||
|
||||
|
||||
|
||||
@ -80,7 +83,7 @@ def load_data(path="reuters.pkl", nb_words=None, skip_top=0, maxlen=None, test_s
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters.pkl")
|
||||
f = open(path, 'rb')
|
||||
|
||||
X, labels = cPickle.load(f)
|
||||
X, labels = six.moves.cPickle.load(f)
|
||||
f.close()
|
||||
random.seed(seed)
|
||||
random.shuffle(X)
|
||||
@ -113,7 +116,7 @@ def load_data(path="reuters.pkl", nb_words=None, skip_top=0, maxlen=None, test_s
|
||||
def get_word_index(path="reuters_word_index.pkl"):
|
||||
path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl")
|
||||
f = open(path, 'rb')
|
||||
return cPickle.load(f)
|
||||
return six.moves.cPickle.load(f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,8 +1,9 @@
|
||||
from __future__ import absolute_import
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
import numpy as np
|
||||
|
||||
from utils.theano_utils import sharedX, shared_zeros
|
||||
from .utils.theano_utils import sharedX, shared_zeros
|
||||
|
||||
def uniform(shape, scale=0.05):
|
||||
return sharedX(np.random.uniform(low=-scale, high=scale, size=shape))
|
||||
@ -49,6 +50,6 @@ def zero(shape):
|
||||
return shared_zeros(shape)
|
||||
|
||||
|
||||
from utils.generic_utils import get_from_module
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
return get_from_module(identifier, globals(), 'initialization')
|
||||
|
@ -1,4 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
from theano.tensor.signal import downsample
|
||||
|
@ -1,4 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
|
||||
@ -7,6 +9,7 @@ from ..utils.theano_utils import shared_zeros, floatX
|
||||
from ..utils.generic_utils import make_tuple
|
||||
|
||||
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
|
||||
from six.moves import zip
|
||||
srng = RandomStreams()
|
||||
|
||||
class Layer(object):
|
||||
|
@ -1,3 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
import numpy as np
|
||||
@ -6,6 +7,7 @@ import numpy as np
|
||||
from .. import activations, initializations
|
||||
from ..utils.theano_utils import shared_zeros, alloc_zeros_matrix
|
||||
from ..layers.core import Layer
|
||||
from six.moves import range
|
||||
|
||||
class SimpleRNN(Layer):
|
||||
'''
|
||||
|
@ -1,11 +1,14 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
import numpy as np
|
||||
|
||||
import optimizers
|
||||
import objectives
|
||||
from . import optimizers
|
||||
from . import objectives
|
||||
import time, copy
|
||||
from utils.generic_utils import Progbar
|
||||
from .utils.generic_utils import Progbar
|
||||
from six.moves import range
|
||||
|
||||
def standardize_y(y):
|
||||
if not hasattr(y, 'shape'):
|
||||
@ -97,7 +100,7 @@ class Sequential(object):
|
||||
do_validation = True
|
||||
y_val = standardize_y(y_val)
|
||||
if verbose:
|
||||
print "Train on %d samples, validate on %d samples" % (len(y), len(y_val))
|
||||
print("Train on %d samples, validate on %d samples" % (len(y), len(y_val)))
|
||||
else:
|
||||
if 0 < validation_split < 1:
|
||||
# If a validation split size is given (e.g. validation_split=0.2)
|
||||
@ -108,12 +111,12 @@ class Sequential(object):
|
||||
(X, X_val) = (X[0:split_at], X[split_at:])
|
||||
(y, y_val) = (y[0:split_at], y[split_at:])
|
||||
if verbose:
|
||||
print "Train on %d samples, validate on %d samples" % (len(y), len(y_val))
|
||||
print("Train on %d samples, validate on %d samples" % (len(y), len(y_val)))
|
||||
|
||||
index_array = np.arange(len(X))
|
||||
for epoch in range(nb_epoch):
|
||||
if verbose:
|
||||
print 'Epoch', epoch
|
||||
print('Epoch', epoch)
|
||||
if shuffle:
|
||||
np.random.shuffle(index_array)
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
from __future__ import absolute_import
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
import numpy as np
|
||||
from six.moves import range
|
||||
|
||||
epsilon = 1.0e-15
|
||||
|
||||
@ -32,7 +34,7 @@ def binary_crossentropy(y_true, y_pred):
|
||||
mse = MSE = mean_squared_error
|
||||
mae = MAE = mean_absolute_error
|
||||
|
||||
from utils.generic_utils import get_from_module
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
return get_from_module(identifier, globals(), 'objective')
|
||||
|
||||
|
@ -1,8 +1,10 @@
|
||||
from __future__ import absolute_import
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
import numpy as np
|
||||
|
||||
from utils.theano_utils import shared_zeros, shared_scalar
|
||||
from .utils.theano_utils import shared_zeros, shared_scalar
|
||||
from six.moves import zip
|
||||
|
||||
def clip_norm(g, c, n):
|
||||
if c > 0:
|
||||
@ -174,6 +176,6 @@ adagrad = Adagrad
|
||||
adadelta = Adadelta
|
||||
adam = Adam
|
||||
|
||||
from utils.generic_utils import get_from_module
|
||||
from .utils.generic_utils import get_from_module
|
||||
def get(identifier):
|
||||
return get_from_module(identifier, globals(), 'optimizer', instantiate=True)
|
||||
|
@ -1,4 +1,5 @@
|
||||
from PIL import Image
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
from scipy import ndimage
|
||||
from scipy import linalg
|
||||
@ -6,6 +7,7 @@ from scipy import linalg
|
||||
from os import listdir
|
||||
from os.path import isfile, join
|
||||
import random, math
|
||||
from six.moves import range
|
||||
|
||||
'''
|
||||
Fairly basic set of tools for realtime data augmentation on image data.
|
||||
@ -74,6 +76,7 @@ def random_zoom(x, rg, fill_mode="nearest", cval=0.):
|
||||
|
||||
|
||||
def array_to_img(x, scale=True):
|
||||
from PIL import Image
|
||||
x = x.transpose(1, 2, 0)
|
||||
if scale:
|
||||
x += max(-np.min(x), 0)
|
||||
@ -93,6 +96,7 @@ def img_to_array(img):
|
||||
|
||||
|
||||
def load_img(path, grayscale=False):
|
||||
from PIL import Image
|
||||
img = Image.open(open(path))
|
||||
if grayscale:
|
||||
img = img.convert('L')
|
||||
|
@ -1,6 +1,8 @@
|
||||
from __future__ import absolute_import
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import random
|
||||
from six.moves import range
|
||||
|
||||
def pad_sequences(sequences, maxlen=None, dtype='int32'):
|
||||
"""
|
||||
@ -37,7 +39,7 @@ def make_sampling_table(size, sampling_factor=1e-5):
|
||||
where gamma is the Euler–Mascheroni constant.
|
||||
'''
|
||||
gamma = 0.577
|
||||
rank = np.array(range(size))
|
||||
rank = np.array(list(range(size)))
|
||||
rank[0] = 1
|
||||
inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1./(12.*rank)
|
||||
f = sampling_factor * inv_fq
|
||||
|
@ -3,9 +3,12 @@
|
||||
These preprocessing utils would greatly benefit
|
||||
from a fast Cython rewrite.
|
||||
'''
|
||||
from __future__ import absolute_import
|
||||
|
||||
import string
|
||||
import numpy as np
|
||||
from six.moves import range
|
||||
from six.moves import zip
|
||||
|
||||
def base_filter():
|
||||
f = string.punctuation
|
||||
@ -20,7 +23,7 @@ def text_to_word_sequence(text, filters=base_filter(), lower=True, split=" "):
|
||||
text = text.lower()
|
||||
text = text.translate(string.maketrans(filters, split*len(filters)))
|
||||
seq = text.split(split)
|
||||
return filter(None, seq)
|
||||
return [_f for _f in seq if _f]
|
||||
|
||||
|
||||
def one_hot(text, n, filters=base_filter(), lower=True, split=" "):
|
||||
@ -58,13 +61,13 @@ class Tokenizer(object):
|
||||
else:
|
||||
self.word_docs[w] = 1
|
||||
|
||||
wcounts = self.word_counts.items()
|
||||
wcounts = list(self.word_counts.items())
|
||||
wcounts.sort(key = lambda x: x[1], reverse=True)
|
||||
sorted_voc = [wc[0] for wc in wcounts]
|
||||
self.word_index = dict(zip(sorted_voc, range(1, len(sorted_voc)+1)))
|
||||
self.word_index = dict(list(zip(sorted_voc, list(range(1, len(sorted_voc)+1)))))
|
||||
|
||||
self.index_docs = {}
|
||||
for w, c in self.word_docs.items():
|
||||
for w, c in list(self.word_docs.items()):
|
||||
self.index_docs[self.word_index[w]] = c
|
||||
|
||||
|
||||
@ -153,7 +156,7 @@ class Tokenizer(object):
|
||||
counts[j] = 1.
|
||||
else:
|
||||
counts[j] += 1
|
||||
for j, c in counts.items():
|
||||
for j, c in list(counts.items()):
|
||||
if mode == "count":
|
||||
X[i][j] = c
|
||||
elif mode == "freq":
|
||||
|
@ -1,3 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
import time
|
||||
import sys
|
||||
|
@ -1,5 +1,8 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
import scipy as sp
|
||||
from six.moves import range
|
||||
from six.moves import zip
|
||||
|
||||
def to_categorical(y, nb_classes=None):
|
||||
'''Convert class vector (integers from 0 to nb_classes)
|
||||
|
@ -1,3 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
import theano
|
||||
import theano.tensor as T
|
||||
|
13
setup.py
13
setup.py
@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from distutils.core import setup
|
||||
from setuptools import setup
|
||||
from setuptools import find_packages
|
||||
|
||||
setup(name='Keras',
|
||||
version='0.0.1',
|
||||
@ -9,12 +8,6 @@ setup(name='Keras',
|
||||
author_email='francois.chollet@gmail.com',
|
||||
url='https://github.com/fchollet/keras',
|
||||
license='MIT',
|
||||
packages=[
|
||||
'keras',
|
||||
'keras.layers',
|
||||
'keras.preprocessing',
|
||||
'keras.datasets',
|
||||
'keras.utils',
|
||||
],
|
||||
packages=find_packages(),
|
||||
# TODO: dependencies
|
||||
)
|
Loading…
Reference in New Issue
Block a user