本文整理汇总了Python中util.load_data函数的典型用法代码示例。如果您正苦于以下问题:Python load_data函数的具体用法?Python load_data怎么用?Python load_data使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_data函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
# img_width, img_height = 48, 48
img_width, img_height = 200, 60
img_channels = 1
# batch_size = 1024
batch_size = 32
nb_epoch = 1000
post_correction = False
save_dir = 'save_model/' + str(datetime.now()).split('.')[0].split()[0] + '/' # model is saved corresponding to the datetime
train_data_dir = 'train_data/ip_train/'
# train_data_dir = 'train_data/single_1000000/'
val_data_dir = 'train_data/ip_val/'
test_data_dir = 'test_data//'
weights_file_path = 'save_model/2016-10-27/weights.11-1.58.hdf5'
char_set, char2idx = get_char_set(train_data_dir)
nb_classes = len(char_set)
max_nb_char = get_maxnb_char(train_data_dir)
label_set = get_label_set(train_data_dir)
# val 'char_set:', char_set
print 'nb_classes:', nb_classes
print 'max_nb_char:', max_nb_char
print 'size_label_set:', len(label_set)
model = build_shallow(img_channels, img_width, img_height, max_nb_char, nb_classes) # build CNN architecture
# model.load_weights(weights_file_path) # load trained model
val_data = load_data(val_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx)
# val_data = None
train_data = load_data(train_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx)
train(model, batch_size, nb_epoch, save_dir, train_data, val_data, char_set)
开发者ID:testanull,项目名称:DeepLearning-OCR,代码行数:30,代码来源:train.py
示例2: test_stacker
def test_stacker():
comments, dates, labels = load_data()
clf = LogisticRegression(tol=1e-8, C=0.01, penalty='l2')
countvect_char = TfidfVectorizer(ngram_range=(1, 5),
analyzer="char", binary=False)
countvect_word = TfidfVectorizer(ngram_range=(1, 3),
analyzer="word", binary=False)
badwords = BadWordCounter()
select = SelectPercentile(score_func=chi2)
char_select = Pipeline([('char_count', countvect_char),
('select', select)])
words_select = Pipeline([('word_count', countvect_word),
('select', select)])
badwords_select = Pipeline([('badwords', badwords), ('select', select)])
stack = FeatureStacker([("badwords", badwords_select),
("chars", char_select),
("words", words_select)])
#stack.fit(comments)
#features = stack.transform(comments)
#print("training and transforming for linear model")
print("training grid search")
pipeline = Pipeline([("features", stack), ("clf", clf)])
param_grid = dict(clf__C=[0.31, 0.42, 0.54],
features__words__select__percentile=[5, 7])
grid = GridSearchCV(pipeline, cv=5, param_grid=param_grid, verbose=4,
n_jobs=1, score_func=auc_score)
grid.fit(comments, labels)
tracer()
开发者ID:ANB2,项目名称:kaggle_insults,代码行数:30,代码来源:old.py
示例3: plot_conformity
def plot_conformity(name, log_dir, ax=None, legend=True):
if ax is None:
ax = plt.gca()
r, actual, pred, a_err, p_err = util.load_data(name, log_dir)
ax.errorbar(r, actual[0] - a_err[0], actual[0] + a_err[0], color=red_col,
label='Red centrals')
ax.errorbar(r, actual[1] - a_err[1], actual[1] + a_err[1], color=blue_col,
label='Blue centrals')
ax.errorbar(r, actual[2] - a_err[2], actual[2] + a_err[2], color='k',
label='All centrals')
ax.errorbar(r, pred[0] - p_err[0], pred[0] + p_err[0], color=red_col,
linestyle='--', alpha=0.3)
ax.errorbar(r, pred[1] - p_err[1], pred[1] + p_err[1], color=blue_col,
linestyle='--', alpha=0.3)
ax.errorbar(r, pred[2] - p_err[2], pred[2] + p_err[2], color='k',
linestyle='--', alpha=0.3)
ax.set_xscale('log')
ax.set_xlabel('r [Mpc/h]')
ax.set_ylabel('Quenched Fraction')
ax.set_ylim(0.0, 1.1)
ax.set_xlim(0.1, 20)
if legend:
ax.legend(loc='best')
return style_plots(ax)
开发者ID:vipasu,项目名称:addseds,代码行数:25,代码来源:plotting.py
示例4: main
def main():
global k_out
k_out = 0
x, y = load_data(k=2)
kf = cross_validation.KFold(len(x), n_fold)
scaler = preprocessing.StandardScaler()
acc, prec, recall = [], [], []
for train, test in kf:
x_train, x_test, y_train, y_test = x[train] , x[test] , y[train] , y[test]
c_star, gamma_star = choose_c_gamma(x_train, y_train)
print '=========c*:{} g*:{}'.format(c_star, gamma_star)
scaler.fit(x_train)
clf = svm.SVC(C=c_star, gamma=gamma_star)
clf.fit(scaler.transform(x_train), y_train)
y_pred = clf.predict(scaler.transform(x_test))
acc.append(accuracy_score(y_test, y_pred))
prec.append(precision_score(y_test, y_pred))
recall.append(recall_score(y_test, y_pred))
print acc
k_out += 1
a = np.mean(acc)
p = np.mean(prec)
r = np.mean(recall)
f = 2 * p * r / (p + r)
print 'precision: {}'.format(p)
print "recall: {}".format(r)
print "f1: {}".format(f)
print "accuracy: {}".format(a)
开发者ID:harrylclc,项目名称:ist557,代码行数:29,代码来源:svm_5.py
示例5: main
def main(stat, stat_name):
cats = util.load_all_cats()
all_r_values = []
names = cats.keys()
names = ['HW', 'Becker', 'Lu', 'Henriques', 'Illustris', 'EAGLE', 'MB-II'][::-1]
proxies = ['s1','s2','s5','s10','d1','d2','d5','d10', 'rhill', 'rhillmass']
proxies_formatted = [ '$\Sigma_1$', '$\Sigma_2$', '$\Sigma_5$', '$\Sigma_{10}$', '$D_1$', '$D_2$', '$D_5$', '$D_{10}$', 'R$_\mathrm{hill}$', 'R$_\mathrm{hill-mass}$' ]
for name in names:
cat = cats[name]
stat_dict = util.load_data('statistics.pckl', cat['dir'])
r_values = []
for p in proxies:
try:
print 'std of ', stat,' for ', p, '=', np.std(stat_dict[stat][p])
r_values.append(np.mean(stat_dict[stat][p]))
except:
print 'no statistics found for', p
r_values.append(0)
all_r_values.append(r_values)
df = pd.DataFrame(columns=proxies_formatted, index=names)
for name, r_values in zip(names, all_r_values):
df.loc[name] = pd.Series({p: v for p,v in zip(proxies_formatted, r_values)})
#plt.imshow(all_r_values)
#plt.show()
df = df[df.columns].astype(float)
#sns.heatmap(df, vmin=0,vmax=0.71, cmap='Blues', annot=True, fmt='.2f')
#plots.style_plots()
#plt.show()
print df.values
plot_heatmap(df, proxies_formatted, names, stat_name)
开发者ID:vipasu,项目名称:addseds,代码行数:30,代码来源:correlation_heatmap.py
示例6: main
def main():
global k_out
k_out = 0
x, y = load_data(k=2)
kf_out = cross_validation.KFold(len(x), n_fold)
a_score, p_score, r_score = [], [], []
for train_out, test_out in kf_out:
x_train_out, x_test_out, y_train_out, y_test_out = x[train_out] , x[test_out] , y[train_out] , y[test_out]
kf = cross_validation.KFold(len(x_train_out), n_fold)
m_opt = pruning_cross_validation(x_train_out, y_train_out, kf)
clf = DecisionTreeClassifier(criterion='entropy', max_leaf_nodes=m_opt + 1)
print '=========m_opt:{}'.format(m_opt)
clf.fit(x_train_out, y_train_out)
y_pred = clf.predict(x_test_out)
a_score.append(accuracy_score(y_test_out, y_pred))
p_score.append(precision_score(y_test_out, y_pred))
r_score.append(recall_score(y_test_out, y_pred))
k_out += 1
a = np.mean(a_score)
p = np.mean(p_score)
r = np.mean(r_score)
f = 2 * p * r / (p + r)
print 'precision: {}'.format(p)
print "recall: {}".format(r)
print "f1: {}".format(f)
print "accuracy: {}".format(a)
开发者ID:harrylclc,项目名称:ist557,代码行数:26,代码来源:pruning_eval.py
示例7: bagging
def bagging():
from sklearn.feature_selection import SelectPercentile, chi2
comments, dates, labels = load_data()
select = SelectPercentile(score_func=chi2, percentile=4)
clf = LogisticRegression(tol=1e-8, penalty='l2', C=7)
#clf = BaggingClassifier(logr, n_estimators=50)
countvect_char = TfidfVectorizer(ngram_range=(1, 5),
analyzer="char", binary=False)
countvect_word = TfidfVectorizer(ngram_range=(1, 3),
analyzer="word", binary=False)
badwords = BadWordCounter()
ft = FeatureStacker([("badwords", badwords), ("chars", countvect_char),
("words", countvect_word)])
#ft = TextFeatureTransformer()
pipeline = Pipeline([('vect', ft), ('select', select), ('logr', clf)])
cv = ShuffleSplit(len(comments), n_iterations=20, test_size=0.2,
indices=True)
scores = []
for train, test in cv:
X_train, y_train = comments[train], labels[train]
X_test, y_test = comments[test], labels[test]
pipeline.fit(X_train, y_train)
probs = pipeline.predict_proba(X_test)
scores.append(auc_score(y_test, probs[:, 1]))
print("score: %f" % scores[-1])
print(np.mean(scores), np.std(scores))
开发者ID:ANB2,项目名称:kaggle_insults,代码行数:30,代码来源:old.py
示例8: main
def main():
window_size = 100
threshold = calc_threshold(exp_moving_average, window_size)
print threshold
filename = sys.argv[1]
data_in = load_data(filename)
# Uncomment for more realistic first values. First window_size/4 values
# should not be taken into account in the output data and plots.
# data_in[:0] = [sum(data_in[:(window_size/4)])/(window_size/4)]
filtered_ma = average_diff(data_in, moving_average, window_size)
filtered_ema = average_diff(data_in, exp_moving_average, window_size)
plot([0] * len(data_in),
filtered_ma,
filtered_ema,
[threshold] * len(data_in),
[-threshold] * len(data_in),
)
mean_ma = mean_value_detector(filtered_ma, threshold)
mean_ema = mean_value_detector(filtered_ema, threshold)
plot(mean_ema)
plot(mean_ma)
write_data(mean_ema, filename + ".out")
开发者ID:AntonKozlov,项目名称:fault-car,代码行数:30,代码来源:fault_detect.py
示例9: loadText
def loadText(self):
login, password, dbname = load_data()
self.ui.loginEdit.setText(login)
self.ui.passwordEdit.setText(password)
self.ui.dbEdit.setText(dbname)
self.ui.rememberPassword.setChecked(bool(password))
if login:
self.ui.passwordEdit.setFocus()
开发者ID:vnetserg,项目名称:LadaDetail,代码行数:8,代码来源:logindialog.py
示例10: get_visitorid
def get_visitorid():
visitor_id = util.load_data(addon, VISITOR_FILE)
if visitor_id is False:
from random import randint
visitor_id = str(randint(0, 0x7fffffff))
util.save_data(addon, VISITOR_FILE, visitor_id)
return visitor_id
开发者ID:davejm,项目名称:plugin.video.lynda,代码行数:8,代码来源:addon.py
示例11: main
def main():
x, y = load_data(k=2)
kf = cross_validation.KFold(len(x), n_fold)
a, p, r, f = classify(x, y, kf, n_estimator=50)
print "precision: {}".format(p)
print "recall: {}".format(r)
print "f1: {}".format(f)
print "accuracy: {}".format(a)
开发者ID:harrylclc,项目名称:ist557,代码行数:8,代码来源:boosting.py
示例12: __init__
def __init__(self, problem_path):
A, b, N, block_sizes, x_true, nz, f = util.load_data(problem_path)
self._A = A
self._b = b
self._U = util.U(block_sizes)
self._x_true = x_true
self._f = f
self._N = N
self._x0 = util.block_sizes_to_x0(block_sizes)
开发者ID:ion599,项目名称:optimization,代码行数:10,代码来源:experiment.py
示例13: main
def main():
x, y = load_data(k=2)
kf = cross_validation.KFold(len(x), n_fold)
max_m = min(2500 - 1, int(len(x) * (n_fold - 1) / n_fold) - 1)
acc_score = [[] for i in xrange(max_m)]
p_score = [[] for i in xrange(max_m)]
r_score = [[] for i in xrange(max_m)]
for train, test in kf:
print len(train)
x_train, x_test, y_train, y_test = x[train] , x[test] , y[train] , y[test]
m = 1
while 1:
print "iter: {}".format(m)
clf = DecisionTreeClassifier(criterion='entropy', max_leaf_nodes=m + 1)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
acc_score[m - 1].append(acc)
p_score[m - 1].append(precision_score(y_test, y_pred))
r_score[m - 1].append(recall_score(y_test, y_pred))
print 'accuracy: {}'.format(acc)
m += 1
if m > max_m:
break
# break
max_val, max_id = -1, -1
for i in xrange(len(acc_score)):
acc = np.mean(acc_score[i])
if acc > max_val:
max_val = acc
max_id = i
acc_score[i] = acc
p_score[i] = np.mean(p_score[i])
r_score[i] = np.mean(r_score[i])
print acc_score[:10]
with open('res/effect_of_leaves', 'w') as out:
out.write(str(acc_score) + '\n')
out.write(str(p_score) + '\n')
out.write(str(r_score) + '\n')
print 'splits:{}'.format(max_id + 1)
print 'accuracy:{}'.format(max_val)
print 'p:{} r:{}'.format(p_score[max_id], r_score[max_id])
plt.clf()
m_idx = np.arange(2, len(acc_score) + 2)
max_leaf = max_id + 2
plt.plot(m_idx, acc_score, label='cross_validation')
plt.plot(max_leaf, max_val, linestyle='none', marker='o', markeredgecolor='r', markeredgewidth=1, markersize=12, markerfacecolor='none', label='best choice')
plt.plot((max_leaf, max_leaf), (0, max_val), 'k--')
plt.ylim(ymin=0.88, ymax=0.96)
plt.xlabel("Number of leaf nodes")
plt.ylabel("Cross validation score")
plt.legend(numpoints=1, loc=4)
plt.savefig('figs/effect_of_leaves.png')
开发者ID:harrylclc,项目名称:ist557,代码行数:55,代码来源:pruning.py
示例14: ge_cmd_predict
def ge_cmd_predict():
args = parse_arg_predict()
# prepare input to GE_learn
data = util.load_data(args.data)
model = util.load_model(args.model)
pred_path = args.output
pred = GE_predict(data, model)
util.write_prediction(pred, pred_path)
return
开发者ID:ShiyanYan,项目名称:gelearn,代码行数:11,代码来源:ge_cmd.py
示例15: setup_ts
def setup_ts(self):
cube, self.time, flux, radii, unc = load_data(self.setup['data_dir'],
self.aor)
pixels = get_pix(cube, geom=self.geom)
self.t = binned(self.time, binsize=self.bs)
self.pix = binned(pixels, binsize=self.bs)
i = self.select_radius(flux)
print("using radius: {}".format(radii[i]))
self.radius = radii[i]
self.f = binned(flux[i], binsize=self.bs)
self.unc = binned(unc[i], binsize=self.bs) / np.sqrt(self.bs)
self.pld = [0] * pixels.shape[1] + [0] * 2
开发者ID:john-livingston,项目名称:etp,代码行数:12,代码来源:pld.py
示例16: main
def main():
# The original data set.
data = util.load_data()
# Fill in missing values with the average for that course.
data.fill_missing_with_feature_means()
# Count successful and probation students as one group (s)
# Comment this out to try and distinguish all 3 groups (s, p, f)
data.combine_labels(["s", "p"], "s")
binning_exploration(data)
plot_tests(data)
开发者ID:drusk,项目名称:pml-applications,代码行数:13,代码来源:decision_tree_analysis.py
示例17: main
def main():
# The original data set.
data = util.load_data()
# Fill in missing values with the average for that course.
data.fill_missing_with_feature_means()
# Count successful and probation students as one group (s)
# Comment this out to try and distinguish all 3 groups (s, p, f)
data.combine_labels(["s", "p"], "s")
examine_principal_components(data)
pca_find_important_features(data)
开发者ID:drusk,项目名称:pml-applications,代码行数:14,代码来源:pca_exploration.py
示例18: obfuscate_keystrokes
def obfuscate_keystrokes(name, strategy, param):
"""
"""
df = load_data(name)
df = df.groupby(level=[0, 1]).apply(keystrokes2events).reset_index(level=[2, 3], drop=True)
if strategy == 'delay':
df = df.groupby(level=[0, 1]).apply(lambda x: delay_mix(x, param))
elif strategy == 'interval':
df = df.groupby(level=[0, 1]).apply(lambda x: interval_mix(x, param))
else:
raise Exception('Unknown masking strategy')
df = df.groupby(level=[0, 1]).apply(events2keystrokes).reset_index(level=[2, 3], drop=True)
save_data(df, name, masking=(strategy, param))
return
开发者ID:vmonaco,项目名称:keystroke-obfuscation,代码行数:17,代码来源:obfuscation.py
示例19: main
def main():
# The original data set.
data = util.load_data()
# Fill in missing values with the average for that course.
data.fill_missing_with_feature_means()
cluster_3_groups(data.copy())
cluster_pass_fail(data.copy())
cluster_success_struggle(data.copy())
util.print_line_break()
print "Now with PCA:"
cluster_3_groups_with_pca(data.copy())
cluster_pass_fail_with_pca(data.copy())
cluster_success_struggle_with_pca(data.copy())
开发者ID:drusk,项目名称:pml-applications,代码行数:17,代码来源:clustering_analysis.py
示例20: main
def main():
window_size = 150
threshold = 3000
filename = sys.argv[1]
data_in = load_data(filename)
# second arg - maximum size of the window of interest
# third arg - some threshold
data_filtered = adaptive_window_avg(data_in, 100, 10)
abs_data = data_abs(data_filtered)
out_data = filtered_derivative_detector(abs_data, window_size, 0, 0)
tline = [threshold] * len(out_data)
plot(data_in)
plot(data_filtered)
plot(out_data, tline)
开发者ID:AntonKozlov,项目名称:fault-car,代码行数:17,代码来源:filtered_derivative_detector.py
注:本文中的util.load_data函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论