I'm trying to load tfrecords to boost training speed. My tfrecord contains extracted features of audio files and it's corresponding label. To reproduce the same, I have generated a random NumPy array of size (50,50) and label(0 or 1) and saved as tfrecord and read them for training.
tensorflow version: 2.3.0
here is my sample code,
Generate TFrecord
from pathlib import Path
import tensorflow as tf
import numpy as np
import os
import shutil
from tensorflow.keras.optimizers import SGD, Adadelta,Adam, Nadam, RMSprop
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.layers import Conv1D,Conv2D, Dense, Flatten, MaxPool1D,MaxPool2D, Dropout, BatchNormalization, Input, MaxPooling1D, Activation, Concatenate, SeparableConv1D
from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, Bidirectional, GRU, LSTM, TimeDistributed, ConvLSTM2D, SimpleRNN, AveragePooling1D
from tensorflow.keras.layers import Reshape, Lambda, Dot, Softmax, LocallyConnected1D, LayerNormalization, add
from tensorflow.keras.models import Model, Sequential, load_model
def _bytes_feature(value):
"""Returns a bytes_list from a string / byte."""
# If the value is an eager tensor BytesList won't unpack a string from an EagerTensor.
if isinstance(value, type(tf.constant(0))):
value = value.numpy()
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def create_example(label, audio_feature):
feature = {
'label': _int64_feature(label),
'audio_feature': _bytes_feature(audio_feature),
}
# Create a Features message using tf.train.Example.
example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
return example_proto
def serialize_example(audio_feature, label):
feature = {
'audio_feature': _bytes_feature(audio_feature),
'label': _int64_feature(label),
}
# Create a Features message using tf.train.Example.
example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
return example_proto.SerializeToString()
def genrate_tf_records(tf_records_dir, length):
cnt = 1
if os.path.exists(tf_records_dir):
shutil.rmtree(tf_records_dir)
print(f"Removed old directory...({tf_records_dir})")
print("Creating new tf_record directory...")
Path(tf_records_dir).mkdir(parents = True, exist_ok = True)
file_path = os.path.join(tf_records_dir, "tfdata.tfrecord")
with tf.io.TFRecordWriter(file_path) as writer:
for fn in range(length):
audio_feature, label = np.random.randn(50,50), np.random.choice([0,1])
serialized_example = serialize_example(tf.io.serialize_tensor(audio_feature), label)
writer.write(serialized_example)
tf_records_train_dir ="./tf_features_aug_train/"
tf_records_val_dir ="./tf_features_aug_val/"
genrate_tf_records(tf_records_train_dir, length=1000)
genrate_tf_records(tf_records_val_dir, length=100)
Read TFrecord
AUTOTUNE = tf.data.experimental.AUTOTUNE
def _parse_batch(record_batch):
feature_description = {
'audio_feature': tf.io.FixedLenFeature((), tf.string),
'label': tf.io.FixedLenFeature((), tf.int64),
}
example = tf.io.parse_example(record_batch, feature_description)
audio_feature = tf.io.parse_tensor(example['audio_feature'], out_type = tf.float64)
#audio_feature = tf.reshape(audio_feature, (input_shape[0],input_shape[1]))
label = example['label']
return audio_feature, label
def get_dataset_from_tfrecords(tfrecords_dir='tfrecords', mode='train', n_epochs=1):
# List all *.tfrecord files for the selected split
files_ds = tf.data.Dataset.list_files(str(tfrecords_dir)+"*.tfrecord")
# Disregard data order in favor of reading speed
ignore_order = tf.data.Options()
ignore_order.experimental_deterministic = False
files_ds = files_ds.with_options(ignore_order)
ds = tf.data.TFRecordDataset(files_ds, num_parallel_reads=AUTOTUNE)
# Parse a batch into a dataset of [audio, label] pairs
ds = ds.map(lambda x: _parse_batch(x))
for data, label in ds.take(1):
print(np.shape(data.numpy()))
print(label.numpy())
return ds.prefetch(buffer_size=AUTOTUNE)
train_ds = get_dataset_from_tfrecords(tfrecords_dir=tf_records_train_dir, mode='train')
val_ds = get_dataset_from_tfrecords(tfrecords_dir=tf_records_val_dir, mode='val')
I'm able to load the saved tfrecords and can parse the data. But when I pass the TensorFlow object to model.fit() we get an error.
Create model
class Model_Creator():
def getmodel(self, model_name, input_shape, numclass):
#import pdb;pdb.set_trace()
if model_name in dir(self) and callable(getattr(self, model_name)):
print(model_name, 'from ACK.py')
model = getattr(self, model_name)(input_shape, numclass)
else:
print(model_name, 'from ign_utils/models_audio_ign.py')
model = Model_Creator2().getmodel( model_name, input_shape, numclass)
print('Created ',model_name)
return model
def cnn_model(self, input_shape, numclass):
datainp = Input(shape=input_shape)
x=datainp
#x = Dropout(0.1)(x)
x = Conv1D(filters=32, kernel_size=3, strides=1, activation='relu', padding='same')(x)
x = LayerNormalization(axis=2)(x)
x = MaxPool1D(strides=2)(x)
#x = Dropout(0.1)(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(numclass, activation='softmax')(x)
output_model = Model(inputs=datainp, outputs=x)
return output_model
modelcreator = Model_Creator()
model = modelcreator.getmodel(model_name = 'cnn_model', input_shape=(50,50), numclass=2)
model.summary()
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
Run model
model.fit(train_ds, validation_data = val_ds,epochs=10)
When I run this, I get the error,
ValueError: slice index 0 of dimension 0 out of bounds. for '{{node
strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_INT32,
begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0,
shrink_axis_mask=1](Shape, strided_slice/stack, strided_slice/stack_1,
strided_slice/stack_2)' with input shapes: [0], [1], [1], [1] and with
computed input tensors: input[1] = <0>, input[2] = <1>, input[3] =
<1>.
I don't understand why is this error. I can read and parse the data from tfrecords, but can't able to use it for training(model.fit()).
P.S: I have written the code to be reproduced easily. I'm sticking with it for the past two days. Looking forward for some help. Thanks in advance.