Hi I am using TensorFlow Keras deep learning model to train my data.
I have GPU instance memory of 16 GB, and After train and validation split data is of 16GB, I am not able to train my data because of memory limitations.
Here is my estimator code, and python script,
Estimator,
Python training script,
import argparse, os import numpy as np import json import tensorflow as tf import tensorflow.keras from tensorflow.keras import backend as K from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization from tensorflow.keras.optimizers import Adam from tensorflow.keras.utils import multi_gpu_model from sklearn.metrics import classification_report, accuracy_score if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--learning-rate', type=float, default=0.01) parser.add_argument('--batch-size', type=int, default=128) parser.add_argument('--sequence-length', type=int, default=60) parser.add_argument('--class-weight', type=str, default='{0:1,1:1}') parser.add_argument('--gpu-count', type=int, default=os.environ['SM_NUM_GPUS']) parser.add_argument("--model_dir", type=str) parser.add_argument("--sm-model-dir", type=str, default=os.environ.get("SM_MODEL_DIR")) parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN']) parser.add_argument('--val', type=str, default=os.environ['SM_CHANNEL_VAL']) parser.add_argument("--current-host", type=str, default=os.environ.get("SM_CURRENT_HOST")) args, _ = parser.parse_known_args() epochs = args.epochs lr = args.learning_rate batch_size = args.batch_size class_weight = eval(args.class_weight) gpu_count = args.gpu_count model_dir = args.sm_model_dir training_dir = args.train validation_dir = args.val sequence_length = args.sequence_length # load data X_train = np.load(os.path.join(training_dir, 'train.npz'))['X'] y_train = np.load(os.path.join(training_dir, 'train.npz'))['y'] X_val = np.load(os.path.join(validation_dir, 'val.npz'))['X'] y_val = np.load(os.path.join(validation_dir, 'val.npz'))['y'] #create model model = Sequential() model.add(LSTM(32, input_shape=(X_train.shape[1:]), return_sequences=True)) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(LSTM(32)) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Dense(32, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) #if gpu_count > 1: # model = multi_gpu_model(model, gpus=gpu_count) METRICS = [ tf.keras.metrics.TruePositives(name='tp'), tf.keras.metrics.FalsePositives(name='fp'), tf.keras.metrics.TrueNegatives(name='tn'), tf.keras.metrics.FalseNegatives(name='fn'), tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.AUC(name='auc'), tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve ] # compile model model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=Adam(lr=lr, decay=1e-6), metrics=METRICS) # Slicing using tensorflow apis tf_trainX_dataset = tf.data.Dataset.from_tensor_slices(X_train) tf_trainY_dataset = tf.data.Dataset.from_tensor_slices(y_train) # Train model model.fit(tf_trainX_dataset, tf_trainY_dataset, batch_size=batch_size, epochs=epochs, class_weight=class_weight, validation_data=(X_val,y_val), verbose=2)
I am new to TensorFlow objects, could you please help me how can I train my data in chunks using the GPU instance’s memory efficiently?
Thanks in advance.