I’m curious about the zero-padding effect after the LSTM. I observed that when we have sequences that are 0 padded and then masked, the LSTM can handle them.
Thus, my question is whether, if I use another method as a downstream layer after LSTM, such as self-attention, Zero-padding will have no effect on self-attention and attention weights and thus will have no effect on the model’s performance?
Example of Input feeding into model:
[ 2 3.2 0.2 4.2 1.7 3 1.0 3.2 1.0 2.3 1 0.3 3.1 0.4 3.8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Here is the code:
def LSTM_model(input_shape, total_classes): input_layer = tf.keras.Input(shape=input_shape, name="time_series_activity") input_mask = tf.keras.layers.Masking(mask_value=0.00000)(input_layer) lstm_l5 = tf.keras.layers.LSTM(128, activation='tanh', recurrent_initializer=tf.keras.initializers.Orthogonal(), dropout=0.5, recurrent_dropout=0.5, return_sequences=True)(input_mask) lstm_l6 = tf.keras.layers.LSTM(128, activation='tanh', recurrent_initializer=tf.keras.initializers.Orthogonal(), dropout=0.9, recurrent_dropout=0.5)(lstm_l5) output_layer = tf.keras.layers.Dense(total_classes, activation="softmax")(lstm_l6) return tf.keras.models.Model(inputs=input_layer, outputs=output_layer) train_generator = Generator(train_x, train_y, shuffle_input_segments=True) test_generator = Generator(test_x, test_y) input_shape = (None, train_x[0].shape[1]) model = LSTM_model(input_shape, total_classes) model.summary() opt = tf.keras.optimizers.Adam(learning_rate=0.001) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=total_classes, average='macro')] ) path_checkpoint = os.path.join(filepath, dataset, "best_model_checkpoint" ) callback_model_saving = tf.keras.callbacks.ModelCheckpoint(filepath=path_checkpoint, monitor='val_f1_score', mode='max', verbose=1, save_best_only=True) history = model.fit_generator(train_generator, epochs=total_epoch, steps_per_epoch=len(train_generator), callbacks=[callbacks, callback_model_saving], validation_data=test_generator, validation_steps=len(test_generator))