machine learning - LSTM history length vs prediction error -
i use lstm predict next step voltage value in voltage time series signal. have question:
why using longer sequences (5 or 10 steps in time) train lstm not improve prediction , reduce prediction error ? (it degrades - see figures e.g. results sequence_length=5 better sequence_length=10)
testplot('epochs: 10', 'ratio: 1', 'sequence_length: 10', 'mean error: ', '0.00116802704509')
testplot('epochs: 10', 'ratio: 1', 'sequence_length: 5', 'mean error: ', '0.000495359163296'
(predicted signal in green, real in red)
import os import matplotlib.pyplot plt import numpy np import time import csv keras.layers.core import dense, activation, dropout keras.layers.recurrent import lstm keras.models import sequential np.random.seed(1234) def data_power_consumption(path_to_dataset, sequence_length=50, ratio=1.0): max_values = ratio * 2049280 open(path_to_dataset) f: data = csv.reader(f, delimiter=",") power = [] nb_of_values = 0 line in data: try: power.append(float(line[4])) nb_of_values += 1 except valueerror: pass # 2049280.0 total number of valid values, i.e. ratio = 1.0 if nb_of_values >= max_values: print "max value", nb_of_values break print "data loaded csv. formatting..." result = [] index in range(len(power) - sequence_length): result.append(power[index: index + sequence_length]) result = np.array(result) # shape (2049230, 50) result_mean = result.mean() result -= result_mean print "shift : ", result_mean print "data : ", result.shape row = round(0.9 * result.shape[0]) train = result[:row, :] np.random.shuffle(train) x_train = train[:, :-1] y_train = train[:, -1] x_test = result[row:, :-1] y_test = result[row:, -1] x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) return [x_train, y_train, x_test, y_test] def build_model(): model = sequential() layers = [1, 50, 100, 1] model.add(lstm( input_dim=layers[0], output_dim=layers[1], return_sequences=true)) model.add(dropout(0.2)) model.add(lstm( layers[2], return_sequences=false)) model.add(dropout(0.2)) model.add(dense( output_dim=layers[3])) model.add(activation("linear")) start = time.time() model.compile(loss="mse", optimizer="adam") # consider adam print "compilation time : ", time.time() - start return model def run_network(model=none, data=none): global_start_time = time.time() epochs = 10 ratio = 1 sequence_length = 3 path_to_dataset = 'timber_data_1.csv' if data none: print 'loading data... ' x_train, y_train, x_test, y_test = data_power_consumption( path_to_dataset, sequence_length, ratio) else: x_train, y_train, x_test, y_test = data print '\ndata loaded. compiling...\n' if model none: model = build_model() try: model.fit( x_train, y_train, batch_size=512, nb_epoch=epochs, validation_split=0.05) predicted = model.predict(x_test) predicted = np.reshape(predicted, (predicted.size,)) print "done" except keyboardinterrupt: print 'training duration (s) : ', time.time() - global_start_time return model, y_test, 0 try: fig, ax = plt.subplots() txt = "epochs: " + str(epochs), "ratio: " + str(ratio), "sequence_length: " + str(sequence_length) # calculate error (shift predicted "sequence_length - 1 , apply mean abs) y_test_mean = y_test - np.mean(y_test) y_test_mean_shifted = y_test_mean[:-1*(sequence_length - 1)] predicted_mean = predicted - np.mean(predicted) predicted_mean_shifted = predicted_mean[(sequence_length - 1):] prediction_error = np.mean(abs(y_test_mean_shifted - predicted_mean_shifted)) text_mean = "mean error: ", str(prediction_error) txt = txt + text_mean # add legend customizations. legend = ax.legend(loc='upper center', shadow=true) ax.plot(y_test_mean_shifted[900:1000], 'r--', label='real data') ax.plot(predicted_mean_shifted[900:1000], 'g:', label='predicted') fig.text(0.4, 0.2, txt, horizontalalignment='center', verticalalignment='center', transform = ax.transaxes) plt.savefig(os.path.join('cern_figures', 'testplot' + str(txt) + '.png')) plt.show() except exception e: print str(e) print 'training duration (s) : ', time.time() - global_start_time return model, y_test, predicted # main if __name__ == "__main__": _, y_test_out, predicted_out = run_network() #y_test_out_mean = y_test_out - np.mean(y_test_out) #predicted_out_mean = predicted_out - np.mean(predicted_out)
maybe because time series @ time t not depend on time series @ time t-10. if have time series (x1,...,xn)
, there no link between xn
, xn-p
, there no reason use step of p.
for example if want predict weather 1 hour ahead, not use step of 2 weeks. why ? because weather of 2 weeks in past has no influence on weather right now. use instead weather of last hour (or last day).
ps : use example of weather forcasting because there me no link between weather 2 weeks in past , now. maybe expert in weather forcast prove me wrong !
cheers !
Thank you for sharing this informative post. Looking forward to reading more.
ReplyDeleteWeb Design and Development Company