In [1]: import keras from keras import preprocessing, models, layers, datasets, callbacks from keras.preprocessing import sequence from keras.models import Sequential from keras.layers import Dense, Embedding, Bidirectional, Dropout, LSTM from keras.datasets import reuters Using TensorFlow backend. In [2]: caracters_max= 8982 long_max=100 batch_size =32 In [3]: print('Loading data...') (features_train,target_train),(features_test,target_test)=reuters.load_data(nu m_words=caracters_max) print("Datos de entrenamiento: {0}, identificadores: {1}".format(len(features_ train),len(target_train))) print("Datos de test: {0}, identificadores: {1}".format(len(features_test),len (target_test))) Loading data... Datos de entrenamiento: 8982, identificadores: 8982 Datos de test: 2246, identificadores: 2246 In [4]: indice_palabra= reuters.get_word_index() indice_palabra= {i:(j+3) for i,j in indice_palabra.items()} indice_palabra["<PAD>"] = 0 indice_palabra["<START>"]=1 indice_palabra["<UNK>"]=2 #palabra desconocida indice_palabra["<UNUSED>"] = 3 indice_palabra_retornada= dict([(valor, clave) for (clave,valor) in indice_pal abra.items()]) def decodificar_revision(texto): return ' '.join([indice_palabra_retornada.get(x, '?')for x in texto]) In [5]: z=20 print('Cantidad de palabras en la revision:', len(features_train[z]),'palabra s') print(decodificar_revision(features_train[z])) Cantidad de palabras en la revision: 231 palabras <START> leading u s farm state senators are seeking to <UNK> into the <UNK> <UNK> trade bill a provision that would broaden <UNK> requirements under the u s agriculture department's export enhancement program eep to include tradi tional buyers of u s farm products including the soviet union senate staff s aid under existing criteria usda can offer eep subsidies to <UNK> export mar kets lost to competing nations' unfair trading practices senate agriculture committee chairman patrick leahy d <UNK> is leading a group of farm state se nators in an effort to broaden the criteria in such a way as to enable mosco w to be eligible for the subsidies sources said the senators including senat e finance committee chairman lloyd bentsen d tex <UNK> <UNK> d <UNK> david < UNK> d <UNK> john <UNK> d <UNK> and <UNK> <UNK> r miss also may <UNK> into t he trade bill a measure to shield pork producers and processors from canadia n imports the measure sponsored by sen charles <UNK> r iowa would clarify th e definition of industry in determining whether or not imports were causing injury to u s producers <UNK> bill stems from a 1985 decision by the interna tional trade commission that imports from canada of live <UNK> but not fresh chilled and frozen pork were <UNK> u s producers the bill's proponents have argued canada has simply replaced shipments of live hogs with fresh pork reu ter 3 In [6]: print('añadiendo Padding a las secuencias') padding_train=keras.preprocessing.sequence.pad_sequences(features_train,value= indice_palabra['<PAD>'],padding='post',maxlen=long_max) padding_test=keras.preprocessing.sequence.pad_sequences(features_test,value=in dice_palabra['<PAD>'],padding='post',maxlen=long_max) añadiendo Padding a las secuencias In [7]: X=8981 print('Padding añadido a las revisiones') print('Cantidad de palabras en la revision:', len(padding_train[X]),'palabras' ) print(decodificar_revision(padding_train[X])) Padding añadido a las revisiones Cantidad de palabras en la revision: 100 palabras their influence on the bullion market in the near future bullion bankers sam uel montagu and co ltd said in a market report but the firm said silver may lag behind gold in any <UNK> to movements on foreign exchanges opec's failur e to address the recent decline in oil prices remains a worrying factor howe ver and on balance it appears that the market should be approached cautiousl y montagu said the bank said the us economy has shown no <UNK> long term imp rovement and that both latin american debt and the iranian arms affair could undermine confidence in the dollar reuter 3 In [8]: print('dimensiones de los datos de entrenamiento:', padding_train.shape) print('dimensiones de los datos de prueba:',padding_test.shape) dimensiones de los datos de entrenamiento: (8982, 100) dimensiones de los datos de prueba: (2246, 100) In [9]: #transformacion de etiquetas a categorias labels_train = keras.utils.to_categorical(target_train) labels_test = keras.utils.to_categorical(target_test) In [10]: print('Build model...') earlystopping=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, m ode='auto')] LSTM_model = Sequential() LSTM_model.add(Embedding(input_dim=caracters_max,output_dim=128,input_length=l ong_max)) LSTM_model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2)) LSTM_model.add(Dense(units=46, activation="sigmoid")) Build model... WARNING:tensorflow:From C:\Users\riosm\Anaconda3\lib\site-packages\tensorflo w\python\framework\op_def_library.py:263: colocate_with (from tensorflow.pyt hon.framework.ops) is deprecated and will be removed in a future version. Instructions for updating: Colocations handled automatically by placer. WARNING:tensorflow:From C:\Users\riosm\Anaconda3\lib\site-packages\keras\bac kend\tensorflow_backend.py:3445: calling dropout (from tensorflow.python.op s.nn_ops) with keep_prob is deprecated and will be removed in a future versi on. Instructions for updating: Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 keep_prob`. In [11]: LSTM_model.summary() _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_1 (Embedding) (None, 100, 128) 1149696 _________________________________________________________________ lstm_1 (LSTM) (None, 128) 131584 _________________________________________________________________ dense_1 (Dense) (None, 46) 5934 ================================================================= Total params: 1,287,214 Trainable params: 1,287,214 Non-trainable params: 0 _________________________________________________________________ In [12]: LSTM_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) In [13]: savemodel=LSTM_model.fit(padding_train,labels_train, batch_size=batch_size, epochs=8, validation_data=(padding_test,labels_test ),callbacks=earlystopping) WARNING:tensorflow:From C:\Users\riosm\Anaconda3\lib\site-packages\tensorflo w\python\ops\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_op s) is deprecated and will be removed in a future version. Instructions for updating: Use tf.cast instead. Train on 8982 samples, validate on 2246 samples Epoch 1/8 8982/8982 [==============================] - 49s 5ms/step - loss: 0.1016 - a cc: 0.9731 - val_loss: 0.0709 - val_acc: 0.9783 Epoch 2/8 8982/8982 [==============================] - 47s 5ms/step - loss: 0.0669 - a cc: 0.9816 - val_loss: 0.0604 - val_acc: 0.9846 Epoch 3/8 8982/8982 [==============================] - 48s 5ms/step - loss: 0.0590 - a cc: 0.9845 - val_loss: 0.0582 - val_acc: 0.9848 Epoch 4/8 8982/8982 [==============================] - 47s 5ms/step - loss: 0.0567 - a cc: 0.9849 - val_loss: 0.0574 - val_acc: 0.9848 Epoch 5/8 8982/8982 [==============================] - 47s 5ms/step - loss: 0.0547 - a cc: 0.9865 - val_loss: 0.0554 - val_acc: 0.9871 Epoch 6/8 8982/8982 [==============================] - 47s 5ms/step - loss: 0.0542 - a cc: 0.9872 - val_loss: 0.0544 - val_acc: 0.9873 Epoch 7/8 8982/8982 [==============================] - 47s 5ms/step - loss: 0.0504 - a cc: 0.9883 - val_loss: 0.0520 - val_acc: 0.9876 Epoch 8/8 8982/8982 [==============================] - 47s 5ms/step - loss: 0.0470 - a cc: 0.9890 - val_loss: 0.0507 - val_acc: 0.9876 In [14]: test_loss, test_acc = LSTM_model.evaluate(x=padding_test, y=labels_test, batch _size=batch_size) print('accuracy en el conjunto de datos de test:', test_acc, 'Perdida de datos en el dataset de test:', test_loss) 2246/2246 [==============================] - 3s 1ms/step accuracy en el conjunto de datos de test: 0.9875527866058655 Perdida de dato s en el dataset de test: 0.05070327855281711 In [15]: import matplotlib from matplotlib import pyplot %matplotlib inline acc=savemodel.history['acc'] val_acc=savemodel.history['acc'] loss=savemodel.history['loss'] val_loss=savemodel.history['val_loss'] epochs= range(1,len(acc)+1) pyplot.plot(epochs,loss, 'bo',label='Training_loss') pyplot.plot(epochs,val_loss,'b',label='Validation loss') pyplot.title('Training and validation loss') pyplot.xlabel('Epochs') pyplot.ylabel('Loss') pyplot.legend() pyplot.show() In [16]: pyplot.clf() acc_values=savemodel.history['acc'] val_acc_values=savemodel.history['val_acc'] pyplot.plot(epochs,acc_values, 'bo',label='Training_accuracy') pyplot.plot(epochs,val_acc_values,'b',label='Validation accuracy') pyplot.title('Training and validation accuracy') pyplot.xlabel('Epochs') pyplot.ylabel('Accuracy') pyplot.show()