IFRAME SYNC
]
import nltk
import random
from nltk.corpus import reuters
nltk.download('reuters')
nltk.download('punkt')
# Load the Reuters corpus
corpus = reuters.raw()
# Preprocess the text
corpus = corpus.lower()
# Create a mapping of characters to indices and vice versa
chars = sorted(list(set(corpus)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
# Prepare input and target data
max_len = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(corpus) - max_len, step):
sentences.append(corpus[i:i + max_len])
next_chars.append(corpus[i + max_len])
# Create one-hot encoded input data and target labels
x = np.zeros((len(sentences), max_len, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
for t, char in enumerate(sentence):
x[i, t, char_indices[char]] = 1
y[i, char_indices[next_chars[i]]] = 1
# Build a simple character-level LSTM model
from keras.models import Sequential
from keras.layers import LSTM, Dense
model = Sequential()
model.add(LSTM(128, input_shape=(max_len, len(chars))))
model.add(Dense(len(chars), activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy')
# Train the model
model.fit(x, y, epochs=30, batch_size=128)
# Generate text using the trained model
def generate_text(seed_text, length=400, temperature=0.6):
generated_text = seed_text.lower()
for i in range(length):
x_pred = np.zeros((1, max_len, len(chars)))
for t, char in enumerate(generated_text[-max_len:]):
x_pred[0, t, char_indices[char]] = 1
preds = model.predict(x_pred, verbose=0)[0]
next_index = sample(preds, temperature)
next_char = indices_char[next_index]
generated_text += next_char
return generated_text
# Helper function to sample an index from a probability array
def sample(preds, temperature=1.0):
preds = np.asarray(preds).astype('float64')
preds = np.log(preds) / temperature
exp_preds = np.exp(preds)
preds = exp_preds / np.sum(exp_preds)
probas = np.random.multinomial(1, preds, 1)
return np.argmax(probas)
# Generate text starting from a seed
seed_text = "The small text generator is"
generated_text = generate_text(seed_text, length=500, temperature=0.5)
print(generated_text)
IFRAME SYNC
Comments
Post a Comment