SMALL TEXT GENERATOR

IFRAME SYNC ] import nltk import random from nltk.corpus import reuters nltk.download('reuters') nltk.download('punkt') # Load the Reuters corpus corpus = reuters.raw() # Preprocess the text corpus = corpus.lower() # Create a mapping of characters to indices and vice versa chars = sorted(list(set(corpus))) char_indices = dict((c, i) for i, c in enumerate(chars)) indices_char = dict((i, c) for i, c in enumerate(chars)) # Prepare input and target data max_len = 40 step = 3 sentences = [] next_chars = [] for i in range(0, len(corpus) - max_len, step): sentences.append(corpus[i:i + max_len]) next_chars.append(corpus[i + max_len]) # Create one-hot encoded input data and target labels x = np.zeros((len(sentences), max_len, len(chars)), dtype=np.bool) y = np.zeros((len(sentences), len(chars)), dtype=np.bool) for i, sentence in enumerate(sentences): for t, char in enumerate(sentence): x[i, t, char_indices[char]] = 1 y[i, char_indices[next_chars[i]]] = 1 # Build a simple character-level LSTM model from keras.models import Sequential from keras.layers import LSTM, Dense model = Sequential() model.add(LSTM(128, input_shape=(max_len, len(chars)))) model.add(Dense(len(chars), activation='softmax')) model.compile(optimizer='adam', loss='categorical_crossentropy') # Train the model model.fit(x, y, epochs=30, batch_size=128) # Generate text using the trained model def generate_text(seed_text, length=400, temperature=0.6): generated_text = seed_text.lower() for i in range(length): x_pred = np.zeros((1, max_len, len(chars))) for t, char in enumerate(generated_text[-max_len:]): x_pred[0, t, char_indices[char]] = 1 preds = model.predict(x_pred, verbose=0)[0] next_index = sample(preds, temperature) next_char = indices_char[next_index] generated_text += next_char return generated_text # Helper function to sample an index from a probability array def sample(preds, temperature=1.0): preds = np.asarray(preds).astype('float64') preds = np.log(preds) / temperature exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas) # Generate text starting from a seed seed_text = "The small text generator is" generated_text = generate_text(seed_text, length=500, temperature=0.5) print(generated_text) IFRAME SYNC

Comments