generate.py
- Code: Select all Expand view
- import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load the pre-trained model
model_name = "mlabonne/phixtral-4x2_8"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,trust_remote_code=True)
# Define the prompt
prompt = "Generate a simple question and answer regarding astrology:"
# Generate 1000 questions and answers
qa_pairs = []
for i in range(1000):
input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
output = model.generate(input_ids, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
qa_pairs.append({"question": f"Question {i+1}", "answer": generated_text})
# Save the QA pairs to a JSON file
with open("questions_and_answers.json", "w") as json_file:
json.dump(qa_pairs, json_file, indent=4)
print("Questions and answers saved to 'questions_and_answers.json'")