pip install transformers from transformers import AutoTokenizer, AutoModelForCausalLM import torch tokenizer = AutoTokenizer.from_pretrained("OpenAssistant/falcon-7b-sft-mix-2000") model = AutoModelForCausalLM.from_pretrained("OpenAssistant/falcon-7b-sft-mix-2000") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) while True: user_input = input("User: ") if user_input.lower() == "exit": break input_text = f"User: {user_input}\nAssistant: " input_ids = tokenizer.encode(input_text, return_tensors="pt") input_ids = input_ids.to(device) with torch.no_grad(): output_ids = model.generate(input_ids, max_length=100, num_return_sequences=1) response = tokenizer.decode(output_ids[0], skip_special_tokens=True) print("Assistant:", response)