from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer import torch def generate_response(model, tokenizer, instruction, input_text, temperature, top_p, top_k, repeat_penalty): PROMPT = f'''### Instruction: {instruction} ### Input: {input_text} ### Response:''' input_ids = tokenizer.encode(PROMPT, return_tensors='pt') max_length = len(input_ids[0]) + 50 # Example, you can set your preferred value # Set generation parameters within given ranges gen_parameters = { 'temperature': temperature, 'top_p': top_p, 'top_k': top_k, 'repetition_penalty': repeat_penalty, 'max_length': max_length, 'max_new_tokens': 50 # Example, you can set your preferred value } output = model.generate(input_ids, **gen_parameters) response = tokenizer.decode(output[0], skip_special_tokens=True) return response[len(PROMPT):] # Removing the prompt part def main(): MODEL_NAME = 'Yoko-7B-Japanese-v1 ' # Replace with your model's file path or name # Load pre-trained model and tokenizer model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME) instruction = '次の問題を回答してください。' # instruction = 'Please answer following question.' input_text = '東京は何国の都市ですか?' # Example parameter values, you can modify these within the ranges you provided temperature = 0.6 top_p = 0.7 top_k = 40 repeat_penalty = 1.1 response = generate_response(model, tokenizer, instruction, input_text, temperature, top_p, top_k, repeat_penalty) print('response'+response) if __name__ == '__main__': main()