import gradio as gr import transformers import torch from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from huggingface_hub import login login(token ="HF_TOKEN") def predict(input, history=[]): """Processes user input and potentially leverages history for improved predictions. Args: input (str): User's input text. history (list, optional): List of previous inputs and outputs for context (default: []). Returns: tuple: A tuple containing the chatbot response and the updated history (optional). """ # Replace with your actual Gemma prediction logic here from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # Assuming you're using Transformers # Assuming you've loaded the Gemma model weights model_name = "google/gemma-1.1-7b-it" model = AutoModelForSeq2SeqLM.from_pretrained("google/gemma-1.1-7b-it") tokenizer = AutoTokenizer.from_pretrained("google/gemma-1.1-7b-it") # Process user input using Gemma inputs = tokenizer(input, return_tensors="pt") generated_text = model.generate(**inputs) chatbot_response = tokenizer.decode(generated_text[0], skip_special_tokens=True) return chatbot_response, history # Return response and optionally updated history # Create the Gradio interface interface = gr.Interface( fn=predict, inputs=["textbox", "state"], # "state" input can be removed if not used outputs=["chatbot", "state"] # Remove "state" output if history is not used ) # Load the model within the Gradio interface context try: gr.load("models/google/gemma-1.1-7b-it") # Assuming model weights are available except Exception as e: print(f"An error occurred while loading the model: {e}") # Improved error handling # Launch the Gradio interface interface.launch()