MaroueneA
Initial commit of my Gradio NLP app
139e538
raw
history blame contribute delete
No virus
3.65 kB
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
# Load the saved models and tokenizers
model_roberta = AutoModelForSequenceClassification.from_pretrained("./models/roberta-base-offensive")
tokenizer_roberta = AutoTokenizer.from_pretrained("./models/roberta-base-offensive")
model_distilbert = AutoModelForSequenceClassification.from_pretrained("./models/distilbert-base-uncased-offensive")
tokenizer_distilbert = AutoTokenizer.from_pretrained("./models/distilbert-base-uncased-offensive")
model_deberta = AutoModelForSequenceClassification.from_pretrained("./models/deberta-offensive")
tokenizer_deberta = AutoTokenizer.from_pretrained("./models/deberta-offensive")
model_bert = AutoModelForSequenceClassification.from_pretrained("./models/bert-offensive")
tokenizer_bert = AutoTokenizer.from_pretrained("./models/bert-offensive")
# Arabic saved Models and tokenizers
model_arbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/ARBERT")
tokenizer_arbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/ARBERT")
model_marbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/MARBERT")
tokenizer_marbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/MARBERT")
def predict(tweet, model_choice):
if model_choice == "RoBERTa":
model = model_roberta
tokenizer = tokenizer_roberta
elif model_choice == "DistilBERT":
model = model_distilbert
tokenizer = tokenizer_distilbert
elif model_choice == "ARBERT":
model = model_arbert
tokenizer = tokenizer_arbert
elif model_choice == "MARBERT":
model = model_marbert
tokenizer = tokenizer_marbert
elif model_choice == "DeBERTa":
model = model_deberta
tokenizer = tokenizer_deberta
elif model_choice == "BERT":
model = model_bert
tokenizer = tokenizer_bert
else:
return "Model not selected", "Please select a model."
encoded_input = tokenizer.encode(tweet, return_tensors='pt', truncation=True, max_length=512, padding=True)
with torch.no_grad():
output = model(encoded_input)
logits = output.logits
probabilities = torch.softmax(logits, dim=-1)
prediction_index = probabilities.argmax().item()
prediction_map = {0: "Not Offensive", 1: "Offensive"}
prediction = prediction_map[prediction_index]
confidence = probabilities[0, prediction_index].item()
return prediction, f"Confidence: {confidence:.4f}"
def app_interface():
with gr.Blocks() as app:
gr.Markdown("## Offensive Language Detection")
gr.Markdown("### Instructions:")
gr.Markdown("1. Select the language of the text.\n2. Choose a model corresponding to the selected language:\n - For **English**: BERT, DeBERTa, RoBERTa, or DistilBERT\n - For **Tunisian Arabic**: ARBERT or MARBERT")
with gr.Row():
language = gr.Radio(["English", "Tunisian Arabic"], label="Choose Language")
with gr.Row():
model_choice = gr.Dropdown(["RoBERTa", "DistilBERT", "ARBERT", "MARBERT", "DeBERTa", "BERT"], label="Choose Model")
with gr.Row():
tweet = gr.Textbox(lines=4, placeholder="Enter your text here...", label="Text")
submit_btn = gr.Button("Predict")
with gr.Row():
prediction = gr.Textbox(label="Prediction")
confidence = gr.Textbox(label="Confidence")
submit_btn.click(fn=predict, inputs=[tweet, model_choice], outputs=[prediction, confidence])
return app
app = app_interface()
app.launch()