### 1. Imports and class names setup ### 
import gradio as gr
import os
import requests
import torch
import numpy as np
from roboflow import Roboflow
import cv2

rf = Roboflow(api_key="gjZE3lykkitagkxHplyJ")
project = rf.workspace().project("rideit")
model = project.version(1).model

from model import create_effnetb2_model
from timeit import default_timer as timer
from typing import Tuple, Dict

file_urls = [
    'https://www.dropbox.com/s/7sjfwncffg8xej2/video_7.mp4?dl=1'
]

def download_file(url, save_name):
    url = url
    if not os.path.exists(save_name):
        file = requests.get(url)
        open(save_name, 'wb').write(file.content)

for i, url in enumerate(file_urls):
    if 'mp4' in file_urls[i]:
        download_file(
            file_urls[i],
            f"video.mp4"
        )
    else:
        download_file(
            file_urls[i],
            f"image_{i}.jpg"
        )


video_path = [['video.mp4']]


# Setup class names
class_names = ["dented","good"]

### 2. Model and transforms preparation ###

# Create EffNetB2 model
effnetb2, effnetb2_transforms = create_effnetb2_model(
    num_classes=2, # len(class_names) would also work
)

# Load saved weights
effnetb2.load_state_dict(
    torch.load(
        f="car_model.pth",
        map_location=torch.device("cpu"),  # load to CPU
    )
)

### 3. Predict function ###

def normalize_2d(matrix):
    # Only this is changed to use 2-norm put 2 instead of 1
    norm = np.linalg.norm(matrix)
    # normalized matrix
    matrix = matrix/norm 
    return matrix

# Create predict function

# Detect the image

def detect(imagepath):

    pix=model.predict(imagepath, confidence=40, overlap=30)
    pix=pix.json()
    img=cv2.imread(imagepath)

    x1,x2,y1,y2=[],[],[],[]
    for i in pix.keys():
        if i=="predictions":
            for j in pix["predictions"]:
                for a,b in j.items():
                    if a=="x":
                        x1.append(b)
                    if a=="y":
                        y1.append(b)
                    if a=="width":
                        x2.append(b)
                    if a=="height":
                        y2.append(b)


    for p in range(0,len(x1)):
        x2[p]=x2[p]+x1[p]

    for p in range(0,len(x1)):
        y2[p]=y2[p]+x1[p]

    for (x11,y11,x12,y12) in zip(x1,y1,x2,y2):
        cv2.rectangle(
                img,
                (x11,y11),
                (x12,y12),
                color=(0, 0, 255),
                thickness=2,
                lineType=cv2.LINE_AA
            )
    
    return cv2.resize(img,(img.shape[1],img.shape[0]))


def predict(img):

    start_time = timer()
    
    # Transform the target image and add a batch dimension

    img1 = effnetb2_transforms(img).unsqueeze(0)
    pix = normalize_2d(np.array(img))

    #pix1=model.predict(str(image), confidence=40, overlap=30).numpy()

    
    # Put model into evaluation mode and turn on inference mode
    effnetb2.eval()
    with torch.inference_mode():
        # Pass the transformed image through the model and turn the prediction logits into prediction probabilities
        pred_probs = torch.softmax(effnetb2(img1), dim=1)
    
    # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    
    # Calculate the prediction time
    pred_time = round(timer() - start_time, 5)
    
    # Return the prediction dictionary and prediction time 
    return pred_labels_and_probs, pred_time


def show_preds_video(video_path):
    cap = cv2.VideoCapture(video_path)
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            frame_copy = frame.copy()
            pix=model.predict(frame, confidence=40, overlap=30)
            pix=pix.json()
            x1,x2,y1,y2=[],[],[],[]
            for i in pix.keys():
                if i=="predictions":
                    for j in pix["predictions"]:
                        for a,b in j.items():
                            if a=="x":
                                x1.append(b)
                            if a=="y":
                                y1.append(b)
                            if a=="width":
                                x2.append(b)
                            if a=="height":
                                y2.append(b)


            for p in range(0,len(x1)):
                x2[p]=x2[p]+x1[p]

            for p in range(0,len(x1)):
                y2[p]=y2[p]+x1[p]

            for (x11,y11,x12,y12) in zip(x1,y1,x2,y2):
                cv2.rectangle(
                        img,
                        (x11-200,y11-200),
                        (x12-200,y12-200),
                        color=(0, 0, 255),
                        thickness=2,
                        lineType=cv2.LINE_AA
                    )

            
            yield cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)


### 4. Gradio app ###

# Create title, description and article strings
title = "Dented car Detector"
description = "An EfficientNetB2 feature extractor computer vision model to classify images of dented or good cars."
article = "(https://www.learnpytorch.io/)."

# Create examples list from "examples/" directory
example_list = [["examples/" + example] for example in os.listdir("examples")]

inputs_image = [
    gr.components.Image(type="filepath", label="Input Image"),
]

outputs_image = [
    gr.components.Image(type="numpy", label="Output Image"),
]


inputs_video = [
    gr.components.Video(type="filepath", label="Input Video"),

]
outputs_video = [
    gr.components.Image(type="numpy", label="Output Image"),
]


# Create the Gradio demo
app1 = gr.Interface(fn=predict, # mapping function from input to output
                    inputs=gr.Image(type="pil"), # what are the inputs?
                    outputs=[gr.Label(num_top_classes=2, label="Predictions"), # what are the outputs?
                             gr.Number(label="Prediction time (s)")
                            ], # our fn has two outputs, therefore we have two outputs
                    # Create examples list from "examples/" directory
                    examples=example_list, 
                    title=title,
                    description=description,
                    article=article)

app2=gr.Interface(fn=detect,
                 inputs=inputs_image,
                 outputs=outputs_image,
                 title=title)
app3=gr.Interface(
    fn=show_preds_video,
    inputs=inputs_video,
    outputs=outputs_video,
    examples=video_path,
    cache_examples=False,
)

demo = gr.TabbedInterface([app1, app2,app3], ["Classify", "Detect","Video Interface"])


# Launch the demo!
demo.launch()