import os import openai import gradio as gr from azure.cognitiveservices.vision.computervision import ComputerVisionClient from msrest.authentication import CognitiveServicesCredentials from azure.storage.blob import BlobClient #import utils functions from preprocessing_images import preprocessing_function from extract_text import azure_ocr from sentence_transformers import SentenceTransformer !pip install -U sentence-transformers from numpy.linalg import norm import numpy as np my_container = os.getenv("AZURE_CONTAINER") subscription_key = os.getenv("SUB_KEY") endpoint = os.getenv("AZURE_ENDPOINT") connection_string = os.getenv("AZURE_CON_STRING") openai.api_key = os.getenv("OPENAI_API_KEY") computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key)) def ocr_pdf(pdf_url1): preprocessing_function(pdf_url1) my_blob = pdf_url1.split('/')[-1] blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob) with open("answer_paper.pdf", "rb") as data: blob.upload_blob(data,overwrite=True) text = azure_ocr(blob.url,computervision_client) return text.strip() def ocr_pdf(pdf_url2): preprocessing_function(pdf_url2) my_blob = pdf_url2.split('/')[-1] blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob) with open("answer_paper.pdf", "rb") as data: blob.upload_blob(data,overwrite=True) text = azure_ocr(blob.url,computervision_client) return text.strip() def classify_cause(incident_description): response = openai.Completion.create( engine="text-davinci-003", prompt= f"Identify the root cause from the below list:\nincident_description:{incident_description}\n", temperature= 0, max_tokens= 50, n=1, stop=None #timeout=15, ) classification = response.choices[0].text.strip() return classification def classify_class(incident_description): response = openai.Completion.create( engine="text-davinci-003", prompt= f"Classify the following incident description into one of the given classes:Aircraft Autopilot Problem, Auxiliary Power Problem,Cabin Pressure Problem, Engine Problem,Fuel System Problem,Avionics Problem,Communications Problem,Electrical System Problem,Engine Problem,Fire/Smoke Problem,Fuel System Problem,Ground Service Problem,Hydraulic System Problem,Ice/Frost Problem,Landing Gear Problem,Maintenance Problem,Oxygen System Problem,other problem\nincident_description:{incident_description}\n", temperature= 0, max_tokens= 50, n=1, stop=None #timeout=15, ) classification = response.choices[0].text.strip() return classification def text_similarity(pdftext1,pdftext12): sentences =["pdftext1","pdftext2"] model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') embeddings = model.encode(sentences) cosine = np.dot(embeddings[0],embeddings[1])/(norm(embeddings[0])*norm(embeddings[1])) return cosine def avatiation(pdf_url1,pdf_url2): pdftext1 = ocr_pdf(pdf_url1) pdftext2 = ocr_pdf(pdf_url2) defect_class1 = classify_class(pdftext1) main_issue1 = classify_cause(pdftext1) defect_class2 = classify_class(pdftext2) main_issue2 = classify_cause(pdftext2) cosine = text_similarity(pdftext1,pdftext2) return main_issue1, defect_class1,main_issue2, defect_class2,cosine inputs1 = gr.inputs.Textbox(label="Link for aviation log reports") inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2") outputs = [gr.outputs.Textbox(label="Main Issue of the log report"), gr.outputs.Textbox(label="category of the log report"), gr.outputs.Textbox(label="Main Issue of the log report2"), gr.outputs.Textbox(label="category of the log report2"), gr.outputs.Textbox(label="log similarity") ] demo = gr.Interface(fn=avatiation,inputs= [inputs1,inputs2],outputs=outputs, title="ATA Auto classification using OCR and GPT3 ") demo.launch()