anupam210's picture
Update app.py
25d7a4d
raw
history blame
4.14 kB
import os
import openai
import gradio as gr
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials
from azure.storage.blob import BlobClient
#import utils functions
from preprocessing_images import preprocessing_function
from extract_text import azure_ocr
from sentence_transformers import SentenceTransformer
!pip install -U sentence-transformers
from numpy.linalg import norm
import numpy as np
my_container = os.getenv("AZURE_CONTAINER")
subscription_key = os.getenv("SUB_KEY")
endpoint = os.getenv("AZURE_ENDPOINT")
connection_string = os.getenv("AZURE_CON_STRING")
openai.api_key = os.getenv("OPENAI_API_KEY")
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
def ocr_pdf(pdf_url1):
preprocessing_function(pdf_url1)
my_blob = pdf_url1.split('/')[-1]
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
with open("answer_paper.pdf", "rb") as data:
blob.upload_blob(data,overwrite=True)
text = azure_ocr(blob.url,computervision_client)
return text.strip()
def ocr_pdf(pdf_url2):
preprocessing_function(pdf_url2)
my_blob = pdf_url2.split('/')[-1]
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
with open("answer_paper.pdf", "rb") as data:
blob.upload_blob(data,overwrite=True)
text = azure_ocr(blob.url,computervision_client)
return text.strip()
def classify_cause(incident_description):
response = openai.Completion.create(
engine="text-davinci-003",
prompt= f"Identify the root cause from the below list:\nincident_description:{incident_description}\n",
temperature= 0,
max_tokens= 50,
n=1,
stop=None
#timeout=15,
)
classification = response.choices[0].text.strip()
return classification
def classify_class(incident_description):
response = openai.Completion.create(
engine="text-davinci-003",
prompt= f"Classify the following incident description into one of the given classes:Aircraft Autopilot Problem, Auxiliary Power Problem,Cabin Pressure Problem, Engine Problem,Fuel System Problem,Avionics Problem,Communications Problem,Electrical System Problem,Engine Problem,Fire/Smoke Problem,Fuel System Problem,Ground Service Problem,Hydraulic System Problem,Ice/Frost Problem,Landing Gear Problem,Maintenance Problem,Oxygen System Problem,other problem\nincident_description:{incident_description}\n",
temperature= 0,
max_tokens= 50,
n=1,
stop=None
#timeout=15,
)
classification = response.choices[0].text.strip()
return classification
def text_similarity(pdftext1,pdftext12):
sentences =["pdftext1","pdftext2"]
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
embeddings = model.encode(sentences)
cosine = np.dot(embeddings[0],embeddings[1])/(norm(embeddings[0])*norm(embeddings[1]))
return cosine
def avatiation(pdf_url1,pdf_url2):
pdftext1 = ocr_pdf(pdf_url1)
pdftext2 = ocr_pdf(pdf_url2)
defect_class1 = classify_class(pdftext1)
main_issue1 = classify_cause(pdftext1)
defect_class2 = classify_class(pdftext2)
main_issue2 = classify_cause(pdftext2)
cosine = text_similarity(pdftext1,pdftext2)
return main_issue1, defect_class1,main_issue2, defect_class2,cosine
inputs1 = gr.inputs.Textbox(label="Link for aviation log reports")
inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2")
outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
gr.outputs.Textbox(label="category of the log report"),
gr.outputs.Textbox(label="Main Issue of the log report2"),
gr.outputs.Textbox(label="category of the log report2"),
gr.outputs.Textbox(label="log similarity")
]
demo = gr.Interface(fn=avatiation,inputs= [inputs1,inputs2],outputs=outputs, title="ATA Auto classification using OCR and GPT3 ")
demo.launch()