File size: 4,143 Bytes
99c2b2d
 
 
 
 
 
 
 
 
0af3940
 
25d7a4d
0af3940
 
 
 
99c2b2d
 
 
 
 
 
 
8fb62ba
 
 
 
 
 
 
 
 
 
 
 
99c2b2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627401a
0af3940
 
 
 
 
 
 
8fb62ba
 
 
 
 
 
 
 
 
0af3940
 
99c2b2d
 
6234c6a
f9a17d6
8fb62ba
f9a17d6
99c2b2d
 
8fb62ba
 
 
0af3940
4a624cb
0af3940
5adec92
6234c6a
99c2b2d
8fb62ba
99c2b2d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import openai
import gradio as gr
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials
from azure.storage.blob import BlobClient
#import utils functions
from preprocessing_images import preprocessing_function
from extract_text import azure_ocr

from sentence_transformers import SentenceTransformer
!pip install -U sentence-transformers
from numpy.linalg import norm
import numpy as np


my_container = os.getenv("AZURE_CONTAINER")
subscription_key = os.getenv("SUB_KEY")
endpoint = os.getenv("AZURE_ENDPOINT")
connection_string = os.getenv("AZURE_CON_STRING")
openai.api_key = os.getenv("OPENAI_API_KEY")
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))

def ocr_pdf(pdf_url1):
    preprocessing_function(pdf_url1)
    my_blob = pdf_url1.split('/')[-1]
    blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
    with open("answer_paper.pdf", "rb") as data:
        blob.upload_blob(data,overwrite=True) 
    text = azure_ocr(blob.url,computervision_client)
    return text.strip()

def ocr_pdf(pdf_url2):
    preprocessing_function(pdf_url2)
    my_blob = pdf_url2.split('/')[-1]
    blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
    with open("answer_paper.pdf", "rb") as data:
        blob.upload_blob(data,overwrite=True) 
    text = azure_ocr(blob.url,computervision_client)
    return text.strip()

def classify_cause(incident_description):
    response = openai.Completion.create(
    engine="text-davinci-003",
    prompt= f"Identify the root cause from the below list:\nincident_description:{incident_description}\n",
    temperature= 0,
    max_tokens= 50,
    n=1,
    stop=None
    #timeout=15,
    )
    classification = response.choices[0].text.strip()
    return classification
    
def classify_class(incident_description):
    response = openai.Completion.create(
    engine="text-davinci-003",
    prompt= f"Classify the following incident description into one of the given classes:Aircraft Autopilot Problem, Auxiliary Power Problem,Cabin Pressure Problem, Engine Problem,Fuel System Problem,Avionics Problem,Communications Problem,Electrical System Problem,Engine Problem,Fire/Smoke Problem,Fuel System Problem,Ground Service Problem,Hydraulic System Problem,Ice/Frost Problem,Landing Gear Problem,Maintenance Problem,Oxygen System Problem,other problem\nincident_description:{incident_description}\n", 
    temperature= 0,
    max_tokens= 50,
    n=1,
    stop=None
    #timeout=15,
    )
    classification = response.choices[0].text.strip()
    return classification

def text_similarity(pdftext1,pdftext12):
    sentences =["pdftext1","pdftext2"]
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    embeddings = model.encode(sentences)
    cosine = np.dot(embeddings[0],embeddings[1])/(norm(embeddings[0])*norm(embeddings[1]))
    return cosine


def avatiation(pdf_url1,pdf_url2):
    pdftext1 = ocr_pdf(pdf_url1)
    pdftext2 = ocr_pdf(pdf_url2)

    defect_class1 = classify_class(pdftext1)
    main_issue1 =  classify_cause(pdftext1)

    defect_class2 = classify_class(pdftext2)
    main_issue2 =  classify_cause(pdftext2)
    cosine = text_similarity(pdftext1,pdftext2)
    return main_issue1, defect_class1,main_issue2, defect_class2,cosine



inputs1 = gr.inputs.Textbox(label="Link for aviation log reports")
inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2")


outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
           gr.outputs.Textbox(label="category of the log report"),

           gr.outputs.Textbox(label="Main Issue of the log report2"),
           gr.outputs.Textbox(label="category of the log report2"),
           gr.outputs.Textbox(label="log similarity")
           
           ]
           

demo = gr.Interface(fn=avatiation,inputs= [inputs1,inputs2],outputs=outputs, title="ATA Auto classification using OCR and GPT3 ")
demo.launch()