Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,13 +7,6 @@ from azure.storage.blob import BlobClient
|
|
7 |
#import utils functions
|
8 |
from preprocessing_images import preprocessing_function
|
9 |
from extract_text import azure_ocr
|
10 |
-
|
11 |
-
from sentence_transformers import SentenceTransformer
|
12 |
-
!pip install -U sentence-transformers
|
13 |
-
from numpy.linalg import norm
|
14 |
-
import numpy as np
|
15 |
-
|
16 |
-
|
17 |
my_container = os.getenv("AZURE_CONTAINER")
|
18 |
subscription_key = os.getenv("SUB_KEY")
|
19 |
endpoint = os.getenv("AZURE_ENDPOINT")
|
@@ -21,23 +14,23 @@ connection_string = os.getenv("AZURE_CON_STRING")
|
|
21 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
22 |
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
|
23 |
|
24 |
-
def ocr_pdf(
|
25 |
-
preprocessing_function(
|
26 |
-
my_blob =
|
27 |
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
|
28 |
with open("answer_paper.pdf", "rb") as data:
|
29 |
blob.upload_blob(data,overwrite=True)
|
30 |
text = azure_ocr(blob.url,computervision_client)
|
31 |
return text.strip()
|
32 |
|
33 |
-
def ocr_pdf(pdf_url2):
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
|
42 |
def classify_cause(incident_description):
|
43 |
response = openai.Completion.create(
|
@@ -65,42 +58,26 @@ def classify_class(incident_description):
|
|
65 |
classification = response.choices[0].text.strip()
|
66 |
return classification
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
embeddings = model.encode(sentences)
|
72 |
-
cosine = np.dot(embeddings[0],embeddings[1])/(norm(embeddings[0])*norm(embeddings[1]))
|
73 |
-
return cosine
|
74 |
-
|
75 |
-
|
76 |
-
def avatiation(pdf_url1,pdf_url2):
|
77 |
-
pdftext1 = ocr_pdf(pdf_url1)
|
78 |
-
pdftext2 = ocr_pdf(pdf_url2)
|
79 |
-
|
80 |
-
defect_class1 = classify_class(pdftext1)
|
81 |
-
main_issue1 = classify_cause(pdftext1)
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
return
|
87 |
|
88 |
|
89 |
|
90 |
inputs1 = gr.inputs.Textbox(label="Link for aviation log reports")
|
91 |
-
inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2")
|
92 |
|
93 |
|
94 |
outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
|
95 |
-
gr.outputs.Textbox(label="category of the log report")
|
96 |
-
|
97 |
-
gr.outputs.Textbox(label="Main Issue of the log report2"),
|
98 |
-
gr.outputs.Textbox(label="category of the log report2"),
|
99 |
-
gr.outputs.Textbox(label="log similarity")
|
100 |
-
|
101 |
]
|
102 |
|
103 |
|
104 |
-
demo = gr.Interface(fn=avatiation,inputs=
|
105 |
demo.launch()
|
106 |
|
|
|
7 |
#import utils functions
|
8 |
from preprocessing_images import preprocessing_function
|
9 |
from extract_text import azure_ocr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
my_container = os.getenv("AZURE_CONTAINER")
|
11 |
subscription_key = os.getenv("SUB_KEY")
|
12 |
endpoint = os.getenv("AZURE_ENDPOINT")
|
|
|
14 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
15 |
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
|
16 |
|
17 |
+
def ocr_pdf(pdf_url):
|
18 |
+
preprocessing_function(pdf_url)
|
19 |
+
my_blob = pdf_url.split('/')[-1]
|
20 |
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
|
21 |
with open("answer_paper.pdf", "rb") as data:
|
22 |
blob.upload_blob(data,overwrite=True)
|
23 |
text = azure_ocr(blob.url,computervision_client)
|
24 |
return text.strip()
|
25 |
|
26 |
+
# def ocr_pdf(pdf_url2):
|
27 |
+
# preprocessing_function(pdf_url2)
|
28 |
+
# my_blob = pdf_url2.split('/')[-1]
|
29 |
+
# blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
|
30 |
+
# with open("answer_paper.pdf", "rb") as data:
|
31 |
+
# blob.upload_blob(data,overwrite=True)
|
32 |
+
# text = azure_ocr(blob.url,computervision_client)
|
33 |
+
# return text.strip()
|
34 |
|
35 |
def classify_cause(incident_description):
|
36 |
response = openai.Completion.create(
|
|
|
58 |
classification = response.choices[0].text.strip()
|
59 |
return classification
|
60 |
|
61 |
+
|
62 |
+
def avatiation(pdf_url):
|
63 |
+
pdftext = ocr_pdf(pdf_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
+
|
66 |
+
defect_class = classify_class(pdftext)
|
67 |
+
main_issue = classify_cause(pdftext)
|
68 |
+
return main_issue, defect_class
|
69 |
|
70 |
|
71 |
|
72 |
inputs1 = gr.inputs.Textbox(label="Link for aviation log reports")
|
73 |
+
#inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2")
|
74 |
|
75 |
|
76 |
outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
|
77 |
+
gr.outputs.Textbox(label="category of the log report")
|
|
|
|
|
|
|
|
|
|
|
78 |
]
|
79 |
|
80 |
|
81 |
+
demo = gr.Interface(fn=avatiation,inputs=inputs1,outputs=outputs, title="ATA Auto classification using OCR and GPT3 ")
|
82 |
demo.launch()
|
83 |
|