anupam210 commited on
Commit
00a7c71
1 Parent(s): 25d7a4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -44
app.py CHANGED
@@ -7,13 +7,6 @@ from azure.storage.blob import BlobClient
7
  #import utils functions
8
  from preprocessing_images import preprocessing_function
9
  from extract_text import azure_ocr
10
-
11
- from sentence_transformers import SentenceTransformer
12
- !pip install -U sentence-transformers
13
- from numpy.linalg import norm
14
- import numpy as np
15
-
16
-
17
  my_container = os.getenv("AZURE_CONTAINER")
18
  subscription_key = os.getenv("SUB_KEY")
19
  endpoint = os.getenv("AZURE_ENDPOINT")
@@ -21,23 +14,23 @@ connection_string = os.getenv("AZURE_CON_STRING")
21
  openai.api_key = os.getenv("OPENAI_API_KEY")
22
  computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
23
 
24
- def ocr_pdf(pdf_url1):
25
- preprocessing_function(pdf_url1)
26
- my_blob = pdf_url1.split('/')[-1]
27
  blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
28
  with open("answer_paper.pdf", "rb") as data:
29
  blob.upload_blob(data,overwrite=True)
30
  text = azure_ocr(blob.url,computervision_client)
31
  return text.strip()
32
 
33
- def ocr_pdf(pdf_url2):
34
- preprocessing_function(pdf_url2)
35
- my_blob = pdf_url2.split('/')[-1]
36
- blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
37
- with open("answer_paper.pdf", "rb") as data:
38
- blob.upload_blob(data,overwrite=True)
39
- text = azure_ocr(blob.url,computervision_client)
40
- return text.strip()
41
 
42
  def classify_cause(incident_description):
43
  response = openai.Completion.create(
@@ -65,42 +58,26 @@ def classify_class(incident_description):
65
  classification = response.choices[0].text.strip()
66
  return classification
67
 
68
- def text_similarity(pdftext1,pdftext12):
69
- sentences =["pdftext1","pdftext2"]
70
- model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
71
- embeddings = model.encode(sentences)
72
- cosine = np.dot(embeddings[0],embeddings[1])/(norm(embeddings[0])*norm(embeddings[1]))
73
- return cosine
74
-
75
-
76
- def avatiation(pdf_url1,pdf_url2):
77
- pdftext1 = ocr_pdf(pdf_url1)
78
- pdftext2 = ocr_pdf(pdf_url2)
79
-
80
- defect_class1 = classify_class(pdftext1)
81
- main_issue1 = classify_cause(pdftext1)
82
 
83
- defect_class2 = classify_class(pdftext2)
84
- main_issue2 = classify_cause(pdftext2)
85
- cosine = text_similarity(pdftext1,pdftext2)
86
- return main_issue1, defect_class1,main_issue2, defect_class2,cosine
87
 
88
 
89
 
90
  inputs1 = gr.inputs.Textbox(label="Link for aviation log reports")
91
- inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2")
92
 
93
 
94
  outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
95
- gr.outputs.Textbox(label="category of the log report"),
96
-
97
- gr.outputs.Textbox(label="Main Issue of the log report2"),
98
- gr.outputs.Textbox(label="category of the log report2"),
99
- gr.outputs.Textbox(label="log similarity")
100
-
101
  ]
102
 
103
 
104
- demo = gr.Interface(fn=avatiation,inputs= [inputs1,inputs2],outputs=outputs, title="ATA Auto classification using OCR and GPT3 ")
105
  demo.launch()
106
 
 
7
  #import utils functions
8
  from preprocessing_images import preprocessing_function
9
  from extract_text import azure_ocr
 
 
 
 
 
 
 
10
  my_container = os.getenv("AZURE_CONTAINER")
11
  subscription_key = os.getenv("SUB_KEY")
12
  endpoint = os.getenv("AZURE_ENDPOINT")
 
14
  openai.api_key = os.getenv("OPENAI_API_KEY")
15
  computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
16
 
17
+ def ocr_pdf(pdf_url):
18
+ preprocessing_function(pdf_url)
19
+ my_blob = pdf_url.split('/')[-1]
20
  blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
21
  with open("answer_paper.pdf", "rb") as data:
22
  blob.upload_blob(data,overwrite=True)
23
  text = azure_ocr(blob.url,computervision_client)
24
  return text.strip()
25
 
26
+ # def ocr_pdf(pdf_url2):
27
+ # preprocessing_function(pdf_url2)
28
+ # my_blob = pdf_url2.split('/')[-1]
29
+ # blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
30
+ # with open("answer_paper.pdf", "rb") as data:
31
+ # blob.upload_blob(data,overwrite=True)
32
+ # text = azure_ocr(blob.url,computervision_client)
33
+ # return text.strip()
34
 
35
  def classify_cause(incident_description):
36
  response = openai.Completion.create(
 
58
  classification = response.choices[0].text.strip()
59
  return classification
60
 
61
+
62
+ def avatiation(pdf_url):
63
+ pdftext = ocr_pdf(pdf_url)
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+
66
+ defect_class = classify_class(pdftext)
67
+ main_issue = classify_cause(pdftext)
68
+ return main_issue, defect_class
69
 
70
 
71
 
72
  inputs1 = gr.inputs.Textbox(label="Link for aviation log reports")
73
+ #inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2")
74
 
75
 
76
  outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
77
+ gr.outputs.Textbox(label="category of the log report")
 
 
 
 
 
78
  ]
79
 
80
 
81
+ demo = gr.Interface(fn=avatiation,inputs=inputs1,outputs=outputs, title="ATA Auto classification using OCR and GPT3 ")
82
  demo.launch()
83