anupam210 commited on
Commit
0af3940
1 Parent(s): 8fb62ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -7,6 +7,12 @@ from azure.storage.blob import BlobClient
7
  #import utils functions
8
  from preprocessing_images import preprocessing_function
9
  from extract_text import azure_ocr
 
 
 
 
 
 
10
  my_container = os.getenv("AZURE_CONTAINER")
11
  subscription_key = os.getenv("SUB_KEY")
12
  endpoint = os.getenv("AZURE_ENDPOINT")
@@ -58,7 +64,14 @@ def classify_class(incident_description):
58
  classification = response.choices[0].text.strip()
59
  return classification
60
 
61
-
 
 
 
 
 
 
 
62
  def avatiation(pdf_url1,pdf_url2):
63
  pdftext1 = ocr_pdf(pdf_url1)
64
  pdftext2 = ocr_pdf(pdf_url2)
@@ -68,7 +81,8 @@ def avatiation(pdf_url1,pdf_url2):
68
 
69
  defect_class2 = classify_class(pdftext2)
70
  main_issue2 = classify_cause(pdftext2)
71
- return main_issue1, defect_class1,main_issue2, defect_class2,
 
72
 
73
 
74
 
@@ -80,7 +94,9 @@ outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
80
  gr.outputs.Textbox(label="category of the log report"),
81
 
82
  gr.outputs.Textbox(label="Main Issue of the log report2"),
83
- gr.outputs.Textbox(label="category of the log report2")
 
 
84
  ]
85
 
86
 
 
7
  #import utils functions
8
  from preprocessing_images import preprocessing_function
9
  from extract_text import azure_ocr
10
+
11
+ from sentence_transformers import SentenceTransformer
12
+ from numpy.linalg import norm
13
+ import numpy as np
14
+
15
+
16
  my_container = os.getenv("AZURE_CONTAINER")
17
  subscription_key = os.getenv("SUB_KEY")
18
  endpoint = os.getenv("AZURE_ENDPOINT")
 
64
  classification = response.choices[0].text.strip()
65
  return classification
66
 
67
+ def text_similarity(pdftext1,pdftext1):
68
+ sentences =["pdftext1","pdftext2"]
69
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
70
+ embeddings = model.encode(sentences)
71
+ cosine = np.dot(embeddings[0],embeddings[1])/(norm(embeddings[0])*norm(embeddings[1]))
72
+ return cosine
73
+
74
+
75
  def avatiation(pdf_url1,pdf_url2):
76
  pdftext1 = ocr_pdf(pdf_url1)
77
  pdftext2 = ocr_pdf(pdf_url2)
 
81
 
82
  defect_class2 = classify_class(pdftext2)
83
  main_issue2 = classify_cause(pdftext2)
84
+ cosine = text_similarity(pdftext1,pdftext2)
85
+ return main_issue1, defect_class1,main_issue2, defect_class2,cosine
86
 
87
 
88
 
 
94
  gr.outputs.Textbox(label="category of the log report"),
95
 
96
  gr.outputs.Textbox(label="Main Issue of the log report2"),
97
+ gr.outputs.Textbox(label="category of the log report2"),
98
+ gr.outputs.Textbox(label="log similarity)
99
+
100
  ]
101
 
102