edithram23 commited on
Commit
a9299cc
1 Parent(s): 2d304b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -42,7 +42,7 @@ address_recognizer = PatternRecognizer(supported_entity="ADDRESS", patterns=[add
42
 
43
  # Add the custom address recognizer to the analyzer
44
  analyzer.registry.add_recognizer(address_recognizer)
45
- analyzer.get_recognizers
46
  # Define a function to extract entities
47
 
48
 
@@ -181,18 +181,18 @@ if uploaded_file is not None:
181
  text = pg.get_text()
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
- x = mask_generation(sent)
185
 
186
- sent_n_q_c=[]
187
- sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
188
- for i in sent_n:
189
- for j in i.split(" "):
190
- sent_n_q_c+=j.split(',')
191
- x_q = x.lower().replace('.',' ').split(' ')
192
- e=[]
193
- for i in x_q:
194
- e+=i.split(',')
195
- t5_words=set(sent_n_q_c).difference(set(e))
196
  entities,words_out = extract_entities(sent)
197
  # print("\nwords_out:",words_out)
198
  # print("\nT5",t5_words)
@@ -202,7 +202,7 @@ if uploaded_file is not None:
202
  new=[]
203
  for w in words_out:
204
  new+=w.split('\n')
205
- words_out+=t5_words
206
  new+=bert_words
207
  words_out = [i for i in new if len(i)>3]
208
  # print("\nfinal:",words_out)
 
42
 
43
  # Add the custom address recognizer to the analyzer
44
  analyzer.registry.add_recognizer(address_recognizer)
45
+ # analyzer.get_recognizers
46
  # Define a function to extract entities
47
 
48
 
 
181
  text = pg.get_text()
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
+ # x = mask_generation(sent)
185
 
186
+ # sent_n_q_c=[]
187
+ # sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
188
+ # for i in sent_n:
189
+ # for j in i.split(" "):
190
+ # sent_n_q_c+=j.split(',')
191
+ # x_q = x.lower().replace('.',' ').split(' ')
192
+ # e=[]
193
+ # for i in x_q:
194
+ # e+=i.split(',')
195
+ # t5_words=set(sent_n_q_c).difference(set(e))
196
  entities,words_out = extract_entities(sent)
197
  # print("\nwords_out:",words_out)
198
  # print("\nT5",t5_words)
 
202
  new=[]
203
  for w in words_out:
204
  new+=w.split('\n')
205
+ # words_out+=t5_words
206
  new+=bert_words
207
  words_out = [i for i in new if len(i)>3]
208
  # print("\nfinal:",words_out)