edithram23 commited on
Commit
2d304b6
1 Parent(s): 7f4bb4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -182,25 +182,30 @@ if uploaded_file is not None:
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
  x = mask_generation(sent)
185
- t5=[]
186
- t5_words = list(set(sent.split("\n")))
187
- for i in t5_words:
 
188
  for j in i.split(" "):
189
- t5+=j.split(',')
190
- x1 = x.split(' ')
191
  e=[]
192
- for i in x1:
193
  e+=i.split(',')
194
- t5_words=set(t5).difference(set(e))
195
  entities,words_out = extract_entities(sent)
196
- words_out+=t5_words
 
 
197
  bert_words = words_red_bert(sent)
 
198
  new=[]
199
  for w in words_out:
200
  new+=w.split('\n')
 
201
  new+=bert_words
202
  words_out = [i for i in new if len(i)>3]
203
- # print(words_out)
204
  words_out=sorted(words_out, key=len,reverse=True)
205
 
206
  for i in words_out:
 
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
  x = mask_generation(sent)
185
+
186
+ sent_n_q_c=[]
187
+ sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
188
+ for i in sent_n:
189
  for j in i.split(" "):
190
+ sent_n_q_c+=j.split(',')
191
+ x_q = x.lower().replace('.',' ').split(' ')
192
  e=[]
193
+ for i in x_q:
194
  e+=i.split(',')
195
+ t5_words=set(sent_n_q_c).difference(set(e))
196
  entities,words_out = extract_entities(sent)
197
+ # print("\nwords_out:",words_out)
198
+ # print("\nT5",t5_words)
199
+ # print("X:",x,"\nsent:",sent,"\nx_q:",x_q,"\nsent_n:",sent_n,"\ne:",e,"\nsent_n_q_c:",sent_n_q_c,'\nt5_words',t5_words)
200
  bert_words = words_red_bert(sent)
201
+ # print("\nbert:",bert_words)
202
  new=[]
203
  for w in words_out:
204
  new+=w.split('\n')
205
+ words_out+=t5_words
206
  new+=bert_words
207
  words_out = [i for i in new if len(i)>3]
208
+ # print("\nfinal:",words_out)
209
  words_out=sorted(words_out, key=len,reverse=True)
210
 
211
  for i in words_out: