edithram23 commited on
Commit
09b20e0
1 Parent(s): 9432ed7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -178,8 +178,7 @@ if uploaded_file is not None:
178
  if pdf_document:
179
  redacted_text = []
180
  for pg in pdf_document:
181
- text = pg.get_text('text')
182
- st.text_area(pg.get_text())
183
  sentences = sentence_tokenize(text)
184
  for sent in sentences:
185
  entities,words_out = extract_entities(sent)
@@ -189,13 +188,14 @@ if uploaded_file is not None:
189
  new+=w.split('\n')
190
  words_out+=bert_words
191
  words_out = [i for i in new if len(i)>2]
192
-
193
  # print(words_out)
194
  words_out=sorted(words_out, key=len,reverse=True)
 
195
  print(words_out)
196
  for i in words_out:
197
  redact_text(pg,i)
198
-
 
199
  output_pdf = "output_redacted.pdf"
200
  pdf_document.save(output_pdf)
201
 
 
178
  if pdf_document:
179
  redacted_text = []
180
  for pg in pdf_document:
181
+ text = pg.get_text()
 
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
  entities,words_out = extract_entities(sent)
 
188
  new+=w.split('\n')
189
  words_out+=bert_words
190
  words_out = [i for i in new if len(i)>2]
 
191
  # print(words_out)
192
  words_out=sorted(words_out, key=len,reverse=True)
193
+ redact_text+=words_out
194
  print(words_out)
195
  for i in words_out:
196
  redact_text(pg,i)
197
+ st.text_area(pg.get_text())
198
+
199
  output_pdf = "output_redacted.pdf"
200
  pdf_document.save(output_pdf)
201