beki commited on
Commit
e2c142c
1 Parent(s): d6ba5ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -25
app.py CHANGED
@@ -2,7 +2,7 @@
2
 
3
  import json
4
  from json import JSONEncoder
5
-
6
  import pandas as pd
7
  import streamlit as st
8
  from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
@@ -54,6 +54,24 @@ def anonymize(text, analyze_results):
54
  res = anonymizer_engine().anonymize(text, analyze_results)
55
  return res.text
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  st.set_page_config(page_title="Presidio demo (English)", layout="wide")
59
 
@@ -88,34 +106,35 @@ engine = analyzer_engine()
88
  analyzer_load_state.empty()
89
 
90
 
91
- # Create two columns for before and after
92
- col1, col2 = st.columns(2)
93
-
94
- # Before:
95
- col1.subheader("Input string:")
96
- st_text = col1.text_area(
97
- label="Enter text",
98
- value="Type in some text, "
99
  "like a phone number (212-141-4544) "
100
- "or a name (Lebron James).\n\n"
101
- "{first_name: Moustafa, ip_address: 192.168.2.80, email: mous@gmail.com}\n\n"
102
- "SELECT fullName, secretName WHERE fullName='Nathan Maher' AND secretName='RqJu PZwhjrbcS'\n\n"
103
- "<table><tr><th>nationality</th><td>American</td></tr></table>",
104
- height=400,
105
  )
106
 
107
  # After
108
- col2.subheader("Output:")
109
-
110
- st_analyze_results = analyze(
111
- text=st_text,
112
- entities=st_entities,
113
- language="en",
114
- score_threshold=st_threhsold,
115
- return_decision_process=st_return_decision_process,
116
- )
117
- st_anonymize_results = anonymize(st_text, st_analyze_results)
118
- col2.text_area(label="", value=st_anonymize_results, height=400)
 
 
 
 
 
 
 
 
119
 
120
 
121
  # table result
 
2
 
3
  import json
4
  from json import JSONEncoder
5
+ from annotated_text import annotated_text
6
  import pandas as pd
7
  import streamlit as st
8
  from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
 
54
  res = anonymizer_engine().anonymize(text, analyze_results)
55
  return res.text
56
 
57
+ def annotate(text, st_analyze_results, st_entities):
58
+ tokens = []
59
+ # sort by start index
60
+ results = sorted(st_analyze_results, key=lambda x: x.start)
61
+ for i, res in enumerate(results):
62
+ if i == 0:
63
+ tokens.append(text[:res.start])
64
+
65
+ # append entity text and entity type
66
+ tokens.append((text[res.start: res.end], res.entity_type))
67
+
68
+ # if another entity coming i.e. we're not at the last results element, add text up to next entity
69
+ if i != len(results) - 1:
70
+ tokens.append(text[res.end:results[i+1].start])
71
+ # if no more entities coming, add all remaining text
72
+ else:
73
+ tokens.append(text[res.end:])
74
+ return tokens
75
 
76
  st.set_page_config(page_title="Presidio demo (English)", layout="wide")
77
 
 
106
  analyzer_load_state.empty()
107
 
108
 
109
+ st_text = st.text_area(
110
+ label="Type in some text",
111
+ value=
 
 
 
 
 
112
  "like a phone number (212-141-4544) "
113
+ "or a name (Lebron James).",
114
+ height=200,
115
+ # label_visibility="collapsed",
 
 
116
  )
117
 
118
  # After
119
+ st.subheader("Analyzed")
120
+ with st.spinner("Analyzing..."):
121
+ st_analyze_results = analyze(
122
+ text=st_text,
123
+ entities=st_entities,
124
+ language="en",
125
+ score_threshold=st_threshold,
126
+ return_decision_process=st_return_decision_process,
127
+ )
128
+ annotated_tokens = annotate(st_text, st_analyze_results, st_entities)
129
+ # annotated_tokens
130
+ annotated_text(*annotated_tokens)
131
+ st.text("")
132
+
133
+ st.subheader("Anonymized")
134
+
135
+ with st.spinner("Anonymizing..."):
136
+ st_anonymize_results = anonymize(st_text, st_analyze_results)
137
+ st_anonymize_results
138
 
139
 
140
  # table result