Spaces:

beki
/

pii-anonymizer

Running

App Files Files Community

beki commited on Sep 26, 2022

Commit

e2c142c

•

1 Parent(s): d6ba5ad

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -25

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 import json
 from json import JSONEncoder
 import pandas as pd
 import streamlit as st
 from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
@@ -54,6 +54,24 @@ def anonymize(text, analyze_results):
     res = anonymizer_engine().anonymize(text, analyze_results)
     return res.text
 st.set_page_config(page_title="Presidio demo (English)", layout="wide")
@@ -88,34 +106,35 @@ engine = analyzer_engine()
 analyzer_load_state.empty()
-# Create two columns for before and after
-col1, col2 = st.columns(2)
-# Before:
-col1.subheader("Input string:")
-st_text = col1.text_area(
-    label="Enter text",
-    value="Type in some text, "
     "like a phone number (212-141-4544) "
-    "or a name (Lebron James).\n\n"
-    "{first_name: Moustafa, ip_address:  192.168.2.80, email: mous@gmail.com}\n\n"
-    "SELECT fullName, secretName WHERE fullName='Nathan Maher' AND secretName='RqJu PZwhjrbcS'\n\n"
-    "<table><tr><th>nationality</th><td>American</td></tr></table>",
-    height=400,
 )
 # After
-col2.subheader("Output:")
-st_analyze_results = analyze(
-    text=st_text,
-    entities=st_entities,
-    language="en",
-    score_threshold=st_threhsold,
-    return_decision_process=st_return_decision_process,
-)
-st_anonymize_results = anonymize(st_text, st_analyze_results)
-col2.text_area(label="", value=st_anonymize_results, height=400)
 # table result

 import json
 from json import JSONEncoder
+from annotated_text import annotated_text
 import pandas as pd
 import streamlit as st
 from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
     res = anonymizer_engine().anonymize(text, analyze_results)
     return res.text
+def annotate(text, st_analyze_results, st_entities):
+    tokens = []
+    # sort by start index
+    results = sorted(st_analyze_results, key=lambda x: x.start)
+    for i, res in enumerate(results):
+        if i == 0:
+            tokens.append(text[:res.start])
+        # append entity text and entity type
+        tokens.append((text[res.start: res.end], res.entity_type))
+        # if another entity coming i.e. we're not at the last results element, add text up to next entity
+        if i != len(results) - 1:
+            tokens.append(text[res.end:results[i+1].start])
+        # if no more entities coming, add all remaining text
+        else:
+            tokens.append(text[res.end:])
+    return tokens
 st.set_page_config(page_title="Presidio demo (English)", layout="wide")
 analyzer_load_state.empty()
+st_text = st.text_area(
+    label="Type in some text",
+    value=
     "like a phone number (212-141-4544) "
+    "or a name (Lebron James).",
+    height=200,
+    # label_visibility="collapsed",
 )
 # After
+st.subheader("Analyzed")
+with st.spinner("Analyzing..."):
+    st_analyze_results = analyze(
+        text=st_text,
+        entities=st_entities,
+        language="en",
+        score_threshold=st_threshold,
+        return_decision_process=st_return_decision_process,
+    )
+    annotated_tokens = annotate(st_text, st_analyze_results, st_entities)
+    # annotated_tokens
+    annotated_text(*annotated_tokens)
+st.text("")
+st.subheader("Anonymized")
+with st.spinner("Anonymizing..."):
+    st_anonymize_results = anonymize(st_text, st_analyze_results)
+    st_anonymize_results
 # table result