naveed-stockmark's picture
Update app.py
0b0ac8b
raw
history blame
No virus
1.71 kB
import streamlit as st
from src.utils import generate_diff_html, load_json
import pandas as pd
resps = load_json('eval/resps_100_w_diff.json')
query_to_resp = {resp['query']: resp for resp in resps}
queries = [resp['query'] for resp in resps]
query_option = st.selectbox('Select query', [' '] + queries)
if query_option is not ' ':
resp = query_to_resp[query_option]
metadata = resp['metadata']
title_to_hit = {hit['title']: hit for hit in resp['hits']}
titles = [(i, hit['title']) for i, hit in enumerate(resp['hits']) if hit['text_rank'] > -1]
st.write(metadata)
title_option = st.selectbox('Select document title', [str(i + 1) + ' ' + str(title) for i, title in titles])
index = int(title_option.split(' ')[0])
title = titles[index][1]
if title_option is not None:
doc = title_to_hit[title]
st.write({
"id": doc["id"],
"title": doc["title"],
"text_rank": doc["text_rank"],
"noise_rank": doc["noise_rank"],
"boundary_rank": doc["boundary_rank"],
#"text": doc["text"]
})
list1 = doc['text'].split("\n")
list2 = set(doc['text_noise_removed'].split("\n"))
list3 = set(doc['text_boundary_removed'].split("\n"))
st.header("Noise removal result")
for i, l in enumerate(list1):
if l not in list2:
st.write(f":red[{str(i) + ' ' + l}]")
else:
st.write(str(i) + ' ' + l)
st.header("Boundary removal result")
for i, l in enumerate(list1):
# if l not in list3:
if i+1 > len(list3):
st.write(f":red[{l}]")
else:
st.write(l)
# diff_html = doc['diff_html']
# st.markdown(diff_html, unsafe_allow_html=True)
# components.html(diff_html, width=5000, height=4000, scrolling=True)