Spaces:
Runtime error
Runtime error
import streamlit as st | |
from src.utils import generate_diff_html, load_json | |
import pandas as pd | |
resps = load_json('eval/resps_100_w_diff.json') | |
query_to_resp = {resp['query']: resp for resp in resps} | |
queries = [resp['query'] for resp in resps] | |
query_option = st.selectbox('Select query', [' '] + queries) | |
if query_option is not ' ': | |
resp = query_to_resp[query_option] | |
metadata = resp['metadata'] | |
title_to_hit = {hit['title']: hit for hit in resp['hits']} | |
titles = [(i, hit['title']) for i, hit in enumerate(resp['hits']) if hit['text_rank'] > -1] | |
st.write(metadata) | |
title_option = st.selectbox('Select document title', [str(i + 1) + ' ' + str(title) for i, title in titles]) | |
index = int(title_option.split(' ')[0]) | |
title = titles[index][1] | |
if title_option is not None: | |
doc = title_to_hit[title] | |
st.write({ | |
"id": doc["id"], | |
"title": doc["title"], | |
"text_rank": doc["text_rank"], | |
"noise_rank": doc["noise_rank"], | |
"boundary_rank": doc["boundary_rank"], | |
#"text": doc["text"] | |
}) | |
list1 = doc['text'].split("\n") | |
list2 = set(doc['text_noise_removed'].split("\n")) | |
list3 = set(doc['text_boundary_removed'].split("\n")) | |
st.header("Noise removal result") | |
for i, l in enumerate(list1): | |
if l not in list2: | |
st.write(f":red[{str(i) + ' ' + l}]") | |
else: | |
st.write(str(i) + ' ' + l) | |
st.header("Boundary removal result") | |
for i, l in enumerate(list1): | |
# if l not in list3: | |
if i+1 > len(list3): | |
st.write(f":red[{l}]") | |
else: | |
st.write(l) | |
# diff_html = doc['diff_html'] | |
# st.markdown(diff_html, unsafe_allow_html=True) | |
# components.html(diff_html, width=5000, height=4000, scrolling=True) | |