import streamlit as st from src.utils import generate_diff_html, load_json import pandas as pd resps = load_json('eval/resps_100_w_diff.json') query_to_resp = {resp['query']: resp for resp in resps} queries = [resp['query'] for resp in resps] query_option = st.selectbox('Select query', [' '] + queries) if query_option is not ' ': resp = query_to_resp[query_option] metadata = resp['metadata'] title_to_hit = {hit['title']: hit for hit in resp['hits']} titles = [(i, hit['title']) for i, hit in enumerate(resp['hits']) if hit['text_rank'] > -1] st.write(metadata) title_option = st.selectbox('Select document title', [str(i + 1) + ' ' + str(title) for i, title in titles]) index = int(title_option.split(' ')[0]) title = titles[index][1] if title_option is not None: doc = title_to_hit[title] st.write({ "id": doc["id"], "title": doc["title"], "text_rank": doc["text_rank"], "noise_rank": doc["noise_rank"], "boundary_rank": doc["boundary_rank"], #"text": doc["text"] }) list1 = doc['text'].split("\n") list2 = set(doc['text_noise_removed'].split("\n")) list3 = set(doc['text_boundary_removed'].split("\n")) st.header("Noise removal result") for i, l in enumerate(list1): if l not in list2: st.write(f":red[{str(i) + ' ' + l}]") else: st.write(str(i) + ' ' + l) st.header("Boundary removal result") for i, l in enumerate(list1): # if l not in list3: if i+1 > len(list3): st.write(f":red[{l}]") else: st.write(l) # diff_html = doc['diff_html'] # st.markdown(diff_html, unsafe_allow_html=True) # components.html(diff_html, width=5000, height=4000, scrolling=True)