Spaces:

naveed-stockmark
/

noise_removal_evaluation

Runtime error

App Files Files Community

noise_removal_evaluation / app.py

naveed-stockmark

Update app.py

0b0ac8b over 1 year ago

raw

history blame

No virus

1.71 kB

	import streamlit as st

	from src.utils import generate_diff_html, load_json

	import pandas as pd

	resps = load_json('eval/resps_100_w_diff.json')

	query_to_resp = {resp['query']: resp for resp in resps}
	queries = [resp['query'] for resp in resps]

	query_option = st.selectbox('Select query', [' '] + queries)

	if query_option is not ' ':
	resp = query_to_resp[query_option]
	metadata = resp['metadata']

	title_to_hit = {hit['title']: hit for hit in resp['hits']}
	titles = [(i, hit['title']) for i, hit in enumerate(resp['hits']) if hit['text_rank'] > -1]

	st.write(metadata)

	title_option = st.selectbox('Select document title', [str(i + 1) + ' ' + str(title) for i, title in titles])

	index = int(title_option.split(' ')[0])
	title = titles[index][1]

	if title_option is not None:
	doc = title_to_hit[title]

	st.write({
	"id": doc["id"],
	"title": doc["title"],
	"text_rank": doc["text_rank"],
	"noise_rank": doc["noise_rank"],
	"boundary_rank": doc["boundary_rank"],
	#"text": doc["text"]
	})

	list1 = doc['text'].split("\n")
	list2 = set(doc['text_noise_removed'].split("\n"))
	list3 = set(doc['text_boundary_removed'].split("\n"))

	st.header("Noise removal result")
	for i, l in enumerate(list1):
	if l not in list2:
	st.write(f":red[{str(i) + ' ' + l}]")
	else:
	st.write(str(i) + ' ' + l)

	st.header("Boundary removal result")
	for i, l in enumerate(list1):
	# if l not in list3:
	if i+1 > len(list3):
	st.write(f":red[{l}]")
	else:
	st.write(l)

	# diff_html = doc['diff_html']
	# st.markdown(diff_html, unsafe_allow_html=True)
	# components.html(diff_html, width=5000, height=4000, scrolling=True)