File size: 1,705 Bytes
c8a53c6
 
0b0ac8b
c8a53c6
 
 
0b0ac8b
c8a53c6
 
 
 
 
 
 
 
 
 
 
0b0ac8b
c8a53c6
 
 
0b0ac8b
 
 
 
c8a53c6
 
0b0ac8b
c8a53c6
 
 
 
 
 
 
 
 
 
0b0ac8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa2f582
0b0ac8b
c8a53c6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st

from src.utils import generate_diff_html, load_json

import pandas as pd

resps = load_json('eval/resps_100_w_diff.json')

query_to_resp = {resp['query']: resp for resp in resps}
queries = [resp['query'] for resp in resps]

query_option = st.selectbox('Select query', [' '] + queries)

if query_option is not ' ':
	resp = query_to_resp[query_option]
	metadata = resp['metadata']

	title_to_hit = {hit['title']: hit for hit in resp['hits']}
	titles = [(i, hit['title']) for i, hit in enumerate(resp['hits']) if hit['text_rank'] > -1]
	
	st.write(metadata)

	title_option = st.selectbox('Select document title', [str(i + 1) + ' ' + str(title) for i, title in titles])

	index = int(title_option.split(' ')[0])
	title = titles[index][1]

	if title_option is not None:
		doc = title_to_hit[title]

		st.write({
			"id": doc["id"],
			"title": doc["title"],
			"text_rank": doc["text_rank"],
			"noise_rank": doc["noise_rank"],
			"boundary_rank": doc["boundary_rank"],
			#"text": doc["text"]
			})

		list1 = doc['text'].split("\n")
		list2 = set(doc['text_noise_removed'].split("\n"))
		list3 = set(doc['text_boundary_removed'].split("\n"))

		st.header("Noise removal result")
		for i, l in enumerate(list1):
		    if l not in list2:
		        st.write(f":red[{str(i) + ' ' + l}]")
		    else:
		        st.write(str(i) + ' ' + l)

		st.header("Boundary removal result")
		for i, l in enumerate(list1):
		    # if l not in list3:
		    if i+1 > len(list3):
		        st.write(f":red[{l}]")
		    else:
		        st.write(l)

		# diff_html = doc['diff_html']
		# st.markdown(diff_html, unsafe_allow_html=True)
		# components.html(diff_html, width=5000, height=4000, scrolling=True)