Spaces:

naveed-stockmark
/

query_expansion

Sleeping

App Files Files Community

query_expansion / app.py

naveed-stockmark

Update app.py

bce9eee about 1 year ago

raw

history blame contribute delete

5.02 kB

	import streamlit as st
	import pandas as pd
	import re
	import json

	# Helper functions

	def normalize_alpha(text):
	return re.sub(' \([^()]+?\)', '', text.lower().replace('_', ' ').replace('-', ' ').replace('ō', 'o'))

	def load_json(path):
	"""load json object"""
	with open(path, 'rb') as f:
	data = json.load(f)
	print("Loaded json from path: " + str(path))
	return data

	def get_concept_title(c):
	return c.split('/')[-1].replace('_', ' ')

	def print_links(lst):
	for e in lst:
	# Special formatting for wordnet
	if '/n/wn/' in e:
	title = get_concept_title(e.split('/n/wn/')[0]) + f" ({e.split('/n/wn/')[1].replace('_', ' ')})"
	else:
	title = get_concept_title(e)
	st.markdown(f"- [{title}]({'https://conceptnet.io' + e})")

	def print_list(lst):
	for e in lst:
	st.markdown("- " + e)

	# Load data
	terms_to_conceptnet = load_json("solution_technology_w_conceptnet_20221222.json")
	conceptnet_relations = pd.read_excel("conceptnet_relations_filtered_20230111.xlsx")

	# Build Dictionary

	from collections import defaultdict

	term_to_entity = defaultdict(list)

	for i, elem in enumerate(terms_to_conceptnet):
	term_to_entity[normalize_alpha(elem['title'])].append(i)
	if elem['en_page']:
	term_to_entity[normalize_alpha(elem['en_page'])].append(i)
	for syn in elem['synonyms']:
	term_to_entity[normalize_alpha(syn)].append(i)

	for k in term_to_entity.keys():
	term_to_entity[k] = list(set(term_to_entity[k]))

	all_terms = list(term_to_entity.keys())

	# Demo start

	st.subheader("Query Expansion with ConceptNet")

	input_text = st.text_input(
	"Enter Query",
	"semiconductor",
	key="theme",
	)

	normalized = normalize_alpha(input_text)
	matched_ids = term_to_entity[normalized]
	matched_entities = [terms_to_conceptnet[j] for j in matched_ids]

	# Streamlit specific
	select_names = [e['title'] + ' / ' + e['en_page'] if e['en_page'] != '' else e['title'] for e in matched_entities]
	select_to_ent = {e['title']: e for e in matched_entities}

	entity_term = st.radio(label='Select Wikipedia Entity', options=select_names, index=0)

	# Match conceptnet

	if entity_term is not None:

	entity_term = entity_term.split(' / ')[0] if ' / ' in entity_term else entity_term

	entity = select_to_ent[entity_term]

	matched_concepts = entity['conceptnet_items']

	if len(entity['en_page']) > 0:
	st.subheader("English Page")
	st.markdown(f"[{entity['en_page']}]({'https://en.wikipedia.org/wiki/' + entity['en_page'].replace(' ', '_')})")

	st.subheader("Japanese Page")
	st.markdown(f"[{entity['title']}]({'https://ja.wikipedia.org/wiki/' + entity['title']})")

	if len(entity['wikidata']) > 0:
	st.subheader("Wikidata Item")
	st.markdown(f"[{entity['wikidata']}]({'https://www.wikidata.org/wiki/' + entity['wikidata']})")

	# Get all relations where the head is one of the matched concepts of entity
	head_match = conceptnet_relations[conceptnet_relations['start'].apply(lambda x: x.split('/n/')[0] in matched_concepts if '/n/' in x else x in matched_concepts)]

	# Get relevant relations
	is_a_match = head_match[head_match['relation'] == '/r/IsA']['end'].unique().tolist()
	used_for_match = head_match[head_match['relation'] == '/r/UsedFor']['end'].unique().tolist()

	# Wikipedia redirects
	wiki_suggestions = list(set([normalize_alpha(x) for x in entity['synonyms']]))

	# Get all relations where the tail is one of the matched concepts of entity
	tail_match = conceptnet_relations[conceptnet_relations['end'].apply(lambda x: x.split('/n/')[0] in matched_concepts if '/n/' in x else x in matched_concepts)]

	# Get superclasses
	superclass_match = tail_match[tail_match['relation'] == '/r/IsA']['start'].unique().tolist()

	# Conceptnet items that match the wikipedia entity
	st.subheader("Matched Conceptnet items")
	if len(matched_concepts) > 0:
	# print_list(is_a_suggestions)
	print_links(matched_concepts)
	# st.write(is_a_suggestions)
	else:
	st.write("No Matches")

	# Conceptnet is-a relations
	st.subheader("Is a:")
	if len(is_a_match) > 0:
	# print_list(is_a_suggestions)
	print_links(is_a_match)
	# st.write(is_a_suggestions)
	else:
	st.write("No Matches")

	# Conceptnet used-for relations
	st.subheader("Used for:")
	if len(used_for_match) > 0:
	# print_list(used_for_suggestions)
	print_links(used_for_match)
	# st.write(used_for_suggestions)
	else:
	st.write("No Matches")

	# Conceptnet superclasses
	st.subheader("Superclasses:")
	if len(superclass_match) > 0:
	# print_list(is_a_suggestions)
	print_links(superclass_match)
	# st.write(is_a_suggestions)
	else:
	st.write("No Matches")

	# Wikidata and conceptnet similar terms
	st.subheader("Wikipedia Redirects:")
	print_list(wiki_suggestions)
	# st.write(suggestions)