import streamlit as st import numpy as np import random import pandas as pd import glob from PIL import Image import datasets from datasets import load_dataset, Dataset, load_from_disk from huggingface_hub import login import os import requests from bs4 import BeautifulSoup import re import altair as alt from streamlit_vega_lite import vega_lite_component, altair_component, _component_func SCORE_NAME_MAPPING = {'clip': 'clip_score', 'rank': 'avg_rank', 'pop': 'model_download_count'} # hist_data = pd.DataFrame(np.random.normal(42, 10, (200, 1)), columns=["x"]) @st.cache_resource def altair_histogram(hist_data, sort_by): brushed = alt.selection_interval(encodings=['x'], name="brushed") return ( alt.Chart(hist_data) .mark_bar() .encode(alt.X(f"{sort_by}:Q", bin=True), y="count()") .add_selection(brushed) .properties(width=600, height=300) ) class GalleryApp: def __init__(self, promptBook, images_ds): self.promptBook = promptBook self.images_ds = images_ds def gallery_masonry(self, items, col_num, info): cols = st.columns(col_num) # # sort items by brisque score # items = items.sort_values(by=['brisque'], ascending=True).reset_index(drop=True) for idx in range(len(items)): with cols[idx % col_num]: image = self.images_ds[items.iloc[idx]['row_idx'].item()]['image'] st.image(image, use_column_width=True, ) # with st.expander('Similarity Info'): # tab1, tab2 = st.tabs(['Most Similar', 'Least Similar']) # with tab1: # st.image(image, use_column_width=True) # with tab2: # st.image(image, use_column_width=True) # show checkbox self.promptBook.loc[items.iloc[idx]['row_idx'].item(), 'checked'] = st.checkbox( 'Select', value=self.promptBook.loc[items.iloc[idx]['row_idx'].item(), 'checked'], key=f'select_{idx}') for key in info: st.write(f"**{key}**: {items.iloc[idx][key]}") def gallery_standard(self, items, col_num, info): rows = len(items) // col_num + 1 containers = [st.container() for _ in range(rows*2)] for idx in range(0, len(items), col_num): # assign one container for each row row_idx = (idx // col_num) * 2 with containers[row_idx]: cols = st.columns(col_num) for j in range(col_num): if idx + j < len(items): with cols[j]: # show image image = self.images_ds[items.iloc[idx+j]['row_idx'].item()]['image'] st.image(image, use_column_width=True, ) # show checkbox self.promptBook.loc[items.iloc[idx+j]['row_idx'].item(), 'checked'] = st.checkbox('Select', value=self.promptBook.loc[items.iloc[idx+j]['row_idx'].item(), 'checked'], key=f'select_{idx+j}') # show selected info for key in info: st.write(f"**{key}**: {items.iloc[idx+j][key]}") # st.write(row_idx/2, idx+j, rows) # extra_info = st.checkbox('Extra Info', key=f'extra_info_{idx+j}') # if extra_info: # with containers[row_idx+1]: # st.image(image, use_column_width=True) def selection_panel(self, items): selecters = st.columns([4, 1, 1]) with selecters[0]: types = st.columns([1, 3]) with types[0]: sort_type = st.selectbox('Sort by', ['IDs and Names', 'Scores']) with types[1]: if sort_type == 'IDs and Names': sort_by = st.selectbox('Sort by', ['model_name', 'model_id', 'modelVersion_name', 'modelVersion_id'], label_visibility='hidden') elif sort_type == 'Scores': sort_by = st.multiselect('Sort by', ['clip_score', 'avg_rank', 'popularity'], label_visibility='hidden', default=['clip_score', 'avg_rank', 'popularity']) # process sort_by to map to the column name if len(sort_by) == 3: sort_by = 'clip+rank+pop' elif len(sort_by) == 2: if 'clip_score' in sort_by and 'avg_rank' in sort_by: sort_by = 'clip+rank' elif 'clip_score' in sort_by and 'popularity' in sort_by: sort_by = 'clip+pop' elif 'avg_rank' in sort_by and 'popularity' in sort_by: sort_by = 'rank+pop' elif len(sort_by) == 1: if 'popularity' in sort_by: sort_by = 'model_download_count' else: sort_by = sort_by[0] print(sort_by) with selecters[1]: order = st.selectbox('Order', ['Ascending', 'Descending'], index=1 if sort_type == 'Scores' else 0) if order == 'Ascending': order = True else: order = False items = items.sort_values(by=[sort_by], ascending=order).reset_index(drop=True) with selecters[2]: filter = st.selectbox('Filter', ['Safe', 'All', 'Unsafe']) print('filter', filter) # initialize unsafe_modelVersion_ids if filter == 'Safe': # return checked items items = items[items['checked'] == False].reset_index(drop=True) elif filter == 'Unsafe': # return unchecked items items = items[items['checked'] == True].reset_index(drop=True) print(items) info = st.multiselect('Show Info', ['model_download_count', 'clip_score', 'avg_rank', 'model_name', 'model_id', 'modelVersion_name', 'modelVersion_id', 'clip+rank', 'clip+pop', 'rank+pop', 'clip+rank+pop'], default=sort_by) # add one annotation mentioned_scores = [] for i in info: if '+' in i: mentioned = i.split('+') for m in mentioned: if SCORE_NAME_MAPPING[m] not in mentioned_scores: mentioned_scores.append(SCORE_NAME_MAPPING[m]) if len(mentioned_scores) > 0: st.info( f"**Note:** The scores {mentioned_scores} are normalized to [0, 1] for each score type, and then added together. The higher the score, the better the model.") col_num = st.slider('Number of columns', min_value=1, max_value=9, value=4, step=1, key='col_num') return items, info, col_num def selection_panel_2(self, items): selecters = st.columns([1, 5]) with selecters[0]: sort_type = st.selectbox('Sort by', ['IDs and Names', 'Scores']) if sort_type == 'Scores': sort_by = 'weighted_score_sum' with selecters[1]: if sort_type == 'IDs and Names': sub_selecters = st.columns([3, 1, 1]) with sub_selecters[0]: sort_by = st.selectbox('Sort by', ['model_name', 'model_id', 'modelVersion_name', 'modelVersion_id'], label_visibility='hidden') continue_idx = 1 else: sub_selecters = st.columns([1, 1, 1, 1, 1]) with sub_selecters[0]: clip_weight = st.number_input('Clip Score Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1) with sub_selecters[1]: rank_weight = st.number_input('Rank Score Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1) with sub_selecters[2]: pop_weight = st.number_input('Popularity Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1) items.loc[:, 'weighted_score_sum'] = round(items['norm_clip'] * clip_weight + items['avg_rank'] * rank_weight + items[ 'norm_pop'] * pop_weight, 4) continue_idx = 3 with sub_selecters[continue_idx]: order = st.selectbox('Order', ['Ascending', 'Descending'], index=1 if sort_type == 'Scores' else 0) if order == 'Ascending': order = True else: order = False items = items.sort_values(by=[sort_by], ascending=order).reset_index(drop=True) with sub_selecters[continue_idx+1]: filter = st.selectbox('Filter', ['Safe', 'All', 'Unsafe']) print('filter', filter) # initialize unsafe_modelVersion_ids if filter == 'Safe': # return checked items items = items[items['checked'] == False].reset_index(drop=True) elif filter == 'Unsafe': # return unchecked items items = items[items['checked'] == True].reset_index(drop=True) print(items) if sort_type == 'Scores': st.write('Select the range of scores to show') hist_data = pd.DataFrame(items[sort_by]) event_dict = altair_component(altair_chart=altair_histogram(hist_data, sort_by)) r = event_dict.get(sort_by) if r: items = items[(items[sort_by] >= r[0]) & (items[sort_by] <= r[1])].reset_index(drop=True) st.write(r) info = st.multiselect('Show Info', ['model_download_count', 'clip_score', 'avg_rank', 'model_name', 'model_id', 'modelVersion_name', 'modelVersion_id', 'clip+rank', 'clip+pop', 'rank+pop', 'clip+rank+pop', 'weighted_score_sum'], default=sort_by) # add one annotation mentioned_scores = [] for i in info: if '+' in i: mentioned = i.split('+') for m in mentioned: if SCORE_NAME_MAPPING[m] not in mentioned_scores: mentioned_scores.append(SCORE_NAME_MAPPING[m]) if len(mentioned_scores) > 0: st.info( f"**Note:** The scores {mentioned_scores} are normalized to [0, 1] for each score type, and then added together. The higher the score, the better the model.") col_num = st.slider('Number of columns', min_value=1, max_value=9, value=4, step=1, key='col_num') return items, info, col_num def app(self): st.title('Model Coffer Gallery') st.write('This is a gallery of images generated by the models in the Model Coffer') with st.sidebar: prompt_tags = self.promptBook['tag'].unique() # sort tags by alphabetical order prompt_tags = np.sort(prompt_tags)[::-1] tag = st.selectbox('Select a tag', prompt_tags) items = self.promptBook[self.promptBook['tag'] == tag].reset_index(drop=True) original_prompts = np.sort(items['prompt'].unique())[::-1] # remove the first four items in the prompt, which are mostly the same if tag != 'abstract': prompts = [', '.join(x.split(', ')[4:]) for x in original_prompts] prompt = st.selectbox('Select prompt', prompts) idx = prompts.index(prompt) prompt_full = ', '.join(original_prompts[idx].split(', ')[:4]) + ', ' + prompt else: prompt_full = st.selectbox('Select prompt', original_prompts) prompt_id = items[items['prompt'] == prompt_full]['prompt_id'].unique()[0] items = items[items['prompt_id'] == prompt_id].reset_index(drop=True) # show image metadata image_metadatas = ['prompt_id', 'prompt', 'negativePrompt', 'sampler', 'cfgScale', 'size', 'seed'] for key in image_metadatas: label = ' '.join(key.split('_')).capitalize() st.write(f"**{label}**") if items[key][0] == ' ': st.write('`None`') else: st.caption(f"{items[key][0]}") # for tag as civitai, add civitai reference if tag == 'civitai': try: st.write('**Civitai Reference**') res = requests.get(f'https://civitai.com/images/{prompt_id.item()}') # st.write(res.text) soup = BeautifulSoup(res.text, 'html.parser') image_section = soup.find('div', {'class': 'mantine-12rlksp'}) image_url = image_section.find('img')['src'] st.image(image_url, use_column_width=True) except: pass # add safety check for some prompts safety_check = True unsafe_prompts = {} # initialize unsafe prompts for prompt_tag in prompt_tags: unsafe_prompts[prompt_tag] = [] # manually add unsafe prompts unsafe_prompts['civitai'] = [375790, 366222, 295008, 256477] unsafe_prompts['people'] = [53] unsafe_prompts['art'] = [23] unsafe_prompts['abstract'] = [10, 12] if int(prompt_id.item()) in unsafe_prompts[tag]: st.warning('This prompt may contain unsafe content. They might be offensive, depressing, or sexual.') safety_check = st.checkbox('I understand that this prompt may contain unsafe content. Show these images anyway.') if safety_check: items, info, col_num = self.selection_panel_2(items) # self.gallery_standard(items, col_num, info) with st.form(key=f'{prompt_id}', clear_on_submit=False): buttons = st.columns([1, 1, 1]) with buttons[0]: submit = st.form_submit_button('Save selections', on_click=self.save_checked, use_container_width=True, type='primary') with buttons[1]: submit = st.form_submit_button('Reset current prompt', on_click=self.reset_current_prompt, kwargs={'prompt_id': prompt_id} , use_container_width=True) with buttons[2]: submit = st.form_submit_button('Reset all selections', on_click=self.reset_all, use_container_width=True) self.gallery_standard(items, col_num, info) def reset_current_prompt(self, prompt_id): # reset current prompt self.promptBook.loc[self.promptBook['prompt_id'] == prompt_id, 'checked'] = False self.save_checked() def reset_all(self): # reset all self.promptBook.loc[:, 'checked'] = False self.save_checked() def save_checked(self): # save checked images to huggingface dataset dataset = load_dataset('NYUSHPRP/ModelCofferMetadata', split='train') # get checked images checked_info = self.promptBook['checked'] if 'checked' in dataset.column_names: dataset = dataset.remove_columns('checked') dataset = dataset.add_column('checked', checked_info) # print('metadata dataset: ', dataset) dataset.push_to_hub('NYUSHPRP/ModelCofferMetadata', split='train') @st.cache_data def load_hf_dataset(): # load from huggingface roster = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferRoster', split='train')) promptBook = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferMetadata', split='train')) images_ds = load_from_disk(os.path.join(os.getcwd(), 'data', 'promptbook')) # process dataset roster = roster[['model_id', 'model_name', 'modelVersion_id', 'modelVersion_name', 'model_download_count']].drop_duplicates().reset_index(drop=True) # add 'checked' column to promptBook if not exist if 'checked' not in promptBook.columns: promptBook.loc[:, 'checked'] = False # add 'custom_score_weights' column to promptBook if not exist if 'weighted_score_sum' not in promptBook.columns: promptBook.loc[:, 'weighted_score_sum'] = 0 # merge roster and promptbook promptBook = promptBook.merge(roster[['model_id', 'model_name', 'modelVersion_id', 'modelVersion_name', 'model_download_count']], on=['model_id', 'modelVersion_id'], how='left') # add column to record current row index promptBook.loc[:, 'row_idx'] = promptBook.index return roster, promptBook, images_ds if __name__ == '__main__': login(token=os.environ.get("HF_TOKEN")) st.set_page_config(layout="wide") # if 'roster' not in st.session_state: # print('loading roster') # # st.session_state.roster = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferRoster', split='train')) # st.session_state.roster = pd.DataFrame(load_from_disk(os.path.join(os.getcwd(), 'data', 'roster'))) # st.session_state.roster = st.session_state.roster[['model_id', 'model_name', 'modelVersion_id', 'modelVersion_name', # 'model_download_count']].drop_duplicates().reset_index(drop=True) # # add model download count from roster to promptbook dataframe # if 'promptBook' not in st.session_state: # print('loading promptBook') # # st.session_state.promptBook = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferMetadata', split='train')) # # add 'checked' column to promptBook if not exist # if 'checked' not in st.session_state.promptBook.columns: # st.session_state.promptBook.loc[:, 'checked'] = False # # # add 'custom_score_weights' column to promptBook if not exist # if 'weighted_score_sum' not in st.session_state.promptBook.columns: # st.session_state.promptBook.loc[:, 'weighted_score_sum'] = 0 # # st.session_state.images = load_from_disk(os.path.join(os.getcwd(), 'data', 'promptbook')) # # st.session_state.images = load_dataset('NYUSHPRP/ModelCofferPromptBook', split='train', streaming=True) # print(st.session_state.images) # print('images loaded') # # st.session_state.promptBook = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferPromptBook', split='train')) # st.session_state.promptBook = st.session_state.promptBook.merge(st.session_state.roster[['model_id', 'model_name', 'modelVersion_id', 'modelVersion_name', 'model_download_count']], on=['model_id', 'modelVersion_id'], how='left') # # # add column to record current row index # st.session_state.promptBook['row_idx'] = st.session_state.promptBook.index # print('promptBook loaded') # # print(st.session_state.promptBook) # # check_roster_error = False # if check_roster_error: # # print all rows with the same model_id and modelVersion_id but different model_download_count in roster # print(st.session_state.roster[st.session_state.roster.duplicated(subset=['model_id', 'modelVersion_id'], keep=False)].sort_values(by=['model_id', 'modelVersion_id'])) roster, promptBook, images_ds = load_hf_dataset() # if 'images' not in st.session_state: # st.session_state.images = load_from_disk(os.path.join(os.getcwd(), 'data', 'promptbook')) app = GalleryApp(promptBook=promptBook, images_ds=images_ds) app.app()