Spaces:

MAPS-research
/

GEMRec-Gallery

Sleeping

File size: 20,347 Bytes

import streamlit as st
import numpy as np
import random
import pandas as pd
import glob
from PIL import Image
import datasets
from datasets import load_dataset, Dataset, load_from_disk
from huggingface_hub import login
import os
import requests
from bs4 import BeautifulSoup
import re

import altair as alt
from streamlit_vega_lite import vega_lite_component, altair_component, _component_func

SCORE_NAME_MAPPING = {'clip': 'clip_score', 'rank': 'avg_rank', 'pop': 'model_download_count'}


# hist_data = pd.DataFrame(np.random.normal(42, 10, (200, 1)), columns=["x"])
@st.cache_resource
def altair_histogram(hist_data, sort_by):
    brushed = alt.selection_interval(encodings=['x'], name="brushed")
    return (
        alt.Chart(hist_data)
        .mark_bar()
        .encode(alt.X(f"{sort_by}:Q", bin=True), y="count()")
        .add_selection(brushed)
        .properties(width=600, height=300)
    )

class GalleryApp:
    def __init__(self, promptBook, images_ds):
        self.promptBook = promptBook
        self.images_ds = images_ds

    def gallery_masonry(self, items, col_num, info):
        cols = st.columns(col_num)
        # # sort items by brisque score
        # items = items.sort_values(by=['brisque'], ascending=True).reset_index(drop=True)
        for idx in range(len(items)):
            with cols[idx % col_num]:
                image = self.images_ds[items.iloc[idx]['row_idx'].item()]['image']
                st.image(image,
                         use_column_width=True,
                )
                # with st.expander('Similarity Info'):
                #     tab1, tab2 = st.tabs(['Most Similar', 'Least Similar'])
                #     with tab1:
                #         st.image(image, use_column_width=True)
                #     with tab2:
                #         st.image(image, use_column_width=True)

                # show checkbox
                self.promptBook.loc[items.iloc[idx]['row_idx'].item(), 'checked'] = st.checkbox(
                    'Select', value=self.promptBook.loc[items.iloc[idx]['row_idx'].item(), 'checked'],
                    key=f'select_{idx}')

                for key in info:
                    st.write(f"**{key}**: {items.iloc[idx][key]}")

    def gallery_standard(self, items, col_num, info):
        rows = len(items) // col_num + 1
        containers = [st.container() for _ in range(rows*2)]
        for idx in range(0, len(items), col_num):
            # assign one container for each row
            row_idx = (idx // col_num) * 2
            with containers[row_idx]:
                cols = st.columns(col_num)
                for j in range(col_num):
                    if idx + j < len(items):
                        with cols[j]:
                            # show image
                            image = self.images_ds[items.iloc[idx+j]['row_idx'].item()]['image']

                            st.image(image,
                                     use_column_width=True,
                            )

                            # show checkbox
                            self.promptBook.loc[items.iloc[idx+j]['row_idx'].item(), 'checked'] = st.checkbox('Select', value=self.promptBook.loc[items.iloc[idx+j]['row_idx'].item(), 'checked'], key=f'select_{idx+j}')

                            # show selected info
                            for key in info:
                                st.write(f"**{key}**: {items.iloc[idx+j][key]}")

                            # st.write(row_idx/2, idx+j, rows)
                            # extra_info = st.checkbox('Extra Info', key=f'extra_info_{idx+j}')
                            # if extra_info:
                            #     with containers[row_idx+1]:
                            #         st.image(image, use_column_width=True)

    def selection_panel(self, items):
        selecters = st.columns([4, 1, 1])

        with selecters[0]:
            types = st.columns([1, 3])
            with types[0]:
                sort_type = st.selectbox('Sort by', ['IDs and Names', 'Scores'])
            with types[1]:
                if sort_type == 'IDs and Names':
                    sort_by = st.selectbox('Sort by',
                                           ['model_name', 'model_id', 'modelVersion_name', 'modelVersion_id'],
                                           label_visibility='hidden')
                elif sort_type == 'Scores':
                    sort_by = st.multiselect('Sort by', ['clip_score', 'avg_rank', 'popularity'],
                                             label_visibility='hidden',
                                             default=['clip_score', 'avg_rank', 'popularity'])
                    # process sort_by to map to the column name

                    if len(sort_by) == 3:
                        sort_by = 'clip+rank+pop'
                    elif len(sort_by) == 2:
                        if 'clip_score' in sort_by and 'avg_rank' in sort_by:
                            sort_by = 'clip+rank'
                        elif 'clip_score' in sort_by and 'popularity' in sort_by:
                            sort_by = 'clip+pop'
                        elif 'avg_rank' in sort_by and 'popularity' in sort_by:
                            sort_by = 'rank+pop'
                    elif len(sort_by) == 1:
                        if 'popularity' in sort_by:
                            sort_by = 'model_download_count'
                        else:
                            sort_by = sort_by[0]
                    print(sort_by)

        with selecters[1]:
            order = st.selectbox('Order', ['Ascending', 'Descending'], index=1 if sort_type == 'Scores' else 0)
            if order == 'Ascending':
                order = True
            else:
                order = False

        items = items.sort_values(by=[sort_by], ascending=order).reset_index(drop=True)

        with selecters[2]:
            filter = st.selectbox('Filter', ['Safe', 'All', 'Unsafe'])
            print('filter', filter)
            # initialize unsafe_modelVersion_ids
            if filter == 'Safe':
                # return checked items
                items = items[items['checked'] == False].reset_index(drop=True)

            elif filter == 'Unsafe':
                # return unchecked items
                items = items[items['checked'] == True].reset_index(drop=True)
                print(items)

        info = st.multiselect('Show Info',
                              ['model_download_count', 'clip_score', 'avg_rank', 'model_name', 'model_id',
                               'modelVersion_name', 'modelVersion_id', 'clip+rank', 'clip+pop', 'rank+pop',
                               'clip+rank+pop'],
                              default=sort_by)

        # add one annotation
        mentioned_scores = []
        for i in info:
            if '+' in i:
                mentioned = i.split('+')
                for m in mentioned:
                    if SCORE_NAME_MAPPING[m] not in mentioned_scores:
                        mentioned_scores.append(SCORE_NAME_MAPPING[m])
        if len(mentioned_scores) > 0:
            st.info(
                f"**Note:** The scores {mentioned_scores} are normalized to [0, 1] for each score type, and then added together. The higher the score, the better the model.")

        col_num = st.slider('Number of columns', min_value=1, max_value=9, value=4, step=1, key='col_num')

        return items, info, col_num


    def selection_panel_2(self, items):
        selecters = st.columns([1, 5])

        with selecters[0]:
            sort_type = st.selectbox('Sort by', ['IDs and Names', 'Scores'])
            if sort_type == 'Scores':
                sort_by = 'weighted_score_sum'

        with selecters[1]:
            if sort_type == 'IDs and Names':
                sub_selecters = st.columns([3, 1, 1])
                with sub_selecters[0]:
                    sort_by = st.selectbox('Sort by',
                                           ['model_name', 'model_id', 'modelVersion_name', 'modelVersion_id'],
                                           label_visibility='hidden')

                continue_idx = 1

            else:
                sub_selecters = st.columns([1, 1, 1, 1, 1])

                with sub_selecters[0]:
                    clip_weight = st.number_input('Clip Score Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1)
                with sub_selecters[1]:
                    rank_weight = st.number_input('Rank Score Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1)
                with sub_selecters[2]:
                    pop_weight = st.number_input('Popularity Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1)

                items.loc[:, 'weighted_score_sum'] = round(items['norm_clip'] * clip_weight + items['avg_rank'] * rank_weight + items[
                    'norm_pop'] * pop_weight, 4)

                continue_idx = 3


            with sub_selecters[continue_idx]:
                order = st.selectbox('Order', ['Ascending', 'Descending'], index=1 if sort_type == 'Scores' else 0)
                if order == 'Ascending':
                    order = True
                else:
                    order = False

            items = items.sort_values(by=[sort_by], ascending=order).reset_index(drop=True)

            with sub_selecters[continue_idx+1]:
                filter = st.selectbox('Filter', ['Safe', 'All', 'Unsafe'])
                print('filter', filter)
                # initialize unsafe_modelVersion_ids
                if filter == 'Safe':
                    # return checked items
                    items = items[items['checked'] == False].reset_index(drop=True)

                elif filter == 'Unsafe':
                    # return unchecked items
                    items = items[items['checked'] == True].reset_index(drop=True)
                    print(items)

        if sort_type == 'Scores':
            st.write('Select the range of scores to show')
            hist_data = pd.DataFrame(items[sort_by])
            event_dict = altair_component(altair_chart=altair_histogram(hist_data, sort_by))
            r = event_dict.get(sort_by)
            if r:
                items = items[(items[sort_by] >= r[0]) & (items[sort_by] <= r[1])].reset_index(drop=True)
                st.write(r)

        info = st.multiselect('Show Info',
                              ['model_download_count', 'clip_score', 'avg_rank', 'model_name', 'model_id',
                               'modelVersion_name', 'modelVersion_id', 'clip+rank', 'clip+pop', 'rank+pop',
                               'clip+rank+pop', 'weighted_score_sum'],
                              default=sort_by)

        # add one annotation
        mentioned_scores = []
        for i in info:
            if '+' in i:
                mentioned = i.split('+')
                for m in mentioned:
                    if SCORE_NAME_MAPPING[m] not in mentioned_scores:
                        mentioned_scores.append(SCORE_NAME_MAPPING[m])
        if len(mentioned_scores) > 0:
            st.info(
                f"**Note:** The scores {mentioned_scores} are normalized to [0, 1] for each score type, and then added together. The higher the score, the better the model.")

        col_num = st.slider('Number of columns', min_value=1, max_value=9, value=4, step=1, key='col_num')

        return items, info, col_num

    def app(self):
        st.title('Model Coffer Gallery')
        st.write('This is a gallery of images generated by the models in the Model Coffer')

        with st.sidebar:
            prompt_tags = self.promptBook['tag'].unique()
            # sort tags by alphabetical order
            prompt_tags = np.sort(prompt_tags)[::-1]

            tag = st.selectbox('Select a tag', prompt_tags)

            items = self.promptBook[self.promptBook['tag'] == tag].reset_index(drop=True)

            original_prompts = np.sort(items['prompt'].unique())[::-1]

            # remove the first four items in the prompt, which are mostly the same
            if tag != 'abstract':
                prompts = [', '.join(x.split(', ')[4:]) for x in original_prompts]
                prompt = st.selectbox('Select prompt', prompts)

                idx = prompts.index(prompt)
                prompt_full = ', '.join(original_prompts[idx].split(', ')[:4]) + ', ' + prompt
            else:
                prompt_full = st.selectbox('Select prompt', original_prompts)

            prompt_id = items[items['prompt'] == prompt_full]['prompt_id'].unique()[0]
            items = items[items['prompt_id'] == prompt_id].reset_index(drop=True)

            # show image metadata
            image_metadatas = ['prompt_id', 'prompt', 'negativePrompt', 'sampler', 'cfgScale', 'size', 'seed']
            for key in image_metadatas:
                label = ' '.join(key.split('_')).capitalize()
                st.write(f"**{label}**")
                if items[key][0] == ' ':
                    st.write('`None`')
                else:
                    st.caption(f"{items[key][0]}")

            # for tag as civitai, add civitai reference
            if tag == 'civitai':
                try:
                    st.write('**Civitai Reference**')
                    res = requests.get(f'https://civitai.com/images/{prompt_id.item()}')
                    # st.write(res.text)
                    soup = BeautifulSoup(res.text, 'html.parser')
                    image_section = soup.find('div', {'class': 'mantine-12rlksp'})
                    image_url = image_section.find('img')['src']
                    st.image(image_url, use_column_width=True)
                except:
                    pass


        # add safety check for some prompts
        safety_check = True
        unsafe_prompts = {}
        # initialize unsafe prompts
        for prompt_tag in prompt_tags:
            unsafe_prompts[prompt_tag] = []
        # manually add unsafe prompts
        unsafe_prompts['civitai'] = [375790, 366222, 295008, 256477]
        unsafe_prompts['people'] = [53]
        unsafe_prompts['art'] = [23]
        unsafe_prompts['abstract'] = [10, 12]

        if int(prompt_id.item()) in unsafe_prompts[tag]:
            st.warning('This prompt may contain unsafe content. They might be offensive, depressing, or sexual.')
            safety_check = st.checkbox('I understand that this prompt may contain unsafe content. Show these images anyway.')

        if safety_check:
            items, info, col_num = self.selection_panel_2(items)

            # self.gallery_standard(items, col_num, info)

            with st.form(key=f'{prompt_id}', clear_on_submit=False):
                buttons = st.columns([1, 1, 1])
                with buttons[0]:
                    submit = st.form_submit_button('Save selections', on_click=self.save_checked, use_container_width=True, type='primary')
                with buttons[1]:
                    submit = st.form_submit_button('Reset current prompt', on_click=self.reset_current_prompt, kwargs={'prompt_id': prompt_id} , use_container_width=True)
                with buttons[2]:
                    submit = st.form_submit_button('Reset all selections', on_click=self.reset_all, use_container_width=True)

                self.gallery_standard(items, col_num, info)

    def reset_current_prompt(self, prompt_id):
        # reset current prompt
        self.promptBook.loc[self.promptBook['prompt_id'] == prompt_id, 'checked'] = False
        self.save_checked()

    def reset_all(self):
        # reset all
        self.promptBook.loc[:, 'checked'] = False
        self.save_checked()

    def save_checked(self):
        # save checked images to huggingface dataset
        dataset = load_dataset('NYUSHPRP/ModelCofferMetadata', split='train')
        # get checked images
        checked_info = self.promptBook['checked']

        if 'checked' in dataset.column_names:
            dataset = dataset.remove_columns('checked')
        dataset = dataset.add_column('checked', checked_info)

        # print('metadata dataset: ', dataset)
        dataset.push_to_hub('NYUSHPRP/ModelCofferMetadata', split='train')


@st.cache_data
def load_hf_dataset():
    # load from huggingface
    roster = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferRoster', split='train'))
    promptBook = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferMetadata', split='train'))
    images_ds = load_from_disk(os.path.join(os.getcwd(), 'data', 'promptbook'))

    # process dataset
    roster = roster[['model_id', 'model_name', 'modelVersion_id', 'modelVersion_name',
                                                       'model_download_count']].drop_duplicates().reset_index(drop=True)

    # add 'checked' column to promptBook if not exist
    if 'checked' not in promptBook.columns:
        promptBook.loc[:, 'checked'] = False

    # add 'custom_score_weights' column to promptBook if not exist
    if 'weighted_score_sum' not in promptBook.columns:
        promptBook.loc[:, 'weighted_score_sum'] = 0

    # merge roster and promptbook
    promptBook = promptBook.merge(roster[['model_id', 'model_name', 'modelVersion_id', 'modelVersion_name', 'model_download_count']],
                                                                    on=['model_id', 'modelVersion_id'], how='left')

    # add column to record current row index
    promptBook.loc[:, 'row_idx'] = promptBook.index

    return roster, promptBook, images_ds


if __name__ == '__main__':
    login(token=os.environ.get("HF_TOKEN"))
    st.set_page_config(layout="wide")

    # if 'roster' not in st.session_state:
    #     print('loading roster')
    #     # st.session_state.roster = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferRoster', split='train'))
    #     st.session_state.roster = pd.DataFrame(load_from_disk(os.path.join(os.getcwd(), 'data', 'roster')))
    #     st.session_state.roster = st.session_state.roster[['model_id', 'model_name', 'modelVersion_id', 'modelVersion_name',
    #                                       'model_download_count']].drop_duplicates().reset_index(drop=True)
    # # add model download count from roster to promptbook dataframe
    # if 'promptBook' not in st.session_state:
    #     print('loading promptBook')
    #
    #     st.session_state.promptBook = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferMetadata', split='train'))
    #     # add 'checked' column to promptBook if not exist
    #     if 'checked' not in st.session_state.promptBook.columns:
    #         st.session_state.promptBook.loc[:, 'checked'] = False
    #
    #     # add 'custom_score_weights' column to promptBook if not exist
    #     if 'weighted_score_sum' not in st.session_state.promptBook.columns:
    #         st.session_state.promptBook.loc[:, 'weighted_score_sum'] = 0
    #
    #     st.session_state.images = load_from_disk(os.path.join(os.getcwd(), 'data', 'promptbook'))
    #     # st.session_state.images = load_dataset('NYUSHPRP/ModelCofferPromptBook', split='train', streaming=True)
    #     print(st.session_state.images)
    #     print('images loaded')
    #     # st.session_state.promptBook = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferPromptBook', split='train'))
    #     st.session_state.promptBook = st.session_state.promptBook.merge(st.session_state.roster[['model_id', 'model_name', 'modelVersion_id', 'modelVersion_name', 'model_download_count']], on=['model_id', 'modelVersion_id'], how='left')
    #
    #     # add column to record current row index
    #     st.session_state.promptBook['row_idx'] = st.session_state.promptBook.index
    #     print('promptBook loaded')
    # # print(st.session_state.promptBook)
    #
    # check_roster_error = False
    # if check_roster_error:
    #     # print all rows with the same model_id and modelVersion_id but different model_download_count in roster
    #     print(st.session_state.roster[st.session_state.roster.duplicated(subset=['model_id', 'modelVersion_id'], keep=False)].sort_values(by=['model_id', 'modelVersion_id']))
    roster, promptBook, images_ds = load_hf_dataset()
    # if 'images' not in st.session_state:
    #     st.session_state.images = load_from_disk(os.path.join(os.getcwd(), 'data', 'promptbook'))

    app = GalleryApp(promptBook=promptBook, images_ds=images_ds)
    app.app()