Spaces:

MAPS-research
/

GEMRec-Gallery

Sleeping

App Files Files Community

Ricercar commited on Jul 2, 2023

Commit

b21aab5

•

1 Parent(s): 6bdebc7

update theshold

Browse files

Files changed (5) hide show

app.py +101 -84
data/download_script.py +3 -3
data/roster/data-00000-of-00001.arrow +0 -3
data/roster/dataset_info.json +0 -57
data/roster/state.json +0 -13

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ def altair_histogram(hist_data, sort_by, mini, maxi):
     chart = (
         alt.Chart(hist_data)
         .mark_bar(opacity=0.7, cornerRadius=2)
-        .encode(alt.X(f"{sort_by}:Q", bin=alt.Bin(maxbins=20)), y="count()")
         # .add_selection(brushed)
         # .properties(width=800, height=300)
     )
@@ -84,28 +84,31 @@ class GalleryApp:
     def gallery_standard(self, items, col_num, info):
         rows = len(items) // col_num + 1
-        containers = [st.container() for _ in range(rows*2)]
         for idx in range(0, len(items), col_num):
             # assign one container for each row
-            row_idx = (idx // col_num) * 2
             with containers[row_idx]:
                 cols = st.columns(col_num)
                 for j in range(col_num):
                     if idx + j < len(items):
                         with cols[j]:
                             # show image
-                            image = self.images_ds[items.iloc[idx+j]['row_idx'].item()]['image']
-                            st.image(image,
-                                     use_column_width=True,
-                            )
-                            # show checkbox
-                            self.promptBook.loc[items.iloc[idx+j]['row_idx'].item(), 'checked'] = st.checkbox('Select', value=self.promptBook.loc[items.iloc[idx+j]['row_idx'].item(), 'checked'], key=f'select_{idx+j}')
                             # show selected info
                             for key in info:
-                                st.write(f"**{key}**: {items.iloc[idx+j][key]}")
                             # st.write(row_idx/2, idx+j, rows)
                             # extra_info = st.checkbox('Extra Info', key=f'extra_info_{idx+j}')
@@ -192,16 +195,19 @@ class GalleryApp:
         return items, info, col_num
     def selection_panel_2(self, items):
-        selecters = st.columns([1, 5])
         with selecters[0]:
-            sort_type = st.selectbox('Sort by', ['IDs and Names', 'Scores'])
             if sort_type == 'Scores':
                 sort_by = 'weighted_score_sum'
         with selecters[1]:
             if sort_type == 'IDs and Names':
-                sub_selecters = st.columns([3, 1, 1])
                 with sub_selecters[0]:
                     sort_by = st.selectbox('Sort by',
                                            ['model_name', 'model_id', 'modelVersion_name', 'modelVersion_id'],
@@ -210,81 +216,89 @@ class GalleryApp:
                 continue_idx = 1
             else:
-                sub_selecters = st.columns([1, 1, 1, 1, 1])
                 with sub_selecters[0]:
-                    clip_weight = st.number_input('Clip Score Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1)
                 with sub_selecters[1]:
-                    rank_weight = st.number_input('Rank Score Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1)
                 with sub_selecters[2]:
-                    pop_weight = st.number_input('Popularity Weight', min_value=-100.0, max_value=100.0, value=1.0, step=0.1)
                 items.loc[:, 'weighted_score_sum'] = round(items['norm_clip'] * clip_weight + items['avg_rank'] * rank_weight + items[
                     'norm_pop'] * pop_weight, 4)
                 continue_idx = 3
             with sub_selecters[continue_idx]:
-                order = st.selectbox('Order', ['Ascending', 'Descending'], index=1 if sort_type == 'Scores' else 0)
-                if order == 'Ascending':
-                    order = True
-                else:
-                    order = False
-            items = items.sort_values(by=[sort_by], ascending=order).reset_index(drop=True)
-            with sub_selecters[continue_idx+1]:
-                filter = st.selectbox('Filter', ['Safe', 'All', 'Unsafe'])
-                print('filter', filter)
-                # initialize unsafe_modelVersion_ids
-                if filter == 'Safe':
-                    # return checked items
-                    items = items[items['checked'] == False].reset_index(drop=True)
-                elif filter == 'Unsafe':
-                    # return unchecked items
-                    items = items[items['checked'] == True].reset_index(drop=True)
-                    print(items)
         # draw a distribution histogram
         if sort_type == 'Scores':
-            with st.expander('Show score distribution histogram and select score range'):
-                st.write('**Score distribution histogram**')
-                chart_space = st.container()
-                # st.write('Select the range of scores to show')
-                hist_data = pd.DataFrame(items[sort_by])
-                mini = hist_data[sort_by].min().item()
-                maxi = hist_data[sort_by].max().item()
-                st.write('**Select the range of scores to show**')
-                r = st.slider('Select the range of scores to show', min_value=mini, max_value=maxi, value=(mini, maxi), label_visibility='collapsed')
-                with chart_space:
-                    st.altair_chart(altair_histogram(hist_data, sort_by, r[0], r[1]), use_container_width=True)
-                # event_dict = altair_component(altair_chart=altair_histogram(hist_data, sort_by))
-                # r = event_dict.get(sort_by)
-                if r:
-                    items = items[(items[sort_by] >= r[0]) & (items[sort_by] <= r[1])].reset_index(drop=True)
-                    # st.write(r)
-        info = st.multiselect('Show Info',
-                              ['model_download_count', 'clip_score', 'avg_rank', 'model_name', 'model_id',
-                               'modelVersion_name', 'modelVersion_id', 'clip+rank', 'clip+pop', 'rank+pop',
-                               'clip+rank+pop', 'weighted_score_sum'],
-                              default=sort_by)
-        # add one annotation
-        mentioned_scores = []
-        for i in info:
-            if '+' in i:
-                mentioned = i.split('+')
-                for m in mentioned:
-                    if SCORE_NAME_MAPPING[m] not in mentioned_scores:
-                        mentioned_scores.append(SCORE_NAME_MAPPING[m])
-        if len(mentioned_scores) > 0:
-            st.info(
-                f"**Note:** The scores {mentioned_scores} are normalized to [0, 1] for each score type, and then added together. The higher the score, the better the model.")
         col_num = st.slider('Number of columns', min_value=1, max_value=9, value=4, step=1, key='col_num')
         return items, info, col_num
@@ -351,6 +365,7 @@ class GalleryApp:
         unsafe_prompts['people'] = [53]
         unsafe_prompts['art'] = [23]
         unsafe_prompts['abstract'] = [10, 12]
         if int(prompt_id.item()) in unsafe_prompts[tag]:
             st.warning('This prompt may contain unsafe content. They might be offensive, depressing, or sexual.')
@@ -358,19 +373,18 @@ class GalleryApp:
         if safety_check:
             items, info, col_num = self.selection_panel_2(items)
-            # self.gallery_standard(items, col_num, info)
-            with st.form(key=f'{prompt_id}', clear_on_submit=False):
-                buttons = st.columns([1, 1, 1])
-                with buttons[0]:
-                    submit = st.form_submit_button('Save selections', on_click=self.save_checked, use_container_width=True, type='primary')
-                with buttons[1]:
-                    submit = st.form_submit_button('Reset current prompt', on_click=self.reset_current_prompt, kwargs={'prompt_id': prompt_id} , use_container_width=True)
-                with buttons[2]:
-                    submit = st.form_submit_button('Reset all selections', on_click=self.reset_all, use_container_width=True)
-                self.gallery_standard(items, col_num, info)
     def reset_current_prompt(self, prompt_id):
         # reset current prompt
@@ -393,11 +407,15 @@ class GalleryApp:
         dataset = dataset.add_column('checked', checked_info)
         # print('metadata dataset: ', dataset)
         dataset.push_to_hub('NYUSHPRP/ModelCofferMetadata', split='train')
 @st.cache_data
 def load_hf_dataset():
     # load from huggingface
     roster = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferRoster', split='train'))
     promptBook = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferMetadata', split='train'))
@@ -426,7 +444,6 @@ def load_hf_dataset():
 if __name__ == '__main__':
-    login(token=os.environ.get("HF_TOKEN"))
     st.set_page_config(layout="wide")
     roster, promptBook, images_ds = load_hf_dataset()

     chart = (
         alt.Chart(hist_data)
         .mark_bar(opacity=0.7, cornerRadius=2)
+        .encode(alt.X(f"{sort_by}:Q", bin=alt.Bin(maxbins=25)), y="count()")
         # .add_selection(brushed)
         # .properties(width=800, height=300)
     )
     def gallery_standard(self, items, col_num, info):
         rows = len(items) // col_num + 1
+        # containers = [st.container() for _ in range(rows * 2)]
+        containers = [st.container() for _ in range(rows)]
         for idx in range(0, len(items), col_num):
             # assign one container for each row
+            # row_idx = (idx // col_num) * 2
+            row_idx = idx // col_num
             with containers[row_idx]:
                 cols = st.columns(col_num)
                 for j in range(col_num):
                     if idx + j < len(items):
                         with cols[j]:
                             # show image
+                            image = self.images_ds[items.iloc[idx + j]['row_idx'].item()]['image']
+                            st.image(image, use_column_width=True)
+                            # # show checkbox
+                            # self.promptBook.loc[items.iloc[idx + j]['row_idx'].item(), 'checked'] = st.checkbox(
+                            #     'Select', value=self.promptBook.loc[items.iloc[idx + j]['row_idx'].item(), 'checked'],
+                            #     key=f'select_{idx + j}')
+                            st.write(idx+j)
                             # show selected info
                             for key in info:
+                                st.write(f"**{key}**: {items.iloc[idx + j][key]}")
                             # st.write(row_idx/2, idx+j, rows)
                             # extra_info = st.checkbox('Extra Info', key=f'extra_info_{idx+j}')
         return items, info, col_num
     def selection_panel_2(self, items):
+        selecters = st.columns([1, 4])
+        # select sort type
         with selecters[0]:
+            sort_type = st.selectbox('Sort by', ['Scores', 'IDs and Names'])
             if sort_type == 'Scores':
                 sort_by = 'weighted_score_sum'
+        # select other options
         with selecters[1]:
             if sort_type == 'IDs and Names':
+                sub_selecters = st.columns([3, 1])
+                # select sort by
                 with sub_selecters[0]:
                     sort_by = st.selectbox('Sort by',
                                            ['model_name', 'model_id', 'modelVersion_name', 'modelVersion_id'],
                 continue_idx = 1
             else:
+                # add custom weights
+                sub_selecters = st.columns([1, 1, 1, 1])
+                if 'default_weights' not in st.session_state:
+                    st.session_state.default_weights = [1.0, 1.0, 1.0]
                 with sub_selecters[0]:
+                    clip_weight = st.number_input('Clip Score Weight', min_value=-100.0, max_value=100.0, value=st.session_state.default_weights[0], step=0.1, help='the weight for normalized clip score')
                 with sub_selecters[1]:
+                    rank_weight = st.number_input('Distinctiveness Weight', min_value=-100.0, max_value=100.0, value=st.session_state.default_weights[1], step=0.1, help='the weight for average rank')
                 with sub_selecters[2]:
+                    pop_weight = st.number_input('Popularity Weight', min_value=-100.0, max_value=100.0, value=st.session_state.default_weights[2], step=0.1, help='the weight for normalized popularity score')
+                st.session_state.default_weights = [clip_weight, rank_weight, pop_weight]
                 items.loc[:, 'weighted_score_sum'] = round(items['norm_clip'] * clip_weight + items['avg_rank'] * rank_weight + items[
                     'norm_pop'] * pop_weight, 4)
                 continue_idx = 3
+            # select threshold
             with sub_selecters[continue_idx]:
+                dist_threshold = st.number_input('Distinctiveness Threshold', min_value=0.0, max_value=1.0, value=0.84, step=0.01, help='Only show models with distinctiveness score lower than this threshold, set 1.0 to show all images')
+                items = items[items['avg_rank'] < dist_threshold].reset_index(drop=True)
+                # filter = st.selectbox('Filter', ['Safe', 'All', 'Unsafe'])
+                # print('filter', filter)
+                # # initialize unsafe_modelVersion_ids
+                # if filter == 'Safe':
+                #     # return unchecked items
+                #     items = items[items['checked'] == False].reset_index(drop=True)
+                #
+                # elif filter == 'Unsafe':
+                #     # return checked items
+                #     items = items[items['checked'] == True].reset_index(drop=True)
         # draw a distribution histogram
         if sort_type == 'Scores':
+            try:
+                with st.expander('Show score distribution histogram and select score range'):
+                    st.write('**Score distribution histogram**')
+                    chart_space = st.container()
+                    # st.write('Select the range of scores to show')
+                    hist_data = pd.DataFrame(items[sort_by])
+                    mini = hist_data[sort_by].min().item()
+                    mini = mini//0.1 * 0.1
+                    maxi = hist_data[sort_by].max().item()
+                    maxi = maxi//0.1 * 0.1 + 0.1
+                    st.write('**Select the range of scores to show**')
+                    r = st.slider('Select the range of scores to show', min_value=mini, max_value=maxi, value=(mini, maxi), step=0.05, label_visibility='collapsed')
+                    with chart_space:
+                        st.altair_chart(altair_histogram(hist_data, sort_by, r[0], r[1]), use_container_width=True)
+                    # event_dict = altair_component(altair_chart=altair_histogram(hist_data, sort_by))
+                    # r = event_dict.get(sort_by)
+                    if r:
+                        items = items[(items[sort_by] >= r[0]) & (items[sort_by] <= r[1])].reset_index(drop=True)
+                        # st.write(r)
+            except:
+                pass
+        display_options = st.columns([1, 4])
+        with display_options[0]:
+            # select order
+            order = st.selectbox('Order', ['Ascending', 'Descending'], index=1 if sort_type == 'Scores' else 0)
+            if order == 'Ascending':
+                order = True
+            else:
+                order = False
+        with display_options[1]:
+            # select info to show
+            info = st.multiselect('Show Info',
+                                  ['model_download_count', 'clip_score', 'avg_rank', 'model_name', 'model_id',
+                                   'modelVersion_name', 'modelVersion_id', 'clip+rank', 'clip+pop', 'rank+pop',
+                                   'clip+rank+pop', 'weighted_score_sum'],
+                                  default=sort_by)
+        # apply sorting to dataframe
+        items = items.sort_values(by=[sort_by], ascending=order).reset_index(drop=True)
+        # select number of columns
         col_num = st.slider('Number of columns', min_value=1, max_value=9, value=4, step=1, key='col_num')
         return items, info, col_num
         unsafe_prompts['people'] = [53]
         unsafe_prompts['art'] = [23]
         unsafe_prompts['abstract'] = [10, 12]
+        unsafe_prompts['food'] = [34]
         if int(prompt_id.item()) in unsafe_prompts[tag]:
             st.warning('This prompt may contain unsafe content. They might be offensive, depressing, or sexual.')
         if safety_check:
             items, info, col_num = self.selection_panel_2(items)
+            self.gallery_standard(items, col_num, info)
+            # with st.form(key=f'{prompt_id}', clear_on_submit=True):
+            #     buttons = st.columns([1, 1, 1])
+            #     with buttons[0]:
+            #         submit = st.form_submit_button('Save selections', on_click=self.save_checked, use_container_width=True, type='primary')
+            #     with buttons[1]:
+            #         submit = st.form_submit_button('Reset current prompt', on_click=self.reset_current_prompt, kwargs={'prompt_id': prompt_id} , use_container_width=True)
+            #     with buttons[2]:
+            #         submit = st.form_submit_button('Reset all selections', on_click=self.reset_all, use_container_width=True)
+            #
+            #     self.gallery_standard(items, col_num, info)
     def reset_current_prompt(self, prompt_id):
         # reset current prompt
         dataset = dataset.add_column('checked', checked_info)
         # print('metadata dataset: ', dataset)
+        st.cache_data.clear()
         dataset.push_to_hub('NYUSHPRP/ModelCofferMetadata', split='train')
 @st.cache_data
 def load_hf_dataset():
+    # login to huggingface
+    login(token=os.environ.get("HF_TOKEN"))
     # load from huggingface
     roster = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferRoster', split='train'))
     promptBook = pd.DataFrame(load_dataset('NYUSHPRP/ModelCofferMetadata', split='train'))
 if __name__ == '__main__':
     st.set_page_config(layout="wide")
     roster, promptBook, images_ds = load_hf_dataset()

data/download_script.py CHANGED Viewed

@@ -5,9 +5,9 @@ def main():
     promptbook = load_dataset('NYUSHPRP/ModelCofferPromptBook', split='train')
     print(promptbook)
     promptbook.save_to_disk('./promptbook')
-    roster = load_dataset('NYUSHPRP/ModelCofferRoster', split='train')
-    roster.save_to_disk('./roster')
 def load():

     promptbook = load_dataset('NYUSHPRP/ModelCofferPromptBook', split='train')
     print(promptbook)
     promptbook.save_to_disk('./promptbook')
+    #
+    # roster = load_dataset('NYUSHPRP/ModelCofferRoster', split='train')
+    # roster.save_to_disk('./roster')
 def load():

data/roster/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0d92d1f86f02823ca64b7d88ffbb4c03a1ca8fe9990a54e37b9a1d9171782fca
-size 147952

data/roster/dataset_info.json DELETED Viewed

@@ -1,57 +0,0 @@
-{
-  "citation": "",
-  "dataset_size": 145934,
-  "description": "",
-  "download_checksums": {
-    "https://huggingface.co/datasets/NYUSHPRP/ModelCofferRoster/resolve/ca9efb0b73c3383dfb5bc9fff380b068d468bfde/data/train-00000-of-00001-0fd3ef44b360ac99.parquet": {
-      "num_bytes": 27979,
-      "checksum": null
-    }
-  },
-  "download_size": 27979,
-  "features": {
-    "tag": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "model_name": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "model_id": {
-      "dtype": "int64",
-      "_type": "Value"
-    },
-    "modelVersion_name": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "modelVersion_id": {
-      "dtype": "int64",
-      "_type": "Value"
-    },
-    "modelVersion_url": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "modelVersion_trainedWords": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "model_download_count": {
-      "dtype": "int64",
-      "_type": "Value"
-    }
-  },
-  "homepage": "",
-  "license": "",
-  "size_in_bytes": 173913,
-  "splits": {
-    "train": {
-      "name": "train",
-      "num_bytes": 145934,
-      "num_examples": 1059,
-      "dataset_name": "parquet"
-    }
-  }
-}

data/roster/state.json DELETED Viewed

@@ -1,13 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "9508df8b007debc4",
-  "_format_columns": null,
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": "train"
-}