DongfuJiang commited on
Commit
84a5d01
1 Parent(s): ec1da13

update

update

Files changed (2) hide show
  1. .gitignore +2 -0
  2. app.py +20 -6
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ /__pycache__
2
+ /gradio_cached_examples
app.py CHANGED
@@ -6,8 +6,7 @@ from typing import List
6
  import utils
7
 
8
 
9
- DESCRIPTIONS = """# 🐯TIGERScore
10
-
11
  We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
12
 
13
  ### [**Website**](https://tiger-ai-lab.github.io/TIGERScore/) [**Paper**](https://arxiv.org/abs/2310.00752) [**Code**](https://github.com/TIGER-AI-Lab/TIGERScore) [**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B-V1.0) [**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B-V1.0)
@@ -37,15 +36,22 @@ def tigerscore(task, input_context, generation_instruction, hypo_output, max_new
37
  def get_examples(task, inst_textbox, input_textbox, hypo_output_textbox):
38
  return gr.Dropdown.update(value=task), inst_textbox, input_textbox, hypo_output_textbox
39
 
 
 
 
40
  ## initialize the model
41
  print("Loading TIGERScore model...")
42
  utils.load_tigerscore("7b")
43
 
44
  with gr.Blocks(theme='gradio/soft') as demo:
45
- gr.Markdown(DESCRIPTIONS)
46
- gr.Markdown("## TIGERScore Inputs")
 
 
 
47
 
48
- tasks_dropdown = gr.Dropdown(label="Task", choices=utils.tasks + ["other"], value="translation", show_label=True)
 
49
  inst_textbox = gr.Textbox(lines=1, label="Instruction", placeholder="Enter instruction here", show_label=True)
50
  input_textbox = gr.Textbox(lines=4, label="Input Context", placeholder="Enter input context here", show_label=True)
51
  hypo_output_textbox = gr.Textbox(lines=4, label="Hypothesis Output", placeholder="Enter hypothesis output to be evaluated here", show_label=True)
@@ -86,6 +92,12 @@ with gr.Blocks(theme='gradio/soft') as demo:
86
  inputs=[tasks_dropdown, input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
87
  outputs=evaluation_output_textbox,
88
  )
 
 
 
 
 
 
89
 
90
  batch_examples = gr.Examples(
91
  examples=EXAMPLES,
@@ -97,11 +109,13 @@ with gr.Blocks(theme='gradio/soft') as demo:
97
  )
98
 
99
  citations = gr.Markdown("""## Citation
 
100
  @article{jiang2023TIGERScore,
101
  title={TIGERScore: Towards Building Explainable Metric for All Text Generation Tasks},
102
  author={Dongfu Jiang, Yishan Li, Ge Zhang, Wenhao Huang, Bill Yuchen Lin, Wenhu Chen},
103
  journal={arXiv preprint arXiv:2310.00752},
104
  year={2023}
105
- }""")
 
106
 
107
  demo.queue(max_size=20).launch()
 
6
  import utils
7
 
8
 
9
+ DESCRIPTIONS = """
 
10
  We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
11
 
12
  ### [**Website**](https://tiger-ai-lab.github.io/TIGERScore/) [**Paper**](https://arxiv.org/abs/2310.00752) [**Code**](https://github.com/TIGER-AI-Lab/TIGERScore) [**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B-V1.0) [**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B-V1.0)
 
36
  def get_examples(task, inst_textbox, input_textbox, hypo_output_textbox):
37
  return gr.Dropdown.update(value=task), inst_textbox, input_textbox, hypo_output_textbox
38
 
39
+ def clear_all(task, inst_textbox, input_textbox, hypo_output_textbox):
40
+ return gr.Dropdown.update(value=task), "", "", ""
41
+
42
  ## initialize the model
43
  print("Loading TIGERScore model...")
44
  utils.load_tigerscore("7b")
45
 
46
  with gr.Blocks(theme='gradio/soft') as demo:
47
+
48
+ gr.Markdown("## 🐯 TIGERScore Demo")
49
+ with gr.Row():
50
+ gr.Markdown(DESCRIPTIONS)
51
+ gr.Image("https://jdf-prog.github.io/assets/img/publication_preview/tigerscore_preview.png")
52
 
53
+ gr.Markdown("## TIGERScore Inputs")
54
+ tasks_dropdown = gr.Dropdown(label="Task", choices=utils.tasks, value="translation", show_label=True, allow_custom_value=True)
55
  inst_textbox = gr.Textbox(lines=1, label="Instruction", placeholder="Enter instruction here", show_label=True)
56
  input_textbox = gr.Textbox(lines=4, label="Input Context", placeholder="Enter input context here", show_label=True)
57
  hypo_output_textbox = gr.Textbox(lines=4, label="Hypothesis Output", placeholder="Enter hypothesis output to be evaluated here", show_label=True)
 
92
  inputs=[tasks_dropdown, input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
93
  outputs=evaluation_output_textbox,
94
  )
95
+
96
+ clear_button.click(
97
+ fn=clear_all,
98
+ inputs=[tasks_dropdown, inst_textbox, input_textbox, hypo_output_textbox],
99
+ outputs=[tasks_dropdown, inst_textbox, input_textbox, hypo_output_textbox],
100
+ )
101
 
102
  batch_examples = gr.Examples(
103
  examples=EXAMPLES,
 
109
  )
110
 
111
  citations = gr.Markdown("""## Citation
112
+ ```txt
113
  @article{jiang2023TIGERScore,
114
  title={TIGERScore: Towards Building Explainable Metric for All Text Generation Tasks},
115
  author={Dongfu Jiang, Yishan Li, Ge Zhang, Wenhao Huang, Bill Yuchen Lin, Wenhu Chen},
116
  journal={arXiv preprint arXiv:2310.00752},
117
  year={2023}
118
+ }
119
+ ```""")
120
 
121
  demo.queue(max_size=20).launch()