IlyasMoutawwakil HF staff commited on
Commit
7bb9bae
1 Parent(s): 04d9934
Files changed (2) hide show
  1. app.py +33 -20
  2. run.py +7 -5
app.py CHANGED
@@ -1,18 +1,24 @@
1
  import os
 
 
2
 
3
- nvidia_available = os.system("nvidia-smi") == 0
4
- if nvidia_available:
5
- os.system(
6
- "pip install optimum-benchmark[onnxruntime-gpu,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
7
- )
8
- os.system("pip uninstall onnxruntime onnxruntime-gpu -y")
9
- os.system("pip install onnxruntime-gpu")
10
  DEVICES = ["cpu", "cuda"]
 
 
 
 
 
 
11
  else:
12
- os.system(
13
- "pip install optimum-benchmark[onnxruntime,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
14
- )
15
  DEVICES = ["cpu"]
 
 
 
 
 
16
 
17
  BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
18
  BENCHMARKS = ["inference", "training"]
@@ -48,7 +54,8 @@ with gr.Blocks() as demo:
48
  "Zero code Gradio interface of <a href='https://github.com/huggingface/optimum-benchmark.git'>Optimum-Benchmark</a><br>"
49
  "</h3>"
50
  "<p style='text-align: center'>"
51
- "Note: Duplicate the space and change the hardware settings to an Nvidia machine to target CUDA devices."
 
52
  "</p>"
53
  )
54
 
@@ -71,7 +78,7 @@ with gr.Blocks() as demo:
71
  )
72
  experiment = gr.Textbox(
73
  label="experiment_name",
74
- value=f"awesome-experiment-{random.randint(0, 1000)}",
75
  info="Name of the experiment. Will be used to create a folder where results are stored.",
76
  )
77
  model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
@@ -87,13 +94,13 @@ with gr.Blocks() as demo:
87
  )
88
 
89
  with gr.Row() as backend_configs:
90
- with gr.Accordion(label="Pytorch Config", open=False, visible=True):
91
  pytorch_config = get_pytorch_config()
92
- with gr.Accordion(label="OnnxRunTime Config", open=False, visible=False):
93
  onnxruntime_config = get_onnxruntime_config()
94
- with gr.Accordion(label="OpenVINO Config", open=False, visible=False):
95
  openvino_config = get_openvino_config()
96
- with gr.Accordion(label="Neural Compressor Config", open=False, visible=False):
97
  neural_compressor_config = get_neural_compressor_config()
98
 
99
  # hide backend configs based on backend
@@ -113,9 +120,9 @@ with gr.Blocks() as demo:
113
  )
114
 
115
  with gr.Row() as benchmark_configs:
116
- with gr.Accordion(label="Inference Config", open=False, visible=True):
117
  inference_config = get_inference_config()
118
- with gr.Accordion(label="Training Config", open=False, visible=False):
119
  training_config = get_training_config()
120
 
121
  # hide benchmark configs based on benchmark
@@ -127,8 +134,8 @@ with gr.Blocks() as demo:
127
 
128
  baseline = gr.Checkbox(
129
  value=False,
130
- label="Compare to Baseline",
131
- info="If checked, will run two experiments: one with the given configuration, and another with a a baseline pytorch configuration.",
132
  )
133
 
134
  button = gr.Button(value="Run Benchmark", variant="primary")
@@ -156,5 +163,11 @@ with gr.Blocks() as demo:
156
  outputs=[html_output, button, table_output],
157
  queue=True,
158
  )
 
 
 
 
 
 
159
 
160
  demo.queue().launch()
 
1
  import os
2
+ import subprocess
3
+ import importlib.util
4
 
5
+ NVIDIA_AVAILABLE = subprocess.check_output("nvidia-smi").decode("utf-8").strip() != ""
6
+
7
+ if NVIDIA_AVAILABLE:
 
 
 
 
8
  DEVICES = ["cpu", "cuda"]
9
+ if importlib.util.find_spec("optimum_benchmark") is None:
10
+ os.system(
11
+ "pip install optimum-benchmark[onnxruntime-gpu,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
12
+ )
13
+ os.system("pip uninstall onnxruntime onnxruntime-gpu -y")
14
+ os.system("pip install onnxruntime-gpu")
15
  else:
 
 
 
16
  DEVICES = ["cpu"]
17
+ if importlib.util.find_spec("optimum_benchmark") is None:
18
+ os.system(
19
+ "pip install optimum-benchmark[onnxruntime,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
20
+ )
21
+
22
 
23
  BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
24
  BENCHMARKS = ["inference", "training"]
 
54
  "Zero code Gradio interface of <a href='https://github.com/huggingface/optimum-benchmark.git'>Optimum-Benchmark</a><br>"
55
  "</h3>"
56
  "<p style='text-align: center'>"
57
+ "Note: <a href='https://huggingface.co/spaces/optimum/optimum-benchmark-ui?duplicate=true'>Duplicate this space</a> and change its hardware to enable CUDA device<br>"
58
+ "or <a href='https://huggingface.co/spaces/optimum/optimum-benchmark-ui?docker=true'>Run with Docker</a> locally to target your own hardware."
59
  "</p>"
60
  )
61
 
 
78
  )
79
  experiment = gr.Textbox(
80
  label="experiment_name",
81
+ value=f"awesome-experiment-{random.randint(0, 100000)}",
82
  info="Name of the experiment. Will be used to create a folder where results are stored.",
83
  )
84
  model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
 
94
  )
95
 
96
  with gr.Row() as backend_configs:
97
+ with gr.Accordion(label="backend options", open=False, visible=True):
98
  pytorch_config = get_pytorch_config()
99
+ with gr.Accordion(label="backend config", open=False, visible=False):
100
  onnxruntime_config = get_onnxruntime_config()
101
+ with gr.Accordion(label="backend config", open=False, visible=False):
102
  openvino_config = get_openvino_config()
103
+ with gr.Accordion(label="backend config", open=False, visible=False):
104
  neural_compressor_config = get_neural_compressor_config()
105
 
106
  # hide backend configs based on backend
 
120
  )
121
 
122
  with gr.Row() as benchmark_configs:
123
+ with gr.Accordion(label="benchmark Config", open=False, visible=True):
124
  inference_config = get_inference_config()
125
+ with gr.Accordion(label="benchmark Config", open=False, visible=False):
126
  training_config = get_training_config()
127
 
128
  # hide benchmark configs based on benchmark
 
134
 
135
  baseline = gr.Checkbox(
136
  value=False,
137
+ label="compare_to_baseline",
138
+ info="Check this box to compare your chosen configuration to the baseline configuration.",
139
  )
140
 
141
  button = gr.Button(value="Run Benchmark", variant="primary")
 
163
  outputs=[html_output, button, table_output],
164
  queue=True,
165
  )
166
+ button.click(
167
+ fn=lambda: f"awesome-experiment-{random.randint(0, 100000)}",
168
+ inputs=[],
169
+ outputs=experiment,
170
+ queue=True,
171
+ )
172
 
173
  demo.queue().launch()
run.py CHANGED
@@ -8,13 +8,12 @@ ansi2html_converter = Ansi2HTMLConverter(inline=True)
8
 
9
  def run_benchmark(kwargs):
10
  for key, value in kwargs.copy().items():
11
- if key.label == "Compare to Baseline":
12
  baseline = value
13
  kwargs.pop(key)
14
  elif key.label == "experiment_name":
15
  experiment_name = value
16
  kwargs.pop(key)
17
-
18
  elif key.label == "model":
19
  model = value
20
  kwargs.pop(key)
@@ -45,7 +44,7 @@ def run_benchmark(kwargs):
45
  f"model={model}",
46
  f"device={device}",
47
  f"benchmark={benchmark}",
48
- f"experiment_name=baseline",
49
  ]
50
  for component, value in kwargs.items():
51
  if f"{benchmark}." in component.label:
@@ -93,7 +92,7 @@ def run_benchmark(kwargs):
93
  return
94
 
95
  if baseline:
96
- baseline_table = pd.read_csv(f"runs/baseline/{benchmark}_results.csv", index_col=0)
97
  table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
98
  # concat tables
99
  table = pd.concat([baseline_table, table], axis=0)
@@ -121,6 +120,9 @@ def run_experiment(args, html_text=""):
121
 
122
  curr_ansi_text = ""
123
  for ansi_line in iter(process.stdout.readline, ""):
 
 
 
124
  # stream process output to stdout
125
  print(ansi_line, end="")
126
  # skip torch.distributed.nn.jit.instantiator messages
@@ -164,7 +166,7 @@ def postprocess_table(table, experiment_name):
164
  table["forward.peak_memory.reduction(%)"] = (
165
  table["forward.peak_memory(MB)"] / table["forward.peak_memory(MB)"].iloc[0] - 1
166
  ) * 100
167
- table["forward.peak_memory.reduction(%)"] = table["forward.peak_memory.savings(%)"].round(2)
168
 
169
  if "generate.latency(s)" in table.columns:
170
  table["generate.latency.reduction(%)"] = (
 
8
 
9
  def run_benchmark(kwargs):
10
  for key, value in kwargs.copy().items():
11
+ if key.label == "compare_to_baseline":
12
  baseline = value
13
  kwargs.pop(key)
14
  elif key.label == "experiment_name":
15
  experiment_name = value
16
  kwargs.pop(key)
 
17
  elif key.label == "model":
18
  model = value
19
  kwargs.pop(key)
 
44
  f"model={model}",
45
  f"device={device}",
46
  f"benchmark={benchmark}",
47
+ f"experiment_name=baseline_{experiment_name}",
48
  ]
49
  for component, value in kwargs.items():
50
  if f"{benchmark}." in component.label:
 
92
  return
93
 
94
  if baseline:
95
+ baseline_table = pd.read_csv(f"runs/baseline_{experiment_name}/{benchmark}_results.csv", index_col=0)
96
  table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
97
  # concat tables
98
  table = pd.concat([baseline_table, table], axis=0)
 
120
 
121
  curr_ansi_text = ""
122
  for ansi_line in iter(process.stdout.readline, ""):
123
+ if process.returncode is not None and process.returncode != 0:
124
+ break
125
+
126
  # stream process output to stdout
127
  print(ansi_line, end="")
128
  # skip torch.distributed.nn.jit.instantiator messages
 
166
  table["forward.peak_memory.reduction(%)"] = (
167
  table["forward.peak_memory(MB)"] / table["forward.peak_memory(MB)"].iloc[0] - 1
168
  ) * 100
169
+ table["forward.peak_memory.reduction(%)"] = table["forward.peak_memory.reduction(%)"].round(2)
170
 
171
  if "generate.latency(s)" in table.columns:
172
  table["generate.latency.reduction(%)"] = (