import gradio as gr import os from torch import is_inference from pq3d.inference import inference MESH_DIR = 'assets/mesh' MESH_NAMES = sorted([os.path.splitext(fname)[0] for fname in os.listdir(MESH_DIR)]) def change_scene(dropdown_scene: str): # reset 3D scene and chatbot history return os.path.join(MESH_DIR, f'{dropdown_scene}.glb'), dropdown_scene with gr.Blocks(title='PQ3D Demo') as demo: gr.HTML(value="

Unifying 3D Vision Language Understanding vis Promptable Queries

") #gr.HTML(value="
") # gr.HTML(value="Teaser") #gr.HTML(value="

arXiv | Project Page | Code

") #gr.HTML(value="

LEO: an embodied generalist agent capable of perceiving, grounding, reasoning, planning, and acting in 3D world.

") with gr.Row(): with gr.Column(scale=5): dropdown_scene = gr.Dropdown( choices=MESH_NAMES, value='scene0050_00', interactive=True, label='Select a 3D scene', ) model_3d = gr.Model3D( value=os.path.join(MESH_DIR, f'scene0050_00.glb'), clear_color=[0.0, 0.0, 0.0, 0.0], label='3D Scene', camera_position=(80, 100, 6), height=659, ) gr.HTML( """
👆 SCROLL and DRAG on the 3D Scene to zoom in/out and rotate. Press CTRL and DRAG to pan.
""" ) scan_id = gr.Text("scene0050_00", label='scan_id') dropdown_scene.change( fn=change_scene, inputs=[dropdown_scene], outputs=[model_3d, scan_id], queue=False ) def inference_wrapper(text, scan_id): inst_id, response = inference(scan_id, text) return f"assets/mask/{scan_id}/{scan_id}_obj_{inst_id}.glb", response gr.Interface( fn=inference_wrapper, inputs=["text", scan_id], outputs=[gr.Model3D( clear_color=[0.0, 0.0, 0.0, 0.0], camera_position=(80, 100, 6), label="3D Model"), "text"], examples=[ ["armchair", "scene0050_00"], ["Sofa", "scene0050_00"], ["left computer on the desk", "scene0050_00"] ], title="Input text, Output 3D Mask, Red denotes predicted object" ) demo.queue().launch(share=True, allowed_paths=['assets'])