{ "_class_name": "UNetMotionModel", "_diffusers_version": "0.29.0", "act_fn": "silu", "addition_embed_type": null, "addition_time_embed_dim": null, "block_out_channels": [ 64, 128, 256 ], "cross_attention_dim": 100, "down_block_types": [ "DownBlockMotion", "DownBlockMotion", "CrossAttnDownBlockMotion" ], "downsample_padding": 1, "encoder_hid_dim": null, "encoder_hid_dim_type": null, "in_channels": 1, "layers_per_block": 1, "mid_block_scale_factor": 1, "motion_max_seq_length": 20, "motion_num_attention_heads": 8, "norm_eps": 1e-05, "norm_num_groups": 32, "num_attention_heads": 8, "out_channels": 1, "projection_class_embeddings_input_dim": null, "reverse_transformer_layers_per_block": null, "sample_size": 32, "time_cond_proj_dim": null, "transformer_layers_per_block": 1, "up_block_types": [ "UpBlockMotion", "UpBlockMotion", "CrossAttnUpBlockMotion" ], "use_linear_projection": false, "use_motion_mid_block": true }