adams-story
/

vq-ViT-L-14-k64-d32-ema

Transformers

PyTorch

Inference Endpoints

Model card Files Files and versions Community

adams-story commited on Jul 12, 2023

Commit

65bd4c2

•

1 Parent(s): a1de053

Upload 2 files

Browse files

Files changed (2) hide show

config.json +41 -331
pytorch_model.bin +2 -2

config.json CHANGED Viewed

@@ -1,336 +1,46 @@
 {
   "architectures": [
-    "VQCLIPModel"
   ],
-  "clip_config_dict": {
-    "_commit_hash": "8d052a0f05efbaefbc9e8786ba291cfdf93e5bff",
-    "_name_or_path": "clip-vit-large-patch14/",
-    "add_cross_attention": false,
-    "architectures": [
-      "CLIPModel"
-    ],
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "initializer_factor": 1.0,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "length_penalty": 1.0,
-    "logit_scale_init_value": 2.6592,
-    "max_length": 20,
-    "min_length": 0,
-    "model_type": "clip",
-    "no_repeat_ngram_size": 0,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "prefix": null,
-    "problem_type": null,
-    "projection_dim": 768,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "text_config": {
-      "_name_or_path": "",
-      "add_cross_attention": false,
-      "architectures": null,
-      "attention_dropout": 0.0,
-      "bad_words_ids": null,
-      "begin_suppress_tokens": null,
-      "bos_token_id": 0,
-      "chunk_size_feed_forward": 0,
-      "cross_attention_hidden_size": null,
-      "decoder_start_token_id": null,
-      "diversity_penalty": 0.0,
-      "do_sample": false,
-      "dropout": 0.0,
-      "early_stopping": false,
-      "encoder_no_repeat_ngram_size": 0,
-      "eos_token_id": 2,
-      "exponential_decay_length_penalty": null,
-      "finetuning_task": null,
-      "forced_bos_token_id": null,
-      "forced_eos_token_id": null,
-      "hidden_act": "quick_gelu",
-      "hidden_size": 768,
-      "id2label": {
-        "0": "LABEL_0",
-        "1": "LABEL_1"
-      },
-      "initializer_factor": 1.0,
-      "initializer_range": 0.02,
-      "intermediate_size": 3072,
-      "is_decoder": false,
-      "is_encoder_decoder": false,
-      "label2id": {
-        "LABEL_0": 0,
-        "LABEL_1": 1
-      },
-      "layer_norm_eps": 1e-05,
-      "length_penalty": 1.0,
-      "max_length": 20,
-      "max_position_embeddings": 77,
-      "min_length": 0,
-      "model_type": "clip_text_model",
-      "no_repeat_ngram_size": 0,
-      "num_attention_heads": 12,
-      "num_beam_groups": 1,
-      "num_beams": 1,
-      "num_hidden_layers": 12,
-      "num_return_sequences": 1,
-      "output_attentions": false,
-      "output_hidden_states": false,
-      "output_scores": false,
-      "pad_token_id": 1,
-      "prefix": null,
-      "problem_type": null,
-      "projection_dim": 768,
-      "pruned_heads": {},
-      "remove_invalid_values": false,
-      "repetition_penalty": 1.0,
-      "return_dict": true,
-      "return_dict_in_generate": false,
-      "sep_token_id": null,
-      "suppress_tokens": null,
-      "task_specific_params": null,
-      "temperature": 1.0,
-      "tf_legacy_loss": false,
-      "tie_encoder_decoder": false,
-      "tie_word_embeddings": true,
-      "tokenizer_class": null,
-      "top_k": 50,
-      "top_p": 1.0,
-      "torch_dtype": null,
-      "torchscript": false,
-      "transformers_version": "4.30.1",
-      "typical_p": 1.0,
-      "use_bfloat16": false,
-      "vocab_size": 49408
-    },
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": "float32",
-    "torchscript": false,
-    "transformers_version": null,
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "vision_config": {
-      "_name_or_path": "",
-      "add_cross_attention": false,
-      "architectures": null,
-      "attention_dropout": 0.0,
-      "bad_words_ids": null,
-      "begin_suppress_tokens": null,
-      "bos_token_id": null,
-      "chunk_size_feed_forward": 0,
-      "cross_attention_hidden_size": null,
-      "decoder_start_token_id": null,
-      "diversity_penalty": 0.0,
-      "do_sample": false,
-      "dropout": 0.0,
-      "early_stopping": false,
-      "encoder_no_repeat_ngram_size": 0,
-      "eos_token_id": null,
-      "exponential_decay_length_penalty": null,
-      "finetuning_task": null,
-      "forced_bos_token_id": null,
-      "forced_eos_token_id": null,
-      "hidden_act": "quick_gelu",
-      "hidden_size": 1024,
-      "id2label": {
-        "0": "LABEL_0",
-        "1": "LABEL_1"
-      },
-      "image_size": 224,
-      "initializer_factor": 1.0,
-      "initializer_range": 0.02,
-      "intermediate_size": 4096,
-      "is_decoder": false,
-      "is_encoder_decoder": false,
-      "label2id": {
-        "LABEL_0": 0,
-        "LABEL_1": 1
-      },
-      "layer_norm_eps": 1e-05,
-      "length_penalty": 1.0,
-      "max_length": 20,
-      "min_length": 0,
-      "model_type": "clip_vision_model",
-      "no_repeat_ngram_size": 0,
-      "num_attention_heads": 16,
-      "num_beam_groups": 1,
-      "num_beams": 1,
-      "num_channels": 3,
-      "num_hidden_layers": 24,
-      "num_return_sequences": 1,
-      "output_attentions": false,
-      "output_hidden_states": false,
-      "output_scores": false,
-      "pad_token_id": null,
-      "patch_size": 14,
-      "prefix": null,
-      "problem_type": null,
-      "projection_dim": 768,
-      "pruned_heads": {},
-      "remove_invalid_values": false,
-      "repetition_penalty": 1.0,
-      "return_dict": true,
-      "return_dict_in_generate": false,
-      "sep_token_id": null,
-      "suppress_tokens": null,
-      "task_specific_params": null,
-      "temperature": 1.0,
-      "tf_legacy_loss": false,
-      "tie_encoder_decoder": false,
-      "tie_word_embeddings": true,
-      "tokenizer_class": null,
-      "top_k": 50,
-      "top_p": 1.0,
-      "torch_dtype": null,
-      "torchscript": false,
-      "transformers_version": "4.30.1",
-      "typical_p": 1.0,
-      "use_bfloat16": false
-    }
-  },
-  "model_type": "VQCLIP",
-  "text_vq_adapter_config_dict": null,
   "torch_dtype": "float32",
-  "transformers_version": "4.30.2",
-  "vision_vq_adapter_config_dict": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "clip_dim": 768,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "is_rq": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "min_length": 0,
-    "mlp_dim": 1028,
-    "mlp_hidden_dim": 512,
-    "mlp_layers": 1,
-    "model_type": "",
-    "no_repeat_ngram_size": 0,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "rq_quantize_dropout": true,
-    "rq_quantize_dropout_cutoff_index": 1,
-    "rq_quantize_dropout_multiple_of": 4,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.30.1",
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "vq_accept_image_fmap": false,
-    "vq_affine_param": false,
-    "vq_affine_param_batch_decay": 0.99,
-    "vq_affine_param_codebook_decay": 0.9,
-    "vq_channel_last": true,
-    "vq_codebook_dim": 32,
-    "vq_codebook_size": 64,
-    "vq_commitment_use_cross_entropy_loss": false,
-    "vq_commitment_weight": 0.05,
-    "vq_decay": 0.85,
-    "vq_ema_update": true,
-    "vq_eps": 1e-05,
-    "vq_heads": 32,
-    "vq_kmeans_init": false,
-    "vq_kmeans_iters": 20,
-    "vq_learnable_codebook": false,
-    "vq_orthogonal_reg_active_codes_only": false,
-    "vq_orthogonal_reg_max_codes": null,
-    "vq_orthogonal_reg_weight": 0.0,
-    "vq_reinmax": false,
-    "vq_sample_codebook_temp": 1.0,
-    "vq_separate_codebook_per_head": true,
-    "vq_stochastic_sample_codes": true,
-    "vq_straight_through": false,
-    "vq_sync_affine_param": false,
-    "vq_sync_codebook": false,
-    "vq_sync_kmeans": true,
-    "vq_sync_update_v": 0.0,
-    "vq_threshold_ema_dead_code": 2,
-    "vq_use_cosine_sim": false
-  }
 }

 {
   "architectures": [
+    "VQAdapterModel"
   ],
+  "clip_dim": 768,
+  "codebook_lr": 10.0,
+  "is_rq": false,
+  "mlp_dim": 1028,
+  "mlp_hidden_dim": 512,
+  "mlp_layers": 1,
+  "rq_quantize_dropout": true,
+  "rq_quantize_dropout_cutoff_index": 1,
+  "rq_quantize_dropout_multiple_of": 4,
   "torch_dtype": "float32",
+  "transformers_version": "4.31.0.dev0",
+  "vq_accept_image_fmap": false,
+  "vq_affine_param": false,
+  "vq_affine_param_batch_decay": 0.99,
+  "vq_affine_param_codebook_decay": 0.9,
+  "vq_channel_last": true,
+  "vq_codebook_dim": 32,
+  "vq_codebook_size": 64,
+  "vq_commitment_use_cross_entropy_loss": false,
+  "vq_commitment_weight": 0.05,
+  "vq_decay": 0.85,
+  "vq_ema_update": true,
+  "vq_eps": 1e-05,
+  "vq_heads": 32,
+  "vq_kmeans_init": false,
+  "vq_kmeans_iters": 20,
+  "vq_learnable_codebook": false,
+  "vq_orthogonal_reg_active_codes_only": false,
+  "vq_orthogonal_reg_max_codes": null,
+  "vq_orthogonal_reg_weight": 0.0,
+  "vq_reinmax": false,
+  "vq_sample_codebook_temp": 1.0,
+  "vq_separate_codebook_per_head": true,
+  "vq_stochastic_sample_codes": true,
+  "vq_straight_through": false,
+  "vq_sync_affine_param": false,
+  "vq_sync_codebook": false,
+  "vq_sync_kmeans": true,
+  "vq_sync_update_v": 0.0,
+  "vq_threshold_ema_dead_code": 2,
+  "vq_use_cosine_sim": false
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bffff18537e4ef89636f957dd24657dee9769c3f5d0cf4a4bdb3df1e44a57a9e
-size 19485348

 version https://git-lfs.github.com/spec/v1
+oid sha256:086b610aae0ca0169970562e70b3d940a33b28f4bbf153f42dcdd1203cd5e1d7
+size 19485281