liminghong commited on
Commit
b498c31
1 Parent(s): e228d76
Files changed (2) hide show
  1. bert_layers.py +5 -3
  2. config.json +1 -1
bert_layers.py CHANGED
@@ -826,12 +826,13 @@ class BertForSequenceClassification(BertPreTrainedModel):
826
  self.num_labels = config.num_labels
827
  self.config = config
828
 
829
- self.bert = BertModel(config)
 
830
  classifier_dropout = (config.classifier_dropout
831
  if config.classifier_dropout is not None else
832
  config.hidden_dropout_prob)
833
  self.dropout = nn.Dropout(classifier_dropout)
834
- self.classifier = nn.Linear(config.hidden_size, config.num_labels)
835
 
836
  # Initialize weights and apply final processing
837
  self.post_init()
@@ -871,7 +872,8 @@ class BertForSequenceClassification(BertPreTrainedModel):
871
  return_dict=return_dict,
872
  )
873
 
874
- pooled_output = outputs[1]
 
875
 
876
  pooled_output = self.dropout(pooled_output)
877
  logits = self.classifier(pooled_output)
 
826
  self.num_labels = config.num_labels
827
  self.config = config
828
 
829
+ self.bert = BertModel(config, add_pooling_layer=False)
830
+ self.pooler = nn.Linear(config.hidden_size, 1)
831
  classifier_dropout = (config.classifier_dropout
832
  if config.classifier_dropout is not None else
833
  config.hidden_dropout_prob)
834
  self.dropout = nn.Dropout(classifier_dropout)
835
+ self.classifier = nn.Linear(10, config.num_labels)
836
 
837
  # Initialize weights and apply final processing
838
  self.post_init()
 
872
  return_dict=return_dict,
873
  )
874
 
875
+ encoder_output = outputs[0]
876
+ pooled_output = self.pooler(encoder_output).squeeze(-1)[:, :10]
877
 
878
  pooled_output = self.dropout(pooled_output)
879
  logits = self.classifier(pooled_output)
config.json CHANGED
@@ -1 +1 @@
1
- {"_name_or_path": "zhihan1996/DNABERT-2-117M", "alibi_starting_size": 512, "architectures": ["BertForMaskedLM"], "attention_probs_dropout_prob": 0.0, "auto_map": {"AutoConfig": "configuration_bert.BertConfig", "AutoModel": "bert_layers.BertModel", "AutoModelForMaskedLM": "bert_layers.BertForMaskedLM", "AutoModelForSequenceClassification": "bert_layers.BertForSequenceClassification"}, "classifier_dropout": null, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_hidden_layers": 12, "position_embedding_type": "absolute", "torch_dtype": "float32", "transformers_version": "4.28.0", "type_vocab_size": 2, "use_cache": true, "vocab_size": 4096}
 
1
+ {"_name_or_path": "zhihan1996/DNABERT-2-117M", "alibi_starting_size": 512, "architectures": ["BertForMaskedLM"], "attention_probs_dropout_prob": 0.0, "auto_map": {"AutoConfig": "configuration_bert.BertConfig", "AutoModel": "bert_layers.BertModel", "AutoModelForMaskedLM": "bert_layers.BertForMaskedLM", "AutoModelForSequenceClassification": "bert_layers.BertForSequenceClassification"}, "classifier_dropout": 0.01, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_hidden_layers": 12, "position_embedding_type": "absolute", "torch_dtype": "float32", "transformers_version": "4.28.0", "type_vocab_size": 2, "use_cache": true, "vocab_size": 4096}