Spaces:

AI4PD
/

hexviz

Sleeping

aksell commited on Mar 30, 2023

Commit

3de389d

•

1 Parent(s): 97dc04c

Plot avg attention not sum

Files changed (2) hide show

hexviz/attention.py CHANGED Viewed

@@ -85,21 +85,20 @@ def get_attention(
     return attentions
-def unidirectional_sum_filtered(attention, layer, head, threshold):
     num_layers, num_heads, seq_len, _ = attention.shape
     attention_head = attention[layer, head]
-    unidirectional_sum_for_head = []
     for i in range(seq_len):
         for j in range(i, seq_len):
             # Attention matrices for BERT models are asymetric.
-            # Bidirectional attention is reduced to one value by adding the
-            # attention values
-            # TODO think... does this operation make sense?
             sum = attention_head[i, j].item() + attention_head[j, i].item()
-            if sum >= threshold:
-                unidirectional_sum_for_head.append((sum, i, j))
-    return unidirectional_sum_for_head
 @st.cache
 def get_attention_pairs(pdb_code: str, layer: int, head: int, threshold: int = 0.2, model_type: ModelType = ModelType.TAPE_BERT):
     # fetch structure
@@ -110,7 +109,7 @@ def get_attention_pairs(pdb_code: str, layer: int, head: int, threshold: int = 0
     attention_pairs = []
     for i, sequence in enumerate(sequences):
         attention = get_attention(sequence=sequence, model_type=model_type)
-        attention_unidirectional = unidirectional_sum_filtered(attention, layer, head, threshold)
         chain = list(structure.get_chains())[i]
         for attn_value, res_1, res_2 in attention_unidirectional:
             try:

     return attentions
+def unidirectional_avg_filtered(attention, layer, head, threshold):
     num_layers, num_heads, seq_len, _ = attention.shape
     attention_head = attention[layer, head]
+    unidirectional_avg_for_head = []
     for i in range(seq_len):
         for j in range(i, seq_len):
             # Attention matrices for BERT models are asymetric.
+            # Bidirectional attention is represented by the average of the two values
             sum = attention_head[i, j].item() + attention_head[j, i].item()
+            avg = sum / 2
+            if avg >= threshold:
+                unidirectional_avg_for_head.append((avg, i, j))
+    return unidirectional_avg_for_head
 @st.cache
 def get_attention_pairs(pdb_code: str, layer: int, head: int, threshold: int = 0.2, model_type: ModelType = ModelType.TAPE_BERT):
     # fetch structure
     attention_pairs = []
     for i, sequence in enumerate(sequences):
         attention = get_attention(sequence=sequence, model_type=model_type)
+        attention_unidirectional = unidirectional_avg_filtered(attention, layer, head, threshold)
         chain = list(structure.get_chains())[i]
         for attn_value, res_1, res_2 in attention_unidirectional:
             try:

tests/test_attention.py CHANGED Viewed

@@ -2,7 +2,7 @@ import torch
 from Bio.PDB.Structure import Structure
 from hexviz.attention import (ModelType, get_attention, get_sequences,
-                              get_structure, unidirectional_sum_filtered)
 def test_get_structure():
@@ -58,14 +58,14 @@ def test_get_attention_prot_bert():
     assert result is not None
     assert result.shape == torch.Size([30, 16, 3, 3])
-def test_get_unidirection_sum_filtered():
     # 1 head, 1 layer, 4 residues long attention tensor
     attention= torch.tensor([[[[1, 2, 3, 4],
                                [2, 5, 6, 7],
                                [3, 6, 8, 9],
                                [4, 7, 9, 11]]]], dtype=torch.float32)
-    result = unidirectional_sum_filtered(attention, 0, 0, 0)
     assert result is not None
     assert len(result) == 10
@@ -74,6 +74,6 @@ def test_get_unidirection_sum_filtered():
                                [2, 5, 6],
                                [4, 7, 91]]]], dtype=torch.float32)
-    result = unidirectional_sum_filtered(attention, 0, 0, 0)
     assert len(result) == 6

 from Bio.PDB.Structure import Structure
 from hexviz.attention import (ModelType, get_attention, get_sequences,
+                              get_structure, unidirectional_avg_filtered)
 def test_get_structure():
     assert result is not None
     assert result.shape == torch.Size([30, 16, 3, 3])
+def test_get_unidirection_avg_filtered():
     # 1 head, 1 layer, 4 residues long attention tensor
     attention= torch.tensor([[[[1, 2, 3, 4],
                                [2, 5, 6, 7],
                                [3, 6, 8, 9],
                                [4, 7, 9, 11]]]], dtype=torch.float32)
+    result = unidirectional_avg_filtered(attention, 0, 0, 0)
     assert result is not None
     assert len(result) == 10
                                [2, 5, 6],
                                [4, 7, 91]]]], dtype=torch.float32)
+    result = unidirectional_avg_filtered(attention, 0, 0, 0)
     assert len(result) == 6