Nanobit commited on
Commit
e6b57de
1 Parent(s): fe1f4c4

Lint tokenization

Browse files
Files changed (1) hide show
  1. src/axolotl/utils/tokenization.py +5 -2
src/axolotl/utils/tokenization.py CHANGED
@@ -1,5 +1,8 @@
1
- from termcolor import colored
 
 
2
  import logging
 
3
 
4
 
5
  def check_dataset_labels(dataset, tokenizer):
@@ -17,7 +20,7 @@ def check_example_labels(example, tokenizer):
17
  # You can compare the input_ids and labels element-wise
18
  # Remember to ignore positions with IGNORE_TOKEN_ID (if you use it) or attention_mask equal to 0
19
  colored_tokens = []
20
- for i, (input_id, label_id, mask) in enumerate(
21
  zip(input_ids, labels, attention_mask)
22
  ):
23
  decoded_input_token = tokenizer.decode(input_id)
 
1
+ """Module for tokenization utilities"""
2
+
3
+
4
  import logging
5
+ from termcolor import colored
6
 
7
 
8
  def check_dataset_labels(dataset, tokenizer):
 
20
  # You can compare the input_ids and labels element-wise
21
  # Remember to ignore positions with IGNORE_TOKEN_ID (if you use it) or attention_mask equal to 0
22
  colored_tokens = []
23
+ for _, (input_id, label_id, mask) in enumerate(
24
  zip(input_ids, labels, attention_mask)
25
  ):
26
  decoded_input_token = tokenizer.decode(input_id)