Spaces:

Dovakiins
/

qwerrwe

Build error

Nanobit commited on May 29, 2023

Commit

a1f9850

•

1 Parent(s): 83d2920

Fix security issue or ignore false positives

Files changed (3) hide show

scripts/finetune.py CHANGED Viewed

@@ -136,7 +136,7 @@ def train(
     # load the config from the yaml file
     with open(config, encoding="utf-8") as file:
-        cfg: DictDefault = DictDefault(yaml.load(file, Loader=yaml.Loader))
     # if there are any options passed in the cli, if it is something that seems valid from the yaml,
     # then overwrite the value
     cfg_keys = cfg.keys()
@@ -185,7 +185,7 @@ def train(
         logging.info("check_dataset_labels...")
         check_dataset_labels(
             train_dataset.select(
-                [random.randrange(0, len(train_dataset) - 1) for i in range(5)]
             ),
             tokenizer,
         )

     # load the config from the yaml file
     with open(config, encoding="utf-8") as file:
+        cfg: DictDefault = DictDefault(yaml.safe_load(file))
     # if there are any options passed in the cli, if it is something that seems valid from the yaml,
     # then overwrite the value
     cfg_keys = cfg.keys()
         logging.info("check_dataset_labels...")
         check_dataset_labels(
             train_dataset.select(
+                [random.randrange(0, len(train_dataset) - 1) for _ in range(5)]  # nosec
             ),
             tokenizer,
         )

src/axolotl/prompt_tokenizers.py CHANGED Viewed

@@ -11,10 +11,10 @@ from transformers import PreTrainedTokenizer
 from axolotl.prompters import IGNORE_TOKEN_ID
 IGNORE_INDEX = -100
-LLAMA_DEFAULT_PAD_TOKEN = "[PAD]"
-LLAMA_DEFAULT_EOS_TOKEN = "</s>"
-LLAMA_DEFAULT_BOS_TOKEN = "<s>"
-LLAMA_DEFAULT_UNK_TOKEN = "<unk>"
 class InvalidDataException(Exception):

 from axolotl.prompters import IGNORE_TOKEN_ID
 IGNORE_INDEX = -100
+LLAMA_DEFAULT_PAD_TOKEN = "[PAD]"  # nosec
+LLAMA_DEFAULT_EOS_TOKEN = "</s>"  # nosec
+LLAMA_DEFAULT_BOS_TOKEN = "<s>"  # nosec
+LLAMA_DEFAULT_UNK_TOKEN = "<unk>"  # nosec
 class InvalidDataException(Exception):

src/axolotl/utils/data.py CHANGED Viewed

@@ -40,7 +40,7 @@ def load_tokenized_prepared_datasets(
 ) -> DatasetDict:
     tokenizer_name = tokenizer.__class__.__name__
     ds_hash = str(
-        md5(
             (
                 str(cfg.sequence_len)
                 + "@"
@@ -66,7 +66,7 @@ def load_tokenized_prepared_datasets(
                 use_auth_token=use_auth_token,
             )
             dataset = dataset["train"]
-    except Exception:  # pylint: disable=broad-except
         pass
     if dataset:
@@ -272,7 +272,7 @@ def load_prepare_datasets(
         # see if we can go ahead and load the stacked dataset
         seed = f"@{str(cfg.seed)}" if cfg.seed else ""
         ds_hash = str(
-            md5(
                 (
                     str(cfg.sequence_len)
                     + "@"
@@ -304,7 +304,7 @@ def load_prepare_datasets(
                     use_auth_token=use_auth_token,
                 )
                 dataset = dataset["train"]
-        except Exception:  # pylint: disable=broad-except
             pass
         if dataset:

 ) -> DatasetDict:
     tokenizer_name = tokenizer.__class__.__name__
     ds_hash = str(
+        md5(  # nosec
             (
                 str(cfg.sequence_len)
                 + "@"
                 use_auth_token=use_auth_token,
             )
             dataset = dataset["train"]
+    except Exception:  # pylint: disable=broad-except # nosec
         pass
     if dataset:
         # see if we can go ahead and load the stacked dataset
         seed = f"@{str(cfg.seed)}" if cfg.seed else ""
         ds_hash = str(
+            md5(  # nosec
                 (
                     str(cfg.sequence_len)
                     + "@"
                     use_auth_token=use_auth_token,
                 )
                 dataset = dataset["train"]
+        except Exception:  # pylint: disable=broad-except # nosec
             pass
         if dataset: