authors: false cite: false build-info: "" workspace: 6500 log: /docker/home/logs/marian/log_s2s_adjusted_gn_es_s2s.log log-level: info log-time-zone: "" quiet: false quiet-translation: false seed: 1234 check-nan: false interpolate-env-vars: false relative-paths: false dump-config: "" sigterm: save-and-exit model: /docker/home/reproduce_best_models/model_s2s_adjusted_gn_es_s2s/s2s_adjusted_gn_es_s2s.npz pretrained-model: "" ignore-model-config: false type: s2s dim-vocabs: - 6000 - 6000 dim-emb: 512 factors-dim-emb: 0 factors-combine: sum lemma-dependency: "" lemma-dim-emb: 0 dim-rnn: 1024 enc-type: bidirectional enc-cell: gru enc-cell-depth: 1 enc-depth: 6 dec-cell: gru dec-cell-base-depth: 2 dec-cell-high-depth: 1 dec-depth: 6 skip: true layer-normalization: true right-left: false input-types: [] tied-embeddings: false tied-embeddings-src: false tied-embeddings-all: true output-omit-bias: false transformer-heads: 8 transformer-no-projection: false transformer-pool: false transformer-dim-ffn: 2048 transformer-decoder-dim-ffn: 0 transformer-ffn-depth: 2 transformer-decoder-ffn-depth: 0 transformer-ffn-activation: swish transformer-dim-aan: 2048 transformer-aan-depth: 2 transformer-aan-activation: swish transformer-aan-nogate: false transformer-decoder-autoreg: self-attention transformer-tied-layers: [] transformer-guided-alignment-layer: last transformer-preprocess: "" transformer-postprocess-emb: d transformer-postprocess: dan transformer-postprocess-top: "" transformer-train-position-embeddings: false transformer-depth-scaling: false bert-mask-symbol: "[MASK]" bert-sep-symbol: "[SEP]" bert-class-symbol: "[CLS]" bert-masking-fraction: 0.15 bert-train-type-embeddings: true bert-type-vocab-size: 2 dropout-rnn: 0.2 dropout-src: 0.1 dropout-trg: 0.1 transformer-dropout: 0 transformer-dropout-attention: 0 transformer-dropout-ffn: 0 cost-type: ce-sum multi-loss-type: sum unlikelihood-loss: false overwrite: true no-reload: false train-sets: - /docker/home/artifacts/data/train/train.gn - /docker/home/artifacts/data/train/train.es vocabs: - /docker/home/reproduce_best_models/pretrain_test_vocab.gnV6000_6000.spm - /docker/home/reproduce_best_models/pretrain_test_vocab.esV6000_6000.spm sentencepiece-alphas: [] sentencepiece-options: "" sentencepiece-max-lines: 2000000 after-epochs: 20 after-batches: 0 after: 0e disp-freq: 1000u disp-first: 0 disp-label-counts: true save-freq: 10000u logical-epoch: - 1e - 0 max-length: 187 max-length-crop: true tsv: false tsv-fields: 0 shuffle: data no-restore-corpus: true tempdir: /docker/home/libs sqlite: "" sqlite-drop: false devices: - 0 num-devices: 0 no-nccl: false sharding: global sync-freq: 200u cpu-threads: 0 mini-batch: 64 mini-batch-words: 0 mini-batch-fit: true mini-batch-fit-step: 10 gradient-checkpointing: false maxi-batch: 1000 maxi-batch-sort: trg shuffle-in-ram: false data-threads: 8 all-caps-every: 0 english-title-case-every: 0 mini-batch-words-ref: 0 mini-batch-warmup: 0 mini-batch-track-lr: false mini-batch-round-up: true optimizer: adam optimizer-params: [] optimizer-delay: 1 sync-sgd: false learn-rate: 0.001648045 lr-report: false lr-decay: 0 lr-decay-strategy: epoch+stalled lr-decay-start: - 10 - 1 lr-decay-freq: 50000 lr-decay-reset-optimizer: false lr-decay-repeat-warmup: false lr-decay-inv-sqrt: - 0 lr-warmup: 0 lr-warmup-start-rate: 0 lr-warmup-cycle: false lr-warmup-at-reload: false label-smoothing: 0.1 factor-weight: 1 clip-norm: 1 exponential-smoothing: 0.0001 guided-alignment: none guided-alignment-cost: mse guided-alignment-weight: 0.1 data-weighting: "" data-weighting-type: sentence embedding-vectors: [] embedding-normalization: false embedding-fix-src: false embedding-fix-trg: false precision: - float16 - float32 cost-scaling: - 256.f - 1000 - 2.f - 256.f gradient-norm-average-window: 100 dynamic-gradient-scaling: [] check-gradient-nan: false normalize-gradient: false train-embedder-rank: [] quantize-bits: 0 quantize-optimization-steps: 0 quantize-log-based: false quantize-biases: false ulr: false ulr-query-vectors: "" ulr-keys-vectors: "" ulr-trainable-transformation: false ulr-dim-emb: 0 ulr-dropout: 0 ulr-softmax-temperature: 1 valid-sets: - /docker/home/artifacts/data/validation/valid.gn - /docker/home/artifacts/data/validation/valid.es valid-freq: 50000000 valid-metrics: - cross-entropy - translation valid-reset-stalled: false early-stopping: 10000 early-stopping-on: first beam-size: 12 normalize: 0 max-length-factor: 3 word-penalty: 0 allow-unk: false n-best: false word-scores: false valid-mini-batch: 32 valid-max-length: 1000 valid-script-path: "" valid-script-args: [] valid-translation-output: /docker/home/reproduce_best_models/decoded_adjusted_gn_es_s2s.txt keep-best: false valid-log: ""