model_class: STPatch   # NDT2 is a sub-class of STPatch


encoder:

  stitching: false
  from_pt: null
  embed_region: false

  masker:
    force_active: true         
    mode: random_token  
    ratio: 0.3                # ratio of data to predict
    zero_ratio: 1.0           # of the data to predict, ratio of zero-ed out
    random_ratio: 1.0         # of the not zero-ed, ratio of randomly replaced
    expand_prob: 0.0          # probability of expanding the mask in "temporal" mode
    max_timespan: 1           # max span of mask if expanded
    channels: null            # neurons to mask in "co-smoothing" mode
    timesteps: null           # time steps to mask in "forward-pred" mode
    mask_regions: ['all']     # brain regions to mask in "inter-region" mode
    target_regions: ['all']   # brain regions to predict in "intra-region" mode
    n_mask_regions: 1         # num of regions to choose from the list of mask_regions or target_regions
    
  patcher:
    active: true       
    time_stride: 0

  # context available for each timestep
  context:
    forward: -1
    backward: -1

  embedder:
    n_neurons: 1280 
    n_timesteps: 100
    max_time_F: 1
    max_space_F: 128
    max_spikes: 0         # max number of spikes in a single time bin
    mode: linear          # linear/embed/identity
    mult: 2               # embedding multiplier. hiddden_sizd = n_channels * mult
    act: softsign         # activation for the embedding layers
    scale: 1              # scale the embedding multiplying by this number
    bias: true            # use bias in the embedding layer
    dropout: 0.2          # dropout in embedding layer
    use_prompt: false
    use_session: true

  transformer:
    n_layers: 5           # number of transformer layers
    hidden_size: 128      # hidden space of the transformer
    n_heads: 8            # number of attentiomn heads
    attention_bias: true  # learn bias in the attention layers
    act: gelu             # activiation function in mlp layers
    inter_size: 512       # intermediate dimension in the mlp layers
    mlp_bias: true        # learn bias in the mlp layers
    dropout: 0.4          # dropout in transformer layers
    fixup_init: true      # modify weight initialization


decoder:
  from_pt: null