6,410 1 year ago

NFSW - Quite a lewd model based on either the Llama 3.1 instruct model or the Mistral (Nemo) instruct model. Created by Neversleep. The 8B, 12B, 70B and 123B versions are available.

tools 8b 12b 70b 123b
c09a994788df · 130GB
    Metadata
  • general.architecture
    llama
  • general.file_type
    Q8_0
  • llama.attention.head_count
    96
  • llama.attention.head_count_kv
    8
  • llama.attention.key_length
    128
  • llama.attention.layer_norm_rms_epsilon
    1e-05
  • llama.attention.value_length
    128
  • llama.block_count
    88
  • llama.context_length
    131072
  • llama.embedding_length
    12288
  • llama.feed_forward_length
    28672
  • llama.rope.dimension_count
    128
  • llama.rope.freq_base
    1e+06
  • llama.vocab_size
    32769
  • tokenizer.ggml.add_bos_token
    true
  • tokenizer.ggml.add_eos_token
    false
  • tokenizer.ggml.add_space_prefix
    false
  • tokenizer.ggml.bos_token_id
    1
  • tokenizer.ggml.eos_token_id
    2
  • tokenizer.ggml.model
    llama
  • tokenizer.ggml.padding_token_id
    32768
  • tokenizer.ggml.pre
    default
  • tokenizer.ggml.scores
    [-1000, -1000, -1000, -1000, -1000, ...]
  • tokenizer.ggml.token_type
    [3, 3, 3, 4, 4, ...]
  • tokenizer.ggml.tokens
    [<unk>, <s>, </s>, [INST], [/INST], ...]
  • tokenizer.ggml.unknown_token_id
    0
  • mradermacher.convert_type
    hf
  • mradermacher.quantize_version
    2
  • mradermacher.quantized_at
    2024-07-27T16:44:55+02:00
  • mradermacher.quantized_by
    mradermacher
  • mradermacher.quantized_on
    db1
  • Tensor
  • token_embd.weight
    Q8_0
    [12288, 32769]
  • blk.0
  • blk.0.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.0.attn_norm.weight
    F32
    [12288]
  • blk.0.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.0.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.0.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.0.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.0.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.0.ffn_norm.weight
    F32
    [12288]
  • blk.0.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.1
  • blk.1.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.1.attn_norm.weight
    F32
    [12288]
  • blk.1.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.1.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.1.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.1.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.1.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.1.ffn_norm.weight
    F32
    [12288]
  • blk.1.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.2
  • blk.2.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.2.attn_norm.weight
    F32
    [12288]
  • blk.2.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.2.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.2.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.2.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.2.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.2.ffn_norm.weight
    F32
    [12288]
  • blk.2.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.3
  • blk.3.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.3.attn_norm.weight
    F32
    [12288]
  • blk.3.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.3.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.3.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.3.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.3.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.3.ffn_norm.weight
    F32
    [12288]
  • blk.3.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.4
  • blk.4.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.4.attn_norm.weight
    F32
    [12288]
  • blk.4.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.4.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.4.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.4.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.4.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.4.ffn_norm.weight
    F32
    [12288]
  • blk.4.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.5
  • blk.5.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.5.attn_norm.weight
    F32
    [12288]
  • blk.5.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.5.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.5.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.5.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.5.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.5.ffn_norm.weight
    F32
    [12288]
  • blk.5.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.6
  • blk.6.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.6.attn_norm.weight
    F32
    [12288]
  • blk.6.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.6.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.6.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.6.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.6.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.6.ffn_norm.weight
    F32
    [12288]
  • blk.6.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.7
  • blk.7.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.7.attn_norm.weight
    F32
    [12288]
  • blk.7.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.7.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.7.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.7.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.7.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.7.ffn_norm.weight
    F32
    [12288]
  • blk.7.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.8
  • blk.8.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.8.attn_norm.weight
    F32
    [12288]
  • blk.8.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.8.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.8.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.8.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.8.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.8.ffn_norm.weight
    F32
    [12288]
  • blk.8.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.9
  • blk.9.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.9.attn_norm.weight
    F32
    [12288]
  • blk.9.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.9.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.9.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.9.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.9.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.9.ffn_norm.weight
    F32
    [12288]
  • blk.9.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.10
  • blk.10.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.10.attn_norm.weight
    F32
    [12288]
  • blk.10.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.10.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.10.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.10.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.10.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.10.ffn_norm.weight
    F32
    [12288]
  • blk.10.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.11
  • blk.11.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.11.attn_norm.weight
    F32
    [12288]
  • blk.11.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.11.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.11.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.11.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.11.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.11.ffn_norm.weight
    F32
    [12288]
  • blk.11.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.12
  • blk.12.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.12.attn_norm.weight
    F32
    [12288]
  • blk.12.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.12.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.12.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.12.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.12.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.12.ffn_norm.weight
    F32
    [12288]
  • blk.12.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.13
  • blk.13.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.13.attn_norm.weight
    F32
    [12288]
  • blk.13.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.13.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.13.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.13.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.13.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.13.ffn_norm.weight
    F32
    [12288]
  • blk.13.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.14
  • blk.14.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.14.attn_norm.weight
    F32
    [12288]
  • blk.14.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.14.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.14.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.14.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.14.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.14.ffn_norm.weight
    F32
    [12288]
  • blk.14.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.15
  • blk.15.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.15.attn_norm.weight
    F32
    [12288]
  • blk.15.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.15.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.15.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.15.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.15.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.15.ffn_norm.weight
    F32
    [12288]
  • blk.15.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.16
  • blk.16.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.16.attn_norm.weight
    F32
    [12288]
  • blk.16.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.16.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.16.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.16.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.16.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.16.ffn_norm.weight
    F32
    [12288]
  • blk.16.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.17
  • blk.17.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.17.attn_norm.weight
    F32
    [12288]
  • blk.17.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.17.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.17.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.17.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.17.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.17.ffn_norm.weight
    F32
    [12288]
  • blk.17.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.18
  • blk.18.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.18.attn_norm.weight
    F32
    [12288]
  • blk.18.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.18.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.18.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.18.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.18.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.18.ffn_norm.weight
    F32
    [12288]
  • blk.18.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.19
  • blk.19.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.19.attn_norm.weight
    F32
    [12288]
  • blk.19.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.19.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.19.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.19.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.19.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.19.ffn_norm.weight
    F32
    [12288]
  • blk.19.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.20
  • blk.20.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.20.attn_norm.weight
    F32
    [12288]
  • blk.20.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.20.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.20.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.20.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.20.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.20.ffn_norm.weight
    F32
    [12288]
  • blk.20.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.21
  • blk.21.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.21.attn_norm.weight
    F32
    [12288]
  • blk.21.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.21.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.21.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.21.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.21.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.21.ffn_norm.weight
    F32
    [12288]
  • blk.21.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.22
  • blk.22.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.22.attn_norm.weight
    F32
    [12288]
  • blk.22.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.22.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.22.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.22.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.22.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.22.ffn_norm.weight
    F32
    [12288]
  • blk.22.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.23
  • blk.23.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.23.attn_norm.weight
    F32
    [12288]
  • blk.23.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.23.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.23.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.23.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.23.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.23.ffn_norm.weight
    F32
    [12288]
  • blk.23.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.24
  • blk.24.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.24.attn_norm.weight
    F32
    [12288]
  • blk.24.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.24.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.24.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.24.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.24.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.24.ffn_norm.weight
    F32
    [12288]
  • blk.24.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.25
  • blk.25.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.25.attn_norm.weight
    F32
    [12288]
  • blk.25.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.25.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.25.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.25.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.25.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.25.ffn_norm.weight
    F32
    [12288]
  • blk.25.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.26
  • blk.26.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.26.attn_norm.weight
    F32
    [12288]
  • blk.26.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.26.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.26.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.26.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.26.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.26.ffn_norm.weight
    F32
    [12288]
  • blk.26.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.27
  • blk.27.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.27.attn_norm.weight
    F32
    [12288]
  • blk.27.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.27.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.27.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.27.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.27.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.27.ffn_norm.weight
    F32
    [12288]
  • blk.27.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.28
  • blk.28.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.28.attn_norm.weight
    F32
    [12288]
  • blk.28.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.28.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.28.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.28.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.28.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.28.ffn_norm.weight
    F32
    [12288]
  • blk.28.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.29
  • blk.29.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.29.attn_norm.weight
    F32
    [12288]
  • blk.29.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.29.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.29.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.29.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.29.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.29.ffn_norm.weight
    F32
    [12288]
  • blk.29.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.30
  • blk.30.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.30.attn_norm.weight
    F32
    [12288]
  • blk.30.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.30.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.30.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.30.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.30.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.30.ffn_norm.weight
    F32
    [12288]
  • blk.30.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.31
  • blk.31.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.31.attn_norm.weight
    F32
    [12288]
  • blk.31.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.31.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.31.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.31.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.31.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.31.ffn_norm.weight
    F32
    [12288]
  • blk.31.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.32
  • blk.32.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.32.attn_norm.weight
    F32
    [12288]
  • blk.32.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.32.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.32.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.32.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.32.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.32.ffn_norm.weight
    F32
    [12288]
  • blk.32.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.33
  • blk.33.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.33.attn_norm.weight
    F32
    [12288]
  • blk.33.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.33.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.33.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.33.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.33.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.33.ffn_norm.weight
    F32
    [12288]
  • blk.33.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.34
  • blk.34.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.34.attn_norm.weight
    F32
    [12288]
  • blk.34.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.34.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.34.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.34.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.34.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.34.ffn_norm.weight
    F32
    [12288]
  • blk.34.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.35
  • blk.35.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.35.attn_norm.weight
    F32
    [12288]
  • blk.35.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.35.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.35.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.35.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.35.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.35.ffn_norm.weight
    F32
    [12288]
  • blk.35.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.36
  • blk.36.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.36.attn_norm.weight
    F32
    [12288]
  • blk.36.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.36.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.36.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.36.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.36.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.36.ffn_norm.weight
    F32
    [12288]
  • blk.36.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.37
  • blk.37.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.37.attn_norm.weight
    F32
    [12288]
  • blk.37.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.37.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.37.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.37.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.37.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.37.ffn_norm.weight
    F32
    [12288]
  • blk.37.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.38
  • blk.38.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.38.attn_norm.weight
    F32
    [12288]
  • blk.38.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.38.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.38.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.38.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.38.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.38.ffn_norm.weight
    F32
    [12288]
  • blk.38.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.39
  • blk.39.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.39.attn_norm.weight
    F32
    [12288]
  • blk.39.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.39.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.39.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.39.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.39.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.39.ffn_norm.weight
    F32
    [12288]
  • blk.39.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.40
  • blk.40.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.40.attn_norm.weight
    F32
    [12288]
  • blk.40.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.40.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.40.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.40.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.40.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.40.ffn_norm.weight
    F32
    [12288]
  • blk.40.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.41
  • blk.41.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.41.attn_norm.weight
    F32
    [12288]
  • blk.41.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.41.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.41.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.41.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.41.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.41.ffn_norm.weight
    F32
    [12288]
  • blk.41.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.42
  • blk.42.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.42.attn_norm.weight
    F32
    [12288]
  • blk.42.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.42.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.42.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.42.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.42.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.42.ffn_norm.weight
    F32
    [12288]
  • blk.42.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.43
  • blk.43.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.43.attn_norm.weight
    F32
    [12288]
  • blk.43.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.43.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.43.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.43.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.43.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.43.ffn_norm.weight
    F32
    [12288]
  • blk.43.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.44
  • blk.44.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.44.attn_norm.weight
    F32
    [12288]
  • blk.44.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.44.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.44.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.44.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.44.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.44.ffn_norm.weight
    F32
    [12288]
  • blk.44.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.45
  • blk.45.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.45.attn_norm.weight
    F32
    [12288]
  • blk.45.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.45.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.45.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.45.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.45.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.45.ffn_norm.weight
    F32
    [12288]
  • blk.45.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.46
  • blk.46.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.46.attn_norm.weight
    F32
    [12288]
  • blk.46.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.46.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.46.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.46.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.46.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.46.ffn_norm.weight
    F32
    [12288]
  • blk.46.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.47
  • blk.47.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.47.attn_norm.weight
    F32
    [12288]
  • blk.47.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.47.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.47.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.47.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.47.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.47.ffn_norm.weight
    F32
    [12288]
  • blk.47.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.48
  • blk.48.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.48.attn_norm.weight
    F32
    [12288]
  • blk.48.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.48.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.48.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.48.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.48.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.48.ffn_norm.weight
    F32
    [12288]
  • blk.48.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.49
  • blk.49.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.49.attn_norm.weight
    F32
    [12288]
  • blk.49.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.49.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.49.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.49.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.49.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.49.ffn_norm.weight
    F32
    [12288]
  • blk.49.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.50
  • blk.50.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.50.attn_norm.weight
    F32
    [12288]
  • blk.50.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.50.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.50.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.50.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.50.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.50.ffn_norm.weight
    F32
    [12288]
  • blk.50.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.51
  • blk.51.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.51.attn_norm.weight
    F32
    [12288]
  • blk.51.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.51.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.51.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.51.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.51.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.51.ffn_norm.weight
    F32
    [12288]
  • blk.51.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.52
  • blk.52.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.52.attn_norm.weight
    F32
    [12288]
  • blk.52.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.52.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.52.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.52.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.52.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.52.ffn_norm.weight
    F32
    [12288]
  • blk.52.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.53
  • blk.53.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.53.attn_norm.weight
    F32
    [12288]
  • blk.53.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.53.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.53.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.53.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.53.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.53.ffn_norm.weight
    F32
    [12288]
  • blk.53.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.54
  • blk.54.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.54.attn_norm.weight
    F32
    [12288]
  • blk.54.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.54.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.54.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.54.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.54.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.54.ffn_norm.weight
    F32
    [12288]
  • blk.54.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.55
  • blk.55.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.55.attn_norm.weight
    F32
    [12288]
  • blk.55.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.55.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.55.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.55.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.55.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.55.ffn_norm.weight
    F32
    [12288]
  • blk.55.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.56
  • blk.56.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.56.attn_norm.weight
    F32
    [12288]
  • blk.56.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.56.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.56.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.56.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.56.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.56.ffn_norm.weight
    F32
    [12288]
  • blk.56.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.57
  • blk.57.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.57.attn_norm.weight
    F32
    [12288]
  • blk.57.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.57.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.57.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.57.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.57.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.57.ffn_norm.weight
    F32
    [12288]
  • blk.57.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.58
  • blk.58.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.58.attn_norm.weight
    F32
    [12288]
  • blk.58.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.58.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.58.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.58.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.58.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.58.ffn_norm.weight
    F32
    [12288]
  • blk.58.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.59
  • blk.59.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.59.attn_norm.weight
    F32
    [12288]
  • blk.59.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.59.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.59.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.59.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.59.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.59.ffn_norm.weight
    F32
    [12288]
  • blk.59.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.60
  • blk.60.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.60.attn_norm.weight
    F32
    [12288]
  • blk.60.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.60.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.60.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.60.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.60.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.60.ffn_norm.weight
    F32
    [12288]
  • blk.60.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.61
  • blk.61.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.61.attn_norm.weight
    F32
    [12288]
  • blk.61.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.61.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.61.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.61.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.61.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.61.ffn_norm.weight
    F32
    [12288]
  • blk.61.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.62
  • blk.62.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.62.attn_norm.weight
    F32
    [12288]
  • blk.62.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.62.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.62.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.62.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.62.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.62.ffn_norm.weight
    F32
    [12288]
  • blk.62.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.63
  • blk.63.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.63.attn_norm.weight
    F32
    [12288]
  • blk.63.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.63.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.63.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.63.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.63.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.63.ffn_norm.weight
    F32
    [12288]
  • blk.63.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.64
  • blk.64.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.64.attn_norm.weight
    F32
    [12288]
  • blk.64.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.64.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.64.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.64.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.64.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.64.ffn_norm.weight
    F32
    [12288]
  • blk.64.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.65
  • blk.65.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.65.attn_norm.weight
    F32
    [12288]
  • blk.65.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.65.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.65.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.65.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.65.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.65.ffn_norm.weight
    F32
    [12288]
  • blk.65.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.66
  • blk.66.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.66.attn_norm.weight
    F32
    [12288]
  • blk.66.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.66.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.66.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.66.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.66.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.66.ffn_norm.weight
    F32
    [12288]
  • blk.66.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.67
  • blk.67.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.67.attn_norm.weight
    F32
    [12288]
  • blk.67.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.67.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.67.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.67.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.67.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.67.ffn_norm.weight
    F32
    [12288]
  • blk.67.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.68
  • blk.68.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.68.attn_norm.weight
    F32
    [12288]
  • blk.68.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.68.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.68.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.68.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.68.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.68.ffn_norm.weight
    F32
    [12288]
  • blk.68.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.69
  • blk.69.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.69.attn_norm.weight
    F32
    [12288]
  • blk.69.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.69.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.69.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.69.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.69.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.69.ffn_norm.weight
    F32
    [12288]
  • blk.69.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.70
  • blk.70.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.70.attn_norm.weight
    F32
    [12288]
  • blk.70.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.70.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.70.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.70.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.70.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.70.ffn_norm.weight
    F32
    [12288]
  • blk.70.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.71
  • blk.71.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.71.attn_norm.weight
    F32
    [12288]
  • blk.71.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.71.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.71.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.71.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.71.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.71.ffn_norm.weight
    F32
    [12288]
  • blk.71.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.72
  • blk.72.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.72.attn_norm.weight
    F32
    [12288]
  • blk.72.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.72.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.72.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.72.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.72.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.72.ffn_norm.weight
    F32
    [12288]
  • blk.72.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.73
  • blk.73.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.73.attn_norm.weight
    F32
    [12288]
  • blk.73.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.73.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.73.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.73.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.73.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.73.ffn_norm.weight
    F32
    [12288]
  • blk.73.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.74
  • blk.74.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.74.attn_norm.weight
    F32
    [12288]
  • blk.74.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.74.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.74.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.74.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.74.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.74.ffn_norm.weight
    F32
    [12288]
  • blk.74.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.75
  • blk.75.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.75.attn_norm.weight
    F32
    [12288]
  • blk.75.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.75.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.75.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.75.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.75.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.75.ffn_norm.weight
    F32
    [12288]
  • blk.75.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.76
  • blk.76.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.76.attn_norm.weight
    F32
    [12288]
  • blk.76.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.76.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.76.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.76.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.76.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.76.ffn_norm.weight
    F32
    [12288]
  • blk.76.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.77
  • blk.77.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.77.attn_norm.weight
    F32
    [12288]
  • blk.77.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.77.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.77.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.77.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.77.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.77.ffn_norm.weight
    F32
    [12288]
  • blk.77.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.78
  • blk.78.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.78.attn_norm.weight
    F32
    [12288]
  • blk.78.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.78.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.78.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.78.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.78.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.78.ffn_norm.weight
    F32
    [12288]
  • blk.78.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.79
  • blk.79.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.79.attn_norm.weight
    F32
    [12288]
  • blk.79.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.79.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.79.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.79.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.79.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.79.ffn_norm.weight
    F32
    [12288]
  • blk.79.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.80
  • blk.80.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.80.attn_norm.weight
    F32
    [12288]
  • blk.80.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.80.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.80.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.80.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.80.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.80.ffn_norm.weight
    F32
    [12288]
  • blk.80.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.81
  • blk.81.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.81.attn_norm.weight
    F32
    [12288]
  • blk.81.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.81.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.81.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.81.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.81.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.81.ffn_norm.weight
    F32
    [12288]
  • blk.81.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.82
  • blk.82.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.82.attn_norm.weight
    F32
    [12288]
  • blk.82.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.82.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.82.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.82.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.82.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.82.ffn_norm.weight
    F32
    [12288]
  • blk.82.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.83
  • blk.83.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.83.attn_norm.weight
    F32
    [12288]
  • blk.83.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.83.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.83.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.83.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.83.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.83.ffn_norm.weight
    F32
    [12288]
  • blk.83.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.84
  • blk.84.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.84.attn_norm.weight
    F32
    [12288]
  • blk.84.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.84.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.84.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.84.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.84.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.84.ffn_norm.weight
    F32
    [12288]
  • blk.84.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.85
  • blk.85.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.85.attn_norm.weight
    F32
    [12288]
  • blk.85.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.85.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.85.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.85.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.85.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.85.ffn_norm.weight
    F32
    [12288]
  • blk.85.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.86
  • blk.86.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.86.attn_norm.weight
    F32
    [12288]
  • blk.86.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.86.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.86.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.86.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.86.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.86.ffn_norm.weight
    F32
    [12288]
  • blk.86.ffn_up.weight
    Q8_0
    [12288, 28672]
  • blk.87
  • blk.87.attn_k.weight
    Q8_0
    [12288, 1024]
  • blk.87.attn_norm.weight
    F32
    [12288]
  • blk.87.attn_output.weight
    Q8_0
    [12288, 12288]
  • blk.87.attn_q.weight
    Q8_0
    [12288, 12288]
  • blk.87.attn_v.weight
    Q8_0
    [12288, 1024]
  • blk.87.ffn_down.weight
    Q8_0
    [28672, 12288]
  • blk.87.ffn_gate.weight
    Q8_0
    [12288, 28672]
  • blk.87.ffn_norm.weight
    F32
    [12288]
  • blk.87.ffn_up.weight
    Q8_0
    [12288, 28672]
  • output.weight
    Q8_0
    [12288, 32769]
  • output_norm.weight
    F32
    [12288]