Models
GitHub
Discord
Turbo
Sign in
Download
Models
Download
GitHub
Discord
Sign in
olmo2
:13b-1124-instruct-q8_0
2.6M
Downloads
Updated
8 months ago
OLMo 2 is a new family of 7B and 13B models trained on up to 5T tokens. These models are on par with or better than equivalently sized fully open models, and competitive with open-weight models such as Llama 3.1 on English academic benchmarks.
OLMo 2 is a new family of 7B and 13B models trained on up to 5T tokens. These models are on par with or better than equivalently sized fully open models, and competitive with open-weight models such as Llama 3.1 on English academic benchmarks.
Cancel
7b
13b
olmo2:13b-1124-instruct-q8_0
...
/
model
26ab3345f3ac · 15GB
Metadata
general.architecture
olmo2
olmo2
general.file_type
Q8_0
Q8_0
olmo2.attention.head_count
40
40
olmo2.attention.head_count_kv
40
40
olmo2.attention.layer_norm_rms_epsilon
1e-06
1e-06
olmo2.block_count
40
40
olmo2.context_length
4096
4096
olmo2.embedding_length
5120
5120
olmo2.feed_forward_length
13824
13824
olmo2.rope.freq_base
500000
500000
tokenizer.ggml.bos_token_id
100257
100257
tokenizer.ggml.eos_token_id
100257
100257
tokenizer.ggml.merges
[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
100277
100277
tokenizer.ggml.pre
dbrx
dbrx
tokenizer.ggml.token_type
[1, 1, 1, 1, 1, ...]
[1, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[!, ", #, $, %, ...]
[!, ", #, $, %, ...]
tokenizer.ggml.unknown_token_id
100257
100257
Tensor
Name
Type
Shape
token_embd.weight
Q8_0
Q8_0
[5120, 100352]
blk.0
blk.0.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.0.attn_k_norm.weight
F32
F32
[5120]
blk.0.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.0.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.0.attn_q_norm.weight
F32
F32
[5120]
blk.0.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.0.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.0.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.0.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.0.post_attention_norm.weight
F32
F32
[5120]
blk.0.post_ffw_norm.weight
F32
F32
[5120]
blk.1
blk.1.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.1.attn_k_norm.weight
F32
F32
[5120]
blk.1.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.1.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.1.attn_q_norm.weight
F32
F32
[5120]
blk.1.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.1.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.1.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.1.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.1.post_attention_norm.weight
F32
F32
[5120]
blk.1.post_ffw_norm.weight
F32
F32
[5120]
blk.2
blk.2.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.2.attn_k_norm.weight
F32
F32
[5120]
blk.2.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.2.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.2.attn_q_norm.weight
F32
F32
[5120]
blk.2.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.2.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.2.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.2.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.2.post_attention_norm.weight
F32
F32
[5120]
blk.2.post_ffw_norm.weight
F32
F32
[5120]
blk.3
blk.3.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.3.attn_k_norm.weight
F32
F32
[5120]
blk.3.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.3.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.3.attn_q_norm.weight
F32
F32
[5120]
blk.3.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.3.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.3.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.3.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.3.post_attention_norm.weight
F32
F32
[5120]
blk.3.post_ffw_norm.weight
F32
F32
[5120]
blk.4
blk.4.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.4.attn_k_norm.weight
F32
F32
[5120]
blk.4.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.4.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.4.attn_q_norm.weight
F32
F32
[5120]
blk.4.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.4.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.4.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.4.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.4.post_attention_norm.weight
F32
F32
[5120]
blk.4.post_ffw_norm.weight
F32
F32
[5120]
blk.5
blk.5.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.5.attn_k_norm.weight
F32
F32
[5120]
blk.5.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.5.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.5.attn_q_norm.weight
F32
F32
[5120]
blk.5.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.5.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.5.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.5.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.5.post_attention_norm.weight
F32
F32
[5120]
blk.5.post_ffw_norm.weight
F32
F32
[5120]
blk.6
blk.6.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.6.attn_k_norm.weight
F32
F32
[5120]
blk.6.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.6.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.6.attn_q_norm.weight
F32
F32
[5120]
blk.6.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.6.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.6.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.6.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.6.post_attention_norm.weight
F32
F32
[5120]
blk.6.post_ffw_norm.weight
F32
F32
[5120]
blk.7
blk.7.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.7.attn_k_norm.weight
F32
F32
[5120]
blk.7.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.7.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.7.attn_q_norm.weight
F32
F32
[5120]
blk.7.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.7.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.7.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.7.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.7.post_attention_norm.weight
F32
F32
[5120]
blk.7.post_ffw_norm.weight
F32
F32
[5120]
blk.8
blk.8.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.8.attn_k_norm.weight
F32
F32
[5120]
blk.8.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.8.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.8.attn_q_norm.weight
F32
F32
[5120]
blk.8.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.8.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.8.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.8.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.8.post_attention_norm.weight
F32
F32
[5120]
blk.8.post_ffw_norm.weight
F32
F32
[5120]
blk.9
blk.9.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.9.attn_k_norm.weight
F32
F32
[5120]
blk.9.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.9.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.9.attn_q_norm.weight
F32
F32
[5120]
blk.9.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.9.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.9.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.9.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.9.post_attention_norm.weight
F32
F32
[5120]
blk.9.post_ffw_norm.weight
F32
F32
[5120]
blk.10
blk.10.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.10.attn_k_norm.weight
F32
F32
[5120]
blk.10.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.10.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.10.attn_q_norm.weight
F32
F32
[5120]
blk.10.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.10.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.10.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.10.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.10.post_attention_norm.weight
F32
F32
[5120]
blk.10.post_ffw_norm.weight
F32
F32
[5120]
blk.11
blk.11.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.11.attn_k_norm.weight
F32
F32
[5120]
blk.11.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.11.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.11.attn_q_norm.weight
F32
F32
[5120]
blk.11.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.11.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.11.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.11.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.11.post_attention_norm.weight
F32
F32
[5120]
blk.11.post_ffw_norm.weight
F32
F32
[5120]
blk.12
blk.12.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.12.attn_k_norm.weight
F32
F32
[5120]
blk.12.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.12.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.12.attn_q_norm.weight
F32
F32
[5120]
blk.12.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.12.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.12.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.12.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.12.post_attention_norm.weight
F32
F32
[5120]
blk.12.post_ffw_norm.weight
F32
F32
[5120]
blk.13
blk.13.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.13.attn_k_norm.weight
F32
F32
[5120]
blk.13.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.13.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.13.attn_q_norm.weight
F32
F32
[5120]
blk.13.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.13.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.13.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.13.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.13.post_attention_norm.weight
F32
F32
[5120]
blk.13.post_ffw_norm.weight
F32
F32
[5120]
blk.14
blk.14.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.14.attn_k_norm.weight
F32
F32
[5120]
blk.14.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.14.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.14.attn_q_norm.weight
F32
F32
[5120]
blk.14.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.14.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.14.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.14.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.14.post_attention_norm.weight
F32
F32
[5120]
blk.14.post_ffw_norm.weight
F32
F32
[5120]
blk.15
blk.15.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.15.attn_k_norm.weight
F32
F32
[5120]
blk.15.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.15.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.15.attn_q_norm.weight
F32
F32
[5120]
blk.15.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.15.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.15.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.15.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.15.post_attention_norm.weight
F32
F32
[5120]
blk.15.post_ffw_norm.weight
F32
F32
[5120]
blk.16
blk.16.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.16.attn_k_norm.weight
F32
F32
[5120]
blk.16.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.16.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.16.attn_q_norm.weight
F32
F32
[5120]
blk.16.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.16.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.16.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.16.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.16.post_attention_norm.weight
F32
F32
[5120]
blk.16.post_ffw_norm.weight
F32
F32
[5120]
blk.17
blk.17.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.17.attn_k_norm.weight
F32
F32
[5120]
blk.17.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.17.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.17.attn_q_norm.weight
F32
F32
[5120]
blk.17.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.17.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.17.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.17.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.17.post_attention_norm.weight
F32
F32
[5120]
blk.17.post_ffw_norm.weight
F32
F32
[5120]
blk.18
blk.18.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.18.attn_k_norm.weight
F32
F32
[5120]
blk.18.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.18.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.18.attn_q_norm.weight
F32
F32
[5120]
blk.18.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.18.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.18.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.18.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.18.post_attention_norm.weight
F32
F32
[5120]
blk.18.post_ffw_norm.weight
F32
F32
[5120]
blk.19
blk.19.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.19.attn_k_norm.weight
F32
F32
[5120]
blk.19.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.19.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.19.attn_q_norm.weight
F32
F32
[5120]
blk.19.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.19.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.19.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.19.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.19.post_attention_norm.weight
F32
F32
[5120]
blk.19.post_ffw_norm.weight
F32
F32
[5120]
blk.20
blk.20.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.20.attn_k_norm.weight
F32
F32
[5120]
blk.20.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.20.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.20.attn_q_norm.weight
F32
F32
[5120]
blk.20.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.20.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.20.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.20.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.20.post_attention_norm.weight
F32
F32
[5120]
blk.20.post_ffw_norm.weight
F32
F32
[5120]
blk.21
blk.21.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.21.attn_k_norm.weight
F32
F32
[5120]
blk.21.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.21.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.21.attn_q_norm.weight
F32
F32
[5120]
blk.21.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.21.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.21.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.21.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.21.post_attention_norm.weight
F32
F32
[5120]
blk.21.post_ffw_norm.weight
F32
F32
[5120]
blk.22
blk.22.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.22.attn_k_norm.weight
F32
F32
[5120]
blk.22.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.22.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.22.attn_q_norm.weight
F32
F32
[5120]
blk.22.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.22.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.22.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.22.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.22.post_attention_norm.weight
F32
F32
[5120]
blk.22.post_ffw_norm.weight
F32
F32
[5120]
blk.23
blk.23.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.23.attn_k_norm.weight
F32
F32
[5120]
blk.23.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.23.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.23.attn_q_norm.weight
F32
F32
[5120]
blk.23.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.23.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.23.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.23.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.23.post_attention_norm.weight
F32
F32
[5120]
blk.23.post_ffw_norm.weight
F32
F32
[5120]
blk.24
blk.24.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.24.attn_k_norm.weight
F32
F32
[5120]
blk.24.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.24.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.24.attn_q_norm.weight
F32
F32
[5120]
blk.24.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.24.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.24.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.24.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.24.post_attention_norm.weight
F32
F32
[5120]
blk.24.post_ffw_norm.weight
F32
F32
[5120]
blk.25
blk.25.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.25.attn_k_norm.weight
F32
F32
[5120]
blk.25.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.25.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.25.attn_q_norm.weight
F32
F32
[5120]
blk.25.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.25.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.25.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.25.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.25.post_attention_norm.weight
F32
F32
[5120]
blk.25.post_ffw_norm.weight
F32
F32
[5120]
blk.26
blk.26.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.26.attn_k_norm.weight
F32
F32
[5120]
blk.26.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.26.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.26.attn_q_norm.weight
F32
F32
[5120]
blk.26.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.26.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.26.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.26.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.26.post_attention_norm.weight
F32
F32
[5120]
blk.26.post_ffw_norm.weight
F32
F32
[5120]
blk.27
blk.27.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.27.attn_k_norm.weight
F32
F32
[5120]
blk.27.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.27.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.27.attn_q_norm.weight
F32
F32
[5120]
blk.27.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.27.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.27.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.27.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.27.post_attention_norm.weight
F32
F32
[5120]
blk.27.post_ffw_norm.weight
F32
F32
[5120]
blk.28
blk.28.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.28.attn_k_norm.weight
F32
F32
[5120]
blk.28.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.28.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.28.attn_q_norm.weight
F32
F32
[5120]
blk.28.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.28.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.28.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.28.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.28.post_attention_norm.weight
F32
F32
[5120]
blk.28.post_ffw_norm.weight
F32
F32
[5120]
blk.29
blk.29.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.29.attn_k_norm.weight
F32
F32
[5120]
blk.29.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.29.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.29.attn_q_norm.weight
F32
F32
[5120]
blk.29.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.29.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.29.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.29.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.29.post_attention_norm.weight
F32
F32
[5120]
blk.29.post_ffw_norm.weight
F32
F32
[5120]
blk.30
blk.30.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.30.attn_k_norm.weight
F32
F32
[5120]
blk.30.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.30.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.30.attn_q_norm.weight
F32
F32
[5120]
blk.30.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.30.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.30.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.30.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.30.post_attention_norm.weight
F32
F32
[5120]
blk.30.post_ffw_norm.weight
F32
F32
[5120]
blk.31
blk.31.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.31.attn_k_norm.weight
F32
F32
[5120]
blk.31.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.31.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.31.attn_q_norm.weight
F32
F32
[5120]
blk.31.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.31.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.31.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.31.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.31.post_attention_norm.weight
F32
F32
[5120]
blk.31.post_ffw_norm.weight
F32
F32
[5120]
blk.32
blk.32.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.32.attn_k_norm.weight
F32
F32
[5120]
blk.32.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.32.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.32.attn_q_norm.weight
F32
F32
[5120]
blk.32.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.32.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.32.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.32.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.32.post_attention_norm.weight
F32
F32
[5120]
blk.32.post_ffw_norm.weight
F32
F32
[5120]
blk.33
blk.33.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.33.attn_k_norm.weight
F32
F32
[5120]
blk.33.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.33.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.33.attn_q_norm.weight
F32
F32
[5120]
blk.33.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.33.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.33.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.33.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.33.post_attention_norm.weight
F32
F32
[5120]
blk.33.post_ffw_norm.weight
F32
F32
[5120]
blk.34
blk.34.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.34.attn_k_norm.weight
F32
F32
[5120]
blk.34.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.34.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.34.attn_q_norm.weight
F32
F32
[5120]
blk.34.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.34.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.34.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.34.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.34.post_attention_norm.weight
F32
F32
[5120]
blk.34.post_ffw_norm.weight
F32
F32
[5120]
blk.35
blk.35.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.35.attn_k_norm.weight
F32
F32
[5120]
blk.35.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.35.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.35.attn_q_norm.weight
F32
F32
[5120]
blk.35.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.35.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.35.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.35.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.35.post_attention_norm.weight
F32
F32
[5120]
blk.35.post_ffw_norm.weight
F32
F32
[5120]
blk.36
blk.36.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.36.attn_k_norm.weight
F32
F32
[5120]
blk.36.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.36.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.36.attn_q_norm.weight
F32
F32
[5120]
blk.36.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.36.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.36.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.36.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.36.post_attention_norm.weight
F32
F32
[5120]
blk.36.post_ffw_norm.weight
F32
F32
[5120]
blk.37
blk.37.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.37.attn_k_norm.weight
F32
F32
[5120]
blk.37.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.37.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.37.attn_q_norm.weight
F32
F32
[5120]
blk.37.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.37.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.37.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.37.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.37.post_attention_norm.weight
F32
F32
[5120]
blk.37.post_ffw_norm.weight
F32
F32
[5120]
blk.38
blk.38.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.38.attn_k_norm.weight
F32
F32
[5120]
blk.38.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.38.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.38.attn_q_norm.weight
F32
F32
[5120]
blk.38.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.38.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.38.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.38.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.38.post_attention_norm.weight
F32
F32
[5120]
blk.38.post_ffw_norm.weight
F32
F32
[5120]
blk.39
blk.39.attn_k.weight
Q8_0
Q8_0
[5120, 5120]
blk.39.attn_k_norm.weight
F32
F32
[5120]
blk.39.attn_output.weight
Q8_0
Q8_0
[5120, 5120]
blk.39.attn_q.weight
Q8_0
Q8_0
[5120, 5120]
blk.39.attn_q_norm.weight
F32
F32
[5120]
blk.39.attn_v.weight
Q8_0
Q8_0
[5120, 5120]
blk.39.ffn_down.weight
Q8_0
Q8_0
[13824, 5120]
blk.39.ffn_gate.weight
Q8_0
Q8_0
[5120, 13824]
blk.39.ffn_up.weight
Q8_0
Q8_0
[5120, 13824]
blk.39.post_attention_norm.weight
F32
F32
[5120]
blk.39.post_ffw_norm.weight
F32
F32
[5120]
output.weight
Q8_0
Q8_0
[5120, 100352]
output_norm.weight
F32
F32
[5120]