Models
GitHub
Discord
Turbo
Sign in
Download
Models
Download
GitHub
Discord
Sign in
olmo2
:7b-1124-instruct-fp16
2.6M
Downloads
Updated
8 months ago
OLMo 2 is a new family of 7B and 13B models trained on up to 5T tokens. These models are on par with or better than equivalently sized fully open models, and competitive with open-weight models such as Llama 3.1 on English academic benchmarks.
OLMo 2 is a new family of 7B and 13B models trained on up to 5T tokens. These models are on par with or better than equivalently sized fully open models, and competitive with open-weight models such as Llama 3.1 on English academic benchmarks.
Cancel
7b
13b
olmo2:7b-1124-instruct-fp16
...
/
model
c40a8a2160d2 · 15GB
Metadata
general.architecture
olmo2
olmo2
general.file_type
F16
F16
olmo2.attention.head_count
32
32
olmo2.attention.head_count_kv
32
32
olmo2.attention.layer_norm_rms_epsilon
1e-06
1e-06
olmo2.block_count
32
32
olmo2.context_length
4096
4096
olmo2.embedding_length
4096
4096
olmo2.feed_forward_length
11008
11008
olmo2.rope.freq_base
500000
500000
tokenizer.ggml.bos_token_id
100257
100257
tokenizer.ggml.eos_token_id
100257
100257
tokenizer.ggml.merges
[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
[Ġ Ġ, ĠĠ ĠĠ, i n, Ġ t, ĠĠĠĠ ĠĠĠĠ, ...]
tokenizer.ggml.model
gpt2
gpt2
tokenizer.ggml.padding_token_id
100277
100277
tokenizer.ggml.pre
dbrx
dbrx
tokenizer.ggml.token_type
[1, 1, 1, 1, 1, ...]
[1, 1, 1, 1, 1, ...]
tokenizer.ggml.tokens
[!, ", #, $, %, ...]
[!, ", #, $, %, ...]
tokenizer.ggml.unknown_token_id
100257
100257
Tensor
Name
Type
Shape
token_embd.weight
F16
F16
[4096, 100352]
blk.0
blk.0.attn_k.weight
F16
F16
[4096, 4096]
blk.0.attn_k_norm.weight
F32
F32
[4096]
blk.0.attn_output.weight
F16
F16
[4096, 4096]
blk.0.attn_q.weight
F16
F16
[4096, 4096]
blk.0.attn_q_norm.weight
F32
F32
[4096]
blk.0.attn_v.weight
F16
F16
[4096, 4096]
blk.0.ffn_down.weight
F16
F16
[11008, 4096]
blk.0.ffn_gate.weight
F16
F16
[4096, 11008]
blk.0.ffn_up.weight
F16
F16
[4096, 11008]
blk.0.post_attention_norm.weight
F32
F32
[4096]
blk.0.post_ffw_norm.weight
F32
F32
[4096]
blk.1
blk.1.attn_k.weight
F16
F16
[4096, 4096]
blk.1.attn_k_norm.weight
F32
F32
[4096]
blk.1.attn_output.weight
F16
F16
[4096, 4096]
blk.1.attn_q.weight
F16
F16
[4096, 4096]
blk.1.attn_q_norm.weight
F32
F32
[4096]
blk.1.attn_v.weight
F16
F16
[4096, 4096]
blk.1.ffn_down.weight
F16
F16
[11008, 4096]
blk.1.ffn_gate.weight
F16
F16
[4096, 11008]
blk.1.ffn_up.weight
F16
F16
[4096, 11008]
blk.1.post_attention_norm.weight
F32
F32
[4096]
blk.1.post_ffw_norm.weight
F32
F32
[4096]
blk.2
blk.2.attn_k.weight
F16
F16
[4096, 4096]
blk.2.attn_k_norm.weight
F32
F32
[4096]
blk.2.attn_output.weight
F16
F16
[4096, 4096]
blk.2.attn_q.weight
F16
F16
[4096, 4096]
blk.2.attn_q_norm.weight
F32
F32
[4096]
blk.2.attn_v.weight
F16
F16
[4096, 4096]
blk.2.ffn_down.weight
F16
F16
[11008, 4096]
blk.2.ffn_gate.weight
F16
F16
[4096, 11008]
blk.2.ffn_up.weight
F16
F16
[4096, 11008]
blk.2.post_attention_norm.weight
F32
F32
[4096]
blk.2.post_ffw_norm.weight
F32
F32
[4096]
blk.3
blk.3.attn_k.weight
F16
F16
[4096, 4096]
blk.3.attn_k_norm.weight
F32
F32
[4096]
blk.3.attn_output.weight
F16
F16
[4096, 4096]
blk.3.attn_q.weight
F16
F16
[4096, 4096]
blk.3.attn_q_norm.weight
F32
F32
[4096]
blk.3.attn_v.weight
F16
F16
[4096, 4096]
blk.3.ffn_down.weight
F16
F16
[11008, 4096]
blk.3.ffn_gate.weight
F16
F16
[4096, 11008]
blk.3.ffn_up.weight
F16
F16
[4096, 11008]
blk.3.post_attention_norm.weight
F32
F32
[4096]
blk.3.post_ffw_norm.weight
F32
F32
[4096]
blk.4
blk.4.attn_k.weight
F16
F16
[4096, 4096]
blk.4.attn_k_norm.weight
F32
F32
[4096]
blk.4.attn_output.weight
F16
F16
[4096, 4096]
blk.4.attn_q.weight
F16
F16
[4096, 4096]
blk.4.attn_q_norm.weight
F32
F32
[4096]
blk.4.attn_v.weight
F16
F16
[4096, 4096]
blk.4.ffn_down.weight
F16
F16
[11008, 4096]
blk.4.ffn_gate.weight
F16
F16
[4096, 11008]
blk.4.ffn_up.weight
F16
F16
[4096, 11008]
blk.4.post_attention_norm.weight
F32
F32
[4096]
blk.4.post_ffw_norm.weight
F32
F32
[4096]
blk.5
blk.5.attn_k.weight
F16
F16
[4096, 4096]
blk.5.attn_k_norm.weight
F32
F32
[4096]
blk.5.attn_output.weight
F16
F16
[4096, 4096]
blk.5.attn_q.weight
F16
F16
[4096, 4096]
blk.5.attn_q_norm.weight
F32
F32
[4096]
blk.5.attn_v.weight
F16
F16
[4096, 4096]
blk.5.ffn_down.weight
F16
F16
[11008, 4096]
blk.5.ffn_gate.weight
F16
F16
[4096, 11008]
blk.5.ffn_up.weight
F16
F16
[4096, 11008]
blk.5.post_attention_norm.weight
F32
F32
[4096]
blk.5.post_ffw_norm.weight
F32
F32
[4096]
blk.6
blk.6.attn_k.weight
F16
F16
[4096, 4096]
blk.6.attn_k_norm.weight
F32
F32
[4096]
blk.6.attn_output.weight
F16
F16
[4096, 4096]
blk.6.attn_q.weight
F16
F16
[4096, 4096]
blk.6.attn_q_norm.weight
F32
F32
[4096]
blk.6.attn_v.weight
F16
F16
[4096, 4096]
blk.6.ffn_down.weight
F16
F16
[11008, 4096]
blk.6.ffn_gate.weight
F16
F16
[4096, 11008]
blk.6.ffn_up.weight
F16
F16
[4096, 11008]
blk.6.post_attention_norm.weight
F32
F32
[4096]
blk.6.post_ffw_norm.weight
F32
F32
[4096]
blk.7
blk.7.attn_k.weight
F16
F16
[4096, 4096]
blk.7.attn_k_norm.weight
F32
F32
[4096]
blk.7.attn_output.weight
F16
F16
[4096, 4096]
blk.7.attn_q.weight
F16
F16
[4096, 4096]
blk.7.attn_q_norm.weight
F32
F32
[4096]
blk.7.attn_v.weight
F16
F16
[4096, 4096]
blk.7.ffn_down.weight
F16
F16
[11008, 4096]
blk.7.ffn_gate.weight
F16
F16
[4096, 11008]
blk.7.ffn_up.weight
F16
F16
[4096, 11008]
blk.7.post_attention_norm.weight
F32
F32
[4096]
blk.7.post_ffw_norm.weight
F32
F32
[4096]
blk.8
blk.8.attn_k.weight
F16
F16
[4096, 4096]
blk.8.attn_k_norm.weight
F32
F32
[4096]
blk.8.attn_output.weight
F16
F16
[4096, 4096]
blk.8.attn_q.weight
F16
F16
[4096, 4096]
blk.8.attn_q_norm.weight
F32
F32
[4096]
blk.8.attn_v.weight
F16
F16
[4096, 4096]
blk.8.ffn_down.weight
F16
F16
[11008, 4096]
blk.8.ffn_gate.weight
F16
F16
[4096, 11008]
blk.8.ffn_up.weight
F16
F16
[4096, 11008]
blk.8.post_attention_norm.weight
F32
F32
[4096]
blk.8.post_ffw_norm.weight
F32
F32
[4096]
blk.9
blk.9.attn_k.weight
F16
F16
[4096, 4096]
blk.9.attn_k_norm.weight
F32
F32
[4096]
blk.9.attn_output.weight
F16
F16
[4096, 4096]
blk.9.attn_q.weight
F16
F16
[4096, 4096]
blk.9.attn_q_norm.weight
F32
F32
[4096]
blk.9.attn_v.weight
F16
F16
[4096, 4096]
blk.9.ffn_down.weight
F16
F16
[11008, 4096]
blk.9.ffn_gate.weight
F16
F16
[4096, 11008]
blk.9.ffn_up.weight
F16
F16
[4096, 11008]
blk.9.post_attention_norm.weight
F32
F32
[4096]
blk.9.post_ffw_norm.weight
F32
F32
[4096]
blk.10
blk.10.attn_k.weight
F16
F16
[4096, 4096]
blk.10.attn_k_norm.weight
F32
F32
[4096]
blk.10.attn_output.weight
F16
F16
[4096, 4096]
blk.10.attn_q.weight
F16
F16
[4096, 4096]
blk.10.attn_q_norm.weight
F32
F32
[4096]
blk.10.attn_v.weight
F16
F16
[4096, 4096]
blk.10.ffn_down.weight
F16
F16
[11008, 4096]
blk.10.ffn_gate.weight
F16
F16
[4096, 11008]
blk.10.ffn_up.weight
F16
F16
[4096, 11008]
blk.10.post_attention_norm.weight
F32
F32
[4096]
blk.10.post_ffw_norm.weight
F32
F32
[4096]
blk.11
blk.11.attn_k.weight
F16
F16
[4096, 4096]
blk.11.attn_k_norm.weight
F32
F32
[4096]
blk.11.attn_output.weight
F16
F16
[4096, 4096]
blk.11.attn_q.weight
F16
F16
[4096, 4096]
blk.11.attn_q_norm.weight
F32
F32
[4096]
blk.11.attn_v.weight
F16
F16
[4096, 4096]
blk.11.ffn_down.weight
F16
F16
[11008, 4096]
blk.11.ffn_gate.weight
F16
F16
[4096, 11008]
blk.11.ffn_up.weight
F16
F16
[4096, 11008]
blk.11.post_attention_norm.weight
F32
F32
[4096]
blk.11.post_ffw_norm.weight
F32
F32
[4096]
blk.12
blk.12.attn_k.weight
F16
F16
[4096, 4096]
blk.12.attn_k_norm.weight
F32
F32
[4096]
blk.12.attn_output.weight
F16
F16
[4096, 4096]
blk.12.attn_q.weight
F16
F16
[4096, 4096]
blk.12.attn_q_norm.weight
F32
F32
[4096]
blk.12.attn_v.weight
F16
F16
[4096, 4096]
blk.12.ffn_down.weight
F16
F16
[11008, 4096]
blk.12.ffn_gate.weight
F16
F16
[4096, 11008]
blk.12.ffn_up.weight
F16
F16
[4096, 11008]
blk.12.post_attention_norm.weight
F32
F32
[4096]
blk.12.post_ffw_norm.weight
F32
F32
[4096]
blk.13
blk.13.attn_k.weight
F16
F16
[4096, 4096]
blk.13.attn_k_norm.weight
F32
F32
[4096]
blk.13.attn_output.weight
F16
F16
[4096, 4096]
blk.13.attn_q.weight
F16
F16
[4096, 4096]
blk.13.attn_q_norm.weight
F32
F32
[4096]
blk.13.attn_v.weight
F16
F16
[4096, 4096]
blk.13.ffn_down.weight
F16
F16
[11008, 4096]
blk.13.ffn_gate.weight
F16
F16
[4096, 11008]
blk.13.ffn_up.weight
F16
F16
[4096, 11008]
blk.13.post_attention_norm.weight
F32
F32
[4096]
blk.13.post_ffw_norm.weight
F32
F32
[4096]
blk.14
blk.14.attn_k.weight
F16
F16
[4096, 4096]
blk.14.attn_k_norm.weight
F32
F32
[4096]
blk.14.attn_output.weight
F16
F16
[4096, 4096]
blk.14.attn_q.weight
F16
F16
[4096, 4096]
blk.14.attn_q_norm.weight
F32
F32
[4096]
blk.14.attn_v.weight
F16
F16
[4096, 4096]
blk.14.ffn_down.weight
F16
F16
[11008, 4096]
blk.14.ffn_gate.weight
F16
F16
[4096, 11008]
blk.14.ffn_up.weight
F16
F16
[4096, 11008]
blk.14.post_attention_norm.weight
F32
F32
[4096]
blk.14.post_ffw_norm.weight
F32
F32
[4096]
blk.15
blk.15.attn_k.weight
F16
F16
[4096, 4096]
blk.15.attn_k_norm.weight
F32
F32
[4096]
blk.15.attn_output.weight
F16
F16
[4096, 4096]
blk.15.attn_q.weight
F16
F16
[4096, 4096]
blk.15.attn_q_norm.weight
F32
F32
[4096]
blk.15.attn_v.weight
F16
F16
[4096, 4096]
blk.15.ffn_down.weight
F16
F16
[11008, 4096]
blk.15.ffn_gate.weight
F16
F16
[4096, 11008]
blk.15.ffn_up.weight
F16
F16
[4096, 11008]
blk.15.post_attention_norm.weight
F32
F32
[4096]
blk.15.post_ffw_norm.weight
F32
F32
[4096]
blk.16
blk.16.attn_k.weight
F16
F16
[4096, 4096]
blk.16.attn_k_norm.weight
F32
F32
[4096]
blk.16.attn_output.weight
F16
F16
[4096, 4096]
blk.16.attn_q.weight
F16
F16
[4096, 4096]
blk.16.attn_q_norm.weight
F32
F32
[4096]
blk.16.attn_v.weight
F16
F16
[4096, 4096]
blk.16.ffn_down.weight
F16
F16
[11008, 4096]
blk.16.ffn_gate.weight
F16
F16
[4096, 11008]
blk.16.ffn_up.weight
F16
F16
[4096, 11008]
blk.16.post_attention_norm.weight
F32
F32
[4096]
blk.16.post_ffw_norm.weight
F32
F32
[4096]
blk.17
blk.17.attn_k.weight
F16
F16
[4096, 4096]
blk.17.attn_k_norm.weight
F32
F32
[4096]
blk.17.attn_output.weight
F16
F16
[4096, 4096]
blk.17.attn_q.weight
F16
F16
[4096, 4096]
blk.17.attn_q_norm.weight
F32
F32
[4096]
blk.17.attn_v.weight
F16
F16
[4096, 4096]
blk.17.ffn_down.weight
F16
F16
[11008, 4096]
blk.17.ffn_gate.weight
F16
F16
[4096, 11008]
blk.17.ffn_up.weight
F16
F16
[4096, 11008]
blk.17.post_attention_norm.weight
F32
F32
[4096]
blk.17.post_ffw_norm.weight
F32
F32
[4096]
blk.18
blk.18.attn_k.weight
F16
F16
[4096, 4096]
blk.18.attn_k_norm.weight
F32
F32
[4096]
blk.18.attn_output.weight
F16
F16
[4096, 4096]
blk.18.attn_q.weight
F16
F16
[4096, 4096]
blk.18.attn_q_norm.weight
F32
F32
[4096]
blk.18.attn_v.weight
F16
F16
[4096, 4096]
blk.18.ffn_down.weight
F16
F16
[11008, 4096]
blk.18.ffn_gate.weight
F16
F16
[4096, 11008]
blk.18.ffn_up.weight
F16
F16
[4096, 11008]
blk.18.post_attention_norm.weight
F32
F32
[4096]
blk.18.post_ffw_norm.weight
F32
F32
[4096]
blk.19
blk.19.attn_k.weight
F16
F16
[4096, 4096]
blk.19.attn_k_norm.weight
F32
F32
[4096]
blk.19.attn_output.weight
F16
F16
[4096, 4096]
blk.19.attn_q.weight
F16
F16
[4096, 4096]
blk.19.attn_q_norm.weight
F32
F32
[4096]
blk.19.attn_v.weight
F16
F16
[4096, 4096]
blk.19.ffn_down.weight
F16
F16
[11008, 4096]
blk.19.ffn_gate.weight
F16
F16
[4096, 11008]
blk.19.ffn_up.weight
F16
F16
[4096, 11008]
blk.19.post_attention_norm.weight
F32
F32
[4096]
blk.19.post_ffw_norm.weight
F32
F32
[4096]
blk.20
blk.20.attn_k.weight
F16
F16
[4096, 4096]
blk.20.attn_k_norm.weight
F32
F32
[4096]
blk.20.attn_output.weight
F16
F16
[4096, 4096]
blk.20.attn_q.weight
F16
F16
[4096, 4096]
blk.20.attn_q_norm.weight
F32
F32
[4096]
blk.20.attn_v.weight
F16
F16
[4096, 4096]
blk.20.ffn_down.weight
F16
F16
[11008, 4096]
blk.20.ffn_gate.weight
F16
F16
[4096, 11008]
blk.20.ffn_up.weight
F16
F16
[4096, 11008]
blk.20.post_attention_norm.weight
F32
F32
[4096]
blk.20.post_ffw_norm.weight
F32
F32
[4096]
blk.21
blk.21.attn_k.weight
F16
F16
[4096, 4096]
blk.21.attn_k_norm.weight
F32
F32
[4096]
blk.21.attn_output.weight
F16
F16
[4096, 4096]
blk.21.attn_q.weight
F16
F16
[4096, 4096]
blk.21.attn_q_norm.weight
F32
F32
[4096]
blk.21.attn_v.weight
F16
F16
[4096, 4096]
blk.21.ffn_down.weight
F16
F16
[11008, 4096]
blk.21.ffn_gate.weight
F16
F16
[4096, 11008]
blk.21.ffn_up.weight
F16
F16
[4096, 11008]
blk.21.post_attention_norm.weight
F32
F32
[4096]
blk.21.post_ffw_norm.weight
F32
F32
[4096]
blk.22
blk.22.attn_k.weight
F16
F16
[4096, 4096]
blk.22.attn_k_norm.weight
F32
F32
[4096]
blk.22.attn_output.weight
F16
F16
[4096, 4096]
blk.22.attn_q.weight
F16
F16
[4096, 4096]
blk.22.attn_q_norm.weight
F32
F32
[4096]
blk.22.attn_v.weight
F16
F16
[4096, 4096]
blk.22.ffn_down.weight
F16
F16
[11008, 4096]
blk.22.ffn_gate.weight
F16
F16
[4096, 11008]
blk.22.ffn_up.weight
F16
F16
[4096, 11008]
blk.22.post_attention_norm.weight
F32
F32
[4096]
blk.22.post_ffw_norm.weight
F32
F32
[4096]
blk.23
blk.23.attn_k.weight
F16
F16
[4096, 4096]
blk.23.attn_k_norm.weight
F32
F32
[4096]
blk.23.attn_output.weight
F16
F16
[4096, 4096]
blk.23.attn_q.weight
F16
F16
[4096, 4096]
blk.23.attn_q_norm.weight
F32
F32
[4096]
blk.23.attn_v.weight
F16
F16
[4096, 4096]
blk.23.ffn_down.weight
F16
F16
[11008, 4096]
blk.23.ffn_gate.weight
F16
F16
[4096, 11008]
blk.23.ffn_up.weight
F16
F16
[4096, 11008]
blk.23.post_attention_norm.weight
F32
F32
[4096]
blk.23.post_ffw_norm.weight
F32
F32
[4096]
blk.24
blk.24.attn_k.weight
F16
F16
[4096, 4096]
blk.24.attn_k_norm.weight
F32
F32
[4096]
blk.24.attn_output.weight
F16
F16
[4096, 4096]
blk.24.attn_q.weight
F16
F16
[4096, 4096]
blk.24.attn_q_norm.weight
F32
F32
[4096]
blk.24.attn_v.weight
F16
F16
[4096, 4096]
blk.24.ffn_down.weight
F16
F16
[11008, 4096]
blk.24.ffn_gate.weight
F16
F16
[4096, 11008]
blk.24.ffn_up.weight
F16
F16
[4096, 11008]
blk.24.post_attention_norm.weight
F32
F32
[4096]
blk.24.post_ffw_norm.weight
F32
F32
[4096]
blk.25
blk.25.attn_k.weight
F16
F16
[4096, 4096]
blk.25.attn_k_norm.weight
F32
F32
[4096]
blk.25.attn_output.weight
F16
F16
[4096, 4096]
blk.25.attn_q.weight
F16
F16
[4096, 4096]
blk.25.attn_q_norm.weight
F32
F32
[4096]
blk.25.attn_v.weight
F16
F16
[4096, 4096]
blk.25.ffn_down.weight
F16
F16
[11008, 4096]
blk.25.ffn_gate.weight
F16
F16
[4096, 11008]
blk.25.ffn_up.weight
F16
F16
[4096, 11008]
blk.25.post_attention_norm.weight
F32
F32
[4096]
blk.25.post_ffw_norm.weight
F32
F32
[4096]
blk.26
blk.26.attn_k.weight
F16
F16
[4096, 4096]
blk.26.attn_k_norm.weight
F32
F32
[4096]
blk.26.attn_output.weight
F16
F16
[4096, 4096]
blk.26.attn_q.weight
F16
F16
[4096, 4096]
blk.26.attn_q_norm.weight
F32
F32
[4096]
blk.26.attn_v.weight
F16
F16
[4096, 4096]
blk.26.ffn_down.weight
F16
F16
[11008, 4096]
blk.26.ffn_gate.weight
F16
F16
[4096, 11008]
blk.26.ffn_up.weight
F16
F16
[4096, 11008]
blk.26.post_attention_norm.weight
F32
F32
[4096]
blk.26.post_ffw_norm.weight
F32
F32
[4096]
blk.27
blk.27.attn_k.weight
F16
F16
[4096, 4096]
blk.27.attn_k_norm.weight
F32
F32
[4096]
blk.27.attn_output.weight
F16
F16
[4096, 4096]
blk.27.attn_q.weight
F16
F16
[4096, 4096]
blk.27.attn_q_norm.weight
F32
F32
[4096]
blk.27.attn_v.weight
F16
F16
[4096, 4096]
blk.27.ffn_down.weight
F16
F16
[11008, 4096]
blk.27.ffn_gate.weight
F16
F16
[4096, 11008]
blk.27.ffn_up.weight
F16
F16
[4096, 11008]
blk.27.post_attention_norm.weight
F32
F32
[4096]
blk.27.post_ffw_norm.weight
F32
F32
[4096]
blk.28
blk.28.attn_k.weight
F16
F16
[4096, 4096]
blk.28.attn_k_norm.weight
F32
F32
[4096]
blk.28.attn_output.weight
F16
F16
[4096, 4096]
blk.28.attn_q.weight
F16
F16
[4096, 4096]
blk.28.attn_q_norm.weight
F32
F32
[4096]
blk.28.attn_v.weight
F16
F16
[4096, 4096]
blk.28.ffn_down.weight
F16
F16
[11008, 4096]
blk.28.ffn_gate.weight
F16
F16
[4096, 11008]
blk.28.ffn_up.weight
F16
F16
[4096, 11008]
blk.28.post_attention_norm.weight
F32
F32
[4096]
blk.28.post_ffw_norm.weight
F32
F32
[4096]
blk.29
blk.29.attn_k.weight
F16
F16
[4096, 4096]
blk.29.attn_k_norm.weight
F32
F32
[4096]
blk.29.attn_output.weight
F16
F16
[4096, 4096]
blk.29.attn_q.weight
F16
F16
[4096, 4096]
blk.29.attn_q_norm.weight
F32
F32
[4096]
blk.29.attn_v.weight
F16
F16
[4096, 4096]
blk.29.ffn_down.weight
F16
F16
[11008, 4096]
blk.29.ffn_gate.weight
F16
F16
[4096, 11008]
blk.29.ffn_up.weight
F16
F16
[4096, 11008]
blk.29.post_attention_norm.weight
F32
F32
[4096]
blk.29.post_ffw_norm.weight
F32
F32
[4096]
blk.30
blk.30.attn_k.weight
F16
F16
[4096, 4096]
blk.30.attn_k_norm.weight
F32
F32
[4096]
blk.30.attn_output.weight
F16
F16
[4096, 4096]
blk.30.attn_q.weight
F16
F16
[4096, 4096]
blk.30.attn_q_norm.weight
F32
F32
[4096]
blk.30.attn_v.weight
F16
F16
[4096, 4096]
blk.30.ffn_down.weight
F16
F16
[11008, 4096]
blk.30.ffn_gate.weight
F16
F16
[4096, 11008]
blk.30.ffn_up.weight
F16
F16
[4096, 11008]
blk.30.post_attention_norm.weight
F32
F32
[4096]
blk.30.post_ffw_norm.weight
F32
F32
[4096]
blk.31
blk.31.attn_k.weight
F16
F16
[4096, 4096]
blk.31.attn_k_norm.weight
F32
F32
[4096]
blk.31.attn_output.weight
F16
F16
[4096, 4096]
blk.31.attn_q.weight
F16
F16
[4096, 4096]
blk.31.attn_q_norm.weight
F32
F32
[4096]
blk.31.attn_v.weight
F16
F16
[4096, 4096]
blk.31.ffn_down.weight
F16
F16
[11008, 4096]
blk.31.ffn_gate.weight
F16
F16
[4096, 11008]
blk.31.ffn_up.weight
F16
F16
[4096, 11008]
blk.31.post_attention_norm.weight
F32
F32
[4096]
blk.31.post_ffw_norm.weight
F32
F32
[4096]
output.weight
F16
F16
[4096, 100352]
output_norm.weight
F32
F32
[4096]