Testing MTP speed with gemma-4-12B-it-qat-UD-Q4_K_XL.gguf on RX6700XT 12GB Vulkan
--draft-model gemma-4-12B-it-Q4_0-MTP.gguf
MTP 0 - [ Prompt: 272.3 t/s | Generation: 35.7 t/s ]
MTP 1 - [ Prompt: 252.8 t/s (-7.2%) | Generation: 41.9 t/s ] (+17.4%)
MTP 2 - [ Prompt: 253.5 t/s (-6.9%) | Generation: 39.4 t/s ] (+10.4%)
MTP 3 - [ Prompt: 251.2 t/s (-7.7%) | Generation: 32.8 t/s ] (-8.1%)
MTP 4 - [ Prompt: 251.8 t/s (-7.5%) | Generation: 28.9 t/s ] (-19.0%)
--draft-model gemma-4-12B-it-Q8_0-MTP.gguf
MTP 0 - [ Prompt: 273.3 t/s | Generation: 35.7 t/s ]
MTP 1 - [ Prompt: 242.7 t/s (-11.2%) | Generation: 52.1 t/s ] (+45.9%)
MTP 2 - [ Prompt: 244.6 t/s (-10.5%) | Generation: 54.7 t/s ] (+53.2%)
MTP 3 - [ Prompt: 245.9 t/s (-10.0%) | Generation: 50.7 t/s ] (+42.0%)
MTP 4 - [ Prompt: 248.5 t/s (-9.1%) | Generation: 46.5 t/s ] (+30.3%)
--draft-model gemma-4-12B-it-F16-MTP.gguf
MTP 0 - [ Prompt: 274.4 t/s | Generation: 36.1 t/s ]
MTP 1 - [ Prompt: 230.8 t/s (-15.9%) | Generation: 51.5 t/s ] (+42.7%)
MTP 2 - [ Prompt: 247.6 t/s (-9.8%) | Generation: 52.3 t/s ] (+44.9%)
MTP 3 - [ Prompt: 250.2 t/s (-8.8%) | Generation: 48.8 t/s ] (+35.2%)
MTP 4 - [ Prompt: 247.5 t/s (-9.8%) | Generation: 43.0 t/s ] (+19.1%)