tokenizer = T5Tokenizer.from_pretrained(t5_models_dir, subfolder="tokenizer")
t5_model = T5EncoderModel.from_pretrained(
t5_models_dir,
subfolder="text_encoder",
torch_dtype=torch.float16
).to(offload_device)
vae = AutoencoderKL.from_pretrained("F:/ai/models/vae-kl-f8-d16", torch_dtype=torch.float16).to(offload_device)
# KV Compression Config
kv_compress = True
kv_compress_config = {
'sampling': 'conv', # Options: 'conv', 'uniform', 'ave'
'scale_factor': 2,
'kv_compress_layer': list(range(22, 45)), # Layers 22 to 44
}
# Initialize the full model on cuda:0
model = Diffussy(
vae=vae,
depth=44,
hidden_size=1440,
num_heads=20,
kv_compress=kv_compress,
kv_compress_config=kv_compress_config,
learn_sigma=True,
pred_sigma=True
).to(device)
2B is all you need... right?