parser.add_argument(
"--unet_trainable_param_pattern",
# default=r"*attn*weight", # 955,187,200 (560)
# default=[r"*ff.net.0*.weight", r"*to_out*"], # 1,433,766,400 (420)
# default="*", # 2,547,492,804 (1,387)
# block output
# default=r"*ff.net.0*.weight", # 819,200,000 (70)
# default=r"*ff*weight", # 1,228,800,000 (140)
# attention output
# default=[r"*attn1*weight",r"*to_out*"], # 512,166,400 (490)
# default=[r"*attn2*weight",r"*to_out*"], # 648,153,600 (490)
# default=r"*attn2*weight", # 545,587,200 (280)
# default=r"*attn1*weight", # 409,600,000 (280)
# default=r"*attn2*weight", # 545,587,200 (280)
# default=r"*attentions*", # 2,208,941,440 (1,245)
default=r"*attn*weight", # 955,187,200 (560) = to_q + to_k + to_v + to_out
# default=r"*to_out*", # 204,966,400 (280)
# default=r"(_block|_layer|to_|time_embed|label_emb|skip_connection|proj_in|proj_out)",
help="Regex pattern to match the name of trainable parameters of the UNet.",
conv_in.weight torch.Size([320, 4, 3, 3])
conv_in.bias torch.Size([320])
time_embedding.linear_1.weight torch.Size([1280, 320])
time_embedding.linear_1.bias torch.Size([1280])
time_embedding.linear_2.weight torch.Size([1280, 1280])
time_embedding.linear_2.bias torch.Size([1280])
add_embedding.linear_1.weight torch.Size([1280, 2816])
add_embedding.linear_1.bias torch.Size([1280])
add_embedding.linear_2.weight torch.Size([1280, 1280])
add_embedding.linear_2.bias torch.Size([1280])
down_blocks.0.resnets.0.norm1.weight torch.Size([320])
down_blocks.0.resnets.0.norm1.bias torch.Size([320])
down_blocks.0.resnets.0.conv1.weight torch.Size([320, 320, 3, 3])
down_blocks.0.resnets.0.conv1.bias torch.Size([320])
down_blocks.0.resnets.0.time_emb_proj.weight torch.Size([320, 1280])
down_blocks.0.resnets.0.time_emb_proj.bias torch.Size([320])
down_blocks.0.resnets.0.norm2.weight torch.Size([320])
down_blocks.0.resnets.0.norm2.bias torch.Size([320])
down_blocks.0.resnets.0.conv2.weight torch.Size([320, 320, 3, 3])
down_blocks.0.resnets.0.conv2.bias torch.Size([320])
down_blocks.0.resnets.1.norm1.weight torch.Size([320])
down_blocks.0.resnets.1.norm1.bias torch.Size([320])
down_blocks.0.resnets.1.conv1.weight torch.Size([320, 320, 3, 3])
down_blocks.0.resnets.1.conv1.bias torch.Size([320])
down_blocks.0.resnets.1.time_emb_proj.weight torch.Size([320, 1280])
down_blocks.0.resnets.1.time_emb_proj.bias torch.Size([320])
down_blocks.0.resnets.1.norm2.weight torch.Size([320])
down_blocks.0.resnets.1.norm2.bias torch.Size([320])
down_blocks.0.resnets.1.conv2.weight torch.Size([320, 320, 3, 3])
down_blocks.0.resnets.1.conv2.bias torch.Size([320])
down_blocks.0.downsamplers.0.conv.weight torch.Size([320, 320, 3, 3])
down_blocks.0.downsamplers.0.conv.bias torch.Size([320])
down_blocks.1.attentions.0.norm.weight torch.Size([640])
down_blocks.1.attentions.0.norm.bias torch.Size([640])
down_blocks.1.attentions.0.proj_in.weight torch.Size([640, 640])
down_blocks.1.attentions.0.proj_in.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.0.norm1.weight torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.0.norm1.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.0**.attn1.to_out.0.weight torch.Size(**[640, 640])
down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.0.norm2.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight torch.Size([640, 2048])
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight torch.Size([640, 2048])
down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks**.0.attn2.to_out.0.bias torch.Size(**[640])
down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight torch.Size([5120, 640])
down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias torch.Size([5120])
down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight torch.Size([640, 2560])
down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.norm1.weight torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.norm1.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight to_outtorch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.norm2.weight torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.norm2.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight torch.Size([640, 2048])
down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight torch.Size([640, 2048])
down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight torch.Size([640, 640])
down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.norm3.weight torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.norm3.bias torch.Size([640])
down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight torch.Size([5120, 640])
down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.bias torch.Size([5120])
down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight torch.Size([640, 2560])
down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.bias torch.Size([640])
down_blocks.1.attentions.0.proj_out.weight torch.Size([640, 640])
down_blocks.1.attentions.0.proj_out.bias torch.Size([640])
down_blocks.1.attentions.1.norm.weight torch.Size([640])
down_blocks.1.attentions.1.norm.bias torch.Size([640])
down_blocks.1.attentions.1.proj_in.weight torch.Size([640, 640])
down_blocks.1.attentions.1.proj_in.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.norm1.weight torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.norm1.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.norm2.weight torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.norm2.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight torch.Size([640, 2048])
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight torch.Size([640, 2048])
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.norm3.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight torch.Size([5120, 640])
down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias torch.Size([5120])
down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight torch.Size([640, 2560])
down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.norm1.weight torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.norm1.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.norm2.weight torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.norm2.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight torch.Size([640, 2048])
down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight torch.Size([640, 2048])
down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight torch.Size([640, 640])
down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.norm3.weight torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.norm3.bias torch.Size([640])
down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight torch.Size([5120, 640])
down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.bias torch.Size([5120])
down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight torch.Size([640, 2560])
down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.bias torch.Size([640])
down_blocks.1.attentions.1.proj_out.weight torch.Size([640, 640])
down_blocks.1.attentions.1.proj_out.bias torch.Size([640])
down_blocks.1.resnets.0.norm1.weight torch.Size([320])
down_blocks.1.resnets.0.norm1.bias torch.Size([320])
down_blocks.1.resnets.0.conv1.weight torch.Size([640, 320, 3, 3])
down_blocks.1.resnets.0.conv1.bias torch.Size([640])
down_blocks.1.resnets.0.time_emb_proj.weight torch.Size([640, 1280])
down_blocks.1.resnets.0.time_emb_proj.bias torch.Size([640])
down_blocks.1.resnets.0.norm2.weight torch.Size([640])
down_blocks.1.resnets.0.norm2.bias torch.Size([640])
down_blocks.1.resnets.0.conv2.weight torch.Size([640, 640, 3, 3])
down_blocks.1.resnets.0.conv2.bias torch.Size([640])
down_blocks.1.resnets.0.conv_shortcut.weight torch.Size([640, 320, 1, 1])
down_blocks.1.resnets.0.conv_shortcut.bias torch.Size([640])
down_blocks.1.resnets.1.norm1.weight torch.Size([640])
down_blocks.1.resnets.1.norm1.bias torch.Size([640])
down_blocks.1.resnets.1.conv1.weight torch.Size([640, 640, 3, 3])
down_blocks.1.resnets.1.conv1.bias torch.Size([640])
down_blocks.1.resnets.1.time_emb_proj.weight torch.Size([640, 1280])
down_blocks.1.resnets.1.time_emb_proj.bias torch.Size([640])
down_blocks.1.resnets.1.norm2.weight torch.Size([640])
down_blocks.1.resnets.1.norm2.bias torch.Size([640])
down_blocks.1.resnets.1.conv2.weight torch.Size([640, 640, 3, 3])
down_blocks.1.resnets.1.conv2.bias torch.Size([640])
down_blocks.1.downsamplers.0.conv.weight torch.Size([640, 640, 3, 3])
down_blocks.1.downsamplers.0.conv.bias torch.Size([640])
down_blocks.2.attentions.0.norm.weight torch.Size([1280])
down_blocks.2.attentions.0.norm.bias torch.Size([1280])
down_blocks.2.attentions.0.proj_in.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.proj_in.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.norm1.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.norm1.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.norm2.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.norm2.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.norm3.weight torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.norm3.bias torch.Size([1280])
down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.0.proj_out.weight torch.Size([1280, 1280])
down_blocks.2.attentions.0.proj_out.bias torch.Size([1280])
down_blocks.2.attentions.1.norm.weight torch.Size([1280])
down_blocks.2.attentions.1.norm.bias torch.Size([1280])
down_blocks.2.attentions.1.proj_in.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.proj_in.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.norm1.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.norm1.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.norm2.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.norm2.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v.weight torch.Size([1280, 2048])
down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.norm3.weight torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.norm3.bias torch.Size([1280])
down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.weight torch.Size([10240, 1280])
down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.bias torch.Size([10240])
down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.weight torch.Size([1280, 5120])
down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.bias torch.Size([1280])
down_blocks.2.attentions.1.proj_out.weight torch.Size([1280, 1280])
down_blocks.2.attentions.1.proj_out.bias torch.Size([1280])
down_blocks.2.resnets.0.norm1.weight torch.Size([640])
down_blocks.2.resnets.0.norm1.bias torch.Size([640])
down_blocks.2.resnets.0.conv1.weight torch.Size([1280, 640, 3, 3])
down_blocks.2.resnets.0.conv1.bias torch.Size([1280])
down_blocks.2.resnets.0.time_emb_proj.weight torch.Size([1280, 1280])
down_blocks.2.resnets.0.time_emb_proj.bias torch.Size([1280])
down_blocks.2.resnets.0.norm2.weight torch.Size([1280])
down_blocks.2.resnets.0.norm2.bias torch.Size([1280])
down_blocks.2.resnets.0.conv2.weight torch.Size([1280, 1280, 3, 3])
down_blocks.2.resnets.0.conv2.bias torch.Size([1280])
down_blocks.2.resnets.0.conv_shortcut.weight torch.Size([1280, 640, 1, 1])
down_blocks.2.resnets.0.conv_shortcut.bias torch.Size([1280])
down_blocks.2.resnets.1.norm1.weight torch.Size([1280])
down_blocks.2.resnets.1.norm1.bias torch.Size([1280])
down_blocks.2.resnets.1.conv1.weight torch.Size([1280, 1280, 3, 3])
down_blocks.2.resnets.1.conv1.bias torch.Size([1280])
down_blocks.2.resnets.1.time_emb_proj.weight torch.Size([1280, 1280])
down_blocks.2.resnets.1.time_emb_proj.bias torch.Size([1280])
down_blocks.2.resnets.1.norm2.weight torch.Size([1280])
down_blocks.2.resnets.1.norm2.bias torch.Size([1280])
down_blocks.2.resnets.1.conv2.weight torch.Size([1280, 1280, 3, 3])
down_blocks.2.resnets.1.conv2.bias torch.Size([1280])
up_blocks.0.attentions.0.norm.weight torch.Size([1280])
up_blocks.0.attentions.0.norm.bias torch.Size([1280])
up_blocks.0.attentions.0.proj_in.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.proj_in.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.norm1.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.norm1.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.norm2.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.norm2.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.norm3.weight torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.norm3.bias torch.Size([1280])
up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.0.proj_out.weight torch.Size([1280, 1280])
up_blocks.0.attentions.0.proj_out.bias torch.Size([1280])
up_blocks.0.attentions.1.norm.weight torch.Size([1280])
up_blocks.0.attentions.1.norm.bias torch.Size([1280])
up_blocks.0.attentions.1.proj_in.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.proj_in.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.norm1.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.norm1.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.norm2.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.norm2.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.norm3.weight torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.norm3.bias torch.Size([1280])
up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.1.proj_out.weight torch.Size([1280, 1280])
up_blocks.0.attentions.1.proj_out.bias torch.Size([1280])
up_blocks.0.attentions.2.norm.weight torch.Size([1280])
up_blocks.0.attentions.2.norm.bias torch.Size([1280])
up_blocks.0.attentions.2.proj_in.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.proj_in.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.norm1.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.norm1.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.norm2.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.norm2.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.weight torch.Size([1280, 2048])
up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.norm3.weight torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.norm3.bias torch.Size([1280])
up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.weight torch.Size([10240, 1280])
up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.bias torch.Size([10240])
up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.weight torch.Size([1280, 5120])
up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.bias torch.Size([1280])
up_blocks.0.attentions.2.proj_out.weight torch.Size([1280, 1280])
up_blocks.0.attentions.2.proj_out.bias torch.Size([1280])
up_blocks.0.resnets.0.norm1.weight torch.Size([2560])
up_blocks.0.resnets.0.norm1.bias torch.Size([2560])
up_blocks.0.resnets.0.conv1.weight torch.Size([1280, 2560, 3, 3])
up_blocks.0.resnets.0.conv1.bias torch.Size([1280])
up_blocks.0.resnets.0.time_emb_proj.weight torch.Size([1280, 1280])
up_blocks.0.resnets.0.time_emb_proj.bias torch.Size([1280])
up_blocks.0.resnets.0.norm2.weight torch.Size([1280])
up_blocks.0.resnets.0.norm2.bias torch.Size([1280])
up_blocks.0.resnets.0.conv2.weight torch.Size([1280, 1280, 3, 3])
up_blocks.0.resnets.0.conv2.bias torch.Size([1280])
up_blocks.0.resnets.0.conv_shortcut.weight torch.Size([1280, 2560, 1, 1])
up_blocks.0.resnets.0.conv_shortcut.bias torch.Size([1280])
up_blocks.0.resnets.1.norm1.weight torch.Size([2560])
up_blocks.0.resnets.1.norm1.bias torch.Size([2560])
up_blocks.0.resnets.1.conv1.weight torch.Size([1280, 2560, 3, 3])
up_blocks.0.resnets.1.conv1.bias torch.Size([1280])
up_blocks.0.resnets.1.time_emb_proj.weight torch.Size([1280, 1280])
up_blocks.0.resnets.1.time_emb_proj.bias torch.Size([1280])
up_blocks.0.resnets.1.norm2.weight torch.Size([1280])
up_blocks.0.resnets.1.norm2.bias torch.Size([1280])
up_blocks.0.resnets.1.conv2.weight torch.Size([1280, 1280, 3, 3])
up_blocks.0.resnets.1.conv2.bias torch.Size([1280])
up_blocks.0.resnets.1.conv_shortcut.weight torch.Size([1280, 2560, 1, 1])
up_blocks.0.resnets.1.conv_shortcut.bias torch.Size([1280])
up_blocks.0.resnets.2.norm1.weight torch.Size([1920])
up_blocks.0.resnets.2.norm1.bias torch.Size([1920])
up_blocks.0.resnets.2.conv1.weight torch.Size([1280, 1920, 3, 3])
up_blocks.0.resnets.2.conv1.bias torch.Size([1280])
up_blocks.0.resnets.2.time_emb_proj.weight torch.Size([1280, 1280])
up_blocks.0.resnets.2.time_emb_proj.bias torch.Size([1280])
up_blocks.0.resnets.2.norm2.weight torch.Size([1280])
up_blocks.0.resnets.2.norm2.bias torch.Size([1280])
up_blocks.0.resnets.2.conv2.weight torch.Size([1280, 1280, 3, 3])
up_blocks.0.resnets.2.conv2.bias torch.Size([1280])
up_blocks.0.resnets.2.conv_shortcut.weight torch.Size([1280, 1920, 1, 1])
up_blocks.0.resnets.2.conv_shortcut.bias torch.Size([1280])
up_blocks.0.upsamplers.0.conv.weight torch.Size([1280, 1280, 3, 3])
up_blocks.0.upsamplers.0.conv.bias torch.Size([1280])
up_blocks.1.attentions.0.norm.weight torch.Size([640])
up_blocks.1.attentions.0.norm.bias torch.Size([640])
up_blocks.1.attentions.0.proj_in.weight torch.Size([640, 640])
up_blocks.1.attentions.0.proj_in.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.norm1.weight torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.norm1.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.norm2.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight torch.Size([640, 2048])
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight torch.Size([640, 2048])
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight torch.Size([5120, 640])
up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias torch.Size([5120])
up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight torch.Size([640, 2560])
up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.norm1.weight torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.norm1.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.norm2.weight torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.norm2.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight torch.Size([640, 2048])
up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight torch.Size([640, 2048])
up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.norm3.weight torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.norm3.bias torch.Size([640])
up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight torch.Size([5120, 640])
up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.bias torch.Size([5120])
up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight torch.Size([640, 2560])
up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.bias torch.Size([640])
up_blocks.1.attentions.0.proj_out.weight torch.Size([640, 640])
up_blocks.1.attentions.0.proj_out.bias torch.Size([640])
up_blocks.1.attentions.1.norm.weight torch.Size([640])
up_blocks.1.attentions.1.norm.bias torch.Size([640])
up_blocks.1.attentions.1.proj_in.weight torch.Size([640, 640])
up_blocks.1.attentions.1.proj_in.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.norm1.weight torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.norm1.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.norm2.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight torch.Size([640, 2048])
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight torch.Size([640, 2048])
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight torch.Size([5120, 640])
up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias torch.Size([5120])
up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight torch.Size([640, 2560])
up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.norm1.weight torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.norm1.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.norm2.weight torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.norm2.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight torch.Size([640, 2048])
up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight torch.Size([640, 2048])
up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.norm3.weight torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.norm3.bias torch.Size([640])
up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight torch.Size([5120, 640])
up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.bias torch.Size([5120])
up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight torch.Size([640, 2560])
up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.bias torch.Size([640])
up_blocks.1.attentions.1.proj_out.weight torch.Size([640, 640])
up_blocks.1.attentions.1.proj_out.bias torch.Size([640])
up_blocks.1.attentions.2.norm.weight torch.Size([640])
up_blocks.1.attentions.2.norm.bias torch.Size([640])
up_blocks.1.attentions.2.proj_in.weight torch.Size([640, 640])
up_blocks.1.attentions.2.proj_in.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.norm1.weight torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.norm1.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.norm2.weight torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.norm2.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight torch.Size([640, 2048])
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight torch.Size([640, 2048])
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.norm3.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight torch.Size([5120, 640])
up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.bias torch.Size([5120])
up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight torch.Size([640, 2560])
up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.norm1.weight torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.norm1.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.norm2.weight torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.norm2.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.weight torch.Size([640, 2048])
up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.weight torch.Size([640, 2048])
up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.weight torch.Size([640, 640])
up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.norm3.weight torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.norm3.bias torch.Size([640])
up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.weight torch.Size([5120, 640])
up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.bias torch.Size([5120])
up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.weight torch.Size([640, 2560])
up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.bias torch.Size([640])
up_blocks.1.attentions.2.proj_out.weight torch.Size([640, 640])
up_blocks.1.attentions.2.proj_out.bias torch.Size([640])
up_blocks.1.resnets.0.norm1.weight torch.Size([1920])
up_blocks.1.resnets.0.norm1.bias torch.Size([1920])
up_blocks.1.resnets.0.conv1.weight torch.Size([640, 1920, 3, 3])
up_blocks.1.resnets.0.conv1.bias torch.Size([640])
up_blocks.1.resnets.0.time_emb_proj.weight torch.Size([640, 1280])
up_blocks.1.resnets.0.time_emb_proj.bias torch.Size([640])
up_blocks.1.resnets.0.norm2.weight torch.Size([640])
up_blocks.1.resnets.0.norm2.bias torch.Size([640])
up_blocks.1.resnets.0.conv2.weight torch.Size([640, 640, 3, 3])
up_blocks.1.resnets.0.conv2.bias torch.Size([640])
up_blocks.1.resnets.0.conv_shortcut.weight torch.Size([640, 1920, 1, 1])
up_blocks.1.resnets.0.conv_shortcut.bias torch.Size([640])
up_blocks.1.resnets.1.norm1.weight torch.Size([1280])
up_blocks.1.resnets.1.norm1.bias torch.Size([1280])
up_blocks.1.resnets.1.conv1.weight torch.Size([640, 1280, 3, 3])
up_blocks.1.resnets.1.conv1.bias torch.Size([640])
up_blocks.1.resnets.1.time_emb_proj.weight torch.Size([640, 1280])
up_blocks.1.resnets.1.time_emb_proj.bias torch.Size([640])
up_blocks.1.resnets.1.norm2.weight torch.Size([640])
up_blocks.1.resnets.1.norm2.bias torch.Size([640])
up_blocks.1.resnets.1.conv2.weight torch.Size([640, 640, 3, 3])
up_blocks.1.resnets.1.conv2.bias torch.Size([640])
up_blocks.1.resnets.1.conv_shortcut.weight torch.Size([640, 1280, 1, 1])
up_blocks.1.resnets.1.conv_shortcut.bias torch.Size([640])
up_blocks.1.resnets.2.norm1.weight torch.Size([960])
up_blocks.1.resnets.2.norm1.bias torch.Size([960])
up_blocks.1.resnets.2.conv1.weight torch.Size([640, 960, 3, 3])
up_blocks.1.resnets.2.conv1.bias torch.Size([640])
up_blocks.1.resnets.2.time_emb_proj.weight torch.Size([640, 1280])
up_blocks.1.resnets.2.time_emb_proj.bias torch.Size([640])
up_blocks.1.resnets.2.norm2.weight torch.Size([640])
up_blocks.1.resnets.2.norm2.bias torch.Size([640])
up_blocks.1.resnets.2.conv2.weight torch.Size([640, 640, 3, 3])
up_blocks.1.resnets.2.conv2.bias torch.Size([640])
up_blocks.1.resnets.2.conv_shortcut.weight torch.Size([640, 960, 1, 1])
up_blocks.1.resnets.2.conv_shortcut.bias torch.Size([640])
up_blocks.1.upsamplers.0.conv.weight torch.Size([640, 640, 3, 3])
up_blocks.1.upsamplers.0.conv.bias torch.Size([640])
up_blocks.2.resnets.0.norm1.weight torch.Size([960])
up_blocks.2.resnets.0.norm1.bias torch.Size([960])
up_blocks.2.resnets.0.conv1.weight torch.Size([320, 960, 3, 3])
up_blocks.2.resnets.0.conv1.bias torch.Size([320])
up_blocks.2.resnets.0.time_emb_proj.weight torch.Size([320, 1280])
up_blocks.2.resnets.0.time_emb_proj.bias torch.Size([320])
up_blocks.2.resnets.0.norm2.weight torch.Size([320])
up_blocks.2.resnets.0.norm2.bias torch.Size([320])
up_blocks.2.resnets.0.conv2.weight torch.Size([320, 320, 3, 3])
up_blocks.2.resnets.0.conv2.bias torch.Size([320])
up_blocks.2.resnets.0.conv_shortcut.weight torch.Size([320, 960, 1, 1])
up_blocks.2.resnets.0.conv_shortcut.bias torch.Size([320])
up_blocks.2.resnets.1.norm1.weight torch.Size([640])
up_blocks.2.resnets.1.norm1.bias torch.Size([640])
up_blocks.2.resnets.1.conv1.weight torch.Size([320, 640, 3, 3])
up_blocks.2.resnets.1.conv1.bias torch.Size([320])
up_blocks.2.resnets.1.time_emb_proj.weight torch.Size([320, 1280])
up_blocks.2.resnets.1.time_emb_proj.bias torch.Size([320])
up_blocks.2.resnets.1.norm2.weight torch.Size([320])
up_blocks.2.resnets.1.norm2.bias torch.Size([320])
up_blocks.2.resnets.1.conv2.weight torch.Size([320, 320, 3, 3])
up_blocks.2.resnets.1.conv2.bias torch.Size([320])
up_blocks.2.resnets.1.conv_shortcut.weight torch.Size([320, 640, 1, 1])
up_blocks.2.resnets.1.conv_shortcut.bias torch.Size([320])
up_blocks.2.resnets.2.norm1.weight torch.Size([640])
up_blocks.2.resnets.2.norm1.bias torch.Size([640])
up_blocks.2.resnets.2.conv1.weight torch.Size([320, 640, 3, 3])
up_blocks.2.resnets.2.conv1.bias torch.Size([320])
up_blocks.2.resnets.2.time_emb_proj.weight torch.Size([320, 1280])
up_blocks.2.resnets.2.time_emb_proj.bias torch.Size([320])
up_blocks.2.resnets.2.norm2.weight torch.Size([320])
up_blocks.2.resnets.2.norm2.bias torch.Size([320])
up_blocks.2.resnets.2.conv2.weight torch.Size([320, 320, 3, 3])
up_blocks.2.resnets.2.conv2.bias torch.Size([320])
up_blocks.2.resnets.2.conv_shortcut.weight torch.Size([320, 640, 1, 1])
up_blocks.2.resnets.2.conv_shortcut.bias torch.Size([320])
mid_block.attentions.0.norm.weight torch.Size([1280])
mid_block.attentions.0.norm.bias torch.Size([1280])
mid_block.attentions.0.proj_in.weight torch.Size([1280, 1280])
mid_block.attentions.0.proj_in.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.0.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.0.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.0.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.1.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.1.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.1.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.1.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.1.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.1.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.2.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.2.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.2.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.2.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.2.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.2.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.3.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.3.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.3.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.3.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.3.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.3.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.4.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.4.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.4.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.4.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.4.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.4.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.5.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.5.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.5.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.5.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.5.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.5.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.6.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.6.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.6.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.6.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.6.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.6.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.7.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.7.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.7.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.7.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.7.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.7.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.8.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.8.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.8.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.8.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.8.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.8.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.norm1.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.norm1.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.attn1.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.9.attn1.to_k.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.9.attn1.to_v.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.norm2.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.norm2.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.attn2.to_q.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.9.attn2.to_k.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.9.attn2.to_v.weight torch.Size([1280, 2048])
mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.weight torch.Size([1280, 1280])
mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.norm3.weight torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.norm3.bias torch.Size([1280])
mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.weight torch.Size([10240, 1280])
mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.bias torch.Size([10240])
mid_block.attentions.0.transformer_blocks.9.ff.net.2.weight torch.Size([1280, 5120])
mid_block.attentions.0.transformer_blocks.9.ff.net.2.bias torch.Size([1280])
mid_block.attentions.0.proj_out.weight torch.Size([1280, 1280])
mid_block.attentions.0.proj_out.bias torch.Size([1280])
mid_block.resnets.0.norm1.weight torch.Size([1280])
mid_block.resnets.0.norm1.bias torch.Size([1280])
mid_block.resnets.0.conv1.weight torch.Size([1280, 1280, 3, 3])
mid_block.resnets.0.conv1.bias torch.Size([1280])
mid_block.resnets.0.time_emb_proj.weight torch.Size([1280, 1280])
mid_block.resnets.0.time_emb_proj.bias torch.Size([1280])
mid_block.resnets.0.norm2.weight torch.Size([1280])
mid_block.resnets.0.norm2.bias torch.Size([1280])
mid_block.resnets.0.conv2.weight torch.Size([1280, 1280, 3, 3])
mid_block.resnets.0.conv2.bias torch.Size([1280])
mid_block.resnets.1.norm1.weight torch.Size([1280])
mid_block.resnets.1.norm1.bias torch.Size([1280])
mid_block.resnets.1.conv1.weight torch.Size([1280, 1280, 3, 3])
mid_block.resnets.1.conv1.bias torch.Size([1280])
mid_block.resnets.1.time_emb_proj.weight torch.Size([1280, 1280])
mid_block.resnets.1.time_emb_proj.bias torch.Size([1280])
mid_block.resnets.1.norm2.weight torch.Size([1280])
mid_block.resnets.1.norm2.bias torch.Size([1280])
mid_block.resnets.1.conv2.weight torch.Size([1280, 1280, 3, 3])
mid_block.resnets.1.conv2.bias torch.Size([1280])
conv_norm_out.weight torch.Size([320])
conv_norm_out.bias torch.Size([320])
conv_out.weight torch.Size([4, 320, 3, 3])
conv_out.bias torch.Size([4])
ipdb> unet
UNet2DConditionModel(
(conv_in): Conv2d(4, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_proj): Timesteps()
(time_embedding): TimestepEmbedding(
(linear_1): Linear(in_features=320, out_features=1280, bias=True)
(act): SiLU()
(linear_2): Linear(in_features=1280, out_features=1280, bias=True)
)
(add_time_proj): Timesteps()
(add_embedding): TimestepEmbedding(
(linear_1): Linear(in_features=2816, out_features=1280, bias=True)
(act): SiLU()
(linear_2): Linear(in_features=1280, out_features=1280, bias=True)
)
(down_blocks): ModuleList(
(0): DownBlock2D(
(resnets): ModuleList(
(0-1): 2 x ResnetBlock2D(
(norm1): GroupNorm(32, 320, eps=1e-05, affine=True)
(conv1): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=320, bias=True)
(norm2): GroupNorm(32, 320, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
)
)
(downsamplers): ModuleList(
(0): Downsample2D(
(conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
)
)
(1): CrossAttnDownBlock2D(
(attentions): ModuleList(
(0-1): 2 x Transformer2DModel(
(norm): GroupNorm(32, 640, eps=1e-06, affine=True)
(proj_in): Linear(in_features=640, out_features=640, bias=True)
(transformer_blocks): ModuleList(
(0-1): 2 x BasicTransformerBlock(
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
(attn1): Attention(
(to_q): Linear(in_features=640, out_features=640, bias=False)
(to_k): Linear(in_features=640, out_features=640, bias=False)
(to_v): Linear(in_features=640, out_features=640, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=640, out_features=640, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
(attn2): Attention(
(to_q): Linear(in_features=640, out_features=640, bias=False)
(to_k): Linear(in_features=2048, out_features=640, bias=False)
(to_v): Linear(in_features=2048, out_features=640, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=640, out_features=640, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
(ff): FeedForward(
(net): ModuleList(
(0): GEGLU(
(proj): Linear(in_features=640, out_features=5120, bias=True)
)
(1): Dropout(p=0.0, inplace=False)
(2): Linear(in_features=2560, out_features=640, bias=True)
)
)
)
)
(proj_out): Linear(in_features=640, out_features=640, bias=True)
)
)
(resnets): ModuleList(
(0): ResnetBlock2D(
(norm1): GroupNorm(32, 320, eps=1e-05, affine=True)
(conv1): Conv2d(320, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True)
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(320, 640, kernel_size=(1, 1), stride=(1, 1))
)
(1): ResnetBlock2D(
(norm1): GroupNorm(32, 640, eps=1e-05, affine=True)
(conv1): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True)
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
)
)
(downsamplers): ModuleList(
(0): Downsample2D(
(conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
)
)
(2): CrossAttnDownBlock2D(
(attentions): ModuleList(
(0-1): 2 x Transformer2DModel(
(norm): GroupNorm(32, 1280, eps=1e-06, affine=True)
(proj_in): Linear(in_features=1280, out_features=1280, bias=True)
(transformer_blocks): ModuleList(
(0-9): 10 x BasicTransformerBlock(
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(attn1): Attention(
(to_q): Linear(in_features=1280, out_features=1280, bias=False)
(to_k): Linear(in_features=1280, out_features=1280, bias=False)
(to_v): Linear(in_features=1280, out_features=1280, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=1280, out_features=1280, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(attn2): Attention(
(to_q): Linear(in_features=1280, out_features=1280, bias=False)
(to_k): Linear(in_features=2048, out_features=1280, bias=False)
(to_v): Linear(in_features=2048, out_features=1280, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=1280, out_features=1280, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(ff): FeedForward(
(net): ModuleList(
(0): GEGLU(
(proj): Linear(in_features=1280, out_features=10240, bias=True)
)
(1): Dropout(p=0.0, inplace=False)
(2): Linear(in_features=5120, out_features=1280, bias=True)
)
)
)
)
(proj_out): Linear(in_features=1280, out_features=1280, bias=True)
)
)
(resnets): ModuleList(
(0): ResnetBlock2D(
(norm1): GroupNorm(32, 640, eps=1e-05, affine=True)
(conv1): Conv2d(640, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True)
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(640, 1280, kernel_size=(1, 1), stride=(1, 1))
)
(1): ResnetBlock2D(
(norm1): GroupNorm(32, 1280, eps=1e-05, affine=True)
(conv1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True)
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
)
)
)
)
(up_blocks): ModuleList(
(0): CrossAttnUpBlock2D(
(attentions): ModuleList(
(0-2): 3 x Transformer2DModel(
(norm): GroupNorm(32, 1280, eps=1e-06, affine=True)
(proj_in): Linear(in_features=1280, out_features=1280, bias=True)
(transformer_blocks): ModuleList(
(0-9): 10 x BasicTransformerBlock(
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(attn1): Attention(
(to_q): Linear(in_features=1280, out_features=1280, bias=False)
(to_k): Linear(in_features=1280, out_features=1280, bias=False)
(to_v): Linear(in_features=1280, out_features=1280, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=1280, out_features=1280, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(attn2): Attention(
(to_q): Linear(in_features=1280, out_features=1280, bias=False)
(to_k): Linear(in_features=2048, out_features=1280, bias=False)
(to_v): Linear(in_features=2048, out_features=1280, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=1280, out_features=1280, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(ff): FeedForward(
(net): ModuleList(
(0): GEGLU(
(proj): Linear(in_features=1280, out_features=10240, bias=True)
)
(1): Dropout(p=0.0, inplace=False)
(2): Linear(in_features=5120, out_features=1280, bias=True)
)
)
)
)
(proj_out): Linear(in_features=1280, out_features=1280, bias=True)
)
)
(resnets): ModuleList(
(0-1): 2 x ResnetBlock2D(
(norm1): GroupNorm(32, 2560, eps=1e-05, affine=True)
(conv1): Conv2d(2560, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True)
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(2560, 1280, kernel_size=(1, 1), stride=(1, 1))
)
(2): ResnetBlock2D(
(norm1): GroupNorm(32, 1920, eps=1e-05, affine=True)
(conv1): Conv2d(1920, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True)
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(1920, 1280, kernel_size=(1, 1), stride=(1, 1))
)
)
(upsamplers): ModuleList(
(0): Upsample2D(
(conv): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
(1): CrossAttnUpBlock2D(
(attentions): ModuleList(
(0-2): 3 x Transformer2DModel(
(norm): GroupNorm(32, 640, eps=1e-06, affine=True)
(proj_in): Linear(in_features=640, out_features=640, bias=True)
(transformer_blocks): ModuleList(
(0-1): 2 x BasicTransformerBlock(
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
(attn1): Attention(
(to_q): Linear(in_features=640, out_features=640, bias=False)
(to_k): Linear(in_features=640, out_features=640, bias=False)
(to_v): Linear(in_features=640, out_features=640, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=640, out_features=640, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
(attn2): Attention(
(to_q): Linear(in_features=640, out_features=640, bias=False)
(to_k): Linear(in_features=2048, out_features=640, bias=False)
(to_v): Linear(in_features=2048, out_features=640, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=640, out_features=640, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
(ff): FeedForward(
(net): ModuleList(
(0): GEGLU(
(proj): Linear(in_features=640, out_features=5120, bias=True)
)
(1): Dropout(p=0.0, inplace=False)
(2): Linear(in_features=2560, out_features=640, bias=True)
)
)
)
)
(proj_out): Linear(in_features=640, out_features=640, bias=True)
)
)
(resnets): ModuleList(
(0): ResnetBlock2D(
(norm1): GroupNorm(32, 1920, eps=1e-05, affine=True)
(conv1): Conv2d(1920, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True)
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1))
)
(1): ResnetBlock2D(
(norm1): GroupNorm(32, 1280, eps=1e-05, affine=True)
(conv1): Conv2d(1280, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True)
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(1280, 640, kernel_size=(1, 1), stride=(1, 1))
)
(2): ResnetBlock2D(
(norm1): GroupNorm(32, 960, eps=1e-05, affine=True)
(conv1): Conv2d(960, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True)
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(960, 640, kernel_size=(1, 1), stride=(1, 1))
)
)
(upsamplers): ModuleList(
(0): Upsample2D(
(conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
(2): UpBlock2D(
(resnets): ModuleList(
(0): ResnetBlock2D(
(norm1): GroupNorm(32, 960, eps=1e-05, affine=True)
(conv1): Conv2d(960, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=320, bias=True)
(norm2): GroupNorm(32, 320, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1))
)
(1-2): 2 x ResnetBlock2D(
(norm1): GroupNorm(32, 640, eps=1e-05, affine=True)
(conv1): Conv2d(640, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=320, bias=True)
(norm2): GroupNorm(32, 320, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
(conv_shortcut): Conv2d(640, 320, kernel_size=(1, 1), stride=(1, 1))
)
)
)
)
(mid_block): UNetMidBlock2DCrossAttn(
(attentions): ModuleList(
(0): Transformer2DModel(
(norm): GroupNorm(32, 1280, eps=1e-06, affine=True)
(proj_in): Linear(in_features=1280, out_features=1280, bias=True)
(transformer_blocks): ModuleList(
(0-9): 10 x BasicTransformerBlock(
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(attn1): Attention(
(to_q): Linear(in_features=1280, out_features=1280, bias=False)
(to_k): Linear(in_features=1280, out_features=1280, bias=False)
(to_v): Linear(in_features=1280, out_features=1280, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=1280, out_features=1280, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(attn2): Attention(
(to_q): Linear(in_features=1280, out_features=1280, bias=False)
(to_k): Linear(in_features=2048, out_features=1280, bias=False)
(to_v): Linear(in_features=2048, out_features=1280, bias=False)
(to_out): ModuleList(
(0): Linear(in_features=1280, out_features=1280, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(ff): FeedForward(
(net): ModuleList(
(0): GEGLU(
(proj): Linear(in_features=1280, out_features=10240, bias=True)
)
(1): Dropout(p=0.0, inplace=False)
(2): Linear(in_features=5120, out_features=1280, bias=True)
)
)
)
)
(proj_out): Linear(in_features=1280, out_features=1280, bias=True)
)
)
(resnets): ModuleList(
(0-1): 2 x ResnetBlock2D(
(norm1): GroupNorm(32, 1280, eps=1e-05, affine=True)
(conv1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True)
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nonlinearity): SiLU()
)
)
)
(conv_norm_out): GroupNorm(32, 320, eps=1e-05, affine=True)
(conv_act): SiLU()
(conv_out): Conv2d(320, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)