Skip to content

Commit 34879b4

Browse files
authored
Merge branch 'master' into master
2 parents b7872e2 + 8f37471 commit 34879b4

File tree

11 files changed

+936
-43
lines changed

11 files changed

+936
-43
lines changed

QUANTIZATION.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,9 @@ Example:
139139
"_quantization_metadata": {
140140
"format_version": "1.0",
141141
"layers": {
142-
"model.layers.0.mlp.up_proj": "float8_e4m3fn",
143-
"model.layers.0.mlp.down_proj": "float8_e4m3fn",
144-
"model.layers.1.mlp.up_proj": "float8_e4m3fn"
142+
"model.layers.0.mlp.up_proj": {"format": "float8_e4m3fn"},
143+
"model.layers.0.mlp.down_proj": {"format": "float8_e4m3fn"},
144+
"model.layers.1.mlp.up_proj": {"format": "float8_e4m3fn"}
145145
}
146146
}
147147
}
@@ -165,4 +165,4 @@ Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_s
165165
3. **Compute scales**: Derive `input_scale` from collected statistics
166166
4. **Store in checkpoint**: Save `input_scale` parameters alongside weights
167167

168-
The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.
168+
The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.

comfy/ldm/ernie/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
1515

1616
scale = torch.arange(0, dim, 2, dtype=torch.float64, device=device) / dim
1717
omega = 1.0 / (theta**scale)
18-
out = torch.einsum("...n,d->...nd", pos, omega)
18+
out = torch.einsum("...n,d->...nd", pos.to(device), omega)
1919
out = torch.stack([torch.cos(out), torch.sin(out)], dim=0)
2020
return out.to(dtype=torch.float32, device=pos.device)
2121

@@ -279,7 +279,7 @@ def forward(self, x, timesteps, context, **kwargs):
279279
rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1)).to(x.dtype)
280280
del image_ids, text_ids
281281

282-
sample = self.time_proj(timesteps.to(dtype)).to(self.time_embedding.linear_1.weight.dtype)
282+
sample = self.time_proj(timesteps).to(dtype)
283283
c = self.time_embedding(sample)
284284

285285
shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = [

comfy/text_encoders/ernie.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import comfy.text_encoders.llama
44

55
class Ministral3_3BTokenizer(Mistral3Tokenizer):
6-
def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_data={}):
6+
def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='ministral3_3b', tokenizer_data={}):
77
return super().__init__(embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_data=tokenizer_data)
88

99
class ErnieTokenizer(sd1_clip.SD1Tokenizer):

comfy/text_encoders/llama.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class Ministral3_3BConfig:
8282
rope_scale = None
8383
final_norm: bool = True
8484
lm_head: bool = False
85+
stop_tokens = [2]
8586

8687
@dataclass
8788
class Qwen25_3BConfig:
@@ -969,7 +970,7 @@ def __init__(self, config_dict, dtype, device, operations):
969970
self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
970971
self.dtype = dtype
971972

972-
class Ministral3_3B(BaseLlama, torch.nn.Module):
973+
class Ministral3_3B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
973974
def __init__(self, config_dict, dtype, device, operations):
974975
super().__init__()
975976
config = Ministral3_3BConfig(**config_dict)

comfy_api_nodes/apis/bytedance.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,26 @@ class TaskImageContent(BaseModel):
5252
role: Literal["first_frame", "last_frame", "reference_image"] | None = Field(None)
5353

5454

55+
class TaskVideoContentUrl(BaseModel):
56+
url: str = Field(...)
57+
58+
59+
class TaskVideoContent(BaseModel):
60+
type: str = Field("video_url")
61+
video_url: TaskVideoContentUrl = Field(...)
62+
role: str = Field("reference_video")
63+
64+
65+
class TaskAudioContentUrl(BaseModel):
66+
url: str = Field(...)
67+
68+
69+
class TaskAudioContent(BaseModel):
70+
type: str = Field("audio_url")
71+
audio_url: TaskAudioContentUrl = Field(...)
72+
role: str = Field("reference_audio")
73+
74+
5575
class Text2VideoTaskCreationRequest(BaseModel):
5676
model: str = Field(...)
5777
content: list[TaskTextContent] = Field(..., min_length=1)
@@ -64,6 +84,17 @@ class Image2VideoTaskCreationRequest(BaseModel):
6484
generate_audio: bool | None = Field(...)
6585

6686

87+
class Seedance2TaskCreationRequest(BaseModel):
88+
model: str = Field(...)
89+
content: list[TaskTextContent | TaskImageContent | TaskVideoContent | TaskAudioContent] = Field(..., min_length=1)
90+
generate_audio: bool | None = Field(None)
91+
resolution: str | None = Field(None)
92+
ratio: str | None = Field(None)
93+
duration: int | None = Field(None, ge=4, le=15)
94+
seed: int | None = Field(None, ge=0, le=2147483647)
95+
watermark: bool | None = Field(None)
96+
97+
6798
class TaskCreationResponse(BaseModel):
6899
id: str = Field(...)
69100

@@ -77,12 +108,27 @@ class TaskStatusResult(BaseModel):
77108
video_url: str = Field(...)
78109

79110

111+
class TaskStatusUsage(BaseModel):
112+
completion_tokens: int = Field(0)
113+
total_tokens: int = Field(0)
114+
115+
80116
class TaskStatusResponse(BaseModel):
81117
id: str = Field(...)
82118
model: str = Field(...)
83119
status: Literal["queued", "running", "cancelled", "succeeded", "failed"] = Field(...)
84120
error: TaskStatusError | None = Field(None)
85121
content: TaskStatusResult | None = Field(None)
122+
usage: TaskStatusUsage | None = Field(None)
123+
124+
125+
# Dollars per 1K tokens, keyed by (model_id, has_video_input).
126+
SEEDANCE2_PRICE_PER_1K_TOKENS = {
127+
("dreamina-seedance-2-0-260128", False): 0.007,
128+
("dreamina-seedance-2-0-260128", True): 0.0043,
129+
("dreamina-seedance-2-0-fast-260128", False): 0.0056,
130+
("dreamina-seedance-2-0-fast-260128", True): 0.0033,
131+
}
86132

87133

88134
RECOMMENDED_PRESETS = [
@@ -112,6 +158,12 @@ class TaskStatusResponse(BaseModel):
112158
("Custom", None, None),
113159
]
114160

161+
# Seedance 2.0 reference video pixel count limits per model.
162+
SEEDANCE2_REF_VIDEO_PIXEL_LIMITS = {
163+
"dreamina-seedance-2-0-260128": {"min": 409_600, "max": 927_408},
164+
"dreamina-seedance-2-0-fast-260128": {"min": 409_600, "max": 927_408},
165+
}
166+
115167
# The time in this dictionary are given for 10 seconds duration.
116168
VIDEO_TASKS_EXECUTION_TIME = {
117169
"seedance-1-0-lite-t2v-250428": {

0 commit comments

Comments
 (0)