Comfy-Org
diff --git a/‎QUANTIZATION.md‎
Lines changed: 4 additions & 4 deletions b/‎QUANTIZATION.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎comfy/ldm/ernie/model.py‎
Lines changed: 2 additions & 2 deletions b/‎comfy/ldm/ernie/model.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎comfy/text_encoders/ernie.py‎
Lines changed: 1 addition & 1 deletion b/‎comfy/text_encoders/ernie.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎comfy/text_encoders/llama.py‎
Lines changed: 2 additions & 1 deletion b/‎comfy/text_encoders/llama.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎comfy_api_nodes/apis/bytedance.py‎
Lines changed: 52 additions & 0 deletions b/‎comfy_api_nodes/apis/bytedance.py‎
Lines changed: 52 additions & 0 deletions
@@ -139,9 +139,9 @@ Example:
   "_quantization_metadata": {
     "format_version": "1.0",
     "layers": {
-      "model.layers.0.mlp.up_proj": "float8_e4m3fn",
-      "model.layers.0.mlp.down_proj": "float8_e4m3fn",
-      "model.layers.1.mlp.up_proj": "float8_e4m3fn"
+      "model.layers.0.mlp.up_proj": {"format": "float8_e4m3fn"},
+      "model.layers.0.mlp.down_proj": {"format": "float8_e4m3fn"},
+      "model.layers.1.mlp.up_proj": {"format": "float8_e4m3fn"}
     }
   }
 }
@@ -165,4 +165,4 @@ Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_s
 3. **Compute scales**: Derive `input_scale` from collected statistics
 4. **Store in checkpoint**: Save `input_scale` parameters alongside weights
 
-The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.
+The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.
@@ -15,7 +15,7 @@ def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
 
     scale = torch.arange(0, dim, 2, dtype=torch.float64, device=device) / dim
     omega = 1.0 / (theta**scale)
-    out = torch.einsum("...n,d->...nd", pos, omega)
+    out = torch.einsum("...n,d->...nd", pos.to(device), omega)
     out = torch.stack([torch.cos(out), torch.sin(out)], dim=0)
     return out.to(dtype=torch.float32, device=pos.device)
 
@@ -279,7 +279,7 @@ def forward(self, x, timesteps, context, **kwargs):
         rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1)).to(x.dtype)
         del image_ids, text_ids
 
-        sample = self.time_proj(timesteps.to(dtype)).to(self.time_embedding.linear_1.weight.dtype)
+        sample = self.time_proj(timesteps).to(dtype)
         c = self.time_embedding(sample)
 
         shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = [
 
@@ -3,7 +3,7 @@
 import comfy.text_encoders.llama
 
 class Ministral3_3BTokenizer(Mistral3Tokenizer):
-    def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_data={}):
+    def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='ministral3_3b', tokenizer_data={}):
         return super().__init__(embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_data=tokenizer_data)
 
 class ErnieTokenizer(sd1_clip.SD1Tokenizer):
 
@@ -82,6 +82,7 @@ class Ministral3_3BConfig:
     rope_scale = None
     final_norm: bool = True
     lm_head: bool = False
+    stop_tokens = [2]
 
 @dataclass
 class Qwen25_3BConfig:
@@ -969,7 +970,7 @@ def __init__(self, config_dict, dtype, device, operations):
         self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
         self.dtype = dtype
 
-class Ministral3_3B(BaseLlama, torch.nn.Module):
+class Ministral3_3B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
     def __init__(self, config_dict, dtype, device, operations):
         super().__init__()
         config = Ministral3_3BConfig(**config_dict)
 
@@ -52,6 +52,26 @@ class TaskImageContent(BaseModel):
     role: Literal["first_frame", "last_frame", "reference_image"] | None = Field(None)
 
 
+class TaskVideoContentUrl(BaseModel):
+    url: str = Field(...)
+
+
+class TaskVideoContent(BaseModel):
+    type: str = Field("video_url")
+    video_url: TaskVideoContentUrl = Field(...)
+    role: str = Field("reference_video")
+
+
+class TaskAudioContentUrl(BaseModel):
+    url: str = Field(...)
+
+
+class TaskAudioContent(BaseModel):
+    type: str = Field("audio_url")
+    audio_url: TaskAudioContentUrl = Field(...)
+    role: str = Field("reference_audio")
+
+
 class Text2VideoTaskCreationRequest(BaseModel):
     model: str = Field(...)
     content: list[TaskTextContent] = Field(..., min_length=1)
@@ -64,6 +84,17 @@ class Image2VideoTaskCreationRequest(BaseModel):
     generate_audio: bool | None = Field(...)
 
 
+class Seedance2TaskCreationRequest(BaseModel):
+    model: str = Field(...)
+    content: list[TaskTextContent | TaskImageContent | TaskVideoContent | TaskAudioContent] = Field(..., min_length=1)
+    generate_audio: bool | None = Field(None)
+    resolution: str | None = Field(None)
+    ratio: str | None = Field(None)
+    duration: int | None = Field(None, ge=4, le=15)
+    seed: int | None = Field(None, ge=0, le=2147483647)
+    watermark: bool | None = Field(None)
+
+
 class TaskCreationResponse(BaseModel):
     id: str = Field(...)
 
@@ -77,12 +108,27 @@ class TaskStatusResult(BaseModel):
     video_url: str = Field(...)
 
 
+class TaskStatusUsage(BaseModel):
+    completion_tokens: int = Field(0)
+    total_tokens: int = Field(0)
+
+
 class TaskStatusResponse(BaseModel):
     id: str = Field(...)
     model: str = Field(...)
     status: Literal["queued", "running", "cancelled", "succeeded", "failed"] = Field(...)
     error: TaskStatusError | None = Field(None)
     content: TaskStatusResult | None = Field(None)
+    usage: TaskStatusUsage | None = Field(None)
+
+
+# Dollars per 1K tokens, keyed by (model_id, has_video_input).
+SEEDANCE2_PRICE_PER_1K_TOKENS = {
+    ("dreamina-seedance-2-0-260128", False): 0.007,
+    ("dreamina-seedance-2-0-260128", True): 0.0043,
+    ("dreamina-seedance-2-0-fast-260128", False): 0.0056,
+    ("dreamina-seedance-2-0-fast-260128", True): 0.0033,
+}
 
 
 RECOMMENDED_PRESETS = [
@@ -112,6 +158,12 @@ class TaskStatusResponse(BaseModel):
     ("Custom", None, None),
 ]
 
+# Seedance 2.0 reference video pixel count limits per model.
+SEEDANCE2_REF_VIDEO_PIXEL_LIMITS = {
+    "dreamina-seedance-2-0-260128": {"min": 409_600, "max": 927_408},
+    "dreamina-seedance-2-0-fast-260128": {"min": 409_600, "max": 927_408},
+}
+
 # The time in this dictionary are given for 10 seconds duration.
 VIDEO_TASKS_EXECUTION_TIME = {
     "seedance-1-0-lite-t2v-250428": {
Original file line number	Diff line number	Diff line change
`@@ -139,9 +139,9 @@ Example:`
`139`	`139`	`"_quantization_metadata": {`
`140`	`140`	`"format_version": "1.0",`
`141`	`141`	`"layers": {`
`142`		`- "model.layers.0.mlp.up_proj": "float8_e4m3fn",`
`143`		`- "model.layers.0.mlp.down_proj": "float8_e4m3fn",`
`144`		`- "model.layers.1.mlp.up_proj": "float8_e4m3fn"`
	`142`	`+ "model.layers.0.mlp.up_proj": {"format": "float8_e4m3fn"},`
	`143`	`+ "model.layers.0.mlp.down_proj": {"format": "float8_e4m3fn"},`
	`144`	`+ "model.layers.1.mlp.up_proj": {"format": "float8_e4m3fn"}`
`145`	`145`	`}`
`146`	`146`	`}`
`147`	`147`	`}`
@@ -165,4 +165,4 @@ Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_s
`165`	`165`	3. Compute scales: Derive `input_scale` from collected statistics
`166`	`166`	4. Store in checkpoint: Save `input_scale` parameters alongside weights
`167`	`167`
`168`		`-The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.`
	`168`	`+The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.`