发生异常: TypeError (note: full exception trace is shown but execution is paused at: _run_module_as_main)
patch_gpt_oss.<locals>.Mxfp4GptOssExperts.forward() got an unexpected keyword argument 'router_indices'
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/utils/generic.py", line 1079, in wrapper
outputs = func(self, *args, **kwargs_without_recordable)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 507, in forward
hidden_states,
attention_mask=causal_mask_mapping[decoder_layer.attention_type],
position_ids=position_ids,
past_key_values=past_key_values,
use_cache=use_cache,
cache_position=cache_position,
position_embeddings=position_embeddings,
**kwargs,
)
hidden_states = self.norm(hidden_states)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/modeling_layers.py", line 93, in __call__
return self._gradient_checkpointing_func(partial(super().__call__, **kwargs), *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/_compile.py", line 53, in inner
return disable_fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 1044, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/utils/checkpoint.py", line 496, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/autograd/function.py", line 581, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/unsloth_zoo/gradient_checkpointing.py", line 492, in forward
outputs = run_function(*args)
^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 386, in forward
hidden_states, _ = self.mlp(hidden_states) # diff with llama: router scores
^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/unsloth_zoo/tiled_mlp.py", line 256, in tiled_forward_arctic_size
return TiledMLP.apply(inner_forward, mlp_module, x, preserve_rng_state, n_shards, chunk_size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/autograd/function.py", line 581, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/amp/autocast_mode.py", line 527, in decorate_fwd
return fwd(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/unsloth_zoo/tiled_mlp.py", line 143, in forward
out = TiledMLP.handle_output(mlp_forward(x_split), extra_outputs)
^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 170, in forward
routed_out = self.experts(hidden_states, router_indices=router_indices, routing_weights=router_scores)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: patch_gpt_oss.<locals>.Mxfp4GptOssExperts.forward() got an unexpected keyword argument 'router_indices'
During handling of the above exception, another exception occurred:
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 170, in forward
routed_out = self.experts(hidden_states, router_indices=router_indices, routing_weights=router_scores)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/unsloth_zoo/tiled_mlp.py", line 143, in forward
out = TiledMLP.handle_output(mlp_forward(x_split), extra_outputs)
^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/amp/autocast_mode.py", line 527, in decorate_fwd
return fwd(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/autograd/function.py", line 581, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/unsloth_zoo/tiled_mlp.py", line 256, in tiled_forward_arctic_size
return TiledMLP.apply(inner_forward, mlp_module, x, preserve_rng_state, n_shards, chunk_size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 386, in forward
hidden_states, _ = self.mlp(hidden_states) # diff with llama: router scores
^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/unsloth_zoo/gradient_checkpointing.py", line 492, in forward
outputs = run_function(*args)
^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/autograd/function.py", line 581, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/utils/checkpoint.py", line 496, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 1044, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/_compile.py", line 53, in inner
return disable_fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/modeling_layers.py", line 93, in __call__
return self._gradient_checkpointing_func(partial(super().__call__, **kwargs), *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 507, in forward
hidden_states,
attention_mask=causal_mask_mapping[decoder_layer.attention_type],
position_ids=position_ids,
past_key_values=past_key_values,
use_cache=use_cache,
cache_position=cache_position,
position_embeddings=position_embeddings,
**kwargs,
)
hidden_states = self.norm(hidden_states)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/utils/generic.py", line 1081, in wrapper
raise original_exception
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/utils/generic.py", line 1081, in wrapper
raise original_exception
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 668, in forward
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
use_cache=use_cache,
output_router_logits=output_router_logits,
cache_position=cache_position,
**kwargs,
)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/utils/generic.py", line 918, in wrapper
output = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/peft/tuners/tuners_utils.py", line 308, in forward
return self.model.forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/peft/peft_model.py", line 1923, in forward
input_ids=input_ids,
attention_mask=attention_mask,
inputs_embeds=inputs_embeds,
labels=labels,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
**kwargs,
)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/accelerate/utils/operations.py", line 807, in __call__
return convert_to_fp32(self.model_forward(*args, **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/accelerate/utils/operations.py", line 819, in forward
return model_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<string>", line 36, in compute_loss
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/unsloth/models/_utils.py", line 1651, in _unsloth_pre_compute_loss
outputs = self._old_compute_loss(model, inputs, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/shelterw/unsloth_compiled_cache/UnslothSFTTrainer.py", line 1071, in compute_loss
model,
inputs,
return_outputs = return_outputs,
num_items_in_batch = num_items_in_batch,
)
return outputs
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<string>", line 40, in _unsloth_training_step
File "/mnt/data1/xmw/shelterw/unsloth_compiled_cache/UnslothSFTTrainer.py", line 1082, in training_step
return super().training_step(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/trainer.py", line 2674, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/site-packages/transformers/trainer.py", line 2325, in train
args=args,
resume_from_checkpoint=resume_from_checkpoint,
trial=trial,
ignore_keys_for_eval=ignore_keys_for_eval,
)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/shelterw/unsloth_compiled_cache/UnslothSFTTrainer.py", line 55, in wrapper
output = f(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xmw/data/shelterw/train_oss.py", line 81, in <module>
trainer_stats = trainer.train()
^^^^^^^^^^^^^^^
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/runpy.py", line 88, in _run_code
exec(code, run_globals)
File "/mnt/data1/xmw/miniconda3/envs/unsloth/lib/python3.11/runpy.py", line 198, in _run_module_as_main (Current frame)
"__main__", mod_spec)
TypeError: patch_gpt_oss.<locals>.Mxfp4GptOssExperts.forward() got an unexpected keyword argument 'router_indices'