@@ -584,7 +584,7 @@ def test_run_experiment_with_versioned_dataset():
584584
585585 # Create first item
586586 langfuse .create_dataset_item (
587- dataset_name = name , input = {"question" : "What is 2+2?" }, expected_output = "4"
587+ dataset_name = name , input = {"question" : "What is 2+2?" }, expected_output = 4
588588 )
589589 langfuse .flush ()
590590 time .sleep (3 )
@@ -604,14 +604,14 @@ def test_run_experiment_with_versioned_dataset():
604604 id = item1_id ,
605605 dataset_name = name ,
606606 input = {"question" : "What is 4+4?" },
607- expected_output = "8" ,
607+ expected_output = 8 ,
608608 )
609609 langfuse .flush ()
610610 time .sleep (3 )
611611
612612 # Create second item (after version timestamp)
613613 langfuse .create_dataset_item (
614- dataset_name = name , input = {"question" : "What is 3+3?" }, expected_output = "6"
614+ dataset_name = name , input = {"question" : "What is 3+3?" }, expected_output = 6
615615 )
616616 langfuse .flush ()
617617 time .sleep (3 )
@@ -622,13 +622,13 @@ def test_run_experiment_with_versioned_dataset():
622622 assert versioned_dataset .version == version_timestamp
623623 # Verify it returns the ORIGINAL version of item1 (before the update)
624624 assert versioned_dataset .items [0 ].input == {"question" : "What is 2+2?" }
625- assert versioned_dataset .items [0 ].expected_output == "4"
625+ assert versioned_dataset .items [0 ].expected_output == 4
626626 assert versioned_dataset .items [0 ].id == item1_id
627627
628628 # Run a simple experiment on the versioned dataset
629629 def simple_task (* , item , ** kwargs ):
630630 # Just return a static answer
631- return "4"
631+ return item . expected_output
632632
633633 result = versioned_dataset .run_experiment (
634634 name = "Versioned Dataset Test" ,
@@ -639,4 +639,4 @@ def simple_task(*, item, **kwargs):
639639 # Verify experiment ran successfully
640640 assert result .name == "Versioned Dataset Test"
641641 assert len (result .item_results ) == 1 # Only one item in versioned dataset
642- assert result .item_results [0 ].output == "4"
642+ assert result .item_results [0 ].output == 4
0 commit comments