|
3 | 3 | from learning_orchestra_client.transform.data_type import TransformDataType |
4 | 4 | from learning_orchestra_client.builder import BuilderSparkMl |
5 | 5 |
|
6 | | -CLUSTER_IP = "http://34.66.75.31" |
| 6 | +CLUSTER_IP = "http://35.193.116.104" |
7 | 7 |
|
8 | 8 | dataset_csv = DatasetCsv(CLUSTER_IP) |
9 | 9 |
|
10 | 10 | dataset_csv.insert_dataset_async( |
11 | | - url="https://filebin.net/r4b6z6sganz2opsh/train.csv?t=9d3lp7jm", |
| 11 | + url="https://filebin.net/boniydu54k710l54/train.csv?t=s350xryf", |
12 | 12 | dataset_name="titanic_training", |
13 | 13 | ) |
14 | 14 | dataset_csv.insert_dataset_async( |
15 | | - url="https://filebin.net/r0c41p538us5fcrz/test.csv?t=td68r02h", |
| 15 | + url="https://filebin.net/udtf7eogfgasqnx5/test.csv?t=h79pcy0l", |
16 | 16 | dataset_name="titanic_testing" |
17 | 17 | ) |
18 | 18 |
|
|
41 | 41 |
|
42 | 42 | transform_projection.remove_dataset_attributes_async( |
43 | 43 | dataset_name="titanic_training", |
44 | | - projection_name="titanic_training_projection4", |
| 44 | + projection_name="titanic_training_projection", |
45 | 45 | fields=required_columns) |
46 | 46 |
|
47 | 47 | required_columns.remove("Survived") |
|
174 | 174 | training_df = datasets_list[TRAINING_DF_INDEX] |
175 | 175 | testing_df = datasets_list[TESTING_DF_INDEX] |
176 | 176 |
|
| 177 | +columns_without_label = training_df.columns.copy() |
| 178 | +columns_without_label.remove("label") |
| 179 | +
|
177 | 180 | assembler = VectorAssembler( |
178 | | - inputCols=training_df.columns[:], |
| 181 | + inputCols=columns_without_label, |
179 | 182 | outputCol="features") |
180 | 183 | assembler.setHandleInvalid('skip') |
181 | 184 |
|
|
192 | 195 | modeling_code=modeling_code, |
193 | 196 | model_classifiers=["LR", "DT", "GB", "RF", "NB"]) |
194 | 197 |
|
195 | | -for prediction in result["result"]: |
196 | | - builder.wait(dataset_name=prediction) |
| 198 | +PREDICTION_NAME_INDEX_IN_URL = 6 |
| 199 | +INDEX_TO_REMOVE_URI_PARAMETERS = 0 |
| 200 | +for prediction_url in result["result"]: |
| 201 | + prediction_name = prediction_url. \ |
| 202 | + split("/")[PREDICTION_NAME_INDEX_IN_URL]. \ |
| 203 | + split("?")[INDEX_TO_REMOVE_URI_PARAMETERS] |
| 204 | + builder.wait(dataset_name=prediction_name) |
197 | 205 | print(builder.search_builder_register_predictions( |
198 | | - builder_name=prediction, limit=1, pretty_response=True)) |
| 206 | + builder_name=prediction_name, limit=1, pretty_response=True)) |
0 commit comments