Skip to content

Commit a68dff1

Browse files
committed
Regex check for EGA ids
1 parent 758e2e4 commit a68dff1

2 files changed

Lines changed: 25 additions & 12 deletions

File tree

payload-gen-seq-experiment/main.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,22 @@
5656
]
5757
TSV_FIELDS['read_group']["conditional"]=[]
5858

59+
EGA_FIELDS={
60+
"ega_file_id":"EGAF",
61+
"ega_dataset_id":"EGAD",
62+
"ega_experiment_id":"EGAX",
63+
"ega_sample_id":"EGAN",
64+
"ega_study_id":"EGAS",
65+
"ega_run_id":"EGAR",
66+
"ega_policy_id":"EGAP",
67+
"ega_analysis_id":"EGAZ",
68+
"ega_submission_id":"EGAB",
69+
"ega_dac_id":"EGAC"
70+
}
71+
5972
TSV_FIELDS['file']={}
6073
TSV_FIELDS['file']["core"]=['type', 'name', 'size', 'md5sum', 'path', 'format']
61-
TSV_FIELDS['file']["conditional"]=[
62-
"ega_file_id","ega_dataset_id","ega_experiment_id","ega_sample_id","ega_study_id",
63-
"ega_run_id","ega_policy_id","ega_analysis_id","ega_submission_id","ega_dac_id"]
64-
74+
TSV_FIELDS['file']["conditional"]=list(EGA_FIELDS.keys())
6575

6676

6777
def empty_str_to_null(metadata):
@@ -222,10 +232,9 @@ def main(metadata, extra_info=dict()):
222232

223233
# optional experiment arguements
224234
# Strings
225-
optional_experimental_fields=[
226-
"library_isolation_protocol","library_preparation_kit",
227-
"library_strandedness","dv200","spike_ins_included",
228-
"spike_ins_fasta","spike_ins_concentration","sequencing_center"]
235+
optional_experimental_fields=TSV_FIELDS['experiment']["conditional"]
236+
optional_experimental_fields.remove("rin")
237+
229238
for optional_experimental_field in optional_experimental_fields:
230239
if metadata.get(optional_experimental_field):
231240
payload['experiment'][optional_experimental_field]=metadata.get(optional_experimental_field)
@@ -260,7 +269,7 @@ def main(metadata, extra_info=dict()):
260269

261270
# get file of the payload
262271

263-
optional_file_fields=["EGAS","EGAC","EGAP","EGAN","EGAR","EGAX","EGAZ","EGAD","EGAB","EGAF"]
272+
264273
for input_file in metadata.get("files"):
265274
payload['files'].append(
266275
{
@@ -275,9 +284,12 @@ def main(metadata, extra_info=dict()):
275284
}
276285
}
277286
)
278-
for optional_file_field in optional_file_fields:
287+
for optional_file_field in TSV_FIELDS['file']["conditional"]:
279288
if input_file.get(optional_file_field):
280-
payload['files'][-1][optional_file_field]=input_file.get(optional_file_field)
289+
if re.findall("^"+EGA_FIELDS[optional_file_field]+'[0-9]{1,32}$',input_file.get(optional_file_field)):
290+
payload['files'][-1]['info'][optional_file_field]=input_file.get(optional_file_field)
291+
else:
292+
sys.exit(f"Field '%s' in file '%s' with value '%s' does not match expected regex pattern '^%s[0-9]{1,32}$'" % (optional_file_field,input_file.get('name'),input_file.get(optional_file_field),EGA_FIELDS[optional_file_field]))
281293

282294
for rg in metadata.get("read_groups"):
283295
rg.pop('type') # remove 'type' field

payload-gen-seq-experiment/tests/input/208c5ea6-c17a-4a63-981e-4bb91d3119f2.sequencing_experiment.payload.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@
8080
"fileAccess": "controlled",
8181
"dataType": "Submitted Reads",
8282
"info": {
83-
"data_category": "Sequencing Reads"
83+
"data_category": "Sequencing Reads",
84+
"ega_file_id": "EGAF000001"
8485
}
8586
}
8687
]

0 commit comments

Comments
 (0)