Skip to content

Commit baf2958

Browse files
committed
update error identification
1 parent 490bea1 commit baf2958

1 file changed

Lines changed: 63 additions & 44 deletions

File tree

payload-gen-seq-experiment/main.py

Lines changed: 63 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -176,86 +176,100 @@ def validate_args(args):
176176
"""
177177
))
178178

179-
def validationCheck(val,key,target):
179+
def validationCheck(val,key,target_dict):
180+
target=target_dict[key]
180181
check_pass=True
181182
python_type_to_generic_dict={
182183
"NoneType":"null",
183184
"bool":"boolean",
184185
"str":"string",
185-
"int":"integer"
186+
"int":"integer",
186187
}
187-
print()
188-
print(val)
189-
print("key:"+key)
190-
print(target)
191-
if key=='type':
192-
return True if python_type_to_generic_dict[type(val).__name__] in target else False
188+
189+
if key=='type' and "integer" not in target and "number" not in target:
190+
output_str=target if type(target).__name__=='str' else ",".join(target)
191+
return (True,None) if python_type_to_generic_dict[type(val).__name__] in target else (False,"DataType received "+python_type_to_generic_dict[type(val).__name__]+" expected : "+output_str)
192+
if key=='type' and ("integer" in target or "number" in target):
193+
output_str=target if type(target).__name__=='str' else ",".join(target)
194+
return (True,None) if python_type_to_generic_dict[type(int(val)).__name__] in target else (False,"DataType received "+python_type_to_generic_dict[type(val).__name__]+" expected : "+output_str)
193195
elif key=='pattern':
194-
return True if re.findall(target,val) else False
196+
return (True,None)if re.findall(target,val) else (False,"String received "+val+", does not adhere to regex: "+target_dict[key])
195197
elif key=='enum':
196-
return True if val in target else False
198+
return (True,None) if val in target else (False,"Value %s is not part of approved list : %s" % (val,",".join(target)))
197199
elif key=='minimum':
198-
return True if val >= target else False
200+
if "minimum" in target_dict and "maximum" in target_dict:
201+
return (True,None) if (int(val) >= target_dict['minimum'] and int(val) <= target_dict['maximum']) else (False,"Value "+str(val)+" fails to meet both maximum and minimum requirements")
202+
else:
203+
return (True,None) if int(val) >= target else (False,str(val)+" fails to meet both minimum requirements")
199204
elif key=='maximum':
200-
return True if val <= target else False
205+
if "minimum" in target_dict and "maximum" in target_dict:
206+
return (True,None) if (int(val) >= target_dict['minimum'] and int(val) <= target_dict['maximum']) else (False,"Value "+str(val)+" fails to meet both maximum and minimum requirements")
207+
else:
208+
return (True,None) if int(val) <= target else (False,str(val)+" fails to meet both maximum requirements")
201209
elif key=='minLength':
202-
return True if len(val) >= target else False
210+
return (True,None) if len(val) >= target else (False,"length of "+val+" fails to meet both minimum requirements")
203211
elif key=='anyOf':
204-
any_array_check=[]
212+
list_of_errors=[]
205213
for sub_dict in target:
206-
any_of_bool=True
214+
sublist_of_errors=[]
207215
for key_sub_dict in sub_dict.keys():
208-
if not validationCheck(val,key_sub_dict,target[key_sub_dict]):
209-
any_of_bool=False
210-
any_array_check.append(any_of_bool)
211-
return True if sum(any_array_check)>0 else False
216+
check_pass,fail_reason=validationCheck(val,key_sub_dict,sub_dict)
217+
if not check_pass:
218+
sublist_of_errors.append(fail_reason)
219+
print(fail_reason)
220+
if len(sublist_of_errors)>0:
221+
list_of_errors.append(" and ".join(list(set(sublist_of_errors))))
222+
return (True,None) if len(list_of_errors)<len(target) else (False,"Failure to adhere any of the following scenarios - "+";".join(list(set(list_of_errors))))
212223
elif key=='oneOf':
213-
any_array_check=[]
224+
list_of_errors=[]
214225
for sub_dict in target:
215-
any_of_bool=True
226+
sublist_of_errors=[]
216227
for key_sub_dict in sub_dict.keys():
217-
if not validationCheck(val,key_sub_dict,sub_dict[key_sub_dict]):
218-
any_of_bool=False
219-
any_array_check.append(any_of_bool)
220-
print(any_array_check)
221-
return True if sum(any_array_check)>=1 else False
228+
check_pass,fail_reason=validationCheck(val,key_sub_dict,sub_dict)
229+
if not check_pass:
230+
sublist_of_errors.append(fail_reason)
231+
print(fail_reason)
232+
if len(sublist_of_errors)>0:
233+
list_of_errors.append(" and ".join(list(set(sublist_of_errors))))
234+
return (True,None) if len(list_of_errors)<len(target) else (False,"Failure to adhere any of the following scenarios - "+";".join(list(set(list_of_errors))))
222235
elif key=='format':
223236
dparser.parse(val)
224237
try:
225238
dparser.parse(val)
226239
except:
227-
print("what")
228-
return False
240+
return (False,"Fails to meet appropriate 'date-time' or 'date' criteria")
229241
else:
230-
return True
242+
return (True,None)
231243
elif key=='example':
232-
return True
244+
return (True,None)
233245
else:
234-
return False
246+
return (False,"Bad")
235247

236248
def validatePayload(payload):
237249
url="https://raw.githubusercontent.com/icgc-argo/argo-metadata-schemas/master/schemas/sequencing_experiment.json"
238250
resp=requests.get(url)
239251

240252
if not resp.status_code==200:
241253
sys.exit("Unable to retrieve schema\n")
242-
243-
244254
###Experiment
245255
for required in resp.json()['schema']['properties']['experiment']['required']:
246256
if not required in payload.get('experiment').keys():
247257
sys.exit("Payload missing required field %s\n" % required)
248258

249259
for cat in resp.json()['schema']['properties']['experiment']['propertyNames']['enum']:
250-
if metadata.get(cat):
251-
val=metadata.get(cat)
260+
if payload.get(cat):
261+
val=payload.get(cat)
252262
schema_dict=resp.json()['schema']['properties']['experiment']['allof'][0]['properties'][cat]
253263

254264
for validation_key in schema_dict.keys():
255-
if not validationCheck(val,validation_key,schema_dict[validation_key]):
256-
print(val,validation_key,schema_dict[validation_key])
257-
sys.exit("Payload violates Schema : Please check %s in %s\n" % (validation_key,cat))
265+
print(cat)
266+
print(type(val))
267+
check_pass,fail_reason=validationCheck(val,validation_key,schema_dict)
268+
#print(validation_key,check_pass)
269+
if not check_pass:
270+
sys.exit("Payload violates Experiment Schema in field '%s', %s" % ( cat,fail_reason))
258271
###Read groups
272+
259273
for required in resp.json()['schema']['properties']['read_groups']['items']['required']:
260274
for rg in payload['read_groups']:
261275
if not required in rg.keys():
@@ -265,11 +279,11 @@ def validatePayload(payload):
265279
if rg.get(cat):
266280
val=rg.get(cat)
267281
schema_dict=resp.json()['schema']['properties']['read_groups']['items']['allOf'][0]['properties'][cat]
268-
282+
print(cat)
269283
for validation_key in schema_dict.keys():
270-
if not validationCheck(val,validation_key,schema_dict[validation_key]):
271-
print(val,validation_key,schema_dict[validation_key])
272-
sys.exit("Payload violates Schema : Please check %s in %s\n" % (validation_key,cat))
284+
check_pass,fail_reason=validationCheck(val,validation_key,schema_dict)
285+
if not check_pass:
286+
sys.exit("Payload violates Read group Schema in field '%s', %s" % ( cat,fail_reason))
273287

274288

275289

@@ -296,14 +310,19 @@ def main(metadata, extra_info=dict()):
296310
}
297311

298312
# optional experiment arguements
313+
# Strings
299314
optional_experimental_fields=[
300315
"library_isolation_protocol","library_preparation_kit",
301-
"library_strandedness","rin","dv200","spike_ins_included",
316+
"library_strandedness","dv200","spike_ins_included",
302317
"spike_ins_fasta","spike_ins_concentration","sequencing_center"]
303318
for optional_experimental_field in optional_experimental_fields:
304319
if metadata.get(optional_experimental_field):
305320
payload['experiment'][optional_experimental_field]=metadata.get(optional_experimental_field)
306-
321+
# Int
322+
optional_experimental_fields=["rin"]
323+
for optional_experimental_field in optional_experimental_fields:
324+
if metadata.get(optional_experimental_field):
325+
payload['experiment'][optional_experimental_field]=int(metadata.get(optional_experimental_field))
307326

308327
# RNA-seq library_Strandedness requirement check
309328
if metadata.get('experimental_strategy')=='RNA-Seq' and not metadata.get("library_strandedness"):

0 commit comments

Comments
 (0)