@@ -176,86 +176,100 @@ def validate_args(args):
176176 """
177177 ))
178178
179- def validationCheck (val ,key ,target ):
179+ def validationCheck (val ,key ,target_dict ):
180+ target = target_dict [key ]
180181 check_pass = True
181182 python_type_to_generic_dict = {
182183 "NoneType" :"null" ,
183184 "bool" :"boolean" ,
184185 "str" :"string" ,
185- "int" :"integer"
186+ "int" :"integer" ,
186187 }
187- print ()
188- print (val )
189- print ("key:" + key )
190- print (target )
191- if key == 'type' :
192- return True if python_type_to_generic_dict [type (val ).__name__ ] in target else False
188+
189+ if key == 'type' and "integer" not in target and "number" not in target :
190+ output_str = target if type (target ).__name__ == 'str' else "," .join (target )
191+ return (True ,None ) if python_type_to_generic_dict [type (val ).__name__ ] in target else (False ,"DataType received " + python_type_to_generic_dict [type (val ).__name__ ]+ " expected : " + output_str )
192+ if key == 'type' and ("integer" in target or "number" in target ):
193+ output_str = target if type (target ).__name__ == 'str' else "," .join (target )
194+ return (True ,None ) if python_type_to_generic_dict [type (int (val )).__name__ ] in target else (False ,"DataType received " + python_type_to_generic_dict [type (val ).__name__ ]+ " expected : " + output_str )
193195 elif key == 'pattern' :
194- return True if re .findall (target ,val ) else False
196+ return ( True , None ) if re .findall (target ,val ) else ( False , "String received " + val + ", does not adhere to regex: " + target_dict [ key ])
195197 elif key == 'enum' :
196- return True if val in target else False
198+ return ( True , None ) if val in target else ( False , "Value %s is not part of approved list : %s" % ( val , "," . join ( target )))
197199 elif key == 'minimum' :
198- return True if val >= target else False
200+ if "minimum" in target_dict and "maximum" in target_dict :
201+ return (True ,None ) if (int (val ) >= target_dict ['minimum' ] and int (val ) <= target_dict ['maximum' ]) else (False ,"Value " + str (val )+ " fails to meet both maximum and minimum requirements" )
202+ else :
203+ return (True ,None ) if int (val ) >= target else (False ,str (val )+ " fails to meet both minimum requirements" )
199204 elif key == 'maximum' :
200- return True if val <= target else False
205+ if "minimum" in target_dict and "maximum" in target_dict :
206+ return (True ,None ) if (int (val ) >= target_dict ['minimum' ] and int (val ) <= target_dict ['maximum' ]) else (False ,"Value " + str (val )+ " fails to meet both maximum and minimum requirements" )
207+ else :
208+ return (True ,None ) if int (val ) <= target else (False ,str (val )+ " fails to meet both maximum requirements" )
201209 elif key == 'minLength' :
202- return True if len (val ) >= target else False
210+ return ( True , None ) if len (val ) >= target else ( False , "length of " + val + " fails to meet both minimum requirements" )
203211 elif key == 'anyOf' :
204- any_array_check = []
212+ list_of_errors = []
205213 for sub_dict in target :
206- any_of_bool = True
214+ sublist_of_errors = []
207215 for key_sub_dict in sub_dict .keys ():
208- if not validationCheck (val ,key_sub_dict ,target [key_sub_dict ]):
209- any_of_bool = False
210- any_array_check .append (any_of_bool )
211- return True if sum (any_array_check )> 0 else False
216+ check_pass ,fail_reason = validationCheck (val ,key_sub_dict ,sub_dict )
217+ if not check_pass :
218+ sublist_of_errors .append (fail_reason )
219+ print (fail_reason )
220+ if len (sublist_of_errors )> 0 :
221+ list_of_errors .append (" and " .join (list (set (sublist_of_errors ))))
222+ return (True ,None ) if len (list_of_errors )< len (target ) else (False ,"Failure to adhere any of the following scenarios - " + ";" .join (list (set (list_of_errors ))))
212223 elif key == 'oneOf' :
213- any_array_check = []
224+ list_of_errors = []
214225 for sub_dict in target :
215- any_of_bool = True
226+ sublist_of_errors = []
216227 for key_sub_dict in sub_dict .keys ():
217- if not validationCheck (val ,key_sub_dict ,sub_dict [key_sub_dict ]):
218- any_of_bool = False
219- any_array_check .append (any_of_bool )
220- print (any_array_check )
221- return True if sum (any_array_check )>= 1 else False
228+ check_pass ,fail_reason = validationCheck (val ,key_sub_dict ,sub_dict )
229+ if not check_pass :
230+ sublist_of_errors .append (fail_reason )
231+ print (fail_reason )
232+ if len (sublist_of_errors )> 0 :
233+ list_of_errors .append (" and " .join (list (set (sublist_of_errors ))))
234+ return (True ,None ) if len (list_of_errors )< len (target ) else (False ,"Failure to adhere any of the following scenarios - " + ";" .join (list (set (list_of_errors ))))
222235 elif key == 'format' :
223236 dparser .parse (val )
224237 try :
225238 dparser .parse (val )
226239 except :
227- print ("what" )
228- return False
240+ return (False ,"Fails to meet appropriate 'date-time' or 'date' criteria" )
229241 else :
230- return True
242+ return ( True , None )
231243 elif key == 'example' :
232- return True
244+ return ( True , None )
233245 else :
234- return False
246+ return ( False , "Bad" )
235247
236248def validatePayload (payload ):
237249 url = "https://raw.githubusercontent.com/icgc-argo/argo-metadata-schemas/master/schemas/sequencing_experiment.json"
238250 resp = requests .get (url )
239251
240252 if not resp .status_code == 200 :
241253 sys .exit ("Unable to retrieve schema\n " )
242-
243-
244254 ###Experiment
245255 for required in resp .json ()['schema' ]['properties' ]['experiment' ]['required' ]:
246256 if not required in payload .get ('experiment' ).keys ():
247257 sys .exit ("Payload missing required field %s\n " % required )
248258
249259 for cat in resp .json ()['schema' ]['properties' ]['experiment' ]['propertyNames' ]['enum' ]:
250- if metadata .get (cat ):
251- val = metadata .get (cat )
260+ if payload .get (cat ):
261+ val = payload .get (cat )
252262 schema_dict = resp .json ()['schema' ]['properties' ]['experiment' ]['allof' ][0 ]['properties' ][cat ]
253263
254264 for validation_key in schema_dict .keys ():
255- if not validationCheck (val ,validation_key ,schema_dict [validation_key ]):
256- print (val ,validation_key ,schema_dict [validation_key ])
257- sys .exit ("Payload violates Schema : Please check %s in %s\n " % (validation_key ,cat ))
265+ print (cat )
266+ print (type (val ))
267+ check_pass ,fail_reason = validationCheck (val ,validation_key ,schema_dict )
268+ #print(validation_key,check_pass)
269+ if not check_pass :
270+ sys .exit ("Payload violates Experiment Schema in field '%s', %s" % ( cat ,fail_reason ))
258271 ###Read groups
272+
259273 for required in resp .json ()['schema' ]['properties' ]['read_groups' ]['items' ]['required' ]:
260274 for rg in payload ['read_groups' ]:
261275 if not required in rg .keys ():
@@ -265,11 +279,11 @@ def validatePayload(payload):
265279 if rg .get (cat ):
266280 val = rg .get (cat )
267281 schema_dict = resp .json ()['schema' ]['properties' ]['read_groups' ]['items' ]['allOf' ][0 ]['properties' ][cat ]
268-
282+ print ( cat )
269283 for validation_key in schema_dict .keys ():
270- if not validationCheck (val ,validation_key ,schema_dict [ validation_key ]):
271- print ( val , validation_key , schema_dict [ validation_key ])
272- sys .exit ("Payload violates Schema : Please check %s in %s \n " % (validation_key , cat ))
284+ check_pass , fail_reason = validationCheck (val ,validation_key ,schema_dict )
285+ if not check_pass :
286+ sys .exit ("Payload violates Read group Schema in field '%s', %s " % ( cat , fail_reason ))
273287
274288
275289
@@ -296,14 +310,19 @@ def main(metadata, extra_info=dict()):
296310 }
297311
298312 # optional experiment arguements
313+ # Strings
299314 optional_experimental_fields = [
300315 "library_isolation_protocol" ,"library_preparation_kit" ,
301- "library_strandedness" ,"rin" , " dv200" ,"spike_ins_included" ,
316+ "library_strandedness" ,"dv200" ,"spike_ins_included" ,
302317 "spike_ins_fasta" ,"spike_ins_concentration" ,"sequencing_center" ]
303318 for optional_experimental_field in optional_experimental_fields :
304319 if metadata .get (optional_experimental_field ):
305320 payload ['experiment' ][optional_experimental_field ]= metadata .get (optional_experimental_field )
306-
321+ # Int
322+ optional_experimental_fields = ["rin" ]
323+ for optional_experimental_field in optional_experimental_fields :
324+ if metadata .get (optional_experimental_field ):
325+ payload ['experiment' ][optional_experimental_field ]= int (metadata .get (optional_experimental_field ))
307326
308327 # RNA-seq library_Strandedness requirement check
309328 if metadata .get ('experimental_strategy' )== 'RNA-Seq' and not metadata .get ("library_strandedness" ):
0 commit comments