3232import argparse
3333import requests
3434import re
35- from dateutil import parser as dparser
35+ import jsonschema
36+ import traceback
3637
3738
3839TSV_FIELDS = {}
@@ -176,114 +177,24 @@ def validate_args(args):
176177 """
177178 ))
178179
179- def validationCheck (val ,key ,target_dict ):
180- target = target_dict [key ]
181- check_pass = True
182- python_type_to_generic_dict = {
183- "NoneType" :"null" ,
184- "bool" :"boolean" ,
185- "str" :"string" ,
186- "int" :"integer" ,
187- }
188-
189- if key == 'type' and "integer" not in target and "number" not in target :
190- output_str = target if type (target ).__name__ == 'str' else "," .join (target )
191- return (True ,None ) if python_type_to_generic_dict [type (val ).__name__ ] in target else (False ,"DataType received " + python_type_to_generic_dict [type (val ).__name__ ]+ " expected : " + output_str )
192- if key == 'type' and ("integer" in target or "number" in target ):
193- output_str = target if type (target ).__name__ == 'str' else "," .join (target )
194- return (True ,None ) if python_type_to_generic_dict [type (int (val )).__name__ ] in target else (False ,"DataType received " + python_type_to_generic_dict [type (val ).__name__ ]+ " expected : " + output_str )
195- elif key == 'pattern' :
196- return (True ,None )if re .findall (target ,val ) else (False ,"String received " + val + ", does not adhere to regex: " + target_dict [key ])
197- elif key == 'enum' :
198- return (True ,None ) if val in target else (False ,"Value %s is not part of approved list : %s" % (val ,"," .join (target )))
199- elif key == 'minimum' :
200- if "minimum" in target_dict and "maximum" in target_dict :
201- return (True ,None ) if (int (val ) >= target_dict ['minimum' ] and int (val ) <= target_dict ['maximum' ]) else (False ,"Value " + str (val )+ " fails to meet both maximum and minimum requirements" )
202- else :
203- return (True ,None ) if int (val ) >= target else (False ,str (val )+ " fails to meet both minimum requirements" )
204- elif key == 'maximum' :
205- if "minimum" in target_dict and "maximum" in target_dict :
206- return (True ,None ) if (int (val ) >= target_dict ['minimum' ] and int (val ) <= target_dict ['maximum' ]) else (False ,"Value " + str (val )+ " fails to meet both maximum and minimum requirements" )
207- else :
208- return (True ,None ) if int (val ) <= target else (False ,str (val )+ " fails to meet both maximum requirements" )
209- elif key == 'minLength' :
210- return (True ,None ) if len (val ) >= target else (False ,"length of " + val + " fails to meet both minimum requirements" )
211- elif key == 'anyOf' :
212- list_of_errors = []
213- for sub_dict in target :
214- sublist_of_errors = []
215- for key_sub_dict in sub_dict .keys ():
216- check_pass ,fail_reason = validationCheck (val ,key_sub_dict ,sub_dict )
217- if not check_pass :
218- sublist_of_errors .append (fail_reason )
219- print (fail_reason )
220- if len (sublist_of_errors )> 0 :
221- list_of_errors .append (" and " .join (list (set (sublist_of_errors ))))
222- return (True ,None ) if len (list_of_errors )< len (target ) else (False ,"Failure to adhere any of the following scenarios - " + ";" .join (list (set (list_of_errors ))))
223- elif key == 'oneOf' :
224- list_of_errors = []
225- for sub_dict in target :
226- sublist_of_errors = []
227- for key_sub_dict in sub_dict .keys ():
228- check_pass ,fail_reason = validationCheck (val ,key_sub_dict ,sub_dict )
229- if not check_pass :
230- sublist_of_errors .append (fail_reason )
231- print (fail_reason )
232- if len (sublist_of_errors )> 0 :
233- list_of_errors .append (" and " .join (list (set (sublist_of_errors ))))
234- return (True ,None ) if len (list_of_errors )< len (target ) else (False ,"Failure to adhere any of the following scenarios - " + ";" .join (list (set (list_of_errors ))))
235- elif key == 'format' :
236- dparser .parse (val )
237- try :
238- dparser .parse (val )
239- except :
240- return (False ,"Fails to meet appropriate 'date-time' or 'date' criteria" )
241- else :
242- return (True ,None )
243- elif key == 'example' :
244- return (True ,None )
180+ def validatePayload (payload ,args ):
181+ if args .schema_url :
182+ url = args .schema_url
245183 else :
246- return (False ,"Bad" )
247-
248- def validatePayload (payload ):
249- url = "https://raw.githubusercontent.com/icgc-argo/argo-metadata-schemas/master/schemas/sequencing_experiment.json"
184+ url = "https://raw.githubusercontent.com/icgc-argo/argo-metadata-schemas/master/schemas/sequencing_experiment.json"
185+
250186 resp = requests .get (url )
251-
252187 if not resp .status_code == 200 :
253- sys .exit ("Unable to retrieve schema\n " )
254- ###Experiment
255- for required in resp .json ()['schema' ]['properties' ]['experiment' ]['required' ]:
256- if not required in payload .get ('experiment' ).keys ():
257- sys .exit ("Payload missing required field %s\n " % required )
258-
259- for cat in resp .json ()['schema' ]['properties' ]['experiment' ]['propertyNames' ]['enum' ]:
260- if payload .get (cat ):
261- val = payload .get (cat )
262- schema_dict = resp .json ()['schema' ]['properties' ]['experiment' ]['allof' ][0 ]['properties' ][cat ]
263-
264- for validation_key in schema_dict .keys ():
265- print (cat )
266- print (type (val ))
267- check_pass ,fail_reason = validationCheck (val ,validation_key ,schema_dict )
268- #print(validation_key,check_pass)
269- if not check_pass :
270- sys .exit ("Payload violates Experiment Schema in field '%s', %s" % ( cat ,fail_reason ))
271- ###Read groups
272-
273- for required in resp .json ()['schema' ]['properties' ]['read_groups' ]['items' ]['required' ]:
274- for rg in payload ['read_groups' ]:
275- if not required in rg .keys ():
276- sys .exit ("Read Group %s in payload is missing required field %s\n " % (rg ['submitter_read_group_id' ],required ))
277- for cat in resp .json ()['schema' ]['properties' ]['read_groups' ]['items' ]['propertyNames' ]['enum' ]:
278- for rg in payload ['read_groups' ]:
279- if rg .get (cat ):
280- val = rg .get (cat )
281- schema_dict = resp .json ()['schema' ]['properties' ]['read_groups' ]['items' ]['allOf' ][0 ]['properties' ][cat ]
282- print (cat )
283- for validation_key in schema_dict .keys ():
284- check_pass ,fail_reason = validationCheck (val ,validation_key ,schema_dict )
285- if not check_pass :
286- sys .exit ("Payload violates Read group Schema in field '%s', %s" % ( cat ,fail_reason ))
188+ sys .exit ("Unable to retrieve schema. Please check URL\n " )
189+ #print(payload)
190+ #print(resp.json()['schema'])
191+ try :
192+ jsonschema .validate (instance = payload ,schema = resp .json ()['schema' ])
193+ except jsonschema .exceptions .ValidationError as err :
194+ print (err )
195+ sys .exit ("Payload failed to validate against schema\n " )
196+ else :
197+ return True
287198
288199
289200
@@ -407,7 +318,7 @@ def main(metadata, extra_info=dict()):
407318 else :
408319 existing_ele .update (extra_info [item ][ele_to_update ])
409320
410- validatePayload (payload )
321+ validatePayload (payload , args )
411322 with open ("%s.sequencing_experiment.payload.json" % str (uuid .uuid4 ()), 'w' ) as f :
412323 f .write (json .dumps (payload , indent = 2 ))
413324
@@ -424,6 +335,8 @@ def main(metadata, extra_info=dict()):
424335 help = "tsv file containing file information submitted from user" )
425336 parser .add_argument ("-e" , "--extra-info-tsv" ,
426337 help = "tsv file containing file information submitted from user" )
338+ parser .add_argument ("-s" , "--schema-url" ,
339+ help = "URL to validate schema against" )
427340 args = parser .parse_args ()
428341
429342 validate_args (args )
0 commit comments