Skip to content

Commit b92149c

Browse files
committed
Updated schema validator and arguments command
- new argument "-s" to specify schema URL. Other has default - currently using `https://raw.githubusercontent.com/icgc-argo/argo-metadata-schemas/seq_experiment_typo/schemas/sequencing_experiment.json` due to typos
1 parent baf2958 commit b92149c

2 files changed

Lines changed: 21 additions & 108 deletions

File tree

payload-gen-seq-experiment/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ RUN apt-get update && apt-get install -y procps
55
LABEL org.opencontainers.image.source https://github.com/icgc-argo-workflows/data-processing-utility-tools
66

77
RUN pip install requests && \
8-
pip install python-dateutil
8+
pip install jsonschema
99

1010
RUN groupadd -g 1000 ubuntu &&\
1111
useradd -l -u 1000 -g ubuntu ubuntu &&\

payload-gen-seq-experiment/main.py

Lines changed: 20 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
import argparse
3333
import requests
3434
import re
35-
from dateutil import parser as dparser
35+
import jsonschema
36+
import traceback
3637

3738

3839
TSV_FIELDS = {}
@@ -176,114 +177,24 @@ def validate_args(args):
176177
"""
177178
))
178179

179-
def validationCheck(val,key,target_dict):
180-
target=target_dict[key]
181-
check_pass=True
182-
python_type_to_generic_dict={
183-
"NoneType":"null",
184-
"bool":"boolean",
185-
"str":"string",
186-
"int":"integer",
187-
}
188-
189-
if key=='type' and "integer" not in target and "number" not in target:
190-
output_str=target if type(target).__name__=='str' else ",".join(target)
191-
return (True,None) if python_type_to_generic_dict[type(val).__name__] in target else (False,"DataType received "+python_type_to_generic_dict[type(val).__name__]+" expected : "+output_str)
192-
if key=='type' and ("integer" in target or "number" in target):
193-
output_str=target if type(target).__name__=='str' else ",".join(target)
194-
return (True,None) if python_type_to_generic_dict[type(int(val)).__name__] in target else (False,"DataType received "+python_type_to_generic_dict[type(val).__name__]+" expected : "+output_str)
195-
elif key=='pattern':
196-
return (True,None)if re.findall(target,val) else (False,"String received "+val+", does not adhere to regex: "+target_dict[key])
197-
elif key=='enum':
198-
return (True,None) if val in target else (False,"Value %s is not part of approved list : %s" % (val,",".join(target)))
199-
elif key=='minimum':
200-
if "minimum" in target_dict and "maximum" in target_dict:
201-
return (True,None) if (int(val) >= target_dict['minimum'] and int(val) <= target_dict['maximum']) else (False,"Value "+str(val)+" fails to meet both maximum and minimum requirements")
202-
else:
203-
return (True,None) if int(val) >= target else (False,str(val)+" fails to meet both minimum requirements")
204-
elif key=='maximum':
205-
if "minimum" in target_dict and "maximum" in target_dict:
206-
return (True,None) if (int(val) >= target_dict['minimum'] and int(val) <= target_dict['maximum']) else (False,"Value "+str(val)+" fails to meet both maximum and minimum requirements")
207-
else:
208-
return (True,None) if int(val) <= target else (False,str(val)+" fails to meet both maximum requirements")
209-
elif key=='minLength':
210-
return (True,None) if len(val) >= target else (False,"length of "+val+" fails to meet both minimum requirements")
211-
elif key=='anyOf':
212-
list_of_errors=[]
213-
for sub_dict in target:
214-
sublist_of_errors=[]
215-
for key_sub_dict in sub_dict.keys():
216-
check_pass,fail_reason=validationCheck(val,key_sub_dict,sub_dict)
217-
if not check_pass:
218-
sublist_of_errors.append(fail_reason)
219-
print(fail_reason)
220-
if len(sublist_of_errors)>0:
221-
list_of_errors.append(" and ".join(list(set(sublist_of_errors))))
222-
return (True,None) if len(list_of_errors)<len(target) else (False,"Failure to adhere any of the following scenarios - "+";".join(list(set(list_of_errors))))
223-
elif key=='oneOf':
224-
list_of_errors=[]
225-
for sub_dict in target:
226-
sublist_of_errors=[]
227-
for key_sub_dict in sub_dict.keys():
228-
check_pass,fail_reason=validationCheck(val,key_sub_dict,sub_dict)
229-
if not check_pass:
230-
sublist_of_errors.append(fail_reason)
231-
print(fail_reason)
232-
if len(sublist_of_errors)>0:
233-
list_of_errors.append(" and ".join(list(set(sublist_of_errors))))
234-
return (True,None) if len(list_of_errors)<len(target) else (False,"Failure to adhere any of the following scenarios - "+";".join(list(set(list_of_errors))))
235-
elif key=='format':
236-
dparser.parse(val)
237-
try:
238-
dparser.parse(val)
239-
except:
240-
return (False,"Fails to meet appropriate 'date-time' or 'date' criteria")
241-
else:
242-
return (True,None)
243-
elif key=='example':
244-
return (True,None)
180+
def validatePayload(payload,args):
181+
if args.schema_url:
182+
url=args.schema_url
245183
else:
246-
return (False,"Bad")
247-
248-
def validatePayload(payload):
249-
url="https://raw.githubusercontent.com/icgc-argo/argo-metadata-schemas/master/schemas/sequencing_experiment.json"
184+
url="https://raw.githubusercontent.com/icgc-argo/argo-metadata-schemas/master/schemas/sequencing_experiment.json"
185+
250186
resp=requests.get(url)
251-
252187
if not resp.status_code==200:
253-
sys.exit("Unable to retrieve schema\n")
254-
###Experiment
255-
for required in resp.json()['schema']['properties']['experiment']['required']:
256-
if not required in payload.get('experiment').keys():
257-
sys.exit("Payload missing required field %s\n" % required)
258-
259-
for cat in resp.json()['schema']['properties']['experiment']['propertyNames']['enum']:
260-
if payload.get(cat):
261-
val=payload.get(cat)
262-
schema_dict=resp.json()['schema']['properties']['experiment']['allof'][0]['properties'][cat]
263-
264-
for validation_key in schema_dict.keys():
265-
print(cat)
266-
print(type(val))
267-
check_pass,fail_reason=validationCheck(val,validation_key,schema_dict)
268-
#print(validation_key,check_pass)
269-
if not check_pass:
270-
sys.exit("Payload violates Experiment Schema in field '%s', %s" % ( cat,fail_reason))
271-
###Read groups
272-
273-
for required in resp.json()['schema']['properties']['read_groups']['items']['required']:
274-
for rg in payload['read_groups']:
275-
if not required in rg.keys():
276-
sys.exit("Read Group %s in payload is missing required field %s\n" % (rg['submitter_read_group_id'],required))
277-
for cat in resp.json()['schema']['properties']['read_groups']['items']['propertyNames']['enum']:
278-
for rg in payload['read_groups']:
279-
if rg.get(cat):
280-
val=rg.get(cat)
281-
schema_dict=resp.json()['schema']['properties']['read_groups']['items']['allOf'][0]['properties'][cat]
282-
print(cat)
283-
for validation_key in schema_dict.keys():
284-
check_pass,fail_reason=validationCheck(val,validation_key,schema_dict)
285-
if not check_pass:
286-
sys.exit("Payload violates Read group Schema in field '%s', %s" % ( cat,fail_reason))
188+
sys.exit("Unable to retrieve schema. Please check URL\n")
189+
#print(payload)
190+
#print(resp.json()['schema'])
191+
try:
192+
jsonschema.validate(instance=payload,schema=resp.json()['schema'])
193+
except jsonschema.exceptions.ValidationError as err:
194+
print(err)
195+
sys.exit("Payload failed to validate against schema\n")
196+
else:
197+
return True
287198

288199

289200

@@ -407,7 +318,7 @@ def main(metadata, extra_info=dict()):
407318
else:
408319
existing_ele.update(extra_info[item][ele_to_update])
409320

410-
validatePayload(payload)
321+
validatePayload(payload,args)
411322
with open("%s.sequencing_experiment.payload.json" % str(uuid.uuid4()), 'w') as f:
412323
f.write(json.dumps(payload, indent=2))
413324

@@ -424,6 +335,8 @@ def main(metadata, extra_info=dict()):
424335
help="tsv file containing file information submitted from user")
425336
parser.add_argument("-e", "--extra-info-tsv",
426337
help="tsv file containing file information submitted from user")
338+
parser.add_argument("-s", "--schema-url",
339+
help="URL to validate schema against")
427340
args = parser.parse_args()
428341

429342
validate_args(args)

0 commit comments

Comments
 (0)