Skip to content

Commit 8b889a4

Browse files
committed
update to support JSON submission
1 parent eb42361 commit 8b889a4

6 files changed

Lines changed: 186 additions & 10 deletions

File tree

payload-gen-seq-experiment/main.nf

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ params.read_group_info_tsv = "NO_FILE2"
5151
params.file_info_tsv = "NO_FILE3"
5252
params.extra_info_tsv = "NO_FILE4"
5353
params.schema_url="NO_FILE5"
54+
params.metadata_payload_json="NO_FILE6"
5455

5556
process payloadGenSeqExperiment {
5657
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
@@ -64,6 +65,7 @@ process payloadGenSeqExperiment {
6465
path read_group_info_tsv
6566
path file_info_tsv
6667
path extra_info_tsv
68+
path metadata_payload_json
6769
val schema_url
6870

6971
output:
@@ -74,13 +76,15 @@ process payloadGenSeqExperiment {
7476
args_read_group_info_tsv = !read_group_info_tsv.name.startsWith("NO_FILE") ? "-r ${read_group_info_tsv}" : ""
7577
args_file_info_tsv = !file_info_tsv.name.startsWith("NO_FILE") ? "-f ${file_info_tsv}" : ""
7678
args_extra_info_tsv = !extra_info_tsv.name.startsWith("NO_FILE") ? "-e ${extra_info_tsv}" : ""
79+
args_metadata_payload_json= !metadata_payload_json.name.startsWith("NO_FILE") ? "-m ${metadata_payload_json}" : ""
7780
args_schema_url = !schema_url.startsWith("NO_FILE") ? "-s ${schema_url}" : ""
7881
"""
7982
main.py \
8083
${args_experiment_info_tsv} \
8184
${args_read_group_info_tsv} \
8285
${args_file_info_tsv} \
8386
${args_extra_info_tsv} \
87+
${args_metadata_payload_json} \
8488
${args_schema_url}
8589
"""
8690
}
@@ -94,6 +98,7 @@ workflow {
9498
file(params.read_group_info_tsv),
9599
file(params.file_info_tsv),
96100
file(params.extra_info_tsv),
101+
file(params.metadata_payload_json),
97102
params.schema_url
98103
)
99-
}
104+
}

payload-gen-seq-experiment/main.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
Edmund Su <edmund.su@oicr.on.ca>
2424
"""
2525

26-
2726
import sys
2827
import uuid
2928
import json
@@ -173,7 +172,7 @@ def load_all_tsvs(exp_tsv, rg_tsv, file_tsv):
173172

174173
def validate_args(args):
175174
if args.metadata_json and \
176-
not (args.experiment_info_tsv or args.read_group_info_tsv or args.file_info_tsv):
175+
not (args.experiment_info_tsv and args.read_group_info_tsv and args.file_info_tsv):
177176
return True
178177
elif not args.metadata_json and \
179178
(args.experiment_info_tsv and args.read_group_info_tsv and args.file_info_tsv):
@@ -182,8 +181,8 @@ def validate_args(args):
182181
sys.exit(textwrap.dedent(
183182
"""
184183
Usage:
185-
When '-m' is provided, no other arguments can be used
186-
When '-m' is not provided, please provide all of these arguments: -x, -r and -f
184+
When '-m' is provided, '-x','-r' and '-f' are ignored arguments can be used
185+
When '-m' is not provided, please provide all of these arguments: '-x', '-r' and '-f'
187186
Optionally '-s' a schema URL can be provided, which the payload will be validated against
188187
"""
189188
))
@@ -290,8 +289,11 @@ def main(metadata, extra_info=dict()):
290289
sys.exit(f"Field '%s' in file '%s' with value '%s' does not match expected regex pattern '^%s[0-9]{1,32}$'" % (optional_file_field,input_file.get('name'),input_file.get(optional_file_field),EGA_FIELDS[optional_file_field]))
291290

292291
for rg in metadata.get("read_groups"):
293-
rg.pop('type') # remove 'type' field
294-
rg.pop('submitter_sequencing_experiment_id') # remove 'submitter_sequencing_experiment_id' field
292+
if "type" in rg:
293+
print(rg)
294+
rg.pop('type') # remove 'type' field
295+
if "submitter_sequencing_experiment_id" in rg:
296+
rg.pop('submitter_sequencing_experiment_id') # remove 'submitter_sequencing_experiment_id' field
295297
payload['read_groups'].append(rg)
296298

297299

@@ -327,7 +329,7 @@ def main(metadata, extra_info=dict()):
327329
existing_ele['info'].update(extra_info[item][ele_to_update])
328330
else:
329331
existing_ele.update(extra_info[item][ele_to_update])
330-
332+
331333
validatePayload(payload,args)
332334
with open("%s.sequencing_experiment.payload.json" % str(uuid.uuid4()), 'w') as f:
333335
f.write(json.dumps(payload, indent=2))
@@ -391,4 +393,4 @@ def main(metadata, extra_info=dict()):
391393
extra_info[row_type][row_id][row_field]=row_val
392394

393395

394-
main(metadata, extra_info)
396+
main(metadata, extra_info)

payload-gen-seq-experiment/tests/checker.nf

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
along with this program. If not, see <http://www.gnu.org/licenses/>.
1818
1919
Authors:
20+
Linda Xiang
2021
Junjun Zhang
22+
Edmund Su
2123
*/
2224

2325
/*
@@ -48,6 +50,8 @@ params.read_group_info_tsv = "NO_FILE2"
4850
params.file_info_tsv = "NO_FILE3"
4951
params.extra_info_tsv = "NO_FILE4"
5052
params.schema_url = "NO_FILE5"
53+
params.metadata_payload_json = "NO_FILE6"
54+
5155
params.expected_output = ""
5256

5357
include { payloadGenSeqExperiment } from '../main'
@@ -68,7 +72,6 @@ process file_smart_diff {
6872
# Note: this is only for demo purpose, please write your own 'diff' according to your own needs.
6973
# remove date field before comparison eg, <div id="header_filename">Tue 19 Jan 2021<br/>test_rg_3.bam</div>
7074
# sed -e 's#"header_filename">.*<br/>test_rg_3.bam#"header_filename"><br/>test_rg_3.bam</div>#'
71-
7275
diff <( cat ${output_file} | sed -e 's#"header_filename">.*<br/>#"header_filename"><br/>#' ) \
7376
<( ([[ '${expected_file}' == *.gz ]] && gunzip -c ${expected_file} || cat ${expected_file}) | sed -e 's#"header_filename">.*<br/>#"header_filename"><br/>#' ) \
7477
&& ( echo "Test PASSED" && exit 0 ) || ( echo "Test FAILED, output file mismatch." && exit 1 )
@@ -83,6 +86,7 @@ workflow checker {
8386
file_info_tsv
8487
extra_info_tsv
8588
expected_output
89+
metadata_payload_json
8690
schema_url
8791

8892
main:
@@ -91,6 +95,7 @@ workflow checker {
9195
read_group_info_tsv,
9296
file_info_tsv,
9397
extra_info_tsv,
98+
metadata_payload_json,
9499
schema_url
95100
)
96101

@@ -108,6 +113,7 @@ workflow {
108113
file(params.file_info_tsv),
109114
file(params.extra_info_tsv),
110115
file(params.expected_output),
116+
file(params.metadata_payload_json),
111117
params.schema_url
112118
)
113119
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
{
2+
"analysisType": {
3+
"name": "sequencing_experiment"
4+
},
5+
"studyId": "TEST-PRO",
6+
"experiment": {
7+
"submitter_sequencing_experiment_id": "TEST_EXP",
8+
"sequencing_center": "EXT",
9+
"platform": "ILLUMINA",
10+
"platform_model": "HiSeq 2000",
11+
"experimental_strategy": "WGS",
12+
"sequencing_date": "2014-12-12"
13+
},
14+
"read_group_count": 3,
15+
"read_groups": [
16+
{
17+
"submitter_read_group_id": "C0HVY.2",
18+
"read_group_id_in_bam": null,
19+
"platform_unit": "74_8a",
20+
"is_paired_end": true,
21+
"file_r1": "test_rg_3.bam",
22+
"file_r2": "test_rg_3.bam",
23+
"read_length_r1": 150,
24+
"read_length_r2": 150,
25+
"insert_size": 298,
26+
"sample_barcode": null,
27+
"library_name": "Pond-147580"
28+
},
29+
{
30+
"submitter_read_group_id": "D0RE2.1",
31+
"read_group_id_in_bam": null,
32+
"platform_unit": "74_8b",
33+
"is_paired_end": true,
34+
"file_r1": "test_rg_3.bam",
35+
"file_r2": "test_rg_3.bam",
36+
"read_length_r1": 150,
37+
"read_length_r2": 150,
38+
"insert_size": 298,
39+
"sample_barcode": null,
40+
"library_name": "Pond-147580"
41+
},
42+
{
43+
"submitter_read_group_id": "D0RH0.2",
44+
"read_group_id_in_bam": null,
45+
"platform_unit": "74_8c",
46+
"is_paired_end": true,
47+
"file_r1": "test_rg_3.bam",
48+
"file_r2": "test_rg_3.bam",
49+
"read_length_r1": 150,
50+
"read_length_r2": 150,
51+
"insert_size": 298,
52+
"sample_barcode": null,
53+
"library_name": "Pond-147580"
54+
}
55+
],
56+
"samples": [
57+
{
58+
"submitterSampleId": "HCC1143_BAM_INPUT",
59+
"matchedNormalSubmitterSampleId": null,
60+
"sampleType": "Total DNA",
61+
"specimen": {
62+
"submitterSpecimenId": "HCC1143_BAM_INPUT",
63+
"tumourNormalDesignation": "Normal",
64+
"specimenTissueSource": "Blood derived",
65+
"specimenType": "Cell line - derived from normal"
66+
},
67+
"donor": {
68+
"submitterDonorId": "HCC1143",
69+
"gender": "Female"
70+
}
71+
}
72+
],
73+
"files": [
74+
{
75+
"fileName": "test_rg_3.bam",
76+
"fileSize": 14911,
77+
"fileMd5sum": "178f97f7b1ca8bfc28fd5586bdd56799",
78+
"fileType": "BAM",
79+
"fileAccess": "controlled",
80+
"dataType": "Submitted Reads",
81+
"info": {
82+
"data_category": "Sequencing Reads",
83+
"ega_file_id": "EGAF000001"
84+
}
85+
}
86+
]
87+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"program_id": "TEST-PRO",
3+
"submitter_sequencing_experiment_id": "TEST_EXP",
4+
"sequencing_center": "EXT",
5+
"platform": "ILLUMINA",
6+
"platform_model": "HiSeq 2000",
7+
"experimental_strategy": "WGS",
8+
"sequencing_date": "2014-12-12",
9+
"submitter_sample_id": "HCC1143_BAM_INPUT",
10+
"matched_normal_submitter_sample_id": null,
11+
"sample_type": "Total DNA",
12+
"submitter_specimen_id": "HCC1143_BAM_INPUT",
13+
"tumour_normal_designation": "Normal",
14+
"specimen_tissue_source": "Blood derived",
15+
"specimen_type": "Cell line - derived from normal",
16+
"submitter_donor_id": "HCC1143",
17+
"gender": "Female",
18+
"read_group_count": 3,
19+
"files": [
20+
{
21+
"name": "test_rg_3.bam",
22+
"size": 14911,
23+
"md5sum": "178f97f7b1ca8bfc28fd5586bdd56799",
24+
"format": "BAM",
25+
"data_type": "Submitted Reads",
26+
"ega_file_id": "EGAF000001"
27+
}
28+
],
29+
"read_groups": [
30+
{
31+
"submitter_read_group_id": "C0HVY.2",
32+
"read_group_id_in_bam": null,
33+
"platform_unit": "74_8a",
34+
"is_paired_end": true,
35+
"file_r1": "test_rg_3.bam",
36+
"file_r2": "test_rg_3.bam",
37+
"read_length_r1": 150,
38+
"read_length_r2": 150,
39+
"insert_size": 298,
40+
"sample_barcode": null,
41+
"library_name": "Pond-147580"
42+
},
43+
{
44+
"submitter_read_group_id": "D0RE2.1",
45+
"read_group_id_in_bam": null,
46+
"platform_unit": "74_8b",
47+
"is_paired_end": true,
48+
"file_r1": "test_rg_3.bam",
49+
"file_r2": "test_rg_3.bam",
50+
"read_length_r1": 150,
51+
"read_length_r2": 150,
52+
"insert_size": 298,
53+
"sample_barcode": null,
54+
"library_name": "Pond-147580"
55+
},
56+
{
57+
"submitter_read_group_id": "D0RH0.2",
58+
"read_group_id_in_bam": null,
59+
"platform_unit": "74_8c",
60+
"is_paired_end": true,
61+
"file_r1": "test_rg_3.bam",
62+
"file_r2": "test_rg_3.bam",
63+
"read_length_r1": 150,
64+
"read_length_r2": 150,
65+
"insert_size": 298,
66+
"sample_barcode": null,
67+
"library_name": "Pond-147580"
68+
}
69+
]
70+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"metadata_payload_json": "input/submission_input.json",
3+
"file_info_tsv": "input/file.v2.tsv",
4+
"expected_output": "input/12c64309-4f21-4a86-8175-ca2340babadd.sequencing_experiment.payload.json",
5+
"publish_dir": "outdir"
6+
}

0 commit comments

Comments
 (0)