Skip to content

Commit 10f99df

Browse files
authored
Merge pull request #106 from icgc-argo/payload-gen-seq-experiment@0.4.0
[release]
2 parents 8479670 + 3b6cc92 commit 10f99df

10 files changed

Lines changed: 77 additions & 35 deletions

payload-gen-seq-experiment/main.nf

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
/* this block is auto-generated based on info from pkg.json where */
2626
/* changes can be made if needed, do NOT modify this block manually */
2727
nextflow.enable.dsl = 2
28-
version = '0.3.0' // package version
28+
version = '0.4.0'
2929

3030
container = [
3131
'ghcr.io': 'ghcr.io/icgc-argo/data-processing-utility-tools.payload-gen-seq-experiment'
@@ -45,10 +45,10 @@ params.publish_dir = "" // set to empty string will disable publishDir
4545

4646

4747
// tool specific parmas go here, add / change as needed
48-
params.metadata_json = "NO_FILE1"
49-
params.experiment_info_tsv = "NO_FILE2"
50-
params.read_group_info_tsv = "NO_FILE3"
51-
params.file_info_tsv = "NO_FILE4"
48+
params.experiment_info_tsv = "NO_FILE1"
49+
params.read_group_info_tsv = "NO_FILE2"
50+
params.file_info_tsv = "NO_FILE3"
51+
params.extra_info_tsv = "NO_FILE4"
5252

5353

5454
process payloadGenSeqExperiment {
@@ -59,26 +59,26 @@ process payloadGenSeqExperiment {
5959
memory "${params.mem} GB"
6060

6161
input:
62-
path metadata_json
6362
path experiment_info_tsv
6463
path read_group_info_tsv
6564
path file_info_tsv
65+
path extra_info_tsv
6666

6767
output:
6868
path "*.sequencing_experiment.payload.json", emit: payload
6969

7070
script:
71-
args_metadata_json = !metadata_json.name.startsWith("NO_FILE") ? "-m ${metadata_json}" : ""
7271
args_experiment_info_tsv = !experiment_info_tsv.name.startsWith("NO_FILE") ? "-x ${experiment_info_tsv}" : ""
7372
args_read_group_info_tsv = !read_group_info_tsv.name.startsWith("NO_FILE") ? "-r ${read_group_info_tsv}" : ""
7473
args_file_info_tsv = !file_info_tsv.name.startsWith("NO_FILE") ? "-f ${file_info_tsv}" : ""
74+
args_extra_info_tsv = !extra_info_tsv.name.startsWith("NO_FILE") ? "-e ${extra_info_tsv}" : ""
7575

7676
"""
7777
main.py \
78-
${args_metadata_json} \
7978
${args_experiment_info_tsv} \
8079
${args_read_group_info_tsv} \
81-
${args_file_info_tsv}
80+
${args_file_info_tsv} \
81+
${args_extra_info_tsv}
8282
"""
8383
}
8484

@@ -87,9 +87,9 @@ process payloadGenSeqExperiment {
8787
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
8888
workflow {
8989
payloadGenSeqExperiment(
90-
file(params.metadata_json),
9190
file(params.experiment_info_tsv),
9291
file(params.read_group_info_tsv),
93-
file(params.file_info_tsv)
92+
file(params.file_info_tsv),
93+
file(params.extra_info_tsv)
9494
)
9595
}

payload-gen-seq-experiment/main.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def validate_args(args):
159159
))
160160

161161

162-
def main(metadata):
162+
def main(metadata, extra_info=dict()):
163163
empty_str_to_null(metadata)
164164

165165
payload = {
@@ -198,6 +198,24 @@ def main(metadata):
198198
}
199199
}
200200

201+
if extra_info:
202+
if extra_info['sample'].get(sample['submitterSampleId']):
203+
sample['sampleId'] = extra_info['sample'][sample['submitterSampleId']]
204+
else:
205+
sys.exit(f"Provided extra_info_tsv misses mapping for submitter sample ID: {sample['submitterSampleId']}")
206+
207+
if extra_info['specimen'].get(sample['specimen']['submitterSpecimenId']):
208+
sample['specimenId'] = extra_info['specimen'][sample['specimen']['submitterSpecimenId']]
209+
sample['specimen']['specimenId'] = sample["specimenId"]
210+
else:
211+
sys.exit(f"Provided extra_info_tsv misses mapping for submitter specimen ID: {sample['specimen']['submitterSpecimenId']}")
212+
213+
if extra_info['donor'].get(sample['donor']['submitterDonorId']):
214+
sample['donor']['donorId'] = extra_info['donor'][sample['donor']['submitterDonorId']]
215+
sample['specimen']['donorId'] = sample['donor']['donorId']
216+
else:
217+
sys.exit(f"Provided extra_info_tsv misses mapping for submitter donor ID: {sample['donor']['submitterDonorId']}")
218+
201219
payload['samples'].append(sample)
202220

203221
# get file of the payload
@@ -235,6 +253,8 @@ def main(metadata):
235253
help="tsv file containing read_group information submitted from user")
236254
parser.add_argument("-f", "--file-info-tsv",
237255
help="tsv file containing file information submitted from user")
256+
parser.add_argument("-e", "--extra-info-tsv",
257+
help="tsv file containing file information submitted from user")
238258
args = parser.parse_args()
239259

240260
validate_args(args)
@@ -258,4 +278,24 @@ def main(metadata):
258278
# all TSV are well-formed, let's load them
259279
metadata = load_all_tsvs(args.experiment_info_tsv, args.read_group_info_tsv, args.file_info_tsv)
260280

261-
main(metadata)
281+
extra_info = dict()
282+
if args.extra_info_tsv:
283+
with open(args.extra_info_tsv, 'r') as f:
284+
for row in csv.DictReader(f, delimiter='\t'):
285+
type = row['type']
286+
submitter_id = row['submitter_id']
287+
uniform_id = row['uniform_id']
288+
if type in extra_info:
289+
sys.exit(f"Values in 'type' field duplicated. Offending value: {type}, in file: {args.extra_info_tsv}")
290+
else:
291+
extra_info[type] = dict()
292+
293+
if submitter_id in extra_info[type]:
294+
sys.exit(f"Values in 'submitter_id' field duplicated. Offending value: {submitter_id}, for type: {type}, in file: {args.extra_info_tsv}" )
295+
else:
296+
extra_info[type][submitter_id] = uniform_id
297+
298+
if 'donor' not in extra_info or 'specimen' not in extra_info or 'sample' not in extra_info:
299+
sys.exit(f"Provided extra_info_tsv file '{args.extra_info_tsv}' is required to have ID mappings for 'donor', 'specimen' and 'sample'")
300+
301+
main(metadata, extra_info)

payload-gen-seq-experiment/pkg.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "payload-gen-seq-experiment",
3-
"version": "0.3.0",
3+
"version": "0.4.0",
44
"description": "SONG payload generation for sequencing experiment",
55
"main": "main.nf",
66
"deprecated": false,

payload-gen-seq-experiment/tests/checker.nf

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
/* this block is auto-generated based on info from pkg.json where */
3030
/* changes can be made if needed, do NOT modify this block manually */
3131
nextflow.enable.dsl = 2
32-
version = '0.3.0' // package version
32+
version = '0.4.0'
3333

3434
container = [
3535
'ghcr.io': 'ghcr.io/icgc-argo/data-processing-utility-tools.payload-gen-seq-experiment'
@@ -43,10 +43,10 @@ params.container_version = ""
4343
params.container = ""
4444

4545
// tool specific parmas go here, add / change as needed
46-
params.metadata_json = "NO_FILE1"
47-
params.experiment_info_tsv = "NO_FILE2"
48-
params.read_group_info_tsv = "NO_FILE3"
49-
params.file_info_tsv = "NO_FILE4"
46+
params.experiment_info_tsv = "NO_FILE1"
47+
params.read_group_info_tsv = "NO_FILE2"
48+
params.file_info_tsv = "NO_FILE3"
49+
params.extra_info_tsv = "NO_FILE4"
5050
params.expected_output = ""
5151

5252
include { payloadGenSeqExperiment } from '../main'
@@ -77,18 +77,18 @@ process file_smart_diff {
7777

7878
workflow checker {
7979
take:
80-
metadata_json
8180
experiment_info_tsv
8281
read_group_info_tsv
8382
file_info_tsv
83+
extra_info_tsv
8484
expected_output
8585

8686
main:
8787
payloadGenSeqExperiment(
88-
metadata_json,
8988
experiment_info_tsv,
9089
read_group_info_tsv,
91-
file_info_tsv
90+
file_info_tsv,
91+
extra_info_tsv
9292
)
9393

9494
file_smart_diff(
@@ -100,10 +100,10 @@ workflow checker {
100100

101101
workflow {
102102
checker(
103-
file(params.metadata_json),
104103
file(params.experiment_info_tsv),
105104
file(params.read_group_info_tsv),
106105
file(params.file_info_tsv),
106+
file(params.extra_info_tsv),
107107
file(params.expected_output)
108108
)
109109
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../tests/data/extra_info.v2.tsv

payload-gen-seq-experiment/tests/test-job-bam.nf.json

Lines changed: 0 additions & 5 deletions
This file was deleted.

payload-gen-seq-experiment/tests/test-job-bam.tsv.nf.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"experiment_info_tsv": "input/experiment.v2.tsv",
33
"read_group_info_tsv": "input/read_group.v2.tsv",
44
"file_info_tsv": "input/file.v2.tsv",
5+
"extra_info_tsv": "input/extra_info.v2.tsv",
56
"expected_output": "input/98690064-627e-4a2b-a0c2-60fc9ca8433c.sequencing_experiment.payload.json",
67
"publish_dir": "outdir"
78
}

payload-gen-seq-experiment/tests/test-job-fq.nf.json

Lines changed: 0 additions & 4 deletions
This file was deleted.

tests/data/98690064-627e-4a2b-a0c2-60fc9ca8433c.sequencing_experiment.payload.json

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,17 @@
6262
"submitterSpecimenId": "HCC1143_BAM_INPUT",
6363
"tumourNormalDesignation": "Normal",
6464
"specimenTissueSource": "Blood derived",
65-
"specimenType": "Cell line - derived from normal"
65+
"specimenType": "Cell line - derived from normal",
66+
"specimenId": "SP2222222222",
67+
"donorId": "DO1111111111"
6668
},
6769
"donor": {
6870
"submitterDonorId": "HCC1143",
69-
"gender": "Female"
70-
}
71+
"gender": "Female",
72+
"donorId": "DO1111111111"
73+
},
74+
"sampleId": "SA3333333333",
75+
"specimenId": "SP2222222222"
7176
}
7277
],
7378
"files": [

tests/data/extra_info.v2.tsv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
type submitter_id uniform_id
2+
donor HCC1143 DO1111111111
3+
specimen HCC1143_BAM_INPUT SP2222222222
4+
sample HCC1143_BAM_INPUT SA3333333333

0 commit comments

Comments
 (0)