Skip to content

Commit 28a0e5e

Browse files
authored
Merge pull request #110 from icgc-argo/metadata-parser.0.2.0.0
add code to get analysis_tools
2 parents e28cc32 + 3fca4c9 commit 28a0e5e

6 files changed

Lines changed: 221 additions & 4 deletions
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
{
2+
"analysisId": "3414534a-47a4-492d-9453-4a47a4192d6d",
3+
"studyId": "TEST-PR",
4+
"analysisState": "PUBLISHED",
5+
"createdAt": "2020-12-02T17:18:32.353334",
6+
"updatedAt": "2020-12-02T17:18:32.353334",
7+
"firstPublishedAt": "2020-12-02T17:18:32.353334",
8+
"publishedAt": "2020-12-02T17:18:32.353334",
9+
"analysisStateHistory": [
10+
{
11+
"initialState": "UNPUBLISHED",
12+
"updatedState": "PUBLISHED",
13+
"updatedAt": "2020-12-02T17:18:32.353334"
14+
}
15+
],
16+
"samples": [
17+
{
18+
"sampleId": "SA610229",
19+
"specimenId": "SP210202",
20+
"submitterSampleId": "COLO-829",
21+
"matchedNormalSubmitterSampleId": "COLO-829-BL",
22+
"sampleType": "Total DNA",
23+
"specimen": {
24+
"specimenId": "SP210202",
25+
"donorId": "DO250183",
26+
"submitterSpecimenId": "COLO-829",
27+
"tumourNormalDesignation": "Tumour",
28+
"specimenTissueSource": "Blood derived",
29+
"specimenType": "Cell line - derived from tumour"
30+
},
31+
"donor": {
32+
"donorId": "DO250183",
33+
"studyId": "TEST-PR",
34+
"gender": "Female",
35+
"submitterDonorId": "COLO-829"
36+
}
37+
}
38+
],
39+
"files": [
40+
{
41+
"info": {
42+
"analysis_tools": [
43+
"CaVEMan"
44+
],
45+
"data_category": "Simple Nucleotide Variation"
46+
},
47+
"objectId": "68a48641-8df4-58e9-9a3f-fbfa5e21e08d",
48+
"studyId": "TEST-PR",
49+
"analysisId": "3414534a-47a4-492d-9453-4a47a4192d6d",
50+
"fileName": "TEST-PR.DO250183.SA610229.wgs.20200513.sanger-wgs.somatic.snv.vcf.gz",
51+
"fileSize": 24067,
52+
"fileType": "VCF",
53+
"fileMd5sum": "855b69e7541ebe8da2fba6971f8a2da5",
54+
"fileAccess": "controlled",
55+
"dataType": "Raw SNV Calls"
56+
},
57+
{
58+
"info": {
59+
"analysis_tools": [
60+
"CaVEMan"
61+
],
62+
"data_category": "Simple Nucleotide Variation"
63+
},
64+
"objectId": "09831e63-4e8c-51d1-b454-2f01580bd311",
65+
"studyId": "TEST-PR",
66+
"analysisId": "3414534a-47a4-492d-9453-4a47a4192d6d",
67+
"fileName": "TEST-PR.DO250183.SA610229.wgs.20200513.sanger-wgs.somatic.snv.vcf.gz.tbi",
68+
"fileSize": 246,
69+
"fileType": "TBI",
70+
"fileMd5sum": "6b64780740789c6719399ae1d759edab",
71+
"fileAccess": "controlled",
72+
"dataType": "VCF Index"
73+
}
74+
],
75+
"analysisType": {
76+
"name": "variant_calling",
77+
"version": 7
78+
},
79+
"workflow": {
80+
"inputs": [
81+
{
82+
"analysis_type": "sequencing_alignment",
83+
"tumour_analysis_id": "ad7e2df1-03ea-4dae-be2d-f103ea7dae3a"
84+
},
85+
{
86+
"analysis_type": "sequencing_alignment",
87+
"normal_analysis_id": "ce2a49b2-2bda-4ded-aa49-b22bdaadedb3"
88+
}
89+
],
90+
"run_id": "voluminous_brahmagupta",
91+
"genome_build": "GRCh38_hla_decoy_ebv",
92+
"workflow_name": "Sanger WGS Variant Calling",
93+
"workflow_version": "2.1.0-8-1.dev",
94+
"workflow_short_name": "sanger-wgs"
95+
},
96+
"experiment": {
97+
"platform": "ILLUMINA",
98+
"experimental_strategy": "WGS"
99+
},
100+
"variant_class": "Somatic"
101+
}

tests/data/89176bbc-ffca-4d73-984a-4a92ed931a98.sequencing_experiment.4.analysis.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"file_r2": "test_rg_3.bam",
8181
"insert_size": 298,
8282
"library_name": "Pond-147580",
83-
"is_paired_end": true,
83+
"is_paired_end": false,
8484
"platform_unit": "74_8c",
8585
"read_length_r1": 150,
8686
"read_length_r2": 150,
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
{
2+
"analysisId": "ca105e4d-d16b-461c-905e-4dd16b861cc8",
3+
"studyId": "TEST-PR",
4+
"analysisState": "PUBLISHED",
5+
"createdAt": "2020-12-02T17:18:32.353334",
6+
"updatedAt": "2020-12-02T17:18:32.353334",
7+
"firstPublishedAt": "2020-12-02T17:18:32.353334",
8+
"publishedAt": "2020-12-02T17:18:32.353334",
9+
"analysisStateHistory": [
10+
{
11+
"initialState": "UNPUBLISHED",
12+
"updatedState": "PUBLISHED",
13+
"updatedAt": "2020-12-02T17:18:32.353334"
14+
}
15+
],
16+
"samples": [
17+
{
18+
"sampleId": "SA610229",
19+
"specimenId": "SP210202",
20+
"submitterSampleId": "COLO-829",
21+
"matchedNormalSubmitterSampleId": "COLO-829-BL",
22+
"sampleType": "Total DNA",
23+
"specimen": {
24+
"specimenId": "SP210202",
25+
"donorId": "DO250183",
26+
"submitterSpecimenId": "COLO-829",
27+
"tumourNormalDesignation": "Tumour",
28+
"specimenTissueSource": "Blood derived",
29+
"specimenType": "Cell line - derived from tumour"
30+
},
31+
"donor": {
32+
"donorId": "DO250183",
33+
"studyId": "TEST-PR",
34+
"gender": "Female",
35+
"submitterDonorId": "COLO-829"
36+
}
37+
}
38+
],
39+
"files": [
40+
{
41+
"info": {
42+
"analysis_tools": [
43+
"GATK-Mutect2"
44+
],
45+
"data_category": "Simple Nucleotide Variation"
46+
},
47+
"objectId": "a4071a43-77c9-54c7-b67d-9073fc741882",
48+
"studyId": "TEST-PR",
49+
"analysisId": "ca105e4d-d16b-461c-905e-4dd16b861cc8",
50+
"fileName": "TEST-PR.DO250183.SA610229.wgs.20200922.gatk-mutect2.somatic.snv.vcf.gz",
51+
"fileSize": 2896208,
52+
"fileType": "VCF",
53+
"fileMd5sum": "45e1c12bfdfbcf93297d739321f6ef31",
54+
"fileAccess": "controlled",
55+
"dataType": "Raw SNV Calls"
56+
},
57+
{
58+
"info": {
59+
"analysis_tools": [
60+
"GATK-Mutect2"
61+
],
62+
"data_category": "Simple Nucleotide Variation"
63+
},
64+
"objectId": "20a67bb5-b81a-5351-97f7-26b6f1f7faff",
65+
"studyId": "TEST-PR",
66+
"analysisId": "ca105e4d-d16b-461c-905e-4dd16b861cc8",
67+
"fileName": "TEST-PR.DO250183.SA610229.wgs.20200922.gatk-mutect2.somatic.snv.vcf.gz.tbi",
68+
"fileSize": 23243,
69+
"fileType": "TBI",
70+
"fileMd5sum": "45e54b4c1b0ab6c9f907c23d66f58a64",
71+
"fileAccess": "controlled",
72+
"dataType": "VCF Index"
73+
}
74+
],
75+
"analysisType": {
76+
"name": "variant_calling",
77+
"version": 9
78+
},
79+
"workflow": {
80+
"inputs": [
81+
{
82+
"analysis_type": "sequencing_alignment",
83+
"tumour_analysis_id": "94c862ca-8055-4794-8862-ca8055479490"
84+
},
85+
{
86+
"analysis_type": "sequencing_alignment",
87+
"normal_analysis_id": "916b95a5-42d7-46a8-ab95-a542d7a6a81e"
88+
}
89+
],
90+
"run_id": "cheesy_fourier",
91+
"session_id": "66956ede-ec51-4758-8f0b-316fa9812fb6",
92+
"genome_build": "GRCh38_hla_decoy_ebv",
93+
"workflow_name": "GATK Mutect2 Variant Calling",
94+
"workflow_version": "4.1.8.0-1.0",
95+
"workflow_short_name": "gatk-mutect2"
96+
},
97+
"experiment": {
98+
"platform": "ILLUMINA",
99+
"experimental_strategy": "WGS"
100+
},
101+
"variant_class": "Somatic"
102+
}

tools/metadata-parser/metadata-parser.nf

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
*/
2525

2626
nextflow.enable.dsl=2
27-
version = '0.1.0.0'
27+
version = '0.2.0.0'
2828

2929
params.metadata_analysis = ""
3030
params.container_version = ""
@@ -45,13 +45,17 @@ process metadataParser {
4545
env DONOR_ID, emit: donor_id
4646
env EXP, emit: experimental_strategy
4747
env PAIRED, emit: paired
48+
env ANALYSIS_TOOLS, emit: analysis_tools
4849

4950
script:
5051
"""
5152
set -euxo pipefail
5253
STUDY_ID=`cat ${metadata_analysis} | jq -er '.studyId' | tr -d '\\n'`
5354
DONOR_ID=`cat ${metadata_analysis} | jq -er '.samples[0].donor.donorId' | tr -d '\\n'`
54-
EXP=`cat ${metadata_analysis} | jq -er '.experiment | if (.experimental_strategy | length)>0 then .experimental_strategy else .library_strategy end' | tr -d '\\n'`
55-
PAIRED=`cat ${metadata_analysis} | jq -er '[.read_groups[] | .is_paired_end] | all | tostring' | tr -d '\\n'`
55+
EXP=`cat ${metadata_analysis} | jq -er '.experiment | .experimental_strategy? // .library_strategy' | tr -d '\\n'`
56+
VARIABLE1=`cat ${metadata_analysis} | jq -r 'if ([.read_groups[]?] | length) >0 then [.read_groups[] | .is_paired_end] | all | tostring else null end' | tr -d '\\n'`
57+
PAIRED=\${VARIABLE1:-'null'}
58+
VARIABLE2=`cat ${metadata_analysis} | jq -r '[.files[] | .info? | .analysis_tools[]?] | unique | join(",")' | tr -d '\\n'`
59+
ANALYSIS_TOOLS=\${VARIABLE2:-'null'}
5660
"""
5761
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"metadata_analysis": "data/3414534a-47a4-492d-9453-4a47a4192d6d.sanger-wgs.snv.json",
3+
"cpus": 1,
4+
"mem": 0.5
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"metadata_analysis": "data/ca105e4d-d16b-461c-905e-4dd16b861cc8.gatk-mutect2.snv.json",
3+
"cpus": 1,
4+
"mem": 0.5
5+
}

0 commit comments

Comments
 (0)