3333
3434
3535variant_type_to_data_type_etc = {
36- 'snv' : ['Simple Nucleotide Variation' , 'Raw SNV Calls' , ['CaVEMan' ], ['GATK- Mutect2' ]], # dataCategory, dataType, analysis_tools
37- 'indel' : ['Simple Nucleotide Variation' , 'Raw InDel Calls' , ['Pindel' ], ['GATK- Mutect2' ]],
38- 'cnv' : ['Copy Number Variation' , 'Raw CNV Calls' , ['ASCAT' ]],
39- 'sv' : ['Structural Variation' , 'Raw SV Calls' , ['BRASS' ]],
40- 'caveman-supplement' : ['Simple Nucleotide Variation' , 'Variant Calling Supplement' , ['CaVEMan' ]],
41- 'pindel-supplement' : ['Simple Nucleotide Variation' , 'Variant Calling Supplement' , ['Pindel' ]],
42- 'ascat-supplement' : ['Copy Number Variation' , 'Variant Calling Supplement' , ['ASCAT' ]],
43- 'brass-supplement' : ['Structural Variation' , 'Variant Calling Supplement' , ['BRASS' ]],
44- 'timings-supplement' : [None , 'Variant Calling Supplement ' , ['CaVEMan ' , 'Pindel' , 'ASCAT' , 'BRASS' ] ],
45- 'bas_metrics' : ['Quality Control Metrics' , 'Alignment QC' , [ 'bas_stats ' ]],
46- 'contamination_metrics' : ['Quality Control Metrics' , ' Cross Sample Contamination' , ['verifyBamHomChk' ], ['GATK- CalculateContamination' ]],
47- 'ascat_metrics' : ['Quality Control Metrics' , ' Ploidy and Purity Estimation' , ['ASCAT' ]],
48- 'genotyped_gender_metrics' : ['Quality Control Metrics' , 'Genotyping Inferred Gender' , ['compareBamGenotypes' ]],
49- 'mutect_filtering_metrics' : ['Quality Control Metrics' , 'Mutect2 Filtering Stats' , [], ['GATK- FilterMutectCalls' ]],
50- 'mutect_callable_metrics' : ['Quality Control Metrics' , 'Mutect2 Callable Stats' , [], ['GATK- Mutect2' ]],
36+ 'snv' : ['Simple Nucleotide Variation' , [ 'Raw SNV Calls' , None ], ['CaVEMan' ], ['GATK: Mutect2' ]], # dataCategory, [ dataType, data_subtype] , analysis_tools
37+ 'indel' : ['Simple Nucleotide Variation' , [ 'Raw InDel Calls' , None ], ['Pindel' ], ['GATK: Mutect2' ]],
38+ 'cnv' : ['Copy Number Variation' , [ 'Raw CNV Calls' , None ] , ['ASCAT' ]],
39+ 'sv' : ['Structural Variation' , [ 'Raw SV Calls' , None ] , ['BRASS' ]],
40+ 'caveman-supplement' : ['Simple Nucleotide Variation' , [ 'Variant Calling Supplement' , 'SNV Supplement' ] , ['CaVEMan' ]],
41+ 'pindel-supplement' : ['Simple Nucleotide Variation' , [ 'Variant Calling Supplement' , 'InDel Supplement' ] , ['Pindel' ]],
42+ 'ascat-supplement' : ['Copy Number Variation' , [ 'Variant Calling Supplement' , 'CNV Supplement' ] , ['ASCAT' ]],
43+ 'brass-supplement' : ['Structural Variation' , [ 'Variant Calling Supplement' , 'SV Supplement' ] , ['BRASS' ]],
44+ 'timings-supplement' : ['Quality Control Metrics ' , ['Analysis QC ' , 'Runtime Stats' ], None , None ],
45+ 'bas_metrics' : ['Quality Control Metrics' , [ 'Aligned Reads QC' , 'Alignment Metrics' ], [ 'Sanger:bam_stats ' ]],
46+ 'contamination_metrics' : ['Quality Control Metrics' , [ 'Analysis QC' , ' Cross Sample Contamination'] , ['Sanger: verifyBamHomChk' ], ['GATK: CalculateContamination' ]],
47+ 'ascat_metrics' : ['Quality Control Metrics' , [ 'Analysis QC' , ' Ploidy and Purity Estimation'] , ['ASCAT' ]],
48+ 'genotyped_gender_metrics' : ['Quality Control Metrics' , [ 'Analysis QC' , 'Genotyping Stats' ] , ['Sanger: compareBamGenotypes' ]],
49+ 'mutect_filtering_metrics' : ['Quality Control Metrics' , [ 'Analysis QC' , 'Variant Filtering Stats'] , [], ['GATK: FilterMutectCalls' ]],
50+ 'mutect_callable_metrics' : ['Quality Control Metrics' , [ 'Analysis QC' , 'Variant Callable Stats'] , [], ['GATK: Mutect2' ]],
5151}
5252
5353workflow_full_name = {
5656 'gatk-mutect2-variant-calling' : 'GATK Mutect2 Variant Calling'
5757}
5858
59+
5960def calculate_size (file_path ):
6061 return os .stat (file_path ).st_size
6162
@@ -107,12 +108,6 @@ def get_files_info(file_to_upload, wf_short_name, wf_version, somatic_or_germli
107108 else :
108109 sys .exit ('Error: unknown file type "%s"' % file_to_upload )
109110
110- if wf_short_name == 'sanger-wxs' :
111- if 'ASCAT' in variant_type_to_data_type_etc ['timings-supplement' ][2 ]:
112- variant_type_to_data_type_etc ['timings-supplement' ][2 ].remove ('ASCAT' )
113- if 'BRASS' in variant_type_to_data_type_etc ['timings-supplement' ][2 ]:
114- variant_type_to_data_type_etc ['timings-supplement' ][2 ].remove ('BRASS' )
115-
116111 elif wf_short_name in (['gatk-mutect2' ]):
117112 fname_sample_part = metadata ['samples' ][0 ]['sampleId' ]
118113 if file_to_upload .endswith ('mutect2-snv.vcf.gz' ) or file_to_upload .endswith ('mutect2-snv.vcf.gz.tbi' ):
@@ -153,16 +148,25 @@ def get_files_info(file_to_upload, wf_short_name, wf_version, somatic_or_germli
153148 ] + (['tbi' ] if file_to_upload .endswith ('.tbi' ) else []))
154149
155150 file_info ['fileName' ] = new_fname
151+
152+ file_info ['info' ] = {
153+ 'data_category' : variant_type_to_data_type_etc [variant_type ][0 ],
154+ 'data_subtype' : None
155+ }
156+
156157 extra_info = {}
157158 if new_fname .endswith ('.vcf.gz' ):
158- file_info ['dataType' ] = variant_type_to_data_type_etc [variant_type ][1 ]
159+ file_info ['dataType' ] = variant_type_to_data_type_etc [variant_type ][1 ][0 ]
160+ file_info ['info' ]['data_subtype' ] = variant_type_to_data_type_etc [variant_type ][1 ][1 ]
159161 elif new_fname .endswith ('.vcf.gz.tbi' ):
160162 file_info ['dataType' ] = 'VCF Index'
161163 elif new_fname .endswith ('.tgz' ):
162164 if new_fname .endswith ('-supplement.tgz' ):
163- file_info ['dataType' ] = variant_type_to_data_type_etc [variant_type ][1 ]
165+ file_info ['dataType' ] = variant_type_to_data_type_etc [variant_type ][1 ][0 ]
166+ file_info ['info' ]['data_subtype' ] = variant_type_to_data_type_etc [variant_type ][1 ][1 ]
164167 elif new_fname .endswith ('_metrics.tgz' ):
165- file_info ['dataType' ] = variant_type_to_data_type_etc [variant_type ][1 ]
168+ file_info ['dataType' ] = variant_type_to_data_type_etc [variant_type ][1 ][0 ]
169+ file_info ['info' ]['data_subtype' ] = variant_type_to_data_type_etc [variant_type ][1 ][1 ]
166170 else :
167171 sys .exit ('Error: unknown file type "%s"' % file_to_upload )
168172
@@ -175,10 +179,6 @@ def get_files_info(file_to_upload, wf_short_name, wf_version, somatic_or_germli
175179 else :
176180 sys .exit ('Error: unknown file type "%s"' % file_to_upload )
177181
178- file_info ['info' ] = {
179- 'data_category' : variant_type_to_data_type_etc [variant_type ][0 ]
180- }
181-
182182 if wf_short_name in (['sanger-wgs' , 'sanger-wxs' ]):
183183 file_info ['info' ]['analysis_tools' ] = variant_type_to_data_type_etc [variant_type ][2 ]
184184 elif wf_short_name in (['gatk-mutect2' ]):
0 commit comments