Skip to content

Commit e81f004

Browse files
committed
implemented two functions and added tests
1 parent dc6af95 commit e81f004

8 files changed

Lines changed: 175 additions & 145 deletions

File tree

helper-functions/main.nf

Lines changed: 25 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -25,60 +25,35 @@
2525
/* changes can be made if needed, do NOT modify this block manually */
2626
nextflow.enable.dsl = 2
2727
version = '1.0.0' // package version
28-
29-
container = [
30-
'ghcr.io': 'ghcr.io/icgc-argo/data-processing-utility-tools.helper-functions'
31-
]
32-
default_container_registry = 'ghcr.io'
3328
/********************************************************************/
3429

3530

36-
// universal params go here
37-
params.container_registry = ""
38-
params.container_version = ""
39-
params.container = ""
40-
41-
params.cpus = 1
42-
params.mem = 1 // GB
43-
params.publish_dir = "" // set to empty string will disable publishDir
44-
45-
46-
// tool specific parmas go here, add / change as needed
47-
params.input_file = ""
48-
params.output_pattern = "*" // output file name pattern
49-
50-
51-
process helperFunctions {
52-
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
53-
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir
54-
55-
cpus params.cpus
56-
memory "${params.mem} GB"
57-
58-
input: // input, make update as needed
59-
path input_file
60-
61-
output: // output, make update as needed
62-
path "output_dir/${params.output_pattern}", emit: output_file
63-
64-
script:
65-
// add and initialize variables here as needed
66-
67-
"""
68-
mkdir -p output_dir
69-
70-
main.py \
71-
-i ${input_file} \
72-
-o output_dir
73-
74-
"""
31+
// this is kind of like CWL's secondary files
32+
def getSecondaryFiles(main_file, exts){
33+
if (!(main_file instanceof String)) {
34+
exit 1, "[getSecondaryFiles] param: main_file must be a string"
35+
}
36+
37+
if (!(exts instanceof List)) {
38+
exit 1, "[getSecondaryFiles] param: exts must be a list of strings"
39+
}
40+
41+
def secondaryFiles = []
42+
for (ext in exts) {
43+
if (ext.startsWith("^")) {
44+
ext = ext.replace("^", "")
45+
parts = main_file.split("\\.").toList()
46+
parts.removeLast()
47+
secondaryFiles.add((parts + [ext]).join("."))
48+
} else {
49+
secondaryFiles.add(main_file + '.' + ext)
50+
}
51+
}
52+
return secondaryFiles
7553
}
7654

7755

78-
// this provides an entry point for this main script, so it can be run directly without clone the repo
79-
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
80-
workflow {
81-
helperFunctions(
82-
file(params.input_file)
83-
)
56+
// get specific secondary files for BWA alignment, ensure none is missing
57+
def getBwaSecondaryFiles(main_file){
58+
return getSecondaryFiles(main_file, ['fai', 'sa', 'bwt', 'ann', 'amb', 'pac', 'alt'])
8459
}

helper-functions/main.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

helper-functions/tests/checker.nf

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env nextflow
22

33
/*
4-
Copyright (C) 2021, Ontario Institute for Cancer Research
4+
Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR).
55
66
This program is free software: you can redistribute it and/or modify
77
it under the terms of the GNU Affero General Public License as published by
@@ -42,56 +42,57 @@ params.container_registry = ""
4242
params.container_version = ""
4343
params.container = ""
4444

45-
// tool specific parmas go here, add / change as needed
46-
params.input_file = ""
47-
params.expected_output = ""
45+
bwaSecondaryExts = ['fai', 'sa', 'bwt', 'ann', 'amb', 'pac', 'alt']
4846

49-
include { helperFunctions } from '../main'
47+
params.file_name = null
48+
params.file_size = null
5049

50+
include {
51+
getSecondaryFiles;
52+
getBwaSecondaryFiles
53+
} from '../main.nf'
5154

52-
process file_smart_diff {
53-
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
55+
include {
56+
generateDummyFile as gFile1;
57+
generateDummyFile as gFile2;
58+
} from './generate-dummy-file.nf'
5459

55-
input:
56-
path output_file
57-
path expected_file
60+
include {
61+
filesExist as fExist1;
62+
filesExist as fExist2;
63+
} from './files-exist.nf'
5864

59-
output:
60-
stdout()
65+
Channel.from(params.file_name).set{ file_name_ch }
66+
Channel.from(bwaSecondaryExts).set{ bwa_ext_ch }
6167

62-
script:
63-
"""
64-
# Note: this is only for demo purpose, please write your own 'diff' according to your own needs.
65-
# remove date field before comparison eg, <div id="header_filename">Tue 19 Jan 2021<br/>test_rg_3.bam</div>
66-
# sed -e 's#"header_filename">.*<br/>test_rg_3.bam#"header_filename"><br/>test_rg_3.bam</div>#'
6768

68-
diff <( cat ${output_file} | sed -e 's#"header_filename">.*<br/>#"header_filename"><br/>#' ) \
69-
<( ([[ '${expected_file}' == *.gz ]] && gunzip -c ${expected_file} || cat ${expected_file}) | sed -e 's#"header_filename">.*<br/>#"header_filename"><br/>#' ) \
70-
&& ( echo "Test PASSED" && exit 0 ) || ( echo "Test FAILED, output file mismatch." && exit 1 )
71-
"""
72-
}
73-
74-
75-
workflow checker {
76-
take:
77-
input_file
78-
expected_output
69+
workflow {
70+
// generate the main file
71+
gFile1(
72+
file_name_ch.flatten(),
73+
params.file_size
74+
)
7975

80-
main:
81-
helperFunctions(
82-
input_file
76+
// generate the BWA secondary files
77+
gFile2(
78+
file_name_ch.combine(bwa_ext_ch),
79+
params.file_size
8380
)
8481

85-
file_smart_diff(
86-
helperFunctions.out.output_file,
87-
expected_output
82+
// test 'getSecondaryFiles' for expected 'fai' file exists
83+
fExist1(
84+
getSecondaryFiles(params.file_name, ['fai']),
85+
'exist',
86+
gFile2.out.file.collect(),
87+
true // no need to wait
8888
)
89-
}
9089

90+
// test 'getBwaSecondaryFiles' for all expected bwa secondary files exist
91+
fExist2(
92+
getBwaSecondaryFiles(params.file_name).collect(),
93+
'exist',
94+
gFile2.out.file.collect(),
95+
true // no need to wait
96+
)
9197

92-
workflow {
93-
checker(
94-
file(params.input_file),
95-
file(params.expected_output)
96-
)
9798
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
* Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR).
5+
*
6+
* This program is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU Affero General Public License as published
8+
* by the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU Affero General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Affero General Public License
17+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
*/
19+
20+
/*
21+
* Contributors:
22+
* Junjun Zhang <junjun.zhang@oicr.on.ca>
23+
*/
24+
25+
nextflow.enable.dsl = 2
26+
27+
28+
process filesExist {
29+
input:
30+
val file_names // file name shall not have spaces
31+
val expect // 'exist' for files expected to exist; 'not_exist' for files expected not exist
32+
path files
33+
val dependency_flag // any output from process(es) you'd like to make this process depend on
34+
35+
script:
36+
file_name_arg = file_names instanceof List ? file_names.join(" ") : file_names
37+
"""
38+
if [[ "${expect}" = "exist" ]]; then
39+
for f in \$(echo "${file_name_arg}"); do
40+
if [[ ! -f \$f ]]; then
41+
exit "Expected \$f not exists."
42+
fi
43+
done
44+
elif [[ "${expect}" = "not_exist" ]]; then
45+
for f in \$(echo "${file_name_arg}"); do
46+
if [[ -f \$f ]]; then
47+
exit "Unexpected \$f exists."
48+
fi
49+
done
50+
else
51+
exit "Second argument must be either 'exist' or 'not_exist'. '${expect}' is supplied."
52+
fi
53+
"""
54+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
* Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR).
5+
*
6+
* This program is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU Affero General Public License as published
8+
* by the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU Affero General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Affero General Public License
17+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
*/
19+
20+
/*
21+
* Contributors:
22+
* Junjun Zhang <junjun.zhang@oicr.on.ca>
23+
*/
24+
25+
nextflow.enable.dsl = 2
26+
27+
28+
process generateDummyFile {
29+
input:
30+
val file_name
31+
val file_size
32+
33+
output:
34+
path "*", emit: file
35+
36+
script:
37+
file_name_arg = file_name instanceof List ? file_name.join(".") : file_name
38+
"""
39+
dd if=/dev/urandom of="${file_name_arg}" bs=1 count=${file_size}
40+
"""
41+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"file_name": "xyz.fa",
3+
"file_size": 1024
4+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"file_name": "abc.fa",
3+
"file_size": 512,
4+
"container_registry": "ghcr.io"
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"file_name": "123.fa",
3+
"file_size": 256,
4+
"container_registry": "ghcr.io"
5+
}

0 commit comments

Comments
 (0)