Skip to content

Commit 5c2054e

Browse files
authored
Merge pull request #107 from icgc-argo/helper-functions@1.0.0
[release]
2 parents b89995e + ff36835 commit 5c2054e

15 files changed

Lines changed: 426 additions & 0 deletions

helper-functions/.dockerignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.gitignore
2+
.nextflow*
3+
tests
4+
work
5+
outdir

helper-functions/.gitignore

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
*.py[cod]
2+
3+
# C extensions
4+
*.so
5+
6+
# Packages
7+
*.egg
8+
*.egg-info
9+
dist
10+
build
11+
eggs
12+
.eggs
13+
parts
14+
bin
15+
var
16+
sdist
17+
develop-eggs
18+
.installed.cfg
19+
lib
20+
lib64
21+
venv*/
22+
pyvenv*/
23+
24+
# Installer logs
25+
pip-log.txt
26+
27+
# Unit test / coverage reports
28+
.coverage
29+
.tox
30+
.coverage.*
31+
nosetests.xml
32+
coverage.xml
33+
htmlcov
34+
35+
# Translations
36+
*.mo
37+
38+
# Mr Developer
39+
.mr.developer.cfg
40+
.project
41+
.pydevproject
42+
.idea
43+
*.iml
44+
*.komodoproject
45+
46+
# Complexity
47+
output/*.html
48+
output/*/index.html
49+
50+
# Sphinx
51+
docs/_build
52+
53+
.DS_Store
54+
*~
55+
.*.sw[po]
56+
.build
57+
.ve
58+
.env
59+
.cache
60+
.pytest
61+
.bootstrap
62+
.appveyor.token
63+
*.bak
64+
*.log
65+
.vscode
66+
.python-version
67+
.nextflow*
68+
work
69+
outdir

helper-functions/Dockerfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
FROM ubuntu:20.04
2+
3+
LABEL org.opencontainers.image.source https://github.com/icgc-argo/data-processing-utility-tools
4+
5+
RUN groupadd -g 1000 ubuntu &&\
6+
useradd -l -u 1000 -g ubuntu ubuntu &&\
7+
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu
8+
9+
CMD ["/bin/bash"]

helper-functions/main.nf

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (C) 2021, Ontario Institute for Cancer Research
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU Affero General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU Affero General Public License for more details.
15+
16+
You should have received a copy of the GNU Affero General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
Authors:
20+
Junjun Zhang
21+
*/
22+
23+
/********************************************************************/
24+
/* this block is auto-generated based on info from pkg.json where */
25+
/* changes can be made if needed, do NOT modify this block manually */
26+
nextflow.enable.dsl = 2
27+
version = '1.0.0' // package version
28+
/********************************************************************/
29+
30+
31+
// this is kind of like CWL's secondary files
32+
def getSecondaryFiles(main_file, exts){
33+
if (!(main_file instanceof String)) {
34+
exit 1, "[getSecondaryFiles] param: main_file must be a string"
35+
}
36+
37+
if (!(exts instanceof List)) {
38+
exit 1, "[getSecondaryFiles] param: exts must be a list of strings"
39+
}
40+
41+
def secondaryFiles = []
42+
for (ext in exts) {
43+
if (ext.startsWith("^")) {
44+
ext = ext.replace("^", "")
45+
parts = main_file.split("\\.").toList()
46+
parts.removeLast()
47+
secondaryFiles.add((parts + [ext]).join("."))
48+
} else {
49+
secondaryFiles.add(main_file + '.' + ext)
50+
}
51+
}
52+
return secondaryFiles
53+
}
54+
55+
56+
// get specific secondary files for BWA alignment, ensure none is missing
57+
def getBwaSecondaryFiles(main_file){
58+
return getSecondaryFiles(main_file, ['fai', 'sa', 'bwt', 'ann', 'amb', 'pac', 'alt'])
59+
}

helper-functions/nextflow.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
docker {
2+
enabled = true
3+
runOptions = '-u \$(id -u):\$(id -g)'
4+
}

helper-functions/pkg.json

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"name": "helper-functions",
3+
"version": "1.0.0",
4+
"description": "A collection of helper functions",
5+
"main": "main.nf",
6+
"deprecated": false,
7+
"keywords": [
8+
"bioinformatics",
9+
"utils",
10+
"function",
11+
"secondary file"
12+
],
13+
"repository": {
14+
"type": "git",
15+
"url": "https://github.com/icgc-argo/data-processing-utility-tools.git"
16+
},
17+
"container": {
18+
"registries": [
19+
{
20+
"registry": "ghcr.io",
21+
"type": "docker",
22+
"org": "icgc-argo",
23+
"default": true
24+
}
25+
]
26+
},
27+
"dependencies": [],
28+
"devDependencies": [],
29+
"contributors": [
30+
{
31+
"name": "Junjun Zhang",
32+
"email": "junjun.ca@gmail.com"
33+
}
34+
],
35+
"license": "GNU Affero General Public License v3",
36+
"bugReport": "https://github.com/icgc-argo/data-processing-utility-tools/issues",
37+
"homepage": "https://github.com/icgc-argo/data-processing-utility-tools#readme"
38+
}

helper-functions/tests/checker.nf

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR).
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU Affero General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU Affero General Public License for more details.
15+
16+
You should have received a copy of the GNU Affero General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
Authors:
20+
Junjun Zhang
21+
*/
22+
23+
nextflow.enable.dsl = 2
24+
25+
// universal params
26+
params.container_registry = ""
27+
params.container_version = ""
28+
params.container = ""
29+
30+
bwaSecondaryExts = ['fai', 'sa', 'bwt', 'ann', 'amb', 'pac', 'alt']
31+
32+
params.file_name = null
33+
params.file_size = null
34+
35+
include {
36+
getSecondaryFiles;
37+
getBwaSecondaryFiles
38+
} from '../main.nf'
39+
40+
include {
41+
generateDummyFile as gFile1;
42+
generateDummyFile as gFile2;
43+
} from './generate-dummy-file.nf'
44+
45+
include {
46+
filesExist as fExist1;
47+
filesExist as fExist2;
48+
} from './files-exist.nf'
49+
50+
Channel.from(params.file_name).set{ file_name_ch }
51+
Channel.from(bwaSecondaryExts).set{ bwa_ext_ch }
52+
53+
54+
workflow {
55+
// generate the main file
56+
gFile1(
57+
file_name_ch.flatten(),
58+
params.file_size
59+
)
60+
61+
// generate the BWA secondary files
62+
gFile2(
63+
file_name_ch.combine(bwa_ext_ch),
64+
params.file_size
65+
)
66+
67+
// test 'getSecondaryFiles' for expected 'fai' file exists
68+
fExist1(
69+
getSecondaryFiles(params.file_name, ['fai']),
70+
'exist',
71+
gFile2.out.file.collect(),
72+
true // no need to wait
73+
)
74+
75+
// test 'getBwaSecondaryFiles' for all expected bwa secondary files exist
76+
fExist2(
77+
getBwaSecondaryFiles(params.file_name).collect(),
78+
'exist',
79+
gFile2.out.file.collect(),
80+
true // no need to wait
81+
)
82+
83+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
* Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR).
5+
*
6+
* This program is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU Affero General Public License as published
8+
* by the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU Affero General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Affero General Public License
17+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
*/
19+
20+
/*
21+
* Contributors:
22+
* Junjun Zhang <junjun.zhang@oicr.on.ca>
23+
*/
24+
25+
/********************************************************************/
26+
/* this block is auto-generated based on info from pkg.json where */
27+
/* changes can be made if needed, do NOT modify this block manually */
28+
nextflow.enable.dsl = 2
29+
version = '1.0.0' // package version
30+
31+
container = [
32+
'ghcr.io': 'ghcr.io/icgc-argo/data-processing-utility-tools.helper-functions'
33+
]
34+
default_container_registry = 'ghcr.io'
35+
/********************************************************************/
36+
37+
// universal params
38+
params.container_registry = ""
39+
params.container_version = ""
40+
params.container = ""
41+
42+
params.cpus = 1
43+
params.mem = 1 // GB
44+
45+
process filesExist {
46+
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
47+
48+
cpus params.cpus
49+
memory "${params.mem} GB"
50+
51+
input:
52+
val file_names // file name shall not have spaces
53+
val expect // 'exist' for files expected to exist; 'not_exist' for files expected not exist
54+
path files
55+
val dependency_flag // any output from process(es) you'd like to make this process depend on
56+
57+
script:
58+
file_name_arg = file_names instanceof List ? file_names.join(" ") : file_names
59+
"""
60+
if [[ "${expect}" = "exist" ]]; then
61+
for f in \$(echo "${file_name_arg}"); do
62+
if [[ ! -f \$f ]]; then
63+
echo "Expected \$f not exists."
64+
exit 1
65+
fi
66+
done
67+
elif [[ "${expect}" = "not_exist" ]]; then
68+
for f in \$(echo "${file_name_arg}"); do
69+
if [[ -f \$f ]]; then
70+
echo "Unexpected \$f exists."
71+
exit 1
72+
fi
73+
done
74+
else
75+
echo "Second argument must be either 'exist' or 'not_exist'. '${expect}' is supplied."
76+
exit 1
77+
fi
78+
"""
79+
}

0 commit comments

Comments
 (0)