Skip to content

Commit b89995e

Browse files
authored
Merge pull request #108 from icgc-argo/cleanup-workdir@1.0.0
[release]
2 parents 10f99df + 3872391 commit b89995e

15 files changed

Lines changed: 472 additions & 0 deletions

cleanup-workdir/.dockerignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.gitignore
2+
.nextflow*
3+
tests
4+
work
5+
outdir

cleanup-workdir/.gitignore

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
*.py[cod]
2+
3+
# C extensions
4+
*.so
5+
6+
# Packages
7+
*.egg
8+
*.egg-info
9+
dist
10+
build
11+
eggs
12+
.eggs
13+
parts
14+
bin
15+
var
16+
sdist
17+
develop-eggs
18+
.installed.cfg
19+
lib
20+
lib64
21+
venv*/
22+
pyvenv*/
23+
24+
# Installer logs
25+
pip-log.txt
26+
27+
# Unit test / coverage reports
28+
.coverage
29+
.tox
30+
.coverage.*
31+
nosetests.xml
32+
coverage.xml
33+
htmlcov
34+
35+
# Translations
36+
*.mo
37+
38+
# Mr Developer
39+
.mr.developer.cfg
40+
.project
41+
.pydevproject
42+
.idea
43+
*.iml
44+
*.komodoproject
45+
46+
# Complexity
47+
output/*.html
48+
output/*/index.html
49+
50+
# Sphinx
51+
docs/_build
52+
53+
.DS_Store
54+
*~
55+
.*.sw[po]
56+
.build
57+
.ve
58+
.env
59+
.cache
60+
.pytest
61+
.bootstrap
62+
.appveyor.token
63+
*.bak
64+
*.log
65+
.vscode
66+
.python-version
67+
.nextflow*
68+
work
69+
outdir

cleanup-workdir/Dockerfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
FROM ubuntu:20.04
2+
3+
LABEL org.opencontainers.image.source https://github.com/icgc-argo/data-processing-utility-tools
4+
5+
RUN groupadd -g 1000 ubuntu && \
6+
useradd -l -u 1000 -g ubuntu ubuntu && \
7+
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu
8+
9+
CMD ["/bin/bash"]

cleanup-workdir/main.nf

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (C) 2021, Ontario Institute for Cancer Research
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU Affero General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU Affero General Public License for more details.
15+
16+
You should have received a copy of the GNU Affero General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
Authors:
20+
Junjun Zhang
21+
*/
22+
23+
/********************************************************************/
24+
/* this block is auto-generated based on info from pkg.json where */
25+
/* changes can be made if needed, do NOT modify this block manually */
26+
nextflow.enable.dsl = 2
27+
version = '1.0.0' // package version
28+
29+
container = [
30+
'ghcr.io': 'ghcr.io/icgc-argo/data-processing-utility-tools.cleanup-workdir'
31+
]
32+
default_container_registry = 'ghcr.io'
33+
/********************************************************************/
34+
35+
36+
// universal params go here
37+
params.container_registry = ""
38+
params.container_version = ""
39+
params.container = ""
40+
41+
params.cpus = 1
42+
params.mem = 1 // GB
43+
params.publish_dir = "" // set to empty string will disable publishDir
44+
45+
// tool specific parmas go here, add / change as needed
46+
params.files_to_delete = "NO_FILE"
47+
params.virtual_dep_flag = true // default to true, ie, dep is always satisfied
48+
49+
50+
process cleanupWorkdir {
51+
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
52+
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir
53+
54+
cpus params.cpus
55+
memory "${params.mem} GB"
56+
57+
input:
58+
path files_to_delete // more accurately, other non-hidden files in the same folder will be deleted as well
59+
val virtual_dep_flag // for specifying steps do not produce output files but produce values, set those values here
60+
61+
output:
62+
stdout
63+
64+
script:
65+
"""
66+
set -euxo pipefail
67+
68+
IFS=" "
69+
read -a files <<< "${files_to_delete}"
70+
for f in "\${files[@]}"
71+
do
72+
dir_to_rm=\$(dirname \$(readlink -f \$f))
73+
74+
if [[ \$dir_to_rm != ${workflow.workDir}/* ]]; then # skip dir not under workdir, like from input file dir
75+
echo "Not delete: \$dir_to_rm/*\"
76+
continue
77+
fi
78+
79+
rm -fr \$dir_to_rm/* # delete all files and subdirs but not hidden ones
80+
echo "Deleted: \$dir_to_rm/*"
81+
done
82+
"""
83+
}
84+
85+
86+
// this provides an entry point for this main script, so it can be run directly without clone the repo
87+
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
88+
workflow {
89+
cleanupWorkdir(
90+
Channel.fromPath(params.files_to_delete),
91+
params.virtual_dep_flag
92+
)
93+
}

cleanup-workdir/nextflow.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
docker {
2+
enabled = true
3+
runOptions = '-u \$(id -u):\$(id -g)'
4+
}

cleanup-workdir/pkg.json

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"name": "cleanup-workdir",
3+
"version": "1.0.0",
4+
"description": "Process to cleanup workdir intermediate files",
5+
"main": "main.nf",
6+
"deprecated": false,
7+
"keywords": [
8+
"workflow",
9+
"utility",
10+
"temp file"
11+
],
12+
"repository": {
13+
"type": "git",
14+
"url": "https://github.com/icgc-argo/data-processing-utility-tools.git"
15+
},
16+
"container": {
17+
"registries": [
18+
{
19+
"registry": "ghcr.io",
20+
"type": "docker",
21+
"org": "icgc-argo",
22+
"default": true
23+
}
24+
]
25+
},
26+
"dependencies": [],
27+
"devDependencies": [],
28+
"contributors": [
29+
{
30+
"name": "Junjun Zhang",
31+
"email": "junjun.ca@gmail.com"
32+
}
33+
],
34+
"license": "GNU Affero General Public License v3",
35+
"bugReport": "https://github.com/icgc-argo/data-processing-utility-tools/issues",
36+
"homepage": "https://github.com/icgc-argo/data-processing-utility-tools#readme"
37+
}

cleanup-workdir/tests/checker.nf

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (C) 2021, Ontario Institute for Cancer Research
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU Affero General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU Affero General Public License for more details.
15+
16+
You should have received a copy of the GNU Affero General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
Authors:
20+
Junjun Zhang
21+
*/
22+
23+
nextflow.enable.dsl = 2
24+
25+
// universal params
26+
params.container_registry = ""
27+
params.container_version = ""
28+
params.container = ""
29+
30+
// tool specific parmas go here, add / change as needed
31+
params.input_file = ""
32+
params.expected_output = ""
33+
34+
params.cpus = 1
35+
params.mem = 1 // GB
36+
37+
include { cleanupWorkdir } from '../main'
38+
39+
include {
40+
generateDummyFile as gFile1;
41+
generateDummyFile as gFile2;
42+
} from './generate-dummy-file.nf'
43+
44+
include {
45+
filesExist as fExist1;
46+
filesExist as fExist2;
47+
filesExist as fExist3;
48+
filesExist as fExist4;
49+
} from './files-exist.nf'
50+
51+
Channel.from(params.file_name).set{ file_name_ch }
52+
53+
54+
workflow {
55+
// generate the file
56+
gFile1(
57+
file_name_ch.flatten(),
58+
params.file_size
59+
)
60+
61+
// generate the same file again
62+
gFile2(
63+
file_name_ch.flatten(),
64+
params.file_size
65+
)
66+
67+
// test file exists
68+
fExist1(
69+
params.file_name,
70+
'exist',
71+
gFile1.out.file.collect(),
72+
true // no need to wait
73+
)
74+
75+
// test file exist
76+
fExist2(
77+
params.file_name,
78+
'exist',
79+
gFile2.out.file.collect(),
80+
true // no need to wait
81+
)
82+
83+
// perform cleanup in gFile1 workdir
84+
cleanupWorkdir(
85+
gFile1.out.collect(),
86+
fExist1.out.collect() // flag enables waiting for fExist1 before cleaning up gFile1 workdir
87+
)
88+
89+
// test cleaned up workdir from gFile1 indeed does not have previous files
90+
fExist3(
91+
gFile1.out.collect(),
92+
'not_exist',
93+
gFile1.out.collect(),
94+
cleanupWorkdir.out // wait for cleanup is done
95+
)
96+
97+
// test not cleaned up workdir from gFile2 indeed still have the exptected files
98+
fExist4(
99+
gFile2.out.collect(),
100+
'exist',
101+
gFile2.out.collect(),
102+
cleanupWorkdir.out // wait for cleanup is done
103+
)
104+
}

0 commit comments

Comments
 (0)