Skip to content

Commit 29a0fd6

Browse files
authored
Merge pull request #151 from icgc-argo-workflows/payload-gen-qc@0.1.0
[release]
2 parents 4fe0ac4 + ef1378d commit 29a0fd6

28 files changed

Lines changed: 1341 additions & 0 deletions

payload-gen-qc/.dockerignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.gitignore
2+
.nextflow*
3+
tests
4+
work
5+
outdir

payload-gen-qc/Dockerfile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
FROM python:3.7.5-slim-buster
2+
3+
LABEL org.opencontainers.image.source https://github.com/icgc-argo-workflows/data-processing-utility-tools
4+
5+
RUN apt-get update && apt-get install -y procps
6+
7+
RUN groupadd -g 1000 ubuntu && \
8+
useradd -l -u 1000 -g ubuntu ubuntu && \
9+
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu
10+
11+
ENV PATH="/tools:${PATH}"
12+
13+
COPY *.py /tools/
14+
15+
WORKDIR /tools
16+
17+
USER ubuntu
18+
19+
ENTRYPOINT ["/usr/bin/env"]
20+
CMD ["/bin/bash"]

payload-gen-qc/README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Nextflow Package `payload-gen-qc`
2+
3+
A simple wrapper written in `nextflow` for the payload generation tool to generate ARGO Song payloads containing QC metrics files.
4+
5+
## Package development
6+
7+
The initial version of this package was created by the WorkFlow Package Manager CLI tool, please refer to
8+
the [documentation](https://wfpm.readthedocs.io) for details on the development procedure including
9+
versioning, updating, CI testing and releasing.
10+
11+
12+
## Inputs
13+
### Required
14+
- `files_to_upload`: All files to upload
15+
- `metadata_analysis`: JSON file contains donor/sample/specimen/experiment/read_groups/files metadata for input data
16+
- `wf_name`: Workflow name
17+
- `wf_version`: Workflow version
18+
19+
### Optional
20+
- `genome_annotation`: Genome annotation version
21+
- `genome_build`: Genome build version
22+
- `cpus`: Set cpu number for running the tool
23+
- `mem`: Set memory(G) for running the tool
24+
- `publish_dir`: Specify directory for getting output files
25+
26+
## Outputs
27+
- `payload`: Payload contains metadata
28+
- `files_to_upload`: All files to upload with normalized name convention
29+
30+
## Usage
31+
32+
### Run the package directly
33+
34+
With inputs prepared, you should be able to run the package directly using the following command.
35+
Please replace the params file with a real one (with all required parameters and input files). Example
36+
params file(s) can be found in the `tests` folder.
37+
38+
```
39+
nextflow run icgc-argo-workflows/data-processing-utility-tools/payload-gen-qc/main.nf -r payload-gen-qc.v0.1.0 -params-file <your-params-json-file>
40+
```
41+
42+
### Import the package as a dependency
43+
44+
To import this package into another package as a dependency, please follow these steps at the
45+
importing package side:
46+
47+
1. add this package's URI `github.com/icgc-argo-workflows/data-processing-utility-tools/payload-gen-qc@0.1.0` in the `dependencies` list of the `pkg.json` file
48+
2. run `wfpm install` to install the dependency
49+
3. add the `include` statement in the main Nextflow script to import the dependent package from this path: `./wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/payload-gen-qc@0.1.0/main.nf`

payload-gen-qc/main.nf

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/usr/bin/env nextflow
2+
3+
/*
4+
Copyright (C) 2021, Ontario Institute for Cancer Research
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU Affero General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU Affero General Public License for more details.
15+
16+
You should have received a copy of the GNU Affero General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
Authors:
20+
Linda Xiang
21+
*/
22+
23+
/********************************************************************/
24+
/* this block is auto-generated based on info from pkg.json where */
25+
/* changes can be made if needed, do NOT modify this block manually */
26+
nextflow.enable.dsl = 2
27+
version = '0.1.0' // package version
28+
29+
container = [
30+
'ghcr.io': 'ghcr.io/icgc-argo-workflows/data-processing-utility-tools.payload-gen-qc'
31+
]
32+
default_container_registry = 'ghcr.io'
33+
/********************************************************************/
34+
35+
36+
// universal params go here
37+
params.container_registry = ""
38+
params.container_version = ""
39+
params.container = ""
40+
41+
params.cpus = 1
42+
params.mem = 1 // GB
43+
params.publish_dir = "" // set to empty string will disable publishDir
44+
45+
46+
// tool specific parmas go here, add / change as needed
47+
params.files_to_upload = ""
48+
params.metadata_analysis = ""
49+
params.wf_name = ""
50+
params.wf_version = ""
51+
params.genome_annotation = ""
52+
params.genome_build = ""
53+
54+
55+
process payloadGenQc {
56+
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
57+
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false
58+
59+
cpus params.cpus
60+
memory "${params.mem} GB"
61+
62+
input: // input, make update as needed
63+
path files_to_upload
64+
path metadata_analysis
65+
val genome_annotation
66+
val genome_build
67+
val wf_name
68+
val wf_version
69+
70+
output: // output, make update as needed
71+
path "*.payload.json", emit: payload
72+
path "out/*", emit: files_to_upload
73+
74+
script:
75+
// add and initialize variables here as needed
76+
77+
"""
78+
main.py \
79+
-f ${files_to_upload} \
80+
-a ${metadata_analysis} \
81+
-g "${genome_annotation}" \
82+
-b "${genome_build}" \
83+
-w "${wf_name}" \
84+
-r ${workflow.runName} \
85+
-s ${workflow.sessionId} \
86+
-v ${wf_version}
87+
88+
"""
89+
}
90+
91+
92+
// this provides an entry point for this main script, so it can be run directly without clone the repo
93+
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
94+
workflow {
95+
payloadGenQc(
96+
Channel.fromPath(params.files_to_upload).collect(),
97+
file(params.metadata_analysis),
98+
params.genome_annotation,
99+
params.genome_build,
100+
params.wf_name,
101+
params.wf_version
102+
)
103+
}

0 commit comments

Comments
 (0)