Fetch Data #470
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Fetch Data | |
| on: | |
| schedule: | |
| # Normal schedule | |
| # # at 03:15 on all days in first month of each quarter | |
| - cron: '15 3 * 1,4,7,10 *' | |
| # # at 03:15 on days 1-14 in second month of each quarter | |
| - cron: '15 3 1-14 2,5,8,11 *' | |
| workflow_dispatch: | |
| jobs: | |
| fetch: | |
| runs-on: ubuntu-latest | |
| steps: | |
| # CC Technology team members: | |
| # See cc-quantifying-bot GitHub entry in Bitwarden for information on | |
| # BOT_ secrets | |
| - name: Configure git | |
| run: | | |
| git config --global init.defaultBranch main | |
| git config --global user.name "${{ secrets.BOT_NAME }}" | |
| git config --global user.email "${{ secrets.BOT_EMAIL }}" | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| with: | |
| # Default fetch-depth is 1, however that value results in errors | |
| # when GitPython attempts to push changes: | |
| # "failed to push some refs" | |
| fetch-depth: 0 | |
| token: ${{ secrets.BOT_TOKEN }} | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.11' | |
| - name: Install Python dependencies | |
| run: | | |
| pip install --upgrade pip pipenv | |
| - name: Sync Python modules | |
| run: | | |
| pipenv sync --system | |
| # Fetch from arXiv disabled due to long run time (~6 hours) | |
| # | |
| # For now, data is fetched manually :/ | |
| # Fetch from Europeana disabled due API limitations | |
| # https://github.com/creativecommons/quantifying/issues/224 | |
| # Fetch from GCS disabled due to Google blocking GitHub Action runners | |
| # # CC Technology team members: | |
| # # See cc-quantifying-bot Google Workspace entry in Bitwarden for | |
| # # information on GCS_ secrets | |
| # - name: Fetch from Google Custom Search (GCS) | |
| # run: | | |
| # ./scripts/1-fetch/gcs_fetch.py \ | |
| # --limit=100 --enable-save --enable-git | |
| # env: | |
| # GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }} | |
| # GCS_CX: ${{ secrets.GCS_CX }} | |
| # | |
| # For now, data is fetched manually :/ | |
| - name: Fetch from GitHub | |
| run: | | |
| ./scripts/1-fetch/github_fetch.py \ | |
| --enable-save --enable-git | |
| env: | |
| GH_TOKEN: ${{ secrets.BOT_TOKEN }} | |
| # Fetch from Openverse disabled due to API limitations | |
| # https://github.com/creativecommons/quantifying/issues/184 | |
| - name: Fetch from Smithsonian | |
| run: | | |
| ./scripts/1-fetch/smithsonian_fetch.py \ | |
| --enable-save --enable-git | |
| env: | |
| DATA_GOV_API_KEY: ${{ secrets.DATA_GOV_API_KEY }} | |
| - name: Fetch from Wikipedia | |
| run: | | |
| ./scripts/1-fetch/wikipedia_fetch.py \ | |
| --enable-save --enable-git |