Skip to content

Crawl and Process Reports #35

Crawl and Process Reports

Crawl and Process Reports #35

Workflow file for this run

name: Crawl and Process Reports
on:
push:
branches:
- main
workflow_dispatch:
schedule:
- cron: '0 0 */3 * *' # Run every 3 days at midnight UTC
jobs:
crawl-and-process:
runs-on: ubuntu-latest
timeout-minutes: 700
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y python3 python3-pip
pip install --upgrade pip
pip install requests beautifulsoup4 feedparser PyPDF2
- name: Download and install ripgrep
run: |
wget https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep_14.1.1-1_amd64.deb
sudo dpkg -i ripgrep_14.1.1-1_amd64.deb
- name: Verify ripgrep installation
run: rg --version
- name: Run fetch_report_links.py
working-directory: automated
run: |
echo "Starting fetch_report_links.py"
python3 fetch_report_links.py | tee fetch_report_links.log
echo "fetch_report_links.py completed"
- name: Run download_reports.py
working-directory: automated
run: |
echo "Starting download_reports.py"
python3 download_reports.py | tee download_reports.log
echo "download_reports.py completed"
- name: Run the keyword search script
run: |
echo "Starting search_keyword_ripgrep_fast.py --update"
python3 search_keyword_ripgrep_fast.py --update | tee search_keyword_ripgrep_fast.log
echo "search_keyword_ripgrep_fast.py completed"
- name: Run the keyword search script
run: |
echo "Starting search_keyword_ripgrep_fast.py --update"
python3 search_keyword_ripgrep_fast.py --update | tee search_keyword_ripgrep_fast.log
echo "search_keyword_ripgrep_fast.py completed"
- name: Run the update html script
run: |
echo "Starting update_html_json_list.py"
python3 update_html_json_list.py
echo "update_html_json_list.py completed"
- name: Commit and push all changes
run: |
echo "Preparing to commit all new and updated files"
git config --global user.name "GitHub Actions"
git config --global user.email "[email protected]"
git add --all # Stage all new, modified, and deleted files
if git diff --cached --quiet; then
echo "No changes to commit"
else
git commit -m "Update all changes [skip ci]"
git push origin main
fi