Crawl and Process Reports #46
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Crawl and Process Reports | |
on: | |
push: | |
branches: | |
- main | |
workflow_dispatch: | |
schedule: | |
- cron: '0 0 */3 * *' # Run every 3 days at midnight UTC | |
jobs: | |
crawl-and-process: | |
runs-on: ubuntu-latest | |
timeout-minutes: 700 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- name: Install dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y python3 python3-pip | |
pip install --upgrade pip | |
pip install requests beautifulsoup4 feedparser PyPDF2 | |
- name: Download and install ripgrep | |
run: | | |
wget https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep_14.1.1-1_amd64.deb | |
sudo dpkg -i ripgrep_14.1.1-1_amd64.deb | |
- name: Verify ripgrep installation | |
run: rg --version | |
- name: Run fetch_report_links.py | |
working-directory: automated | |
run: | | |
echo "Starting fetch_report_links.py" | |
python3 fetch_report_links.py | tee fetch_report_links.log | |
echo "fetch_report_links.py completed" | |
- name: Run download_reports.py | |
working-directory: automated | |
run: | | |
echo "Starting download_reports.py" | |
python3 download_reports.py | tee download_reports.log | |
echo "download_reports.py completed" | |
- name: Run the keyword search script | |
run: | | |
echo "Starting search_keyword_ripgrep_fast.py --update" | |
python3 search_keyword_ripgrep_fast.py --update | tee search_keyword_ripgrep_fast.log | |
echo "search_keyword_ripgrep_fast.py completed" | |
- name: Run the keyword search script | |
run: | | |
echo "Starting search_keyword_ripgrep_fast.py --update" | |
python3 search_keyword_ripgrep_fast.py --update | tee search_keyword_ripgrep_fast.log | |
echo "search_keyword_ripgrep_fast.py completed" | |
- name: Run the update html script | |
run: | | |
echo "Starting update_html_json_list.py" | |
python3 update_html_json_list.py | |
echo "update_html_json_list.py completed" | |
- name: Commit and push all changes | |
run: | | |
echo "Preparing to commit all new and updated files" | |
git config --global user.name "GitHub Actions" | |
git config --global user.email "[email protected]" | |
git add --all # Stage all new, modified, and deleted files | |
if git diff --cached --quiet; then | |
echo "No changes to commit" | |
else | |
git commit -m "Update all changes [skip ci]" | |
git push origin main | |
fi |