Skip to content

Commit 5d64b2c

Browse files
Mallori HarrellMallori Harrell
authored andcommitted
Initial commit
0 parents  commit 5d64b2c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+8394
-0
lines changed

.github/dependabot.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: "pip"
4+
directory: "/requirements"
5+
schedule:
6+
interval: "weekly"
7+
8+
- package-ecosystem: "github-actions"
9+
# NOTE(robinson) - Workflow files stored in the
10+
# default location of `.github/workflows`
11+
directory: "/"
12+
schedule:
13+
interval: "weekly"

.github/workflows/ci.yml

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
pull_request:
7+
branches: [ main ]
8+
9+
env:
10+
PYTHON_VERSION: "3.8"
11+
PIPELINE_FAMILY: "general"
12+
13+
jobs:
14+
setup:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v3
18+
- uses: actions/cache@v3
19+
id: virtualenv-cache
20+
with:
21+
path: |
22+
.venv
23+
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/*.txt') }}
24+
- name: Set up Python ${{ env.PYTHON_VERSION }}
25+
uses: actions/setup-python@v4
26+
with:
27+
python-version: ${{ env.PYTHON_VERSION }}
28+
- name: Setup virtual environment (no cache hit)
29+
if: steps.virtualenv-cache.outputs.cache-hit != 'true'
30+
run: |
31+
python${{ env.PYTHON_VERSION }} -m venv .venv
32+
source .venv/bin/activate
33+
make install
34+
35+
lint:
36+
runs-on: ubuntu-latest
37+
needs: setup
38+
steps:
39+
- uses: actions/checkout@v3
40+
- uses: actions/cache@v3
41+
id: virtualenv-cache
42+
with:
43+
path: |
44+
.venv
45+
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/*.txt') }}
46+
- name: Lint
47+
run: |
48+
source .venv/bin/activate
49+
make check
50+
51+
shellcheck:
52+
runs-on: ubuntu-latest
53+
steps:
54+
- uses: actions/checkout@v2
55+
- name: ShellCheck
56+
uses: ludeeus/action-shellcheck@master
57+
58+
test:
59+
runs-on: ubuntu-latest
60+
needs: [setup, lint]
61+
steps:
62+
- uses: actions/checkout@v3
63+
- uses: actions/cache@v3
64+
id: virtualenv-cache
65+
with:
66+
path: |
67+
.venv
68+
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/*.txt') }}
69+
- name: Run core tests
70+
run: |
71+
source .venv/bin/activate
72+
sudo apt-get install --yes poppler-utils
73+
make install-nltk-models
74+
make test
75+
# NOTE(robinson) - Add check-coverage back in once we add real tests
76+
# make check-coverage
77+
make check-notebooks
78+
79+
changelog:
80+
runs-on: ubuntu-latest
81+
steps:
82+
- uses: actions/checkout@v3
83+
- if: github.ref != 'refs/heads/main'
84+
uses: dorny/paths-filter@v2
85+
id: changes
86+
with:
87+
filters: |
88+
src:
89+
- 'doc_recipe/**'
90+
- 'recipe-notebooks/**'
91+
92+
- if: steps.changes.outputs.src == 'true' && github.ref != 'refs/heads/main'
93+
uses: dangoslen/changelog-enforcer@v3
94+
95+
api_consistency:
96+
runs-on: ubuntu-latest
97+
needs: setup
98+
steps:
99+
- uses: actions/checkout@v3
100+
- uses: actions/cache@v3
101+
id: virtualenv-cache
102+
with:
103+
path: |
104+
.venv
105+
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/*.txt') }}
106+
- name: API Consistency
107+
run: |
108+
source .venv/bin/activate
109+
make api-check

.gitignore

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
*.py,cover
51+
.hypothesis/
52+
.pytest_cache/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
target/
76+
77+
# Jupyter Notebook
78+
.ipynb_checkpoints
79+
80+
# IPython
81+
profile_default/
82+
ipython_config.py
83+
84+
# pyenv
85+
.python-version
86+
87+
# pipenv
88+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
90+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
91+
# install all needed dependencies.
92+
#Pipfile.lock
93+
94+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
95+
__pypackages__/
96+
97+
# Celery stuff
98+
celerybeat-schedule
99+
celerybeat.pid
100+
101+
# SageMath parsed files
102+
*.sage.py
103+
104+
# Environments
105+
.env
106+
.venv
107+
env/
108+
venv/
109+
ENV/
110+
env.bak/
111+
venv.bak/
112+
113+
# Spyder project settings
114+
.spyderproject
115+
.spyproject
116+
117+
# Rope project settings
118+
.ropeproject
119+
120+
# mkdocs documentation
121+
/site
122+
123+
# mypy
124+
.mypy_cache/
125+
.dmypy.json
126+
dmypy.json
127+
128+
# Pyre type checker
129+
.pyre/
130+
131+
# VSCode
132+
.vscode/
133+
134+
# Mac
135+
.DS_Store
136+
137+
nbs/
138+
139+
# Celery files that are created when the mercury dashboard is run
140+
celery.sqlite
141+
celerybeat-schedule.db
142+
143+
# temporarily generated files by project-specific Makefile
144+
tmp*

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
## 0.0.4
2+
3+
* Changed pipeline name to `pipeline-general`
4+
* Changed pipeline to handle a variety of documents not just emails
5+
6+
## 0.0.3
7+
8+
* Add emails pipeline Dockerfile
9+
10+
## 0.0.2
11+
12+
* Add pipeline notebook
13+
14+
## 0.0.1
15+
16+
* Initial pipeline setup

Dockerfile

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# syntax=docker/dockerfile:experimental
2+
3+
FROM centos:centos7.9.2009
4+
5+
# NOTE(crag): NB_USER ARG for mybinder.org compat:
6+
# https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html
7+
ARG NB_USER=notebook-user
8+
ARG NB_UID=1000
9+
ARG PIP_VERSION
10+
ARG PIPELINE_PACKAGE
11+
12+
RUN yum -y update && \
13+
yum -y install gcc openssl-devel bzip2-devel libffi-devel make git sqlite-devel && \
14+
curl -O https://www.python.org/ftp/python/3.8.15/Python-3.8.15.tgz && tar -xzf Python-3.8.15.tgz && \
15+
cd Python-3.8.15/ && ./configure --enable-optimizations && make altinstall && \
16+
cd .. && rm -rf Python-3.8.15* && \
17+
ln -s /usr/local/bin/python3.8 /usr/local/bin/python3
18+
19+
# create user with a home directory
20+
ENV USER ${NB_USER}
21+
ENV HOME /home/${NB_USER}
22+
23+
RUN groupadd --gid ${NB_UID} ${NB_USER}
24+
RUN useradd --uid ${NB_UID} --gid ${NB_UID} ${NB_USER}
25+
USER ${NB_USER}
26+
WORKDIR ${HOME}
27+
ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
28+
ENV PATH="/home/${NB_USER}/.local/bin:${PATH}"
29+
30+
COPY requirements/dev.txt requirements-dev.txt
31+
COPY requirements/base.txt requirements-base.txt
32+
COPY prepline_${PIPELINE_PACKAGE}/ prepline_${PIPELINE_PACKAGE}/
33+
COPY exploration-notebooks exploration-notebooks
34+
COPY pipeline-notebooks pipeline-notebooks
35+
36+
37+
RUN python3.8 -m pip install --no-cache -r requirements-base.txt \
38+
&& python3.8 -m pip install --no-cache -r requirements-dev.txt

0 commit comments

Comments
 (0)