Skip to content

Pull Request: Load Data from Local Parquet File & Expose API Endpoint #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
AWS_ACCESS_KEY_ID = 'AKIATDC35CBYPE4BW3U4'
AWS_SECRET_ACCESS_KEY = '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'
AWS_REGION = 'eu-west-1'
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/data
/playground
*.venv
.html
*.pyc
23 changes: 23 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
SHELL := /bin/bash

venv:
@echo "Creating virtual environment..."
@python -m venv venv
@echo "Virtual environment created successfully."

activate:
@echo "Activating virtual environment..."
@source venv/bin/activate
@echo "Virtual environment activated. Use 'deactivate' to exit."

install:
@echo "Installing dependencies..."
@pip install -r requirements.txt
@echo "Dependencies installed successfully."

format:
@echo "Formatting code..."
@black --line-length 79 --target-version py310 --skip-string-normalization --exclude venv,build,dist,docs .
@isort --profile black --line-length 79 --skip venv,build,dist,docs .
@pylint --max-line-length 79 --ignore=E501,W503 --exclude venv,build,dist,docs .
@echo "Code formatted successfully."
Empty file added api/__init__.py
Empty file.
Empty file added api/crud/__init__.py
Empty file.
124 changes: 124 additions & 0 deletions api/crud/flood.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""
Author: Ajeyomi Adedoyin Samuel
Email: [email protected]
Title: Flood Risk Intersect Checker

Description:
------------
This script Loads flood risk polygon data(parquet file), loads it using duckdb,
and check if the coordinate point(s) intersect with the flood-prone areas.

IT accepts Geoparquet with lon and lat of a location and applies spatial
analysis to determine whether input geographic points fall within any of the designated flood zones.

Modules:
--------
- duckdb
- typing
- shapely

Functions:
---------
get_flood_information(bucket, key, longitude, latitude)
Get a geographical coordinates as input and checks if they intersect with flood zones,
and returns the results as JSON.

Usage:
------
- Ensure you put the correct path to your parqet file
- Call `get_flood_information()` with either single or list of `longitude` and `latitude` values.

Note:
-----
- This script assumes that the geospatial data is readable by duckdb and contains a geometry column.
"""



import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))

from .utils.duckdb import setup_spatial_extensions
from .utils.utils import validate_coordinates
import duckdb
import json
from shapely.geometry import Point
from core.config import FLOOD_DATA



def get_flood_information(
point_lon: float,
point_lat: float,
parquet_path:str=FLOOD_DATA,
buffer_degrees: float = 0.00001 # ~0.1m buffer for pre-filtering
) -> json:
"""Check if a coordinate intersect with flood zones"""

# validate the input coordinates
validate_coordinates(point_lon, point_lat)

# Setup spatial extensions
setup_spatial_extensions()

# Create point and bounding box
point_wkt = Point(point_lon, point_lat).wkt

# Bounding box for pre-filtering
bbox_min_lon = point_lon - buffer_degrees
bbox_max_lon = point_lon + buffer_degrees
bbox_min_lat = point_lat - buffer_degrees
bbox_max_lat = point_lat + buffer_degrees

print(f"Checking intersection for: {point_wkt}")
print(f"Using bounding box filter: [{bbox_min_lon:.6f}, {bbox_min_lat:.6f}, {bbox_max_lon:.6f}, {bbox_max_lat:.6f}]")

try:
print("Running optimized spatial query...")
result = duckdb.sql(f"""
SELECT 1
FROM (
SELECT geometry
FROM read_parquet('{parquet_path}')
)
WHERE
-- Fast bounding box pre-filter using geometry bounds
ST_XMin(geometry) <= {bbox_max_lon}
AND ST_XMax(geometry) >= {bbox_min_lon}
AND ST_YMin(geometry) <= {bbox_max_lat}
AND ST_YMax(geometry) >= {bbox_min_lat}
-- Perform expensive intersection
AND ST_Intersects(geometry, ST_GeomFromText('{point_wkt}'))
LIMIT 1
""").fetchone()

intersects = result is not None

result = {
"longitude": point_lon,
"latitude": point_lat,
"flood_zone": intersects
}

return result

except Exception as e:
raise RuntimeError(f"{e.__class__.__name__}: {str(e)}")


# if __name__ == "__main__":
# start_time = time.time()

# point_lon=12.772144
# point_lat=45.562546


# result = get_flood_information(
# point_lon=point_lon,
# point_lat=point_lat,
# parquet_path=FLOOD_DATA)

# elapsed = time.time() - start_time
# print(f"Total time taken: {elapsed:.2f}s - Result: {result}")

Empty file added api/crud/utils/__init__.py
Empty file.
16 changes: 16 additions & 0 deletions api/crud/utils/duckdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import duckdb

def setup_spatial_extensions():
"""Install and load spatial extension with performance settings"""
try:
duckdb.sql("INSTALL spatial;")
duckdb.sql("LOAD spatial;")

# Performance optimizations
duckdb.sql("SET threads=2;") # Use multiple threads
duckdb.sql("SET memory_limit='2GB';") # Increase memory limit
duckdb.sql("SET max_memory='2GB';")

print("Spatial extension loaded with performance settings")
except Exception as e:
print(f"Note: Spatial extension setup: {e}")
6 changes: 6 additions & 0 deletions api/crud/utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def validate_coordinates(longitude:float,
latitude:float) -> tuple:
"""Validate and normalize coordinate inputs"""
if isinstance(longitude, (float)) and isinstance(latitude, (float)):
return [longitude], [latitude]
raise ValueError("Coordinates must be floats")
Empty file added api/route/__init__.py
Empty file.
30 changes: 30 additions & 0 deletions api/route/flood.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from fastapi import APIRouter, HTTPException
from api.crud.flood import get_flood_information
from schema.flood import FloodDataResponse

import sys
import os
sys.path.append(os.path.dirname(__file__))


flood_route = APIRouter()

@flood_route.get("/flood-risk",
response_model=FloodDataResponse,
status_code=200,
tags=["Flood Risk"],
description="Check if a particular location is flooded or not")
async def get_flood_info(longitude: float, latitude: float) -> FloodDataResponse:
"""Check if the latitudes and longitudes intersect with flood areas and return info as JSON"""
try:
data = get_flood_information(longitude, latitude)
if not data:
raise HTTPException(
status_code=404,
detail=f"No flood risk data found for location ({latitude}, {longitude})."
)
return data
except ValueError as ve:
raise HTTPException(status_code=400, detail=str(ve))
except Exception:
raise HTTPException(status_code=500, detail="Internal server error.")
Empty file added core/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions core/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FLOOD_DATA = r'C:\Users\Admin\Desktop\ongoing_tasks\environtrust\data\flood.parquet'

Loading