Skip to content

Commit 2a4fa21

Browse files
authored
feat: improve PDF validation error handling with FileValidationError … (#280)
…base class
1 parent dbf1c07 commit 2a4fa21

File tree

7 files changed

+39
-21
lines changed

7 files changed

+39
-21
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
## 0.37.3
2+
3+
### Enhancements
4+
* Improve PDF validation error handling by introducing FileValidationError base class for better error abstraction
5+
6+
### Features
7+
8+
### Fixes
9+
* Replace RequestError with PDFValidationError for invalid PDF files to provide more accurate error context
10+
111
## 0.37.0
212

313
### Enhancements

_test_unstructured_client/unit/test_split_pdf_hook.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from unittest.mock import MagicMock, patch
99

1010
import httpx
11-
from httpx import RequestError
1211
import pytest
1312
import requests
1413
from requests_toolbelt import MultipartDecoder
@@ -467,8 +466,8 @@ def test_unit_get_split_pdf_cache_tmp_data_dir_uses_dir_from_form_data(mock_path
467466
assert result == str(Path(mock_dir).resolve())
468467

469468

470-
def test_before_request_raises_request_error_when_pdf_check_fails():
471-
"""Test that before_request raises RequestError when pdf_utils.check_pdf throws PDFValidationError."""
469+
def test_before_request_raises_pdf_validation_error_when_pdf_check_fails():
470+
"""Test that before_request raises PDFValidationError when pdf_utils.check_pdf throws PDFValidationError."""
472471
hook = SplitPdfHook()
473472

474473
# Initialize the hook with a mock client
@@ -514,13 +513,12 @@ def test_before_request_raises_request_error_when_pdf_check_fails():
514513
mock_check_pdf.side_effect = pdf_utils.PDFValidationError(error_message)
515514
mock_get_base_url.return_value = "http://localhost:8888"
516515

517-
# Call the method under test and verify it raises RequestError
518-
with pytest.raises(RequestError) as exc_info:
516+
# Call the method under test and verify it raises PDFValidationError
517+
with pytest.raises(pdf_utils.PDFValidationError) as exc_info:
519518
hook.before_request(mock_hook_ctx, mock_request)
520519

521-
# Verify the exception has the correct message and request object
520+
# Verify the exception has the correct message
522521
assert str(exc_info.value) == error_message
523-
assert exc_info.value.request == mock_request
524522

525523
# Verify that the mocked functions were called as expected
526524
mock_get_fields.assert_called_once_with(mock_request)

gen.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ generation:
1414
oAuth2ClientCredentialsEnabled: false
1515
oAuth2PasswordEnabled: false
1616
python:
17-
version: 0.37.2
17+
version: 0.37.3
1818
additionalDependencies:
1919
dev:
2020
deepdiff: '>=6.0'

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "unstructured-client"
3-
version = "0.37.2"
3+
version = "0.37.3"
44
description = "Python Client SDK for Unstructured API"
55
authors = [{ name = "Unstructured" },]
66
readme = "README-PYPI.md"

src/unstructured_client/_hooks/custom/pdf_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pypdf.errors import FileNotDecryptedError, PdfReadError
99

1010
from unstructured_client._hooks.custom.common import UNSTRUCTURED_CLIENT_LOGGER_NAME
11+
from unstructured_client._hooks.custom.validation_errors import FileValidationError
1112

1213
logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME)
1314

@@ -17,12 +18,11 @@
1718
pdf_logger.setLevel(logging.ERROR)
1819

1920

20-
class PDFValidationError(Exception):
21-
"""Base exception for PDF validation errors."""
21+
class PDFValidationError(FileValidationError):
22+
"""Exception for PDF validation errors."""
2223

2324
def __init__(self, message: str):
24-
self.message = message
25-
super().__init__(self.message)
25+
super().__init__(message, file_type="PDF")
2626

2727

2828
def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]:

src/unstructured_client/_hooks/custom/split_pdf_hook.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import aiofiles
1717
import httpx
1818
import nest_asyncio # type: ignore
19-
from httpx import AsyncClient, RequestError
19+
from httpx import AsyncClient
2020
from pypdf import PdfReader, PdfWriter
2121

2222
from unstructured_client._hooks.custom import form_utils, pdf_utils, request_utils
@@ -303,13 +303,7 @@ def before_request(
303303
if pdf is None:
304304
return request
305305

306-
try:
307-
pdf = pdf_utils.check_pdf(pdf)
308-
except pdf_utils.PDFValidationError as e:
309-
raise RequestError(
310-
message=e.message,
311-
request=request,
312-
) from e
306+
pdf = pdf_utils.check_pdf(pdf)
313307

314308
starting_page_number = form_utils.get_starting_page_number(
315309
form_data,
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""File validation error classes for the Unstructured client."""
2+
3+
from typing import Optional
4+
5+
6+
class FileValidationError(Exception):
7+
"""Base exception for file validation errors.
8+
9+
This exception should be raised when a file fails validation
10+
checks before being processed by the API.
11+
"""
12+
13+
def __init__(self, message: str, file_type: Optional[str] = None):
14+
self.message: str = message
15+
self.file_type: Optional[str] = file_type
16+
super().__init__(self.message)

0 commit comments

Comments
 (0)