8
8
from unittest .mock import MagicMock , patch
9
9
10
10
import httpx
11
+ from httpx import RequestError
11
12
import pytest
12
13
import requests
13
14
from requests_toolbelt import MultipartDecoder
14
15
16
+ from _test_unstructured_client .unit_utils import sample_docs_path
15
17
from unstructured_client ._hooks .custom import form_utils , pdf_utils , request_utils
16
18
from unstructured_client ._hooks .custom .form_utils import (
17
19
FormData ,
29
31
SplitPdfHook ,
30
32
get_optimal_split_size , run_tasks ,
31
33
)
34
+ from unstructured_client ._hooks .types import BeforeRequestContext
32
35
from unstructured_client .models import shared
33
36
34
37
@@ -462,3 +465,64 @@ def test_unit_get_split_pdf_cache_tmp_data_dir_uses_dir_from_form_data(mock_path
462
465
mock_path .assert_called_once_with (mock_dir )
463
466
mock_path_instance .exists .assert_called_once ()
464
467
assert result == str (Path (mock_dir ).resolve ())
468
+
469
+
470
+ def test_before_request_raises_request_error_when_pdf_check_fails ():
471
+ """Test that before_request raises RequestError when pdf_utils.check_pdf throws PDFValidationError."""
472
+ hook = SplitPdfHook ()
473
+
474
+ # Initialize the hook with a mock client
475
+ mock_client = MagicMock ()
476
+ hook .sdk_init (base_url = "http://localhost:8888" , client = mock_client )
477
+
478
+ # Create a mock request context
479
+ mock_hook_ctx = MagicMock ()
480
+ mock_hook_ctx .operation_id = "partition"
481
+
482
+ # Create a mock request with proper headers and content
483
+ mock_request = MagicMock ()
484
+ mock_request .headers = {"Content-Type" : "multipart/form-data" }
485
+ mock_request .url .host = "localhost"
486
+
487
+ # Mock the form data to include the necessary fields for PDF splitting
488
+ mock_pdf_file = MagicMock ()
489
+ mock_pdf_file .read .return_value = b"mock_pdf_content"
490
+
491
+ mock_form_data = {
492
+ "split_pdf_page" : "true" ,
493
+ "files" : {
494
+ "filename" : "test.pdf" ,
495
+ "content_type" : "application/pdf" ,
496
+ "file" : mock_pdf_file
497
+ }
498
+ }
499
+
500
+ # Mock the PDF reader object
501
+ mock_pdf_reader = MagicMock ()
502
+
503
+ # Define the error message that will be raised
504
+ error_message = "File does not appear to be a valid PDF."
505
+
506
+ with patch ("unstructured_client._hooks.custom.request_utils.get_multipart_stream_fields" ) as mock_get_fields , \
507
+ patch ("unstructured_client._hooks.custom.pdf_utils.read_pdf" ) as mock_read_pdf , \
508
+ patch ("unstructured_client._hooks.custom.pdf_utils.check_pdf" ) as mock_check_pdf , \
509
+ patch ("unstructured_client._hooks.custom.request_utils.get_base_url" ) as mock_get_base_url :
510
+
511
+ # Set up the mocks
512
+ mock_get_fields .return_value = mock_form_data
513
+ mock_read_pdf .return_value = mock_pdf_reader
514
+ mock_check_pdf .side_effect = pdf_utils .PDFValidationError (error_message )
515
+ mock_get_base_url .return_value = "http://localhost:8888"
516
+
517
+ # Call the method under test and verify it raises RequestError
518
+ with pytest .raises (RequestError ) as exc_info :
519
+ hook .before_request (mock_hook_ctx , mock_request )
520
+
521
+ # Verify the exception has the correct message and request object
522
+ assert str (exc_info .value ) == error_message
523
+ assert exc_info .value .request == mock_request
524
+
525
+ # Verify that the mocked functions were called as expected
526
+ mock_get_fields .assert_called_once_with (mock_request )
527
+ mock_read_pdf .assert_called_once_with (mock_pdf_file )
528
+ mock_check_pdf .assert_called_once_with (mock_pdf_reader )
0 commit comments