[ENH] convert sample sizes (#919)

jdkent · web-flow · commit b838dbbd3afa · 2025-03-11T16:24:19.000-05:00
* allow for sample sizes to be converted

* formatting

* add more tests
diff --git a/nimare/io.py b/nimare/io.py
@@ -62,13 +62,45 @@ def _analysis_to_dict(study, analysis):
                 "z": [p.z for p in analysis.points] or [None],
             },
         }
-        sample_size = study.metadata.get("sample_size")
-        if sample_size:
-            result["metadata"]["sample_sizes"] = [sample_size]
+
+        sample_sizes = analysis.metadata.get("sample_sizes")
+        sample_size = None
+
+        # Validate sample sizes if present
+        if sample_sizes is not None and not isinstance(sample_sizes, (list, tuple)):
+            raise TypeError(
+                f"Expected sample_sizes to be list or tuple, but got {type(sample_sizes)}"
+            )
+
+        if not sample_sizes:
+            # Try to get single sample size from analysis or study metadata
+            sample_size = analysis.metadata.get("sample_size")
+            if sample_size is None:
+                sample_size = study.metadata.get("sample_size")
+
+            # Validate single sample size if present
+            if sample_size is not None and not isinstance(sample_size, (int, float)):
+                raise TypeError(f"Expected sample_size to be numeric, but got {type(sample_size)}")
+
+        # Add sample size info to result if available
+        if sample_sizes or sample_size is not None:
+            try:
+                result["metadata"]["sample_sizes"] = sample_sizes or [sample_size]
+            except TypeError as e:
+                raise TypeError(f"Error converting sample size data to list: {str(e)}") from e
+
+        # Handle annotations if present
         if analysis.annotations:
             result["labels"] = {}
-            for annotation in analysis.annotations.values():
-                result["labels"].update(annotation)
+            try:
+                for annotation in analysis.annotations.values():
+                    if not isinstance(annotation, dict):
+                        raise TypeError(
+                            f"Expected annotation to be dict, but got {type(annotation)}"
+                        )
+                    result["labels"].update(annotation)
+            except (TypeError, AttributeError) as e:
+                raise ValueError(f"Invalid annotation format: {str(e)}") from e
 
         return result
 
diff --git a/nimare/tests/test_io.py b/nimare/tests/test_io.py
@@ -22,6 +22,60 @@ def test_convert_nimads_to_dataset(example_nimads_studyset, example_nimads_annot
     assert isinstance(dset2, nimare.dataset.Dataset)
 
 
+def test_convert_nimads_to_dataset_sample_sizes(
+    example_nimads_studyset, example_nimads_annotation
+):
+    """Conversion of nimads JSON to nimare dataset."""
+    studyset = Studyset(example_nimads_studyset)
+    for study in studyset.studies:
+        for analysis in study.analyses:
+            analysis.metadata["sample_sizes"] = [2, 20]
+
+    dset = io.convert_nimads_to_dataset(studyset)
+
+    assert isinstance(dset, nimare.dataset.Dataset)
+    assert "sample_sizes" in dset.metadata.columns
+
+
+def test_convert_nimads_to_dataset_single_sample_size(
+    example_nimads_studyset, example_nimads_annotation
+):
+    """Test conversion of nimads JSON to nimare dataset with a single sample size value."""
+    studyset = Studyset(example_nimads_studyset)
+    for study in studyset.studies:
+        for analysis in study.analyses:
+            analysis.metadata["sample_size"] = 20
+
+    dset = io.convert_nimads_to_dataset(studyset)
+
+    assert isinstance(dset, nimare.dataset.Dataset)
+    assert "sample_sizes" in dset.metadata.columns
+
+
+def test_analysis_to_dict_invalid_sample_sizes_type(example_nimads_studyset):
+    """Test _analysis_to_dict raises ValueError when sample_sizes is not a list/tuple."""
+    studyset = Studyset(example_nimads_studyset)
+    # Set sample_sizes to an int rather than list/tuple
+    for study in studyset.studies:
+        for analysis in study.analyses:
+            analysis.metadata["sample_sizes"] = 5
+    with pytest.raises(TypeError):
+        # Trigger conversion which internally calls _analysis_to_dict
+        io.convert_nimads_to_dataset(studyset)
+
+
+def test_analysis_to_dict_invalid_annotations_format(example_nimads_studyset):
+    """Test _analysis_to_dict raises ValueError when annotations are in an invalid format."""
+    studyset = Studyset(example_nimads_studyset)
+    # Here we assume that the annotation is expected to be a dict
+    # Set annotation to an invalid format (e.g., a string)
+    for study in studyset.studies:
+        for analysis in study.analyses:
+            analysis.metadata["annotations"] = "invalid_format"
+    with pytest.raises(TypeError):
+        io.convert_nimads_to_dataset(studyset)
+
+
 def test_convert_sleuth_to_dataset_smoke():
     """Smoke test for Sleuth text file conversion."""
     sleuth_file = os.path.join(get_test_data_path(), "test_sleuth_file.txt")