Skip to content

Commit a881bcf

Browse files
committed
fix: resolve merge conflicts
2 parents 5501a83 + 331c6c5 commit a881bcf

File tree

5 files changed

+126
-14
lines changed

5 files changed

+126
-14
lines changed

data/src/data_utils/city_owned_properties.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
import geopandas as gpd
55

66
from src.validation.base import ValidationResult, validate_output
7-
from src.validation.city_owned_properties import CityOwnedPropertiesOutputValidator
7+
from src.validation.city_owned_properties import (
8+
CityOwnedPropertiesOutputValidator,
9+
CityOwnedPropertiesInputValidator,
10+
)
811

912
from ..classes.loaders import EsriLoader
1013
from ..constants.services import CITY_OWNED_PROPERTIES_TO_LOAD
@@ -50,6 +53,7 @@ def city_owned_properties(
5053
esri_urls=CITY_OWNED_PROPERTIES_TO_LOAD,
5154
cols=["OPABRT", "AGENCY", "SIDEYARDELIGIBLE"],
5255
opa_col="opabrt",
56+
validator=CityOwnedPropertiesInputValidator(),
5357
)
5458

5559
city_owned_properties, input_validation = loader.load_or_fetch()

data/src/data_utils/council_dists.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
import pandas as pd
55

66
from src.validation.base import ValidationResult, validate_output
7-
from src.validation.council_dists import CouncilDistrictsOutputValidator
7+
from src.validation.council_dists import (
8+
CouncilDistrictsInputValidator,
9+
CouncilDistrictsOutputValidator,
10+
)
811

912
from ..classes.loaders import EsriLoader
1013
from ..constants.services import COUNCIL_DISTRICTS_TO_LOAD
@@ -42,6 +45,7 @@ def council_dists(
4245
name="Council Districts",
4346
esri_urls=COUNCIL_DISTRICTS_TO_LOAD,
4447
cols=["district"],
48+
validator=CouncilDistrictsInputValidator(),
4549
input_crs="EPSG:4326", # Load in geographic coordinates since the data appears to be lat/lon
4650
)
4751

data/src/test/data_utils/test_data_utils.py

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,26 @@ def test_park_priority_basic_functionality(
112112
self.assertIn("park_priority", second_arg.columns)
113113
self.assertNotIn("parkneed", second_arg.columns)
114114

115-
# Check return values - focus on object identity and type, not exact equality
116-
self.assertIs(result_gdf, mock_spatial_join.return_value)
115+
# Check return values - use data equality instead of object identity
117116
self.assertIsInstance(result_gdf, gpd.GeoDataFrame)
118117
self.assertIsInstance(validation_result, ValidationResult)
119-
# Use the mock return value instead of comparing objects
118+
119+
# Check data equality for the GeoDataFrame
120+
pd.testing.assert_frame_equal(
121+
result_gdf.drop(columns=["geometry"]),
122+
expected_result_gdf.drop(columns=["geometry"]),
123+
check_dtype=False,
124+
)
125+
126+
# Check geometry column separately
127+
for i, (expected_geom, actual_geom) in enumerate(
128+
zip(expected_result_gdf.geometry, result_gdf.geometry)
129+
):
130+
self.assertTrue(
131+
expected_geom.equals(actual_geom), f"Geometry mismatch at index {i}"
132+
)
133+
134+
# Check validation result
120135
self.assertIs(validation_result, mock_validation_result)
121136

122137
@patch("src.data_utils.park_priority.EsriLoader")
@@ -176,8 +191,25 @@ def test_park_priority_column_renaming(
176191
self.assertNotIn("parkneed", second_arg.columns)
177192
self.assertEqual(second_arg["park_priority"].iloc[0], 5.0)
178193

179-
# Check return value - focus on object identity
180-
self.assertIs(result_gdf, mock_spatial_join.return_value)
194+
# Check return value - use data equality instead of object identity
195+
self.assertIsInstance(result_gdf, gpd.GeoDataFrame)
196+
197+
# Check data equality for the GeoDataFrame
198+
pd.testing.assert_frame_equal(
199+
result_gdf.drop(columns=["geometry"]),
200+
expected_result_gdf.drop(columns=["geometry"]),
201+
check_dtype=False,
202+
)
203+
204+
# Check geometry column separately
205+
for i, (expected_geom, actual_geom) in enumerate(
206+
zip(expected_result_gdf.geometry, result_gdf.geometry)
207+
):
208+
self.assertTrue(
209+
expected_geom.equals(actual_geom), f"Geometry mismatch at index {i}"
210+
)
211+
212+
# Check validation result
181213
self.assertIs(validation_result, mock_validation_result)
182214

183215
@patch("src.data_utils.park_priority.EsriLoader")
@@ -261,9 +293,25 @@ def test_park_priority_empty_data(self, mock_spatial_join, mock_esri_loader_clas
261293
result_gdf, validation_result = _park_priority_logic(input_gdf)
262294

263295
# Check that the function handles empty data gracefully
264-
self.assertIs(result_gdf, mock_spatial_join.return_value)
265296
self.assertIsInstance(result_gdf, gpd.GeoDataFrame)
266297
self.assertIsInstance(validation_result, ValidationResult)
298+
299+
# Check data equality for the GeoDataFrame
300+
pd.testing.assert_frame_equal(
301+
result_gdf.drop(columns=["geometry"]),
302+
input_gdf.drop(columns=["geometry"]),
303+
check_dtype=False,
304+
)
305+
306+
# Check geometry column separately
307+
for i, (expected_geom, actual_geom) in enumerate(
308+
zip(input_gdf.geometry, result_gdf.geometry)
309+
):
310+
self.assertTrue(
311+
expected_geom.equals(actual_geom), f"Geometry mismatch at index {i}"
312+
)
313+
314+
# Check validation result
267315
self.assertIs(validation_result, mock_validation_result)
268316

269317
# Check that spatial_join was still called (even with empty data)
@@ -324,8 +372,23 @@ def test_park_priority_return_format(
324372
# Check second element is ValidationResult
325373
self.assertIsInstance(validation_result, ValidationResult)
326374

327-
# Check the actual values - focus on object identity
328-
self.assertIs(result_gdf, mock_spatial_join.return_value)
375+
# Check the actual values - use data equality instead of object identity
376+
# Check data equality for the GeoDataFrame
377+
pd.testing.assert_frame_equal(
378+
result_gdf.drop(columns=["geometry"]),
379+
expected_result_gdf.drop(columns=["geometry"]),
380+
check_dtype=False,
381+
)
382+
383+
# Check geometry column separately
384+
for i, (expected_geom, actual_geom) in enumerate(
385+
zip(expected_result_gdf.geometry, result_gdf.geometry)
386+
):
387+
self.assertTrue(
388+
expected_geom.equals(actual_geom), f"Geometry mismatch at index {i}"
389+
)
390+
391+
# Check validation result
329392
self.assertIs(validation_result, mock_validation_result)
330393

331394
@pytest.mark.skip

data/src/validation/city_owned_properties.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,43 @@
2929
coerce=True,
3030
)
3131

32+
# Expecting ~7,796 records returned (within ±20% tolerance).
33+
# This is checked in CityOwnedPropertiesInputSchema
34+
expected = 7796
35+
lower = int(expected * 0.8)
36+
upper = int(expected * 1.2)
37+
38+
CityOwnedPropertiesInputSchema = pa.DataFrameSchema(
39+
columns={
40+
"opa_id": pa.Column(pa.Int, checks=pa.Check(lambda s: s.dropna() != "")),
41+
"agency": pa.Column(pa.String, nullable=True),
42+
"sideyardeligible": pa.Column(
43+
pa.Category, nullable=True, checks=pa.Check.isin(["Yes", "No"])
44+
),
45+
"geometry": pa.Column("geometry"),
46+
},
47+
checks=pa.Check(lambda df: lower <= df.shape[0] <= upper),
48+
strict=True,
49+
)
50+
3251

3352
class CityOwnedPropertiesInputValidator(BaseValidator):
34-
"""Validator for city owned properties service input."""
53+
"""
54+
Validator for the city-owned properties dataset input.
55+
schema and _custom_validation() are used by validate() in the parent class.
56+
"""
3557

36-
schema = None # No schema validation for input
58+
schema = CityOwnedPropertiesInputSchema
3759

3860
def _custom_validation(self, gdf: gpd.GeoDataFrame):
3961
pass
4062

4163

4264
class CityOwnedPropertiesOutputValidator(BaseValidator):
43-
"""Validator for city owned properties service output."""
65+
"""
66+
Validator for the city-owned properties dataset output.
67+
schema and _custom_validation() are used by validate() in the parent class.
68+
"""
4469

4570
schema = CityOwnedPropertiesSchema
4671

data/src/validation/council_dists.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,27 @@
2626
coerce=True,
2727
)
2828

29+
CouncilDistrictsInputSchema = pa.DataFrameSchema(
30+
columns={
31+
"district": pa.Column(
32+
str,
33+
nullable=True,
34+
),
35+
"geometry": pa.Column("geometry"),
36+
},
37+
# district should contain 10 records of strings 1-10
38+
checks=pa.Check(
39+
lambda df: set(df["district"].dropna().unique())
40+
== {str(i) for i in range(1, 11)}
41+
),
42+
strict=True,
43+
)
44+
2945

3046
class CouncilDistrictsInputValidator(BaseValidator):
3147
"""Validator for council districts service input."""
3248

33-
schema = None # No schema validation for input
49+
schema = CouncilDistrictsInputSchema
3450

3551
def _custom_validation(self, gdf: gpd.GeoDataFrame, check_stats: bool = True):
3652
pass

0 commit comments

Comments
 (0)