Skip to content

Commit c334aa1

Browse files
authored
Merge pull request #2 from CHRISCARLON/v0.1.3
V0.1.3 tasks completed.
2 parents 6f1f5e3 + 5725fe3 commit c334aa1

File tree

10 files changed

+311
-81
lines changed

10 files changed

+311
-81
lines changed

.dockerignore

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Python artifacts
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
build/
8+
develop-eggs/
9+
dist/
10+
downloads/
11+
eggs/
12+
.eggs/
13+
lib/
14+
lib64/
15+
parts/
16+
sdist/
17+
var/
18+
wheels/
19+
*.egg-info/
20+
.installed.cfg
21+
*.egg
22+
23+
# Virtual environments
24+
.venv/
25+
venv/
26+
ENV/
27+
env/
28+
.env
29+
30+
# Testing and type checking
31+
.pytest_cache/
32+
.mypy_cache/
33+
.coverage
34+
htmlcov/
35+
.tox/
36+
.hypothesis/
37+
*.cover
38+
.coverage.*
39+
40+
# Development tools
41+
.ruff_cache/
42+
pyrightconfig.json
43+
.vscode/
44+
.idea/
45+
*.swp
46+
*.swo
47+
*~
48+
49+
# OS files
50+
.DS_Store
51+
.DS_Store?
52+
._*
53+
.Spotlight-V100
54+
.Trashes
55+
ehthumbs.db
56+
Thumbs.db
57+
58+
# Git
59+
.git/
60+
.gitignore
61+
.gitattributes
62+
63+
# Documentation
64+
*.md
65+
docs/
66+
LICENSE
67+
68+
# Build files
69+
Makefile
70+
makefile
71+
dockerfile
72+
Dockerfile
73+
74+
# Logs
75+
*.log
76+
logs/
77+
78+
# Package manager
79+
uv.lock
80+
poetry.lock
81+
Pipfile.lock
82+
83+
# Test files
84+
*_test.py
85+
test_*.py
86+
tests/

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@ wheels/
1313
.ruff_cache
1414
uv.lock
1515
pyrightconfig.json
16+
*.log
17+
.pytest_cache/
18+
.mypy_cache/

dockerfile

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,19 @@ FROM python:3.11-slim
22

33
WORKDIR /app
44

5-
ENV PYTHONPATH=/app/src
6-
ENV PYTHONUNBUFFERED=1
5+
ENV PYTHONPATH=/app/src \
6+
PYTHONUNBUFFERED=1 \
7+
PIP_NO_CACHE_DIR=1 \
8+
PIP_DISABLE_PIP_VERSION_CHECK=1
9+
710

811
RUN apt-get update && apt-get install -y \
912
gcc \
10-
&& rm -rf /var/lib/apt/lists/*
11-
13+
&& rm -rf /var/lib/apt/lists/* \
14+
&& apt-get clean
1215

1316
COPY pyproject.toml ./
14-
15-
RUN pip install .
17+
RUN pip install --no-cache-dir .
1618

1719
COPY src/ ./src/
1820

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "os-mcp"
3-
version = "0.1.2"
3+
version = "0.1.3"
44
description = "A Python MCP server that provides access to Ordnance Survey's DataHub APIs."
55
readme = "README.md"
66
requires-python = ">=3.11"

src/api_service/os_api.py

Lines changed: 71 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ async def _get_open_api_spec(self) -> OpenAPISpecification:
3737
spec = OpenAPISpecification(spec=response)
3838
return spec
3939
except Exception as e:
40-
logger.error(f"Error getting OpenAPI spec: {e}")
41-
raise e
40+
raise ValueError(f"Failed to get OpenAPI spec: {e}")
4241

4342
async def cache_openapi_spec(self) -> OpenAPISpecification:
4443
"""
@@ -111,11 +110,14 @@ async def _get_collections(self) -> CollectionsCache:
111110
response = await self.make_request("COLLECTIONS")
112111
collections_list = response.get("collections", [])
113112
filtered = self._filter_latest_collections(collections_list)
114-
logger.debug(f"Filtered collections: {filtered}")
113+
logger.debug(
114+
f"Filtered collections: {len(filtered)} collections"
115+
) # Don't log the actual data
115116
return CollectionsCache(collections=filtered, raw_response=response)
116117
except Exception as e:
117-
logger.error(f"Error getting collections: {e}")
118-
raise e
118+
sanitized_error = self._sanitise_api_key(str(e))
119+
logger.error(f"Error getting collections: {sanitized_error}")
120+
raise ValueError(f"Failed to get collections: {sanitized_error}")
119121

120122
async def cache_collections(self) -> CollectionsCache:
121123
"""
@@ -132,7 +134,8 @@ async def cache_collections(self) -> CollectionsCache:
132134
f"Collections successfully cached - {len(self._cached_collections.collections)} collections after filtering"
133135
)
134136
except Exception as e:
135-
raise ValueError(f"Failed to cache collections: {e}")
137+
sanitized_error = self._sanitise_api_key(str(e))
138+
raise ValueError(f"Failed to cache collections: {sanitized_error}")
136139
return self._cached_collections
137140

138141
async def initialise(self):
@@ -163,19 +166,59 @@ async def get_api_key(self) -> str:
163166
raise ValueError("OS_API_KEY environment variable is not set")
164167
return api_key
165168

166-
def _sanitize_response(self, data: Any) -> Any:
167-
"""Remove API keys from response URLs recursively"""
169+
def _sanitise_api_key(self, text: Any) -> str:
170+
"""Remove API keys from any text (URLs, error messages, etc.)"""
171+
if not isinstance(text, str):
172+
return text
173+
174+
patterns = [
175+
r"[?&]key=[^&\s]*",
176+
r"[?&]api_key=[^&\s]*",
177+
r"[?&]apikey=[^&\s]*",
178+
r"[?&]token=[^&\s]*",
179+
]
180+
181+
sanitized = text
182+
for pattern in patterns:
183+
sanitized = re.sub(pattern, "", sanitized, flags=re.IGNORECASE)
184+
185+
sanitized = re.sub(r"[?&]$", "", sanitized)
186+
sanitized = re.sub(r"&{2,}", "&", sanitized)
187+
sanitized = re.sub(r"\?&", "?", sanitized)
188+
189+
return sanitized
190+
191+
def _sanitise_response(self, data: Any) -> Any:
192+
"""Remove API keys from response data recursively"""
168193
if isinstance(data, dict):
194+
sanitized_dict = {}
169195
for key, value in data.items():
170-
if key == "href" and isinstance(value, str):
171-
data[key] = re.sub(r'[?&]key=[^&]*', '', value)
172-
data[key] = re.sub(r'[?&]$', '', data[key])
173-
data[key] = re.sub(r'&{2,}', '&', data[key])
196+
if isinstance(value, str) and any(
197+
url_indicator in key.lower()
198+
for url_indicator in ["href", "url", "link", "uri"]
199+
):
200+
sanitized_dict[key] = self._sanitise_api_key(value)
174201
elif isinstance(value, (dict, list)):
175-
data[key] = self._sanitize_response(value)
202+
sanitized_dict[key] = self._sanitise_response(value)
203+
else:
204+
sanitized_dict[key] = value
205+
return sanitized_dict
176206
elif isinstance(data, list):
177-
return [self._sanitize_response(item) for item in data]
178-
207+
return [self._sanitise_response(item) for item in data]
208+
elif isinstance(data, str):
209+
if any(
210+
indicator in data
211+
for indicator in [
212+
"http://",
213+
"https://",
214+
"key=",
215+
"api_key=",
216+
"apikey=",
217+
"token=",
218+
]
219+
):
220+
return self._sanitise_api_key(data)
221+
179222
return data
180223

181224
async def make_request(
@@ -217,14 +260,15 @@ async def make_request(
217260

218261
api_key = await self.get_api_key()
219262
request_params = params or {}
220-
request_params["key"] = api_key
263+
request_params["key"] = api_key
221264

222265
headers = {"User-Agent": self.user_agent, "Accept": "application/json"}
223266

224267
client_ip = getattr(self.session, "_source_address", None)
225268
client_info = f" from {client_ip}" if client_ip else ""
226269

227-
logger.info(f"Requesting URL: {endpoint_value}{client_info}")
270+
sanitized_url = self._sanitise_api_key(endpoint_value)
271+
logger.info(f"Requesting URL: {sanitized_url}{client_info}")
228272

229273
for attempt in range(1, max_retries + 1):
230274
try:
@@ -238,26 +282,29 @@ async def make_request(
238282
timeout=timeout,
239283
) as response:
240284
if response.status >= 400:
285+
# Sanitize error response text
286+
error_text = await response.text()
287+
sanitized_error = self._sanitise_api_key(error_text)
241288
error_message = (
242-
f"HTTP Error: {response.status} - {await response.text()}"
289+
f"HTTP Error: {response.status} - {sanitized_error}"
243290
)
244291
logger.error(f"Error: {error_message}")
245292
raise ValueError(error_message)
246293

247294
response_data = await response.json()
248-
249-
return self._sanitize_response(response_data)
295+
296+
return self._sanitise_response(response_data)
250297
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
251298
if attempt == max_retries:
252-
error_message = (
253-
f"Request failed after {max_retries} attempts: {str(e)}"
254-
)
299+
sanitized_exception = self._sanitise_api_key(str(e))
300+
error_message = f"Request failed after {max_retries} attempts: {sanitized_exception}"
255301
logger.error(f"Error: {error_message}")
256302
raise ValueError(error_message)
257303
else:
258304
await asyncio.sleep(0.7)
259305
except Exception as e:
260-
error_message = f"Request failed: {str(e)}"
306+
sanitized_exception = self._sanitise_api_key(str(e))
307+
error_message = f"Request failed: {sanitized_exception}"
261308
logger.error(f"Error: {error_message}")
262309
raise ValueError(error_message)
263310
raise RuntimeError(

src/mcp_service/guardrails.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __init__(self):
2626
r"(?i)leak confidential",
2727
r"(?i)reveal secrets",
2828
r"(?i)expose secrets",
29-
r"(?i)secrets.*contain",
29+
r"(?i)secrets.*contain",
3030
r"(?i)extract secrets",
3131
]
3232

0 commit comments

Comments
 (0)