Skip to content

Commit 06bdcbc

Browse files
committed
refactor: remove timestamp from embedding cache
Embeddings are immutable - same text always produces the same vector. No need for cache invalidation or timestamps. Simplifies the schema.
1 parent 12eb472 commit 06bdcbc

File tree

5 files changed

+14
-80
lines changed

5 files changed

+14
-80
lines changed

src/Core/Embeddings/Cache/CachedEmbedding.cs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,4 @@ public sealed class CachedEmbedding
2222
/// Null if the provider did not return token count.
2323
/// </summary>
2424
public int? TokenCount { get; init; }
25-
26-
/// <summary>
27-
/// Timestamp when this embedding was stored in the cache.
28-
/// Useful for debugging and diagnostics.
29-
/// </summary>
30-
public required DateTimeOffset Timestamp { get; init; }
3125
}

src/Core/Embeddings/Cache/SqliteEmbeddingCache.cs

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,22 +23,21 @@ CREATE TABLE IF NOT EXISTS embeddings_cache (
2323
text_hash TEXT NOT NULL,
2424
vector BLOB NOT NULL,
2525
token_count INTEGER NULL,
26-
created_at TEXT NOT NULL,
2726
PRIMARY KEY (provider, model, dimensions, is_normalized, text_hash)
2827
);
2928
""";
3029

3130
private const string SelectSql = """
32-
SELECT vector, token_count, created_at FROM embeddings_cache
31+
SELECT vector, token_count FROM embeddings_cache
3332
WHERE provider = @provider AND model = @model AND dimensions = @dimensions
3433
AND is_normalized = @isNormalized AND text_hash = @textHash
3534
""";
3635

3736
private const string UpsertSql = """
38-
INSERT INTO embeddings_cache (provider, model, dimensions, is_normalized, text_length, text_hash, vector, token_count, created_at)
39-
VALUES (@provider, @model, @dimensions, @isNormalized, @textLength, @textHash, @vector, @tokenCount, @createdAt)
37+
INSERT INTO embeddings_cache (provider, model, dimensions, is_normalized, text_length, text_hash, vector, token_count)
38+
VALUES (@provider, @model, @dimensions, @isNormalized, @textLength, @textHash, @vector, @tokenCount)
4039
ON CONFLICT(provider, model, dimensions, is_normalized, text_hash)
41-
DO UPDATE SET vector = @vector, token_count = @tokenCount, created_at = @createdAt
40+
DO UPDATE SET vector = @vector, token_count = @tokenCount
4241
""";
4342

4443
private readonly SqliteConnection _connection;
@@ -136,16 +135,14 @@ public SqliteEmbeddingCache(string dbPath, CacheModes mode, ILogger<SqliteEmbedd
136135
var vector = BytesToFloatArray(vectorBlob);
137136

138137
int? tokenCount = reader["token_count"] == DBNull.Value ? null : Convert.ToInt32(reader["token_count"], CultureInfo.InvariantCulture);
139-
var createdAt = DateTimeOffset.Parse((string)reader["created_at"], CultureInfo.InvariantCulture);
140138

141139
this._logger.LogTrace("Cache hit for {Provider}/{Model} hash: {HashPrefix}..., dimensions: {Dimensions}",
142140
key.Provider, key.Model, key.TextHash[..Math.Min(16, key.TextHash.Length)], vector.Length);
143141

144142
return new CachedEmbedding
145143
{
146144
Vector = vector,
147-
TokenCount = tokenCount,
148-
Timestamp = createdAt
145+
TokenCount = tokenCount
149146
};
150147
}
151148
}
@@ -164,7 +161,6 @@ public async Task StoreAsync(EmbeddingCacheKey key, float[] vector, int? tokenCo
164161
}
165162

166163
var vectorBlob = FloatArrayToBytes(vector);
167-
var createdAt = DateTimeOffset.UtcNow.ToString("o", CultureInfo.InvariantCulture);
168164

169165
var command = this._connection.CreateCommand();
170166
await using (command.ConfigureAwait(false))
@@ -178,7 +174,6 @@ public async Task StoreAsync(EmbeddingCacheKey key, float[] vector, int? tokenCo
178174
command.Parameters.AddWithValue("@textHash", key.TextHash);
179175
command.Parameters.AddWithValue("@vector", vectorBlob);
180176
command.Parameters.AddWithValue("@tokenCount", tokenCount.HasValue ? tokenCount.Value : DBNull.Value);
181-
command.Parameters.AddWithValue("@createdAt", createdAt);
182177

183178
await command.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
184179

tests/Core.Tests/Embeddings/Cache/SqliteEmbeddingCacheTests.cs

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -79,26 +79,6 @@ public async Task StoreAsync_WithTokenCount_ShouldPreserveValue()
7979
Assert.Equal(tokenCount, result.TokenCount);
8080
}
8181

82-
[Fact]
83-
public async Task StoreAsync_ShouldSetTimestamp()
84-
{
85-
// Arrange
86-
using var cache = new SqliteEmbeddingCache(this._tempDbPath, CacheModes.ReadWrite, this._loggerMock.Object);
87-
var key = EmbeddingCacheKey.Create("OpenAI", "model", 1536, true, "test text");
88-
var vector = new float[] { 0.1f, 0.2f, 0.3f };
89-
var beforeStore = DateTimeOffset.UtcNow;
90-
91-
// Act
92-
await cache.StoreAsync(key, vector, tokenCount: null, CancellationToken.None).ConfigureAwait(false);
93-
var result = await cache.TryGetAsync(key, CancellationToken.None).ConfigureAwait(false);
94-
var afterStore = DateTimeOffset.UtcNow;
95-
96-
// Assert
97-
Assert.NotNull(result);
98-
Assert.True(result.Timestamp >= beforeStore.AddSeconds(-1)); // Allow 1 second tolerance
99-
Assert.True(result.Timestamp <= afterStore.AddSeconds(1));
100-
}
101-
10282
[Fact]
10383
public async Task StoreAsync_WithLargeVector_ShouldRoundTrip()
10484
{

tests/Core.Tests/Embeddings/CachedEmbeddingGeneratorTests.cs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ public async Task GenerateAsync_Single_WithCacheHit_ShouldReturnCachedVector()
5454
var cachedVector = new float[] { 0.1f, 0.2f, 0.3f };
5555
var cachedEmbedding = new CachedEmbedding
5656
{
57-
Vector = cachedVector,
58-
Timestamp = DateTimeOffset.UtcNow
57+
Vector = cachedVector
5958
};
6059

6160
this._cacheMock.Setup(x => x.Mode).Returns(CacheModes.ReadWrite);
@@ -220,7 +219,7 @@ public async Task GenerateAsync_Batch_AllCacheHits_ShouldNotCallInnerGenerator()
220219
var testKey = EmbeddingCacheKey.Create("OpenAI", "text-embedding-ada-002", 1536, true, kvp.Key);
221220
if (testKey.TextHash == key.TextHash)
222221
{
223-
return new CachedEmbedding { Vector = kvp.Value, Timestamp = DateTimeOffset.UtcNow };
222+
return new CachedEmbedding { Vector = kvp.Value };
224223
}
225224
}
226225

@@ -299,7 +298,7 @@ public async Task GenerateAsync_Batch_MixedHitsAndMisses_ShouldOnlyGenerateMisse
299298
{
300299
if (key.TextHash == cachedKey.TextHash)
301300
{
302-
return new CachedEmbedding { Vector = cachedVector, Timestamp = DateTimeOffset.UtcNow };
301+
return new CachedEmbedding { Vector = cachedVector };
303302
}
304303

305304
return null;
@@ -411,12 +410,12 @@ public async Task GenerateAsync_Batch_ShouldPreserveOrder()
411410
{
412411
if (key.TextHash == cachedB.TextHash)
413412
{
414-
return new CachedEmbedding { Vector = vectorB, Timestamp = DateTimeOffset.UtcNow };
413+
return new CachedEmbedding { Vector = vectorB };
415414
}
416415

417416
if (key.TextHash == cachedD.TextHash)
418417
{
419-
return new CachedEmbedding { Vector = vectorD, Timestamp = DateTimeOffset.UtcNow };
418+
return new CachedEmbedding { Vector = vectorD };
420419
}
421420

422421
return null;

tests/Core.Tests/Embeddings/CachedEmbeddingTests.cs

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace KernelMemory.Core.Tests.Embeddings;
55

66
/// <summary>
77
/// Tests for CachedEmbedding model to verify proper construction and immutability.
8-
/// CachedEmbedding stores the embedding vector, optional token count, and timestamp.
8+
/// CachedEmbedding stores the embedding vector and optional token count.
99
/// </summary>
1010
public sealed class CachedEmbeddingTests
1111
{
@@ -14,18 +14,15 @@ public void CachedEmbedding_WithRequiredProperties_ShouldBeCreated()
1414
{
1515
// Arrange
1616
var vector = new float[] { 0.1f, 0.2f, 0.3f };
17-
var timestamp = DateTimeOffset.UtcNow;
1817

1918
// Act
2019
var cached = new CachedEmbedding
2120
{
22-
Vector = vector,
23-
Timestamp = timestamp
21+
Vector = vector
2422
};
2523

2624
// Assert
2725
Assert.Equal(vector, cached.Vector);
28-
Assert.Equal(timestamp, cached.Timestamp);
2926
Assert.Null(cached.TokenCount);
3027
}
3128

@@ -34,14 +31,12 @@ public void CachedEmbedding_WithTokenCount_ShouldStoreValue()
3431
{
3532
// Arrange
3633
var vector = new float[] { 0.1f, 0.2f, 0.3f };
37-
var timestamp = DateTimeOffset.UtcNow;
3834
const int tokenCount = 42;
3935

4036
// Act
4137
var cached = new CachedEmbedding
4238
{
4339
Vector = vector,
44-
Timestamp = timestamp,
4540
TokenCount = tokenCount
4641
};
4742

@@ -54,13 +49,11 @@ public void CachedEmbedding_WithNullTokenCount_ShouldBeNull()
5449
{
5550
// Arrange
5651
var vector = new float[] { 0.1f, 0.2f, 0.3f };
57-
var timestamp = DateTimeOffset.UtcNow;
5852

5953
// Act
6054
var cached = new CachedEmbedding
6155
{
6256
Vector = vector,
63-
Timestamp = timestamp,
6457
TokenCount = null
6558
};
6659

@@ -73,13 +66,11 @@ public void CachedEmbedding_VectorShouldPreserveFloatPrecision()
7366
{
7467
// Arrange
7568
var vector = new float[] { 0.123456789f, -0.987654321f, float.MaxValue, float.MinValue };
76-
var timestamp = DateTimeOffset.UtcNow;
7769

7870
// Act
7971
var cached = new CachedEmbedding
8072
{
81-
Vector = vector,
82-
Timestamp = timestamp
73+
Vector = vector
8374
};
8475

8576
// Assert
@@ -99,40 +90,15 @@ public void CachedEmbedding_WithLargeVector_ShouldPreserveAllDimensions()
9990
vector[i] = (float)i / 1536;
10091
}
10192

102-
var timestamp = DateTimeOffset.UtcNow;
103-
10493
// Act
10594
var cached = new CachedEmbedding
10695
{
107-
Vector = vector,
108-
Timestamp = timestamp
96+
Vector = vector
10997
};
11098

11199
// Assert
112100
Assert.Equal(1536, cached.Vector.Length);
113101
Assert.Equal(0.0f, cached.Vector[0]);
114102
Assert.Equal(1535f / 1536, cached.Vector[1535], precision: 6);
115103
}
116-
117-
[Fact]
118-
public void CachedEmbedding_TimestampShouldBeDateTimeOffset()
119-
{
120-
// Arrange
121-
var vector = new float[] { 0.1f };
122-
var timestamp = new DateTimeOffset(2025, 12, 1, 10, 30, 0, TimeSpan.FromHours(-5));
123-
124-
// Act
125-
var cached = new CachedEmbedding
126-
{
127-
Vector = vector,
128-
Timestamp = timestamp
129-
};
130-
131-
// Assert
132-
Assert.Equal(timestamp.Year, cached.Timestamp.Year);
133-
Assert.Equal(timestamp.Month, cached.Timestamp.Month);
134-
Assert.Equal(timestamp.Day, cached.Timestamp.Day);
135-
Assert.Equal(timestamp.Hour, cached.Timestamp.Hour);
136-
Assert.Equal(timestamp.Offset, cached.Timestamp.Offset);
137-
}
138104
}

0 commit comments

Comments
 (0)