Skip to content

Commit 8513c2a

Browse files
authored
.Net: [MEVD] Cosmos NoSQL provider work on keys, partition keys and point reads (#13550)
This is a sizable cleanup/redo of large chunks of the Cosmos NoSQL provider; I've broken it down to three commits for easier reviewing. * Cosmos NoSQL collections can now have either string/Guid as their key, or CosmosNoSqlKey. In the former case, this configures the partition key to be the same as the string/Guid document key - this is a sensible default that's common (and encouraged) in Cosmos. All other scenarios require CosmosNoSqlKey: * A single partition key that isn't the document ID * A hierarchical partition key (more than one property) * No partition key (legacy/discouraged) * Note that CosmosNoSqlKey is only passed to GetAsync and DeleteAsync; on the user's .NET record type, the properties are there as usual (e.g. a string document Id property, an int partition key property). This introduces a discrepancy for the first time between the collection's TKey (CosmosNoSqlKey) and its actual key property on the .NET type (a string); this made some things trickier, but is the correct way to do things for composite key cases (we'd do the same if we implement composite keys for relational providers). * Added support for Cosmos hierarchical partition keys (2-3 properties instead of just one). * GetAsync no longer does a SQL query with a WHERE clause, but rather an efficient point read via ReadItem. The multiple-key overload does ReadManyItemsAsync, which again is far more efficient. * UpdateAsync and DeleteAsync now correctly supply the partition key in the call's request options, again making it much more efficient. * Various other cleanup and fixes all around.
1 parent 91f7956 commit 8513c2a

31 files changed

+873
-365
lines changed

dotnet/samples/Demos/VectorStoreRAG/Program.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
appConfig.CosmosMongoConfig.DatabaseName);
8484
break;
8585
case "CosmosNoSql":
86-
kernelBuilder.Services.AddCosmosNoSqlCollection<TextSnippet<string>>(
86+
kernelBuilder.Services.AddCosmosNoSqlCollection<string, TextSnippet<string>>(
8787
appConfig.RagConfig.CollectionName,
8888
appConfig.CosmosNoSqlConfig.ConnectionString,
8989
appConfig.CosmosNoSqlConfig.DatabaseName);
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System;
4+
using System.Collections.Generic;
5+
using System.Text.Json;
6+
using System.Text.Json.Serialization;
7+
8+
namespace Microsoft.SemanticKernel.Connectors.CosmosNoSql;
9+
10+
/// <summary>
11+
/// A JSON converter for byte arrays that serializes them as JSON arrays of numbers
12+
/// instead of base64-encoded strings.
13+
/// </summary>
14+
/// <remarks>
15+
/// This is needed because Cosmos DB's VectorDistance function requires vectors to be arrays of numbers,
16+
/// not base64-encoded strings.
17+
/// </remarks>
18+
internal sealed class ByteArrayJsonConverter : JsonConverter<byte[]>
19+
{
20+
/// <inheritdoc/>
21+
public override byte[] Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
22+
{
23+
// Support reading both base64 strings (for backward compatibility) and number arrays
24+
if (reader.TokenType == JsonTokenType.String)
25+
{
26+
return reader.GetBytesFromBase64();
27+
}
28+
29+
if (reader.TokenType != JsonTokenType.StartArray)
30+
{
31+
throw new JsonException($"Expected StartArray or String token, got {reader.TokenType}");
32+
}
33+
34+
var list = new List<byte>();
35+
while (reader.Read() && reader.TokenType != JsonTokenType.EndArray)
36+
{
37+
list.Add(reader.GetByte());
38+
}
39+
return list.ToArray();
40+
}
41+
42+
/// <inheritdoc/>
43+
public override void Write(Utf8JsonWriter writer, byte[] value, JsonSerializerOptions options)
44+
{
45+
writer.WriteStartArray();
46+
foreach (var b in value)
47+
{
48+
writer.WriteNumberValue(b);
49+
}
50+
writer.WriteEndArray();
51+
}
52+
}
53+
54+
/// <summary>
55+
/// A JSON converter for <see cref="ReadOnlyMemory{T}"/> of byte that serializes as JSON arrays of numbers
56+
/// instead of base64-encoded strings.
57+
/// </summary>
58+
/// <remarks>
59+
/// This is needed because Cosmos DB's VectorDistance function requires vectors to be arrays of numbers,
60+
/// not base64-encoded strings.
61+
/// </remarks>
62+
internal sealed class ReadOnlyMemoryByteJsonConverter : JsonConverter<ReadOnlyMemory<byte>>
63+
{
64+
/// <inheritdoc/>
65+
public override ReadOnlyMemory<byte> Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
66+
{
67+
// Support reading both base64 strings (for backward compatibility) and number arrays
68+
if (reader.TokenType == JsonTokenType.String)
69+
{
70+
return new ReadOnlyMemory<byte>(reader.GetBytesFromBase64());
71+
}
72+
73+
if (reader.TokenType != JsonTokenType.StartArray)
74+
{
75+
throw new JsonException($"Expected StartArray or String token, got {reader.TokenType}");
76+
}
77+
78+
var list = new List<byte>();
79+
while (reader.Read() && reader.TokenType != JsonTokenType.EndArray)
80+
{
81+
list.Add(reader.GetByte());
82+
}
83+
return new ReadOnlyMemory<byte>(list.ToArray());
84+
}
85+
86+
/// <inheritdoc/>
87+
public override void Write(Utf8JsonWriter writer, ReadOnlyMemory<byte> value, JsonSerializerOptions options)
88+
{
89+
writer.WriteStartArray();
90+
foreach (var b in value.Span)
91+
{
92+
writer.WriteNumberValue(b);
93+
}
94+
writer.WriteEndArray();
95+
}
96+
}

0 commit comments

Comments
 (0)