From bd37f7333d4ef807b632e7ed4cf16178ac493a7f Mon Sep 17 00:00:00 2001 From: neon-sunset Date: Wed, 25 Sep 2024 05:04:31 +0300 Subject: [PATCH] Further refinement - Document List operations - Document parallel operation exceptions - Fix bugs in List operations - Fix F# example - Fix tests build errors - Remove UnsafeAccessor use until .NET 9 - Factor out duplicated logic into helpers - Update readme/description --- README.md | 7 ++- example/Example.FSharp/Program.fs | 12 ++-- src/Constants.cs | 8 ++- src/Extensions.cs | 20 ++++-- src/Grpc/Converters.cs | 100 ++++++------------------------ src/Grpc/GrpcTransport.cs | 16 ++--- src/Index.cs | 89 +++++++++++++++++++------- src/Pinecone.csproj | 4 +- src/PineconeClient.cs | 23 +++---- src/Rest/RestTransport.cs | 15 +---- src/Rest/Types.cs | 31 +++++---- src/ThrowHelpers.cs | 2 +- src/Types/CollectionTypes.cs | 6 +- src/Types/IndexTypes.cs | 46 -------------- src/Types/VectorTypes.cs | 92 +++++++++++++++++++++++++++ test/DataTestBase.cs | 79 +++++++++++------------ test/DataTestFixtureBase.cs | 14 ++--- 17 files changed, 298 insertions(+), 266 deletions(-) diff --git a/README.md b/README.md index 73c8b96..d20081c 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,11 @@ In the absence of an official SDK, it provides first-class support for Pinecone - Standard operations on pod-based and serverless indexes - gRPC and REST transports for vector operations - Sparse-dense vectors -- Automatic batching and parallelization for upsert, fetch and delete operations -- Efficient vector serialization - Metadata support +- Efficient vector serialization - NativeAOT compatibility (e.g. for AWS Lambda) +- Automatic batching and parallelization for upsert, fetch and delete operations +- Preservation of data on partial failures for streaming and batched operations ## Installation @@ -123,4 +124,4 @@ await pinecone.DeleteCollection("myCollection"); ## Contributing -Contributions are welcome! Feel free open an issue or a PR. +Contributions are welcome! Feel free to open an issue or a PR. diff --git a/example/Example.FSharp/Program.fs b/example/Example.FSharp/Program.fs index fc96476..9d0b409 100644 --- a/example/Example.FSharp/Program.fs +++ b/example/Example.FSharp/Program.fs @@ -1,10 +1,14 @@ #nowarn "3391" -open Pinecone +open System open System.Collections.Generic +open Pinecone let createMetadata x = MetadataMap(x |> Seq.map (fun (k, m) -> KeyValuePair(k,m) )) +let getRandomVector size = + Array.init size (fun _ -> Random.Shared.NextSingle()) + let main = task { use pinecone = new PineconeClient("[api-key]") @@ -23,8 +27,8 @@ let main = task { use! index = pinecone.GetIndex(indexName) let tags = [|"tag1" ; "tag2"|] - let first = Vector(Id = "first", Values = Array.zeroCreate 1536, Metadata = createMetadata["new", true; "price", 50; "tags", tags]) - let second = Vector(Id = "second", Values = Array.zeroCreate 1536, Metadata = createMetadata["price", 50]) + let first = Vector(Id = "first", Values = getRandomVector 1536, Metadata = createMetadata["new", true; "price", 50; "tags", tags]) + let second = Vector(Id = "second", Values = getRandomVector 1536, Metadata = createMetadata["price", 50]) // Upsert vectors into the index let! _ = index.Upsert [|first; second|] @@ -36,7 +40,7 @@ let main = task { let priceRange = createMetadata["price", createMetadata["$gte", 75; "$lte", 125]] // Query the index by embedding and metadata filter - let! results = index.Query((Array.zeroCreate 1536), 3u, filter = priceRange, includeMetadata = true) + let! results = index.Query(getRandomVector 1536, 3u, filter = priceRange, includeMetadata = true) let metadata = results |> Seq.collect _.Metadata diff --git a/src/Constants.cs b/src/Constants.cs index edd8c2d..4dbb5dc 100644 --- a/src/Constants.cs +++ b/src/Constants.cs @@ -2,13 +2,15 @@ namespace Pinecone; -internal static class Constants +static class Constants { public const string RestApiKey = "Api-Key"; public const string GrpcApiKey = "api-key"; - public static readonly string Version = - typeof(Constants).Assembly.GetName().Version?.ToString(3) ?? "0.0.0"; + public const string ApiVersion = "2024-07"; + + public static readonly string UserAgent = + $"lang=C#; Pinecone.NET/{typeof(Constants).Assembly.GetName().Version?.ToString(3) ?? "0.0.0"}"; public static readonly HttpClientFactoryOptions RedactApiKeyOptions = new() { diff --git a/src/Extensions.cs b/src/Extensions.cs index 0bd42ed..02dcccf 100644 --- a/src/Extensions.cs +++ b/src/Extensions.cs @@ -2,10 +2,12 @@ using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using Grpc.Core; +using Microsoft.Extensions.Http.Logging; +using Microsoft.Extensions.Logging; namespace Pinecone; -internal static class Extensions +static class Extensions { internal static void AddPineconeHeaders(this HttpClient http, string apiKey) { @@ -14,20 +16,28 @@ internal static void AddPineconeHeaders(this HttpClient http, string apiKey) if (!headers.Contains(Constants.RestApiKey)) headers.Add(Constants.RestApiKey, apiKey); if (!headers.Contains("X-Pinecone-Api-Version")) - headers.Add("X-Pinecone-Api-Version", "2024-07"); + headers.Add("X-Pinecone-Api-Version", Constants.ApiVersion); if (!headers.Contains("User-Agent")) - headers.TryAddWithoutValidation("User-Agent", $"lang=C#; Pinecone.NET/{Constants.Version}"); + headers.TryAddWithoutValidation("User-Agent", Constants.UserAgent); } internal static Metadata WithPineconeProps(this Metadata metadata, string apiKey) { metadata.Add(Constants.GrpcApiKey, apiKey); - metadata.Add("X-Pinecone-Api-Version", "2024-07"); - metadata.Add("User-Agent", $"lang=C#; Pinecone.NET/{Constants.Version}"); + metadata.Add("X-Pinecone-Api-Version", Constants.ApiVersion); + metadata.Add("User-Agent", Constants.UserAgent); return metadata; } + internal static HttpMessageHandler CreateLoggingHandler(this ILoggerFactory factory) + { + return new LoggingHttpMessageHandler( + factory.CreateLogger(), + Constants.RedactApiKeyOptions) + { InnerHandler = new HttpClientHandler() }; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static ValueTask CheckStatusCode(this HttpResponseMessage response, CancellationToken ct, [CallerMemberName] string requestName = "") { diff --git a/src/Grpc/Converters.cs b/src/Grpc/Converters.cs index b59874a..b5ed318 100644 --- a/src/Grpc/Converters.cs +++ b/src/Grpc/Converters.cs @@ -1,13 +1,16 @@ using System.Reflection; -using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using Google.Protobuf.Collections; using Google.Protobuf.WellKnownTypes; namespace Pinecone.Grpc; -internal static class Converters +static class Converters { + static readonly Value NullValue = Value.ForNull(); + static readonly Value TrueValue = Value.ForBool(true); + static readonly Value FalseValue = Value.ForBool(false); + // gRPC types conversion to sane and usable ones public static Struct ToProtoStruct(this MetadataMap source) { @@ -23,10 +26,10 @@ public static Struct ToProtoStruct(this MetadataMap source) public static Value ToProtoValue(this MetadataValue source) => source.Inner switch { // This is terrible but such is life - null => Value.ForNull(), + null => NullValue, double num => Value.ForNumber(num), string str => Value.ForString(str), - bool boolean => Value.ForBool(boolean), + bool boolean => boolean ? TrueValue : FalseValue, MetadataMap nested => Value.ForStruct(nested.ToProtoStruct()), IEnumerable list => Value.ForList(list.Select(v => v.ToProtoValue()).ToArray()), _ => ThrowHelpers.ArgumentException($"Unsupported metadata type: {source.Inner!.GetType()}") @@ -119,75 +122,6 @@ Value.KindOneofCase.None or }; } -#if NET8_0_OR_GREATER - // These have to be duplicated because unsafe accessor does not support generics in .NET 8. - // This approach is, however, very useful as we completely bypass referencing reflection for NAOT. - public static ReadOnlyMemory AsMemory(this RepeatedField source) - { - return ArrayRef(source).AsMemory(0, source.Count); - } - - public static void OverwriteWith(this RepeatedField target, ReadOnlyMemory? source) - { - if (source is null or { IsEmpty: true }) return; - - float[] array; - int count; - if (MemoryMarshal.TryGetArray(source.Value, out var segment) - && segment.Offset is 0) - { - array = segment.Array!; - count = segment.Count; - } - else - { - array = source.Value.ToArray(); - count = array.Length; - } - - ArrayRef(target) = array; - CountRef(target) = count; - } - - [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "array")] - static extern ref float[] ArrayRef(RepeatedField instance); - - [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "count")] - static extern ref int CountRef(RepeatedField instance); - - public static ReadOnlyMemory AsMemory(this RepeatedField source) - { - return ArrayRef(source).AsMemory(0, source.Count); - } - - public static void OverwriteWith(this RepeatedField target, ReadOnlyMemory? source) - { - if (source is null or { IsEmpty: true }) return; - - uint[] array; - int count; - if (MemoryMarshal.TryGetArray(source.Value, out var segment) - && segment.Offset is 0) - { - array = segment.Array!; - count = segment.Count; - } - else - { - array = source.Value.ToArray(); - count = array.Length; - } - - ArrayRef(target) = array; - CountRef(target) = count; - } - - [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "array")] - static extern ref uint[] ArrayRef(RepeatedField instance); - - [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "count")] - static extern ref int CountRef(RepeatedField instance); -#else public static ReadOnlyMemory AsMemory(this RepeatedField source) where T : unmanaged { @@ -217,8 +151,19 @@ public static void OverwriteWith(this RepeatedField target, ReadOnlyMemory FieldAccessors.SetCount(target, count); } - private static class FieldAccessors where T : unmanaged + // UnsafeAccessor path was removed because turns out the support for + // specified generics was added unintentionally and breaks on .NET 9. + // See https://github.com/dotnet/runtime/issues/108046 + // TODO: Once .NET 9 is out, bring back UnsafeAccessor path using the + // pattern described as the solution in the issue above. + static class FieldAccessors where T : unmanaged { + static readonly FieldInfo ArrayField = typeof(RepeatedField) + .GetField("array", BindingFlags.NonPublic | BindingFlags.Instance) ?? throw new NullReferenceException(); + + static readonly FieldInfo CountField = typeof(RepeatedField) + .GetField("count", BindingFlags.NonPublic | BindingFlags.Instance) ?? throw new NullReferenceException(); + public static T[] GetArray(RepeatedField instance) { return (T[])ArrayField.GetValue(instance)!; @@ -233,12 +178,5 @@ public static void SetCount(RepeatedField instance, int value) { CountField.SetValue(instance, value); } - - static readonly FieldInfo ArrayField = typeof(RepeatedField) - .GetField("array", BindingFlags.NonPublic | BindingFlags.Instance) ?? throw new NullReferenceException(); - - static readonly FieldInfo CountField = typeof(RepeatedField) - .GetField("count", BindingFlags.NonPublic | BindingFlags.Instance) ?? throw new NullReferenceException(); } -#endif } diff --git a/src/Grpc/GrpcTransport.cs b/src/Grpc/GrpcTransport.cs index 3207bd0..d4c6ca3 100644 --- a/src/Grpc/GrpcTransport.cs +++ b/src/Grpc/GrpcTransport.cs @@ -145,20 +145,20 @@ public async Task Update( string? indexNamespace = null, CancellationToken ct = default) { - var request = new ListRequest - { - Prefix = prefix ?? "", - Limit = limit ?? 0, - PaginationToken = paginationToken ?? "", - Namespace = indexNamespace ?? "" - }; + var request = new ListRequest { Namespace = indexNamespace ?? "" }; + if (prefix != null) + request.Prefix = prefix; + if (limit != null) + request.Limit = limit.Value; + if (paginationToken != null) + request.PaginationToken = paginationToken; using var call = Grpc.ListAsync(request, Metadata, cancellationToken: ct); var response = await call.ConfigureAwait(false); return ( response.Vectors.Select(v => v.Id).ToArray(), - response.Pagination.Next, + response.Pagination?.Next, response.Usage.ReadUnits); } diff --git a/src/Index.cs b/src/Index.cs index 7bca572..53ff4e2 100644 --- a/src/Index.cs +++ b/src/Index.cs @@ -53,7 +53,7 @@ public sealed partial record Index : IDisposable where TTransport : ITransport { #if NET6_0_OR_GREATER - const int BatchParallelism = 20; + const int BatchParallelism = 10; readonly ILogger? Logger; #endif @@ -171,12 +171,11 @@ public Task Upsert( CancellationToken ct = default) { #if NET6_0_OR_GREATER - const int parallelism = 20; var batchSize = GetBatchSize(); if (vectors.TryGetNonEnumeratedCount(out var count) && count > batchSize) { - return Upsert(vectors, batchSize, parallelism, indexNamespace, ct); + return Upsert(vectors, batchSize, BatchParallelism, indexNamespace, ct); } #endif return Transport.Upsert(vectors, indexNamespace, ct); @@ -284,6 +283,17 @@ public Task Update( return Transport.Update(id, values, sparseValues, metadata, indexNamespace, ct); } + /// + /// An asynchronous iterator that lists vector IDs in the index using specified page size, prefix, and read units threshold. + /// The iterator terminates when all vectors have been listed or the read units threshold has been reached (if specified). + /// + /// The prefix to filter the IDs by. + /// + /// The number of IDs to fetch per request. When left unspecified, the page size detemined by the server is used. + /// As of the current version, the supported range is 1 to 100. Changing this value may affect throughput, memory and read units consumption. + /// + /// The maximum number of read units to consume. The iterator will stop when the threshold is reached. + /// Namespace to list vectors from. If no namespace is provided, the operation applies to all namespaces. public async IAsyncEnumerable List( string? prefix = null, uint? pageSize = null, @@ -291,29 +301,42 @@ public async IAsyncEnumerable List( string? indexNamespace = null, [EnumeratorCancellation] CancellationToken ct = default) { - uint readUnits; - string? next = null; + var readUnits = 0u; + var next = (string?)null; var threshold = readUnitsThreshold ?? uint.MaxValue; do { - (var ids, next, readUnits) = await ListPaginated( - prefix, pageSize, next, indexNamespace, ct).ConfigureAwait(false); + (var ids, next, var units) = await ListPaginated( + prefix, + pageSize, + next, + indexNamespace, + ct).ConfigureAwait(false); + readUnits += units; foreach (var id in ids) yield return id; } while (next != null && readUnits < threshold); } - public Task<(string[] VectorIds, string? PaginationToken, uint ReadUnits)> ListPaginated( - string? prefix = null, - uint? pageSize = null, - string? paginationToken = null, - string? indexNamespace = null, - CancellationToken ct = default) - { - return Transport.List(prefix, pageSize, paginationToken, indexNamespace, ct); - } - + /// + /// Lists all vector IDs in the index filtered by the specified arguments by paginating through the entire index and collecting the results. + /// + /// This method is useful when performing data export or any similar case where materializing the contents of the index is necessary. + /// Otherwise, you may want to use the either or methods for more efficient listing. + /// + /// The prefix to filter the IDs by. + /// + /// The number of IDs to fetch per request. When left unspecified, the page size detemined by the server is used. + /// As of the current version, the supported range is 1 to 100. Changing this value may affect throughput, memory and read units consumption. + /// + /// The optional token to resume the listing from a specific point. See for more information. + /// Namespace to list vectors from. If no namespace is provided, the operation applies to all namespaces. + /// + /// Thrown when an error occurs during the listing operation. The exception contains the IDs of the vectors that were successfully listed, + /// the pagination token that can be used to resume the listing, and the number of read units consumed. + /// public async Task<(string[] VectorIds, uint ReadUnits)> ListAll( string? prefix = null, + uint pageSize = 100, string? paginationToken = null, string? indexNamespace = null, CancellationToken ct = default) @@ -325,9 +348,14 @@ public async IAsyncEnumerable List( { do { - (var ids, next, readUnits) = await ListPaginated( - prefix, null, next, indexNamespace, ct).ConfigureAwait(false); + (var ids, next, var units) = await ListPaginated( + prefix, + pageSize, + next, + indexNamespace, + ct).ConfigureAwait(false); pages.Add(ids); + readUnits += units; } while (next != null); } catch (Exception ex) @@ -342,6 +370,26 @@ public async IAsyncEnumerable List( return (pages is [var single] ? single : pages.SelectMany(p => p).ToArray(), readUnits); } + /// + /// Lists vector IDs in the index using specified page size, prefix, and optional pagination token. + /// + /// The prefix to filter the IDs by. + /// + /// The number of IDs to fetch per request. When left unspecified, the page size detemined by the server is used. + /// As of the current version, the supported range is 1 to 100. Changing this value may affect throughput, memory and read units consumption. + /// + /// The pagination token to continue a previous listing operation. + /// Namespace to list vectors from. If no namespace is provided, the operation applies to all namespaces. + public Task<(string[] VectorIds, string? PaginationToken, uint ReadUnits)> ListPaginated( + string? prefix = null, + uint? pageSize = null, + string? paginationToken = null, + string? indexNamespace = null, + CancellationToken ct = default) + { + return Transport.List(prefix, pageSize, paginationToken, indexNamespace, ct); + } + /// /// Looks up and returns vectors by ID. The returned vectors include the vector data and/or metadata. /// @@ -361,12 +409,11 @@ public async IAsyncEnumerable List( public Task> Fetch(IEnumerable ids, string? indexNamespace = null, CancellationToken ct = default) { #if NET6_0_OR_GREATER - const int parallelism = 20; var batchSize = GetBatchSize(); if (ids.TryGetNonEnumeratedCount(out var count) && count > batchSize) { - return Fetch(ids, batchSize, parallelism, indexNamespace, ct); + return Fetch(ids, batchSize, BatchParallelism, indexNamespace, ct); } #endif return Transport.Fetch(ids, indexNamespace, ct); diff --git a/src/Pinecone.csproj b/src/Pinecone.csproj index dabe503..a273bb8 100644 --- a/src/Pinecone.csproj +++ b/src/Pinecone.csproj @@ -9,7 +9,7 @@ Pinecone;PineconeIo;Vector;Database; README.md Pinecone.NET is a fully-fledged C# library for the Pinecone vector database. -In the absence of an official SDK, it provides first-class support for Pinecone in C# and F#. +This is a community library that provides first-class support for Pinecone in C# and F#. @@ -41,7 +41,7 @@ In the absence of an official SDK, it provides first-class support for Pinecone runtime; build; native; contentfiles; analyzers; buildtransitive all - + diff --git a/src/PineconeClient.cs b/src/PineconeClient.cs index f3c47d7..a721c63 100644 --- a/src/PineconeClient.cs +++ b/src/PineconeClient.cs @@ -1,7 +1,6 @@ using System.Diagnostics.CodeAnalysis; using System.Net.Http.Json; using System.Text.Encodings.Web; -using Microsoft.Extensions.Http.Logging; using Microsoft.Extensions.Logging; using Pinecone.Grpc; using Pinecone.Rest; @@ -22,7 +21,7 @@ public sealed class PineconeClient : IDisposable /// API key used to connect to Pinecone. /// The logger factory to be used. public PineconeClient(string apiKey, ILoggerFactory? loggerFactory = null) - : this(apiKey, new Uri($"https://api.pinecone.io"), loggerFactory) + : this(apiKey, new Uri("https://api.pinecone.io"), loggerFactory) { } @@ -37,25 +36,17 @@ public PineconeClient(string apiKey, Uri baseUrl, ILoggerFactory? loggerFactory ThrowHelpers.CheckNullOrWhiteSpace(apiKey); ThrowHelpers.CheckNull(baseUrl); - if (loggerFactory != null) - { - var handler = new LoggingHttpMessageHandler( - loggerFactory.CreateLogger(), - Constants.RedactApiKeyOptions) - { InnerHandler = new HttpClientHandler() }; - - Http = new(handler) { BaseAddress = baseUrl }; - LoggerFactory = loggerFactory; - } - - Http ??= new() { BaseAddress = baseUrl }; + Http = new(loggerFactory?.CreateLoggingHandler() + ?? new HttpClientHandler()) + { BaseAddress = baseUrl }; Http.AddPineconeHeaders(apiKey); + LoggerFactory = loggerFactory; } /// /// Creates a new instance of the class. /// - /// API key used to connect to Pinecone. + /// /// API key used to connect to Pinecone. /// /// HTTP client used to connect to Pinecone. /// The logger factory to be used. public PineconeClient(string apiKey, HttpClient client, ILoggerFactory? loggerFactory = null) @@ -272,7 +263,7 @@ public async Task ListCollections(CancellationToken ct = de { return (await Http .GetFromJsonAsync("/collections", ClientContext.Default.ListCollectionsResult, ct) - .ConfigureAwait(false))?.Collections ?? []; + .ConfigureAwait(false)).Collections ?? []; } /// diff --git a/src/Rest/RestTransport.cs b/src/Rest/RestTransport.cs index f62dac5..63b649e 100644 --- a/src/Rest/RestTransport.cs +++ b/src/Rest/RestTransport.cs @@ -2,7 +2,6 @@ using System.Net.Http.Json; using System.Text; using System.Text.Encodings.Web; -using Microsoft.Extensions.Http.Logging; using Microsoft.Extensions.Logging; namespace Pinecone.Rest; @@ -16,17 +15,9 @@ public RestTransport(string host, string apiKey, ILoggerFactory? loggerFactory) ThrowHelpers.CheckNullOrWhiteSpace(host); ThrowHelpers.CheckNullOrWhiteSpace(apiKey); - if (loggerFactory != null) - { - var handler = new LoggingHttpMessageHandler( - loggerFactory.CreateLogger(), - Constants.RedactApiKeyOptions) - { InnerHandler = new HttpClientHandler() }; - - Http = new(handler) { BaseAddress = new($"https://{host}") }; - } - - Http ??= new() { BaseAddress = new($"https://{host}") }; + Http = new(loggerFactory?.CreateLoggingHandler() + ?? new HttpClientHandler()) + { BaseAddress = new($"https://{host}") }; Http.AddPineconeHeaders(apiKey); } diff --git a/src/Rest/Types.cs b/src/Rest/Types.cs index 9cbfa36..01101af 100644 --- a/src/Rest/Types.cs +++ b/src/Rest/Types.cs @@ -2,12 +2,12 @@ namespace Pinecone.Rest; -internal sealed record ListIndexesResult +sealed record ListIndexesResult { public required IndexDetails[] Indexes { get; init; } } -internal sealed record CreateIndexRequest +sealed record CreateIndexRequest { public required string Name { get; init; } public required uint Dimension { get; init; } @@ -15,7 +15,7 @@ internal sealed record CreateIndexRequest public required IndexSpec Spec { get; init; } } -internal readonly record struct ConfigureIndexRequest +readonly record struct ConfigureIndexRequest { public int? Replicas { get; init; } @@ -23,18 +23,23 @@ internal readonly record struct ConfigureIndexRequest public string? PodType { get; init; } } -internal readonly record struct DescribeStatsRequest +readonly record struct DescribeStatsRequest { public MetadataMap? Filter { get; init; } } -internal readonly record struct CreateCollectionRequest +readonly record struct CreateCollectionRequest { public required string Name { get; init; } public required string Source { get; init; } } -internal record QueryRequest +readonly record struct ListCollectionsResult +{ + public required CollectionDetails[] Collections { get; init; } +} + +record QueryRequest { public string? Id { get; set; } public ReadOnlyMemory? Vector { get; set; } @@ -46,24 +51,24 @@ internal record QueryRequest public required bool IncludeMetadata { get; init; } } -internal readonly record struct QueryResponse +readonly record struct QueryResponse { public required ScoredVector[] Matches { get; init; } public required string Namespace { get; init; } } -internal readonly record struct UpsertRequest +readonly record struct UpsertRequest { public required IEnumerable Vectors { get; init; } public required string Namespace { get; init; } } -internal readonly record struct UpsertResponse +readonly record struct UpsertResponse { public required uint UpsertedCount { get; init; } } -internal record UpdateRequest +record UpdateRequest { public required string Id { get; init; } public ReadOnlyMemory? Values { get; init; } @@ -72,7 +77,7 @@ internal record UpdateRequest public required string Namespace { get; init; } } -internal readonly record struct ListResponse +readonly record struct ListResponse { public readonly record struct ListVector(string Id); public readonly record struct ListPagination(string? Next); @@ -84,13 +89,13 @@ internal readonly record struct ListResponse } -internal readonly record struct FetchResponse +readonly record struct FetchResponse { public required Dictionary Vectors { get; init; } public required string Namespace { get; init; } } -internal readonly record struct DeleteRequest +readonly record struct DeleteRequest { public string[]? Ids { get; init; } public required bool DeleteAll { get; init; } diff --git a/src/ThrowHelpers.cs b/src/ThrowHelpers.cs index ed7ca75..2e61af4 100644 --- a/src/ThrowHelpers.cs +++ b/src/ThrowHelpers.cs @@ -5,7 +5,7 @@ namespace Pinecone; -internal static class ThrowHelpers +static class ThrowHelpers { [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void CheckGreaterThan( diff --git a/src/Types/CollectionTypes.cs b/src/Types/CollectionTypes.cs index fe66192..29129b5 100644 --- a/src/Types/CollectionTypes.cs +++ b/src/Types/CollectionTypes.cs @@ -2,11 +2,6 @@ namespace Pinecone; -public record ListCollectionsResult -{ - public required CollectionDetails[] Collections { get; init; } -} - public record CollectionDetails { public required string Name { get; init; } @@ -18,6 +13,7 @@ public record CollectionDetails public required string Environment { get; init; } } +[JsonConverter(typeof(JsonStringEnumConverter))] public enum CollectionStatus { Initializing = 0, diff --git a/src/Types/IndexTypes.cs b/src/Types/IndexTypes.cs index b36ffc8..369129a 100644 --- a/src/Types/IndexTypes.cs +++ b/src/Types/IndexTypes.cs @@ -221,49 +221,3 @@ public readonly record struct IndexNamespace /// public required uint VectorCount { get; init; } } - -public class ParallelOperationException( - string message, - T partialResult, - Exception[] exceptions -) : AggregateException(message, exceptions) -{ - public T PartialResult { get; } = partialResult; -} - -public class ListOperationException( - Exception inner, - string[] vectorIds, - string? paginationToken, - uint readUnits -) : Exception(inner.Message, inner) -{ - public string[] VectorIds { get; } = vectorIds; - public string? PaginationToken { get; } = paginationToken; - public uint ReadUnits { get; } = readUnits; -} - -public class ParallelUpsertException( - uint upserted, - string message, - string[] failedBatchVectorIds, - Exception[] exceptions -) : ParallelOperationException(message, upserted, exceptions) -{ - public string[] FailedBatchVectorIds { get; } = failedBatchVectorIds; -} - -public class ParallelFetchException( - Dictionary fetched, - string message, - Exception[] exceptions -) : ParallelOperationException>(message, fetched, exceptions); - -public class ParallelDeleteException( - string message, - string[] failedBatchVectorIds, - Exception[] exceptions -) : AggregateException(message, exceptions) -{ - public string[] FailedBatchVectorIds { get; } = failedBatchVectorIds; -} \ No newline at end of file diff --git a/src/Types/VectorTypes.cs b/src/Types/VectorTypes.cs index 6cd438b..f21617f 100644 --- a/src/Types/VectorTypes.cs +++ b/src/Types/VectorTypes.cs @@ -185,3 +185,95 @@ public static bool TryCreate(T? value, out MetadataValue metadataValue) public static implicit operator MetadataValue(List? value) => new(value?.Select(v => (MetadataValue)v)); public static implicit operator MetadataValue(List? value) => new(value?.Select(v => (MetadataValue)v)); } + +/// +/// An exception that occurs when operation fails. +/// +/// It contains the vector IDs that were successfully read before the operation failed, +/// and the pagination token that can be used to resume and/or retry the operation. +/// +public class ListOperationException( + Exception inner, + string[] vectorIds, + string? paginationToken, + uint readUnits +) : Exception(inner.Message, inner) +{ + /// + /// The IDs of the vectors that were successfully read before the operation failed. + /// + public string[] VectorIds { get; } = vectorIds; + + /// + /// The pagination token that can be passed to to resume/retry the operation + /// where it left off, or to to continue using the pagination token manually. + /// + public string? PaginationToken { get; } = paginationToken; + + /// + /// The number of read units consumed by the operation. + /// + public uint ReadUnits { get; } = readUnits; +} + +/// +/// An exception that occurs when one or more parallel batch upserts fail in scope of +/// operation. +/// +/// It contains the vector count that was successfully upserted before the operation failed, +/// the IDs of the vectors from the batches that could not be upserted, and the exceptions that caused the failure. +/// +public class ParallelUpsertException( + uint upserted, + string message, + string[] failedBatchVectorIds, + Exception[] exceptions +) : AggregateException(message, exceptions) +{ + /// + /// The number of vectors that were successfully upserted before the operation failed. + /// + public uint Upserted { get; } = upserted; + + /// + /// The IDs of the vectors from the batches that failed to upsert. + /// + public string[] FailedBatchVectorIds { get; } = failedBatchVectorIds; +} + +/// +/// An exception that occurs when one or more parallel batch fetches fail in scope of +/// operation. +/// +/// It contains the fetched vectors that were successfully fetched before the operation failed, +/// and the exceptions that caused the failure. +/// +public class ParallelFetchException( + Dictionary fetched, + string message, + Exception[] exceptions +) : AggregateException(message, exceptions) +{ + /// + /// The vectors that were successfully fetched before the operation failed. + /// + public Dictionary Fetched { get; } = fetched; +} + +/// +/// An exception that occurs when one or more parallel batch delete operations fail in scope of +/// operation. +/// +/// It contains the IDs of the vectors from the batches that could not be deleted, and the exceptions that caused the failure. +/// +public class ParallelDeleteException( + string message, + string[] failedBatchVectorIds, + Exception[] exceptions +) : AggregateException(message, exceptions) +{ + /// + /// The IDs of the vectors from the batches that could not be deleted. + /// + public string[] FailedBatchVectorIds { get; } = failedBatchVectorIds; +} diff --git a/test/DataTestBase.cs b/test/DataTestBase.cs index 669533f..b14683c 100644 --- a/test/DataTestBase.cs +++ b/test/DataTestBase.cs @@ -17,14 +17,14 @@ public async Task Basic_query() var x = 0.314f; var results = await Fixture.Index.Query( - [x * 0.1f, x * 0.2f, x * 0.3f, x * 0.4f, x * 0.5f, x * 0.6f, x * 0.7f, x * 0.8f], + new[] { x * 0.1f, x * 0.2f, x * 0.3f, x * 0.4f, x * 0.5f, x * 0.6f, x * 0.7f, x * 0.8f }, topK: 20); Assert.Equal(10, results.Length); results = await Fixture.Index.Query( - [0.7f, 7.7f, 77.7f, 777.7f, 7777.7f, 77777.7f, 777777.7f, 7777777.7f], + new[] { 0.7f, 7.7f, 77.7f, 777.7f, 7777.7f, 77777.7f, 777777.7f, 7777777.7f }, topK: 10, indexNamespace: "namespace1"); @@ -49,17 +49,17 @@ public async Task Query_with_basic_metadata_filter() ["type"] = "number set" }; - var result = await Fixture.Index.Query([3, 4, 5, 6, 7, 8, 9, 10], topK: 5, filter); + var result = await Fixture.Index.Query(new[] { 3f, 4, 5, 6, 7, 8, 9, 10 }, topK: 5, filter); Assert.Equal(3, result.Length); var ordered = result.OrderBy(x => x.Id).ToList(); Assert.Equal("metadata-vector-1", ordered[0].Id); - Assert.Equal([2, 3, 5, 7, 11, 13, 17, 19], ordered[0].Values); + Assert.Equal(new[] { 2f, 3, 5, 7, 11, 13, 17, 19 }, ordered[0].Values); Assert.Equal("metadata-vector-2", ordered[1].Id); - Assert.Equal([0, 1, 1, 2, 3, 5, 8, 13], ordered[1].Values); + Assert.Equal(new[] { 0f, 1, 1, 2, 3, 5, 8, 13 }, ordered[1].Values); Assert.Equal("metadata-vector-3", ordered[2].Id); - Assert.Equal([2, 1, 3, 4, 7, 11, 18, 29], ordered[2].Values); + Assert.Equal(new[] { 2f, 1, 3, 4, 7, 11, 18, 29 }, ordered[2].Values); } [PineconeFact] @@ -70,11 +70,11 @@ public async Task Query_include_metadata_in_result() ["subtype"] = "fibo" }; - var result = await Fixture.Index.Query([3, 4, 5, 6, 7, 8, 9, 10], topK: 5, filter, includeMetadata: true); + var result = await Fixture.Index.Query(new[] { 3f, 4, 5, 6, 7, 8, 9, 10 }, topK: 5, filter, includeMetadata: true); Assert.Single(result); Assert.Equal("metadata-vector-2", result[0].Id); - Assert.Equal([0, 1, 1, 2, 3, 5, 8, 13], result[0].Values); + Assert.Equal(new[] { 0f, 1, 1, 2, 3, 5, 8, 13 }, result[0].Values); var metadata = result[0].Metadata; Assert.NotNull(metadata); @@ -95,15 +95,15 @@ public async Task Query_with_metadata_filter_composite() ["overhyped"] = false }; - var result = await Fixture.Index.Query([3, 4, 5, 6, 7, 8, 9, 10], topK: 5, filter); + var result = await Fixture.Index.Query(new[] { 3f, 4, 5, 6, 7, 8, 9, 10 }, topK: 5, filter); Assert.Equal(2, result.Length); var ordered = result.OrderBy(x => x.Id).ToList(); Assert.Equal("metadata-vector-1", ordered[0].Id); - Assert.Equal([2, 3, 5, 7, 11, 13, 17, 19], ordered[0].Values); + Assert.Equal(new[] { 2f, 3, 5, 7, 11, 13, 17, 19 }, ordered[0].Values); Assert.Equal("metadata-vector-3", ordered[1].Id); - Assert.Equal([2, 1, 3, 4, 7, 11, 18, 29], ordered[1].Values); + Assert.Equal(new[] { 2f, 1, 3, 4, 7, 11, 18, 29 }, ordered[1].Values); } [PineconeFact] @@ -114,15 +114,15 @@ public async Task Query_with_metadata_list_contains() ["rank"] = new MetadataMap() { ["$in"] = new int[] { 12, 3 } } }; - var result = await Fixture.Index.Query([3, 4, 5, 6, 7, 8, 9, 10], topK: 10, filter, includeMetadata: true); + var result = await Fixture.Index.Query(new[] { 3f, 4, 5, 6, 7, 8, 9, 10 }, topK: 10, filter, includeMetadata: true); Assert.Equal(2, result.Length); var ordered = result.OrderBy(x => x.Id).ToList(); Assert.Equal("metadata-vector-1", ordered[0].Id); - Assert.Equal([2, 3, 5, 7, 11, 13, 17, 19], ordered[0].Values); + Assert.Equal(new[] { 2f, 3, 5, 7, 11, 13, 17, 19 }, ordered[0].Values); Assert.Equal("metadata-vector-3", ordered[1].Id); - Assert.Equal([2, 1, 3, 4, 7, 11, 18, 29], ordered[1].Values); + Assert.Equal(new[] { 2f, 1, 3, 4, 7, 11, 18, 29 }, ordered[1].Values); } [PineconeFact] @@ -144,15 +144,15 @@ public async Task Query_with_metadata_complex() } }; - var result = await Fixture.Index.Query([3, 4, 5, 6, 7, 8, 9, 10], topK: 10, filter, includeMetadata: true); + var result = await Fixture.Index.Query(new[] { 3f, 4, 5, 6, 7, 8, 9, 10 }, topK: 10, filter, includeMetadata: true); Assert.Equal(2, result.Length); var ordered = result.OrderBy(x => x.Id).ToList(); Assert.Equal("metadata-vector-1", ordered[0].Id); - Assert.Equal([2, 3, 5, 7, 11, 13, 17, 19], ordered[0].Values); + Assert.Equal(new[] { 2f, 3, 5, 7, 11, 13, 17, 19 }, ordered[0].Values); Assert.Equal("metadata-vector-3", ordered[1].Id); - Assert.Equal([2, 1, 3, 4, 7, 11, 18, 29], ordered[1].Values); + Assert.Equal(new[] { 2f, 1, 3, 4, 7, 11, 18, 29 }, ordered[1].Values); } [PineconeFact] @@ -165,11 +165,11 @@ public async Task Basic_fetch() Assert.Equal("basic-vector-1", orderedResults[0].Key); Assert.Equal("basic-vector-1", orderedResults[0].Value.Id); - Assert.Equal([0.5f, 1.0f, 1.5f, 2.0f, 2.5f, 3.0f, 3.5f, 4.0f], orderedResults[0].Value.Values.AsSpan()); + Assert.Equal(new[] { 0.5f, 1.0f, 1.5f, 2.0f, 2.5f, 3.0f, 3.5f, 4.0f }, orderedResults[0].Value.Values); Assert.Equal("basic-vector-3", orderedResults[1].Key); Assert.Equal("basic-vector-3", orderedResults[1].Value.Id); - Assert.Equal([1.5f, 3.0f, 4.5f, 6.0f, 7.5f, 9.0f, 10.5f, 12.0f], orderedResults[1].Value.Values.AsSpan()); + Assert.Equal(new[] { 1.5f, 3.0f, 4.5f, 6.0f, 7.5f, 9.0f, 10.5f, 12.0f }, orderedResults[1].Value.Values); } [PineconeFact] @@ -182,10 +182,10 @@ public async Task Fetch_sparse_vector() var resultVector = results["sparse-1"]; Assert.Equal("sparse-1", resultVector.Id); - Assert.Equal([5, 10, 15, 20, 25, 30, 35, 40], resultVector.Values.AsSpan()); + Assert.Equal(new[] { 5f, 10, 15, 20, 25, 30, 35, 40 }, resultVector.Values); Assert.NotNull(resultVector.SparseValues); - Assert.Equal([1, 4], resultVector.SparseValues.Value.Indices); - Assert.Equal([0.2f, 0.5f], resultVector.SparseValues.Value.Values.AsSpan()); + Assert.Equal(new[] { 1u, 4u }, resultVector.SparseValues.Value.Indices); + Assert.Equal(new[] { 0.2f, 0.5f }, resultVector.SparseValues.Value.Values); } [PineconeFact] @@ -194,17 +194,18 @@ public async Task Basic_vector_upsert_update_delete() var testNamespace = "upsert-update-delete-namespace"; var newVectors = new Vector[] { - new() { Id = "update-vector-id-1", Values = [1, 3, 5, 7, 9, 11, 13, 15] }, - new() { Id = "update-vector-id-2", Values = [2, 3, 5, 7, 11, 13, 17, 19] }, - new() { Id = "update-vector-id-3", Values = [2, 1, 3, 4, 7, 11, 18, 29] }, + new() { Id = "update-vector-id-1", Values = new[] { 1f, 3, 5, 7, 9, 11, 13, 15 } }, + new() { Id = "update-vector-id-2", Values = new[] { 2f, 3, 5, 7, 11, 13, 17, 19 } }, + new() { Id = "update-vector-id-3", Values = new[] { 2f, 1, 3, 4, 7, 11, 18, 29 } }, }; await Fixture.InsertAndWait(newVectors, testNamespace); var initialFetch = await Fixture.Index.Fetch(["update-vector-id-2"], testNamespace); var vector = initialFetch["update-vector-id-2"]; - vector.Values[0] = 23; - await Fixture.Index.Update(vector, testNamespace); + var values = vector.Values.ToArray(); + values[0] = 23; + await Fixture.Index.Update(vector with { Values = values }, testNamespace); Vector updatedVector; var attemptCount = 0; @@ -214,10 +215,10 @@ public async Task Basic_vector_upsert_update_delete() attemptCount++; var finalFetch = await Fixture.Index.Fetch(["update-vector-id-2"], testNamespace); updatedVector = finalFetch["update-vector-id-2"]; - } while (updatedVector.Values[0] != 23 && attemptCount < DataTestFixtureBase.MaxAttemptCount); + } while (updatedVector.Values.Span[0] != 23 && attemptCount < DataTestFixtureBase.MaxAttemptCount); Assert.Equal("update-vector-id-2", updatedVector.Id); - Assert.Equal([23, 3, 5, 7, 11, 13, 17, 19], updatedVector.Values); + Assert.Equal(new[] { 23f, 3, 5, 7, 11, 13, 17, 19 }, updatedVector.Values); await Fixture.DeleteAndWait(["update-vector-id-1"], testNamespace); @@ -233,14 +234,14 @@ public async Task Upsert_on_existing_vector_makes_an_update() var testNamespace = "upsert-on-existing"; var newVectors = new Vector[] { - new() { Id = "update-vector-id-1", Values = [1, 3, 5, 7, 9, 11, 13, 15] }, - new() { Id = "update-vector-id-2", Values = [2, 3, 5, 7, 11, 13, 17, 19] }, - new() { Id = "update-vector-id-3", Values = [2, 1, 3, 4, 7, 11, 18, 29] }, + new() { Id = "update-vector-id-1", Values = new[] { 1f, 3, 5, 7, 9, 11, 13, 15 } }, + new() { Id = "update-vector-id-2", Values = new[] { 2f, 3, 5, 7, 11, 13, 17, 19 } }, + new() { Id = "update-vector-id-3", Values = new[] { 2f, 1, 3, 4, 7, 11, 18, 29 } }, }; await Fixture.InsertAndWait(newVectors, testNamespace); - var newExistingVector = new Vector() { Id = "update-vector-id-3", Values = [0, 1, 1, 2, 3, 5, 8, 13] }; + var newExistingVector = new Vector() { Id = "update-vector-id-3", Values = new[] { 0f, 1, 1, 2, 3, 5, 8, 13 } }; await Fixture.Index.Upsert([newExistingVector], testNamespace); @@ -252,10 +253,10 @@ public async Task Upsert_on_existing_vector_makes_an_update() attemptCount++; var finalFetch = await Fixture.Index.Fetch(["update-vector-id-3"], testNamespace); updatedVector = finalFetch["update-vector-id-3"]; - } while (updatedVector.Values[0] != 0 && attemptCount < DataTestFixtureBase.MaxAttemptCount); + } while (updatedVector.Values.Span[0] != 0 && attemptCount < DataTestFixtureBase.MaxAttemptCount); Assert.Equal("update-vector-id-3", updatedVector.Id); - Assert.Equal([0, 1, 1, 2, 3, 5, 8, 13], updatedVector.Values); + Assert.Equal(new[] { 0f, 1, 1, 2, 3, 5, 8, 13 }, updatedVector.Values); } [PineconeFact] @@ -264,9 +265,9 @@ public async Task Delete_all_vectors_from_namespace() var testNamespace = "delete-all-namespace"; var newVectors = new Vector[] { - new() { Id = "delete-all-vector-id-1", Values = [1, 3, 5, 7, 9, 11, 13, 15] }, - new() { Id = "delete-all-vector-id-2", Values = [2, 3, 5, 7, 11, 13, 17, 19] }, - new() { Id = "delete-all-vector-id-3", Values = [2, 1, 3, 4, 7, 11, 18, 29] }, + new() { Id = "delete-all-vector-id-1", Values = new[] { 1f, 3, 5, 7, 9, 11, 13, 15 } }, + new() { Id = "delete-all-vector-id-2", Values = new[] { 2f, 3, 5, 7, 11, 13, 17, 19 } }, + new() { Id = "delete-all-vector-id-3", Values = new[] { 2f, 1, 3, 4, 7, 11, 18, 29 } }, }; await Fixture.InsertAndWait(newVectors, testNamespace); @@ -307,7 +308,7 @@ public async Task Logging_is_properly_wired() Assert.Contains($"[Pinecone.PineconeClient | Trace]: List collections started.", logOutput); Assert.Contains(logOutput, x => x.StartsWith("[Pinecone.PineconeClient | Debug]: List collections completed - collections found: ")); - await loggingIndex.Query([0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f], topK: 2); + await loggingIndex.Query(new[] { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f }, topK: 2); Assert.Contains($"[Pinecone.Index | Trace]: Query index '{Fixture.IndexName}' based on vector values started.", logOutput); Assert.Contains($"[Pinecone.Index | Debug]: Query index '{Fixture.IndexName}' based on vector values completed.", logOutput); diff --git a/test/DataTestFixtureBase.cs b/test/DataTestFixtureBase.cs index 1a57c79..bb63930 100644 --- a/test/DataTestFixtureBase.cs +++ b/test/DataTestFixtureBase.cs @@ -38,7 +38,7 @@ private async Task AddSampleDataAsync() var basicVectors = Enumerable.Range(1, 5).Select(i => new Vector { Id = "basic-vector-" + i, - Values = [i * 0.5f, i * 1.0f, i * 1.5f, i * 2.0f, i * 2.5f, i * 3.0f, i * 3.5f, i * 4.0f], + Values = new[] { i * 0.5f, i * 1.0f, i * 1.5f, i * 2.0f, i * 2.5f, i * 3.0f, i * 3.5f, i * 4.0f }, }).ToList(); await InsertAndWait(basicVectors); @@ -46,7 +46,7 @@ private async Task AddSampleDataAsync() var customNamespaceVectors = Enumerable.Range(1, 3).Select(i => new Vector { Id = "custom-namespace-vector-" + i, - Values = [i * 1.1f, i * 2.2f, i * 3.3f, i * 4.4f, i * 5.5f, i * 6.6f, i * 7.7f, i * 8.8f], + Values = new[] { i * 1.1f, i * 2.2f, i * 3.3f, i * 4.4f, i * 5.5f, i * 6.6f, i * 7.7f, i * 8.8f }, }).ToList(); await InsertAndWait(customNamespaceVectors, "namespace1"); @@ -78,17 +78,17 @@ private async Task AddSampleDataAsync() var metadataVectors = new Vector[] { - new() { Id = "metadata-vector-1", Values = [2, 3, 5, 7, 11, 13, 17, 19], Metadata = metadata1 }, - new() { Id = "metadata-vector-2", Values = [0, 1, 1, 2, 3, 5, 8, 13], Metadata = metadata2 }, - new() { Id = "metadata-vector-3", Values = [2, 1, 3, 4, 7, 11, 18, 29], Metadata = metadata3 }, + new() { Id = "metadata-vector-1", Values = new[] { 2f, 3, 5, 7, 11, 13, 17, 19 }, Metadata = metadata1 }, + new() { Id = "metadata-vector-2", Values = new[] { 0f, 1, 1, 2, 3, 5, 8, 13 }, Metadata = metadata2 }, + new() { Id = "metadata-vector-3", Values = new[] { 2f, 1, 3, 4, 7, 11, 18, 29 }, Metadata = metadata3 }, }; await InsertAndWait(metadataVectors); var sparseVectors = new Vector[] { - new() { Id = "sparse-1", Values = [5, 10, 15, 20, 25, 30, 35, 40], SparseValues = new() { Indices = [1, 4], Values = [0.2f, 0.5f] } }, - new() { Id = "sparse-2", Values = [15, 110, 115, 120, 125, 130, 135, 140], SparseValues = new() { Indices = [2, 3], Values = [0.5f, 0.8f] } }, + new() { Id = "sparse-1", Values = new[] { 5f, 10, 15, 20, 25, 30, 35, 40 }, SparseValues = new() { Indices = new[] { 1u, 4u }, Values = new[] { 0.2f, 0.5f } } }, + new() { Id = "sparse-2", Values = new[] { 15f, 110, 115, 120, 125, 130, 135, 140 }, SparseValues = new() { Indices = new[] { 2u, 3u }, Values = new[] { 0.5f, 0.8f } } }, }; await InsertAndWait(sparseVectors);