From 685eb3b9c208d57948e4ae3e95c03e7797e101aa Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Sun, 6 Aug 2023 20:29:38 +0100
Subject: [PATCH] Replaced `nint` with `float[]?` in Model params, which is
 much more user friendly!

---
 LLama/Abstractions/IModelParams.cs |  2 +-
 LLama/Common/ModelParams.cs        |  7 +++---
 LLama/Utils.cs                     | 35 +++++++++++++++---------------
 3 files changed, 21 insertions(+), 23 deletions(-)
diff --git a/LLama/Abstractions/IModelParams.cs b/LLama/Abstractions/IModelParams.cs
index 40c5432b7..fdc911521 100644
--- a/LLama/Abstractions/IModelParams.cs
+++ b/LLama/Abstractions/IModelParams.cs
@@ -93,7 +93,7 @@ public interface IModelParams
         /// <summary>
         /// how split tensors should be distributed across GPUs
         /// </summary>
-        nint TensorSplits { get; set; }
+        float[]? TensorSplits { get; set; }
 
         /// <summary>
         /// Grouped-Query Attention
diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
index 72c779379..5cb810783 100644
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -1,14 +1,13 @@
 ﻿using LLama.Abstractions;
 using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace LLama.Common
 {
     /// <summary>
     /// The parameters for initializing a LLama model.
     /// </summary>
-    public class ModelParams : IModelParams
+    public class ModelParams
+        : IModelParams
     {
         /// <summary>
         /// Model context size (n_ctx)
@@ -85,7 +84,7 @@ public class ModelParams : IModelParams
         /// <summary>
         /// how split tensors should be distributed across GPUs
         /// </summary>
-        public nint TensorSplits { get; set; }
+        public float[]? TensorSplits { get; set; }
 
 		/// <summary>
 		/// Grouped-Query Attention
diff --git a/LLama/Utils.cs b/LLama/Utils.cs
index 1454693fd..7f05c1c7e 100644
--- a/LLama/Utils.cs
+++ b/LLama/Utils.cs
@@ -15,8 +15,13 @@ public static class Utils
     {
         public static SafeLLamaContextHandle InitLLamaContextFromModelParams(IModelParams @params)
         {
-            var lparams = NativeApi.llama_context_default_params();
+            if (!File.Exists(@params.ModelPath))
+                throw new FileNotFoundException($"The model file does not exist: {@params.ModelPath}");
+
+            if (@params.TensorSplits != null && @params.TensorSplits.Length != 1)
+                throw new ArgumentException("Currently multi-gpu support is not supported by both llama.cpp and LLamaSharp.");
 
+            var lparams = NativeApi.llama_context_default_params();
             lparams.n_ctx = @params.ContextSize;
             lparams.n_batch = @params.BatchSize;
             lparams.main_gpu = @params.MainGpu;
@@ -34,27 +39,21 @@ public static SafeLLamaContextHandle InitLLamaContextFromModelParams(IModelParam
             lparams.rope_freq_scale = @params.RopeFrequencyScale;
             lparams.mul_mat_q = @params.MulMatQ;
 
-            /*
-            if (@params.TensorSplits.Length != 1)
-            {
-                throw new ArgumentException("Currently multi-gpu support is not supported by " +
-                    "both llama.cpp and LLamaSharp.");
-            }*/
-
-            lparams.tensor_split = @params.TensorSplits;
-
-            if (!File.Exists(@params.ModelPath))
+            unsafe
             {
-                throw new FileNotFoundException($"The model file does not exist: {@params.ModelPath}");
-            }
+                fixed (float* splits = @params.TensorSplits)
+                {
+                    lparams.tensor_split = (nint)splits;
 
-            var model = SafeLlamaModelHandle.LoadFromFile(@params.ModelPath, lparams);
-            var ctx = SafeLLamaContextHandle.Create(model, lparams);
+                    var model = SafeLlamaModelHandle.LoadFromFile(@params.ModelPath, lparams);
+                    var ctx = SafeLLamaContextHandle.Create(model, lparams);
 
-            if (!string.IsNullOrEmpty(@params.LoraAdapter))
-                model.ApplyLoraFromFile(@params.LoraAdapter, @params.LoraBase, @params.Threads);
+                    if (!string.IsNullOrEmpty(@params.LoraAdapter))
+                        model.ApplyLoraFromFile(@params.LoraAdapter, @params.LoraBase, @params.Threads);
 
-            return ctx;
+                    return ctx;
+                }
+            }
         }
 
         public static IEnumerable<llama_token> Tokenize(SafeLLamaContextHandle ctx, string text, bool add_bos, Encoding encoding)