From 10f88ebd0e982fab8349d9816631bfaae7e179ce Mon Sep 17 00:00:00 2001 From: zombieguy Date: Wed, 16 Aug 2023 00:09:52 +0100 Subject: [PATCH 1/2] Potential fix for .Net Framework issues (#103) * Added a bool to sbyte Utils convertor As an attempt to avoid using any MarshalAs attribute for .Net Framework support this Utils method will take in a bool value and return a 1 for true or 0 for false sbyte. * Changed all bool "MarshalAs" types to sbytes Changed all previous BOOL types with "MarshalAs" attributes to SBYTEs and changed all the setters of them to use the Utils.BoolToSignedByte() convertor method. * Fixed Utils bool convertor & added sbyte to bool Improved the Utils bool convertor just casting an sbyte value to get rid of the unneeded sbyte array and added an sbyte to bool convertor to convert back the way to a C# bool assuming any positive value above 0 is a bool and no bools are packed in the single byte integer. * bool to & from sbyte conversions via properties All 1byte bools are now handled where they "sit", via public properties which perform the conversions to keep all external data able to communicate as it did before. --- LLama/Native/LLamaContextParams.cs | 65 ++++++++++++++++++------ LLama/Native/LLamaModelQuantizeParams.cs | 16 ++++-- LLama/Native/LLamaTokenDataArray.cs | 8 ++- LLama/OldVersion/Utils.cs | 1 + LLama/Utils.cs | 21 ++++++++ 5 files changed, 89 insertions(+), 22 deletions(-) diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs index 0ede4e76e..5fd900d7a 100644 --- a/LLama/Native/LLamaContextParams.cs +++ b/LLama/Native/LLamaContextParams.cs @@ -32,7 +32,7 @@ public struct LLamaContextParams /// /// rms norm epsilon (TEMP - will be moved to model hparams) /// - public float rms_norm_eps; + public float rms_norm_eps; /// /// number of layers to store in VRAM @@ -76,49 +76,82 @@ public struct LLamaContextParams /// /// if true, reduce VRAM usage at the cost of performance /// - [MarshalAs(UnmanagedType.I1)] - public bool low_vram; + public bool low_vram + { + get => Utils.SignedByteToBool(_low_vram); + set => _low_vram = Utils.BoolToSignedByte(value); + } + private sbyte _low_vram; /// /// if true, use experimental mul_mat_q kernels /// - [MarshalAs(UnmanagedType.I1)] public bool mul_mat_q; + public bool mul_mat_q + { + get => Utils.SignedByteToBool(_mul_mat_q); + set => _mul_mat_q = Utils.BoolToSignedByte(value); + } + private sbyte _mul_mat_q; /// /// use fp16 for KV cache /// - [MarshalAs(UnmanagedType.I1)] - public bool f16_kv; + public bool f16_kv + { + get => Utils.SignedByteToBool(_f16_kv); + set => _f16_kv = Utils.BoolToSignedByte(value); + } + private sbyte _f16_kv; /// /// the llama_eval() call computes all logits, not just the last one /// - [MarshalAs(UnmanagedType.I1)] - public bool logits_all; + public bool logits_all + { + get => Utils.SignedByteToBool(_logits_all); + set => _logits_all = Utils.BoolToSignedByte(value); + } + private sbyte _logits_all; /// /// only load the vocabulary, no weights /// - [MarshalAs(UnmanagedType.I1)] - public bool vocab_only; + public bool vocab_only + { + get => Utils.SignedByteToBool(_vocab_only); + set => _vocab_only = Utils.BoolToSignedByte(value); + } + private sbyte _vocab_only; /// /// use mmap if possible /// - [MarshalAs(UnmanagedType.I1)] - public bool use_mmap; + public bool use_mmap + { + get => Utils.SignedByteToBool(_use_mmap); + set => _use_mmap = Utils.BoolToSignedByte(value); + } + private sbyte _use_mmap; /// /// force system to keep model in RAM /// - [MarshalAs(UnmanagedType.I1)] - public bool use_mlock; + public bool use_mlock + { + get => Utils.SignedByteToBool(_use_mlock); + set => _use_mlock = Utils.BoolToSignedByte(value); + } + private sbyte _use_mlock; /// /// embedding mode only /// - [MarshalAs(UnmanagedType.I1)] - public bool embedding; + public bool embedding + { + get => Utils.SignedByteToBool(_embedding); + set => _embedding = Utils.BoolToSignedByte(value); + } + private sbyte _embedding; } } diff --git a/LLama/Native/LLamaModelQuantizeParams.cs b/LLama/Native/LLamaModelQuantizeParams.cs index f23c1d2ed..53887ac06 100644 --- a/LLama/Native/LLamaModelQuantizeParams.cs +++ b/LLama/Native/LLamaModelQuantizeParams.cs @@ -20,13 +20,21 @@ public struct LLamaModelQuantizeParams /// /// allow quantizing non-f32/f16 tensors /// - [MarshalAs(UnmanagedType.I1)] - public bool allow_requantize; + public bool allow_requantize + { + get => Utils.SignedByteToBool(_allow_requantize); + set => _allow_requantize = Utils.BoolToSignedByte(value); + } + private sbyte _allow_requantize; /// /// quantize output.weight /// - [MarshalAs(UnmanagedType.I1)] - public bool quantize_output_tensor; + public bool quantize_output_tensor + { + get => Utils.SignedByteToBool(_quantize_output_tensor); + set => _quantize_output_tensor = Utils.BoolToSignedByte(value); + } + private sbyte _quantize_output_tensor; } } diff --git a/LLama/Native/LLamaTokenDataArray.cs b/LLama/Native/LLamaTokenDataArray.cs index 6e2c4a46b..b90235d25 100644 --- a/LLama/Native/LLamaTokenDataArray.cs +++ b/LLama/Native/LLamaTokenDataArray.cs @@ -51,8 +51,12 @@ public struct LLamaTokenDataArrayNative /// /// Indicates if the items in the array are sorted /// - [MarshalAs(UnmanagedType.I1)] - public bool sorted; + public bool sorted + { + get => Utils.SignedByteToBool(_sorted); + set => _sorted = Utils.BoolToSignedByte(value); + } + private sbyte _sorted; /// /// Create a new LLamaTokenDataArrayNative around the data in the LLamaTokenDataArray diff --git a/LLama/OldVersion/Utils.cs b/LLama/OldVersion/Utils.cs index df8adddd7..eb0986dd4 100644 --- a/LLama/OldVersion/Utils.cs +++ b/LLama/OldVersion/Utils.cs @@ -82,5 +82,6 @@ public static unsafe string PtrToStringUTF8(IntPtr ptr) return Encoding.UTF8.GetString(bytes.ToArray()); #endif } + } } diff --git a/LLama/Utils.cs b/LLama/Utils.cs index de363a3ed..52f5c394b 100644 --- a/LLama/Utils.cs +++ b/LLama/Utils.cs @@ -90,5 +90,26 @@ public static string PtrToString(IntPtr ptr, Encoding encoding) } #endif } + + /// + /// Converts a bool "value" to a signed byte of "1" for true and "0" for false to be compatible with a 1 byte C-style bool. + /// + /// + /// + public static sbyte BoolToSignedByte(bool value) + { + return value ? (sbyte)1 : (sbyte)0; + } + + /// + /// Converts a sbyte "value" to a C# bool. + /// + /// + /// + public static bool SignedByteToBool(sbyte value) + { + return value > 0 ? true : false; + } + } } From 81a26a27ec7dc88d74c1a3d7ac925eb42c89ce1f Mon Sep 17 00:00:00 2001 From: Shaun Cook Date: Wed, 16 Aug 2023 14:34:29 +0100 Subject: [PATCH 2/2] Add missing semi-colon to README sample code --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2bb6a17f3..b4abc5fe0 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ LLamaSharp provides two ways to run inference: `LLamaExecutor` and `ChatSession` using LLama.Common; using LLama; -string modelPath = "" // change it to your own model path +string modelPath = ""; // change it to your own model path var prompt = "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.\r\n\r\nUser: Hello, Bob.\r\nBob: Hello. How may I help you today?\r\nUser: Please tell me the largest city in Europe.\r\nBob: Sure. The largest city in Europe is Moscow, the capital of Russia.\r\nUser:"; // use the "chat-with-bob" prompt here. // Initialize a chat session