diff --git a/.github/prepare_release.sh b/.github/prepare_release.sh
index 3fc2e3937..e44099978 100755
--- a/.github/prepare_release.sh
+++ b/.github/prepare_release.sh
@@ -22,13 +22,19 @@ fi
 
 mkdir ./temp;
 mkdir ./temp/runtimes;
-cp ./LLama/runtimes/*.* ./temp/runtimes/;
+# For sure it could be done better but cp -R did not work on osx
+mkdir ./temp/runtimes/osx-arm64
+mkdir ./temp/runtimes/osx-x64
+cp  ./LLama/runtimes/*.* ./temp/runtimes/;
+cp  ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/;
+cp  ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64;
 cp ./LLama/runtimes/build/*.* ./temp/;
 
 # get the current version
 cd temp;
 dotnet add package LLamaSharp;
 version=$(dotnet list temp.csproj package | grep LLamaSharp);
+# TODO: This didn´t work on osx...we need a solution
 read -ra arr <<< "$version"
 version="${arr[-1]}"
 echo "The latest version: $version";
@@ -71,7 +77,7 @@ cd temp
 nuget pack LLamaSharp.Backend.Cpu.nuspec -version $updated_version
 nuget pack LLamaSharp.Backend.Cuda11.nuspec -version $updated_version
 nuget pack LLamaSharp.Backend.Cuda12.nuspec -version $updated_version
-nuget pack LLamaSharp.Backend.MacMetal.nuspec -version $updated_version
+
 
 cd ..
 exit 0
diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
index 26de28b54..5dda197cd 100644
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -6,9 +6,9 @@ on:
       cublas:
         type: boolean
         description: Build CUBLAS binaries
-      macos:
+      osx:
         type: boolean
-        description: Build MacOS binaries
+        description: Build OSX binaries
   push:
     branches: [cron_job]
   #schedule:
@@ -145,8 +145,10 @@ jobs:
       fail-fast: true
       matrix:
         include:
-          - build: 'metal'
+          - build: 'arm64'
             defines: '-DCMAKE_OSX_ARCHITECTURES=arm64'
+          - build: 'x64'
+            defines: '-DLLAMA_METAL=OFF  -DCMAKE_OSX_ARCHITECTURES=x86_64'            
     runs-on: macos-latest   
     steps:
       - uses: actions/checkout@v3
@@ -167,7 +169,7 @@ jobs:
         uses: actions/upload-artifact@v3
         with:
           path: ./build/libllama.dylib
-          name: llama-bin-macos-${{ matrix.build }}.dylib
+          name: llama-bin-osx-${{ matrix.build }}.dylib
       - name: Upload Metal
         uses: actions/upload-artifact@v3
         with:
@@ -210,9 +212,13 @@ jobs:
       - name: Rearrange MacOS files
         if: ${{ github.event.inputs.macos }}
         run: |
-          mkdir deps/macos-metal
-          cp artifacts/llama-bin-macos-metal.dylib/libllama.dylib deps/macos-metal/libllama.dylib
-          cp artifacts/ggml-metal.metal/ggml-metal.metal deps/macos-metal/ggml-metal.metal
+          mkdir deps/osx-arm64
+          mkdir deps/osx-x64
+          
+          cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
+          cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
+          cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
+
 
       - name: Rearrange CUDA files
         if: ${{ github.event.inputs.cublas }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 1c08e6e57..eb0e936f1 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -12,14 +12,14 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        build: [linux-release, windows-release]
+        build: [linux-release, windows-release, osx-release]
         include:
           - build: linux-release
             os: ubuntu-latest
             config: release
-        # - build: macos-release
-        #   os: macos-latest
-        #   config: release
+          - build: osx-release
+            os: macos-latest
+            config: release
           - build: windows-release
             os: windows-2019
             config: release
diff --git a/LLama.Examples/NewVersion/GetEmbeddings.cs b/LLama.Examples/NewVersion/GetEmbeddings.cs
index 1e5b19be3..fe9e3ea80 100644
--- a/LLama.Examples/NewVersion/GetEmbeddings.cs
+++ b/LLama.Examples/NewVersion/GetEmbeddings.cs
@@ -4,7 +4,7 @@ namespace LLama.Examples.NewVersion
 {
     public class GetEmbeddings
     {
-        public static Task Run()
+        public static void Run()
         {
             Console.Write("Please input your model path: ");
             var modelPath = Console.ReadLine();
@@ -23,7 +23,6 @@ public static Task Run()
                 Console.WriteLine(string.Join(", ", embedder.GetEmbeddings(text)));
                 Console.WriteLine();
             }
-            return Task.CompletedTask;
         }
     }
 }
diff --git a/LLama.Examples/NewVersion/QuantizeModel.cs b/LLama.Examples/NewVersion/QuantizeModel.cs
index 456d89290..71966af8f 100644
--- a/LLama.Examples/NewVersion/QuantizeModel.cs
+++ b/LLama.Examples/NewVersion/QuantizeModel.cs
@@ -2,7 +2,7 @@
 {
     public class QuantizeModel
     {
-        public static Task Run()
+        public static void Run()
         {
             Console.Write("Please input your original model path: ");
             var inputPath = Console.ReadLine();
@@ -21,8 +21,6 @@ public static Task Run()
             {
                 Console.WriteLine("Quantization failed!");
             }
-
-            return Task.CompletedTask;
         }
     }
 }
diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs
index c89cba305..a21a2eed4 100644
--- a/LLama.Examples/NewVersion/TestRunner.cs
+++ b/LLama.Examples/NewVersion/TestRunner.cs
@@ -1,54 +1,109 @@
-﻿using System.Linq.Expressions;
-using Spectre.Console;
-
-namespace LLama.Examples.NewVersion
+﻿namespace LLama.Examples.NewVersion
 {
     public class NewVersionTestRunner
     {
-        static Dictionary<string, Func<Task>> Examples = new Dictionary<string, Func<Task>>
-        {
-            {"Run a chat session without stripping the role names.", () => ChatSessionWithRoleName.Run()},
-            {"Run a chat session with the role names stripped.",()=> ChatSessionStripRoleName.Run()},
-            {"Interactive mode chat by using executor.",()=> InteractiveModeExecute.Run()},
-            {"Instruct mode chat by using executor.",()=> InstructModeExecute.Run()},
-            {"Stateless mode chat by using executor.",()=> StatelessModeExecute.Run()},
-            {"Load and save chat session.",()=> SaveAndLoadSession.Run()},
-            {"Load and save state of model and executor.",()=> LoadAndSaveState.Run()},
-            {"Get embeddings from LLama model.",()=> GetEmbeddings.Run()},
-            {"Quantize the model.",()=> QuantizeModel.Run()},
-            {"Automatic conversation.",()=> TalkToYourself.Run()},
-            {"Constrain response to json format using grammar.",()=> GrammarJsonResponse.Run()},
-            {"Semantic Kernel Prompt.",()=> SemanticKernelPrompt.Run()},
-            {"Semantic Kernel Chat.",()=> SemanticKernelChat.Run()},
-            {"Semantic Kernel Memory.",()=> SemanticKernelMemory.Run()},
-            {"Coding Assistant.",()=> CodingAssistant.Run()},
-            {"Batch Decoding.",()=> BatchedDecoding.Run()},
-            {"SK Kernel Memory.",()=> KernelMemory.Run()},
-            {"Exit", ()=> Task.CompletedTask}
-        };
         public static async Task Run()
         {
-            AnsiConsole.Write(new Rule("LLamaSharp Examples"));
+            Console.WriteLine("================LLamaSharp Examples (New Version)==================\n");
+
+            Console.WriteLine("Please input a number to choose an example to run:");
+            Console.WriteLine("0: Run a chat session without stripping the role names.");
+            Console.WriteLine("1: Run a chat session with the role names stripped.");
+            Console.WriteLine("2: Interactive mode chat by using executor.");
+            Console.WriteLine("3: Instruct mode chat by using executor.");
+            Console.WriteLine("4: Stateless mode chat by using executor.");
+            Console.WriteLine("5: Load and save chat session.");
+            Console.WriteLine("6: Load and save state of model and executor.");
+            Console.WriteLine("7: Get embeddings from LLama model.");
+            Console.WriteLine("8: Quantize the model.");
+            Console.WriteLine("9: Automatic conversation.");
+            Console.WriteLine("10: Constrain response to json format using grammar.");
+            Console.WriteLine("11: Semantic Kernel Prompt.");
+            Console.WriteLine("12: Semantic Kernel Chat.");
+            Console.WriteLine("13: Semantic Kernel Memory.");
+            Console.WriteLine("14: Coding Assistant.");
+            Console.WriteLine("15: Batch Decoding.");
+            Console.WriteLine("16: SK Kernel Memory.");
 
             while (true)
             {
-                var choice = AnsiConsole.Prompt(
-                    new SelectionPrompt<string>()
-                        .Title("Please choose[green] an example[/] to run: ")
-                        .AddChoices(Examples.Keys));
+                Console.Write("\nYour choice: ");
+                int choice = int.Parse(Console.ReadLine());
 
-
-                if (Examples.TryGetValue(choice, out var example))
+                if (choice == 0)
                 {
-                    if (choice == "Exit")
-                    {
-                        break;
-                    }
-                    AnsiConsole.Write(new Rule(choice));
-                    await example();
+                    await ChatSessionWithRoleName.Run();
                 }
-
-                AnsiConsole.Clear();
+                else if (choice == 1)
+                {
+                    await ChatSessionStripRoleName.Run();
+                }
+                else if (choice == 2)
+                {
+                    await InteractiveModeExecute.Run();
+                }
+                else if (choice == 3)
+                {
+                    await InstructModeExecute.Run();
+                }
+                else if (choice == 4)
+                {
+                    await StatelessModeExecute.Run();
+                }
+                else if (choice == 5)
+                {
+                    await SaveAndLoadSession.Run();
+                }
+                else if (choice == 6)
+                {
+                    await LoadAndSaveState.Run();
+                }
+                else if (choice == 7)
+                {
+                    GetEmbeddings.Run();
+                }
+                else if (choice == 8)
+                {
+                    QuantizeModel.Run();
+                }
+                else if (choice == 9)
+                {
+                    await TalkToYourself.Run();
+                }
+                else if (choice == 10)
+                {
+                    await GrammarJsonResponse.Run();
+                }
+                else if (choice == 11)
+                {
+                    await SemanticKernelPrompt.Run();
+                }
+                else if (choice == 12)
+                {
+                    await SemanticKernelChat.Run();
+                }
+                else if (choice == 13)
+                {
+                    await SemanticKernelMemory.Run();
+                }
+                else if (choice == 14)
+                {
+                    await CodingAssistant.Run();
+                }
+                else if (choice == 15)
+                {
+                    await BatchedDecoding.Run();
+                }
+                else if (choice == 16)
+                {
+                    await KernelMemory.Run();
+                }
+                else
+                {
+                    Console.WriteLine("Cannot parse your choice. Please select again.");
+                    continue;
+                }
+                break;
             }
         }
     }
diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs
index 2f9caffdc..182ace002 100644
--- a/LLama.Web/Common/ModelOptions.cs
+++ b/LLama.Web/Common/ModelOptions.cs
@@ -18,9 +18,9 @@ public class ModelOptions
         public int MaxInstances { get; set; }
 
         /// <summary>
-        /// Model context size (n_ctx). Null to use value from model.
+        /// Model context size (n_ctx)
         /// </summary>
-        public uint? ContextSize { get; set; }
+        public uint ContextSize { get; set; } = 512;
 
         /// <summary>
         /// the GPU that is used for scratch and small tensors
diff --git a/LLama/Abstractions/IContextParams.cs b/LLama/Abstractions/IContextParams.cs
index d9811cdce..0f129217f 100644
--- a/LLama/Abstractions/IContextParams.cs
+++ b/LLama/Abstractions/IContextParams.cs
@@ -9,9 +9,9 @@ namespace LLama.Abstractions;
 public interface IContextParams
 {
     /// <summary>
-    /// Model context size (n_ctx). Null to use value from model file.
+    /// Model context size (n_ctx)
     /// </summary>
-    uint? ContextSize { get; set; }
+    uint ContextSize { get; set; }
 
     /// <summary>
     /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
index 37fb1cf51..d4577a475 100644
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@@ -43,7 +43,7 @@ public FixedSizeQueue(int size)
         /// <param name="data"></param>
         public FixedSizeQueue(int size, IEnumerable<T> data)
         {
-#if NET6_0_OR_GREATER
+#if !NETSTANDARD2_0 
             // Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
             // in which case we'll have to check later
             if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)
diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
index a736ccbde..8bf59fa53 100644
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -13,60 +13,92 @@ namespace LLama.Common
     public record ModelParams
         : ILLamaParams
     {
-        /// <inheritdoc />
-        public uint? ContextSize { get; set; }
-
-        /// <inheritdoc />
+        /// <summary>
+        /// Model context size (n_ctx)
+        /// </summary>
+        public uint ContextSize { get; set; } = 512;
+        /// <summary>
+        /// the GPU that is used for scratch and small tensors
+        /// </summary>
         public int MainGpu { get; set; } = 0;
 
-        /// <inheritdoc />
+        /// <summary>
+        /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
+        /// </summary>
         public int GpuLayerCount { get; set; } = 20;
-
-        /// <inheritdoc />
+        /// <summary>
+        /// Seed for the random number generator (seed)
+        /// </summary>
         public uint Seed { get; set; } = 0xFFFFFFFF;
-
-        /// <inheritdoc />
+        /// <summary>
+        /// Use f16 instead of f32 for memory kv (memory_f16)
+        /// </summary>
         public bool UseFp16Memory { get; set; } = true;
-
-        /// <inheritdoc />
+        /// <summary>
+        /// Use mmap for faster loads (use_mmap)
+        /// </summary>
         public bool UseMemorymap { get; set; } = true;
-
-        /// <inheritdoc />
+        /// <summary>
+        /// Use mlock to keep model in memory (use_mlock)
+        /// </summary>
         public bool UseMemoryLock { get; set; }
-
-        /// <inheritdoc />
+        /// <summary>
+        /// Compute perplexity over the prompt (perplexity)
+        /// </summary>
         public bool Perplexity { get; set; }
-
-        /// <inheritdoc />
+        /// <summary>
+        /// Model path (model)
+        /// </summary>
         public string ModelPath { get; set; }
 
-        /// <inheritdoc />
+        /// <summary>
+        /// List of LoRAs to apply
+        /// </summary>
         public AdapterCollection LoraAdapters { get; set; } = new();
 
-        /// <inheritdoc />
+        /// <summary>
+        /// base model path for the lora adapter (lora_base)
+        /// </summary>
         public string LoraBase { get; set; } = string.Empty;
 
-        /// <inheritdoc />
+        /// <summary>
+        /// Number of threads (null = autodetect) (n_threads)
+        /// </summary>
         public uint? Threads { get; set; }
 
-        /// <inheritdoc />
+        /// <summary>
+        /// Number of threads to use for batch processing (null = autodetect) (n_threads)
+        /// </summary>
         public uint? BatchThreads { get; set; }
 
-        /// <inheritdoc />
+        /// <summary>
+        /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
+        /// </summary>
         public uint BatchSize { get; set; } = 512;
 
-        /// <inheritdoc />
+        /// <summary>
+        /// Whether to use embedding mode. (embedding) Note that if this is set to true, 
+        /// The LLamaModel won't produce text response anymore.
+        /// </summary>
         public bool EmbeddingMode { get; set; }
 
-        /// <inheritdoc />
+        /// <summary>
+        /// how split tensors should be distributed across GPUs.
+        /// </summary>
+        /// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
         [JsonConverter(typeof(TensorSplitsCollectionConverter))]
         public TensorSplitsCollection TensorSplits { get; set; } = new();
 
-        /// <inheritdoc />
-        public float? RopeFrequencyBase { get; set; }
+		/// <summary>
+		/// RoPE base frequency
+		/// </summary>
+		public float? RopeFrequencyBase { get; set; }
+
+		/// <summary>
+		/// RoPE frequency scaling factor
+		/// </summary>
+		public float? RopeFrequencyScale { get; set; }
 
-        /// <inheritdoc />
-        public float? RopeFrequencyScale { get; set; }
 
         /// <inheritdoc />
         public float? YarnExtrapolationFactor { get; set; }
@@ -91,10 +123,15 @@ public record ModelParams
         /// </summary>
         public bool MulMatQ { get; set; }
 
-        /// <inheritdoc />
+
+        /// <summary>
+        /// Load vocab only (no weights)
+        /// </summary>
         public bool VocabOnly { get; set; }
 
-        /// <inheritdoc />
+        /// <summary>
+        /// The encoding to use to convert text for the model
+        /// </summary>
         [JsonConverter(typeof(EncodingConverter))]
         public Encoding Encoding { get; set; } = Encoding.UTF8;
 
diff --git a/LLama/Extensions/DictionaryExtensions.cs b/LLama/Extensions/DictionaryExtensions.cs
index 1af0e9e1f..a39ed7e8b 100644
--- a/LLama/Extensions/DictionaryExtensions.cs
+++ b/LLama/Extensions/DictionaryExtensions.cs
@@ -9,8 +9,6 @@ public static TValue GetValueOrDefault<TKey, TValue>(this IReadOnlyDictionary<TK
         {
             return GetValueOrDefaultImpl(dictionary, key, defaultValue);
         }
-#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
-#error Target framework not supported!
 #endif
 
         internal static TValue GetValueOrDefaultImpl<TKey, TValue>(IReadOnlyDictionary<TKey, TValue> dictionary, TKey key, TValue defaultValue)
diff --git a/LLama/Extensions/EncodingExtensions.cs b/LLama/Extensions/EncodingExtensions.cs
index 5005b16c1..e88d83a70 100644
--- a/LLama/Extensions/EncodingExtensions.cs
+++ b/LLama/Extensions/EncodingExtensions.cs
@@ -15,8 +15,6 @@ public static int GetCharCount(this Encoding encoding, ReadOnlySpan<byte> bytes)
     {
         return GetCharCountImpl(encoding, bytes);
     }
-#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
-#error Target framework not supported!
 #endif
 
     internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan<byte> bytes, Span<char> output)
diff --git a/LLama/Extensions/IContextParamsExtensions.cs b/LLama/Extensions/IContextParamsExtensions.cs
index bb029c162..16716b531 100644
--- a/LLama/Extensions/IContextParamsExtensions.cs
+++ b/LLama/Extensions/IContextParamsExtensions.cs
@@ -21,7 +21,7 @@ public static class IContextParamsExtensions
         public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
         {
             result = NativeApi.llama_context_default_params();
-            result.n_ctx = @params.ContextSize ?? 0;
+            result.n_ctx = @params.ContextSize;
             result.n_batch = @params.BatchSize;
             result.seed = @params.Seed;
             result.f16_kv = @params.UseFp16Memory;
diff --git a/LLama/Extensions/IEnumerableExtensions.cs b/LLama/Extensions/IEnumerableExtensions.cs
index 17428d297..9e01feb85 100644
--- a/LLama/Extensions/IEnumerableExtensions.cs
+++ b/LLama/Extensions/IEnumerableExtensions.cs
@@ -10,8 +10,6 @@ public static IEnumerable<T> TakeLast<T>(this IEnumerable<T> source, int count)
         {
             return TakeLastImpl(source, count);
         }
-#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
-#error Target framework not supported!
 #endif
 
         internal static IEnumerable<T> TakeLastImpl<T>(IEnumerable<T> source, int count)
diff --git a/LLama/Extensions/KeyValuePairExtensions.cs b/LLama/Extensions/KeyValuePairExtensions.cs
index 233195ed0..6e12654de 100644
--- a/LLama/Extensions/KeyValuePairExtensions.cs
+++ b/LLama/Extensions/KeyValuePairExtensions.cs
@@ -19,7 +19,5 @@ public static void Deconstruct<TKey, TValue>(this System.Collections.Generic.Key
         first = pair.Key;
         second = pair.Value;
     }
-#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
-#error Target framework not supported!
 #endif
 }
\ No newline at end of file
diff --git a/LLama/Extensions/ListExtensions.cs b/LLama/Extensions/ListExtensions.cs
index eb30a07a0..11a1d4f00 100644
--- a/LLama/Extensions/ListExtensions.cs
+++ b/LLama/Extensions/ListExtensions.cs
@@ -5,7 +5,7 @@ namespace LLama.Extensions
 {
     internal static class ListExtensions
     {
-#if !NET6_0_OR_GREATER
+#if NETSTANDARD2_0
         public static void EnsureCapacity<T>(this List<T> list, int capacity)
         {
             if (list.Capacity < capacity)
diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index 8910f1551..c14f0ffab 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -27,13 +27,17 @@
             <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
             <Link>libllama-cuda12.so</Link>
         </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/libllama.dylib">
+        <None Include="$(MSBuildThisFileDirectory)runtimes/osx-arm64/libllama.dylib">
             <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>libllama.dylib</Link>
+            <Link>runtimes/osx-arm64/libllama.dylib</Link>
         </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/ggml-metal.metal">
+        <None Include="$(MSBuildThisFileDirectory)runtimes/osx-arm64/ggml-metal.metal">
             <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>ggml-metal.metal</Link>
-        </None>    
+            <Link>runtimes/osx-arm64/ggml-metal.metal</Link>
+        </None>
+        <None Include="$(MSBuildThisFileDirectory)runtimes/osx-x64/libllama.dylib">
+            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+            <Link>runtimes/osx-x64/libllama.dylib</Link>
+        </None>
     </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs
index c0f2afa29..f1ba569d1 100644
--- a/LLama/Native/LLamaContextParams.cs
+++ b/LLama/Native/LLamaContextParams.cs
@@ -22,7 +22,7 @@ public struct LLamaContextParams
         public uint seed;
 
         /// <summary>
-        /// text context, 0 = from model
+        /// text context
         /// </summary>
         public uint n_ctx;
 
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index e3b182bd4..fc4086783 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -79,7 +79,9 @@ private static IntPtr TryLoadLibrary()
 
             if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
             {
-                return IntPtr.Zero;
+                return TryLoad("runtimes/osx-arm64/libllama.dylib", System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported)
+                      ?? TryLoad("runtimes/osx-x64/libllama.dylib")  
+                      ?? IntPtr.Zero;
             }
 #endif
 
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
index 739eb9086..29466a1fe 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
@@ -19,10 +19,9 @@
     <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cpu.props" />
     <file src="runtimes/libllama.dll" target="runtimes\win-x64\native\libllama.dll" />
     <file src="runtimes/libllama.so" target="runtimes\linux-x64\native\libllama.so" />
-    <file src="runtimes/libllama.dylib" target="runtimes\osx-x64\native\libllama.dylib" />
-    <file src="runtimes/ggml-metal.metal" target="runtimes\osx-x64\native\ggml-metal.metal" />
-    <file src="runtimes/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
-    <file src="runtimes/ggml-metal.metal" target="runtimes\osx-arm64\native\ggml-metal.metal" />
+    <file src="runtimes/osx-x64/libllama.dylib" target="runtimes\osx-x64\native\libllama.dylib" />
+    <file src="runtimes/osx-arm64/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
+    <file src="runtimes/osx-arm64/ggml-metal.metal" target="runtimes\osx-arm64\native\ggml-metal.metal" />
     <file src="icon512.png" target="icon512.png" />
   </files>
 </package>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.MacMetal.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.MacMetal.nuspec
deleted file mode 100644
index 0ba9c508a..000000000
--- a/LLama/runtimes/build/LLamaSharp.Backend.MacMetal.nuspec
+++ /dev/null
@@ -1,26 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<package >
-  <metadata>
-    <id>LLamaSharp.Backend.MacMetal</id>
-    <version>$version$</version>
-    <title>LLamaSharp.Backend.MacMetal, the backend for LLamaSharp on MACOS.</title>
-    <authors>llama.cpp Authors</authors>
-    <requireLicenseAcceptance>false</requireLicenseAcceptance>
-    <license type="expression">MIT</license>
-    <icon>icon512.png</icon>
-    <projectUrl>https://github.com/SciSharp/LLamaSharp</projectUrl>
-    <description>LLamaSharp.Backend.MacMetal is a backend for LLamaSharp to use MAC with GPU support.</description>
-    <releaseNotes></releaseNotes>
-    <copyright>Copyright 2023 The llama.cpp Authors. All rights reserved.</copyright>
-    <tags>LLamaSharp LLama LLM GPT AI ChatBot SciSharp</tags>
-  </metadata>
-
-  <files>
-    <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.MacMetal.props" />
-    <file src="runtimes/ggml-metal.metal" target="runtimes\osx-x64\native\ggml-metal.metal" />
-    <file src="runtimes/libllama.dylib" target="runtimes\osx-x64\native\libllama.dylib" />
-    <file src="runtimes/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
-    <file src="runtimes/ggml-metal.metal" target="runtimes\osx-arm64\native\ggml-metal.metal" />
-    <file src="icon512.png" target="icon512.png" />
-  </files>
-</package>
diff --git a/LLama/runtimes/build/LLamaSharpBackend.props b/LLama/runtimes/build/LLamaSharpBackend.props
index 7e3db26e5..786e89056 100644
--- a/LLama/runtimes/build/LLamaSharpBackend.props
+++ b/LLama/runtimes/build/LLamaSharpBackend.props
@@ -25,12 +25,6 @@
       <Visible>false</Visible>
       <Link>%(Filename)%(Extension)</Link>
     </Content>
-    <Content Include="$(MSBuildThisFileDirectory)\..\..\runtimes\osx-x64\native\*.metal"
-             Condition="'$(PlatformTarget)' == 'x64'">
-      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-      <Visible>false</Visible>
-      <Link>%(Filename)%(Extension)</Link>
-    </Content>
 
     <Content Include="$(MSBuildThisFileDirectory)\..\..\runtimes\osx-arm64\native\*.dylib"
              Condition="'$(PlatformTarget)' == 'arm64'">
diff --git a/LLama/runtimes/ggml-metal.metal b/LLama/runtimes/osx-arm64/ggml-metal.metal
similarity index 100%
rename from LLama/runtimes/ggml-metal.metal
rename to LLama/runtimes/osx-arm64/ggml-metal.metal
diff --git a/LLama/runtimes/libllama.dylib b/LLama/runtimes/osx-arm64/libllama.dylib
similarity index 100%
rename from LLama/runtimes/libllama.dylib
rename to LLama/runtimes/osx-arm64/libllama.dylib
diff --git a/LLama/runtimes/osx-x64/libllama.dylib b/LLama/runtimes/osx-x64/libllama.dylib
new file mode 100644
index 000000000..37eb3cd43
Binary files /dev/null and b/LLama/runtimes/osx-x64/libllama.dylib differ
diff --git a/README.md b/README.md
index 74d5aee67..216db1249 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 
 
 **The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on 
-both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enough GPU memory, you can still apply LLaMA models well with this repo. 🤗**
+both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enought GPU memory, you can still apply LLaMA models well with this repo. 🤗**
 
 **Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.**