Skip to content

Commit

Permalink
Merge pull request #258 from SignalRT/RuntimeDetection
Browse files Browse the repository at this point in the history
Runtime detection MacOS
  • Loading branch information
AsakusaRinne authored Nov 12, 2023
2 parents c2be012 + 0a2b0ab commit ed479d1
Show file tree
Hide file tree
Showing 26 changed files with 213 additions and 147 deletions.
10 changes: 8 additions & 2 deletions .github/prepare_release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,19 @@ fi

mkdir ./temp;
mkdir ./temp/runtimes;
cp ./LLama/runtimes/*.* ./temp/runtimes/;
# For sure it could be done better but cp -R did not work on osx
mkdir ./temp/runtimes/osx-arm64
mkdir ./temp/runtimes/osx-x64
cp ./LLama/runtimes/*.* ./temp/runtimes/;
cp ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/;
cp ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64;
cp ./LLama/runtimes/build/*.* ./temp/;

# get the current version
cd temp;
dotnet add package LLamaSharp;
version=$(dotnet list temp.csproj package | grep LLamaSharp);
# TODO: This didn´t work on osx...we need a solution
read -ra arr <<< "$version"
version="${arr[-1]}"
echo "The latest version: $version";
Expand Down Expand Up @@ -71,7 +77,7 @@ cd temp
nuget pack LLamaSharp.Backend.Cpu.nuspec -version $updated_version
nuget pack LLamaSharp.Backend.Cuda11.nuspec -version $updated_version
nuget pack LLamaSharp.Backend.Cuda12.nuspec -version $updated_version
nuget pack LLamaSharp.Backend.MacMetal.nuspec -version $updated_version


cd ..
exit 0
20 changes: 13 additions & 7 deletions .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ on:
cublas:
type: boolean
description: Build CUBLAS binaries
macos:
osx:
type: boolean
description: Build MacOS binaries
description: Build OSX binaries
push:
branches: [cron_job]
#schedule:
Expand Down Expand Up @@ -145,8 +145,10 @@ jobs:
fail-fast: true
matrix:
include:
- build: 'metal'
- build: 'arm64'
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64'
- build: 'x64'
defines: '-DLLAMA_METAL=OFF -DCMAKE_OSX_ARCHITECTURES=x86_64'
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -167,7 +169,7 @@ jobs:
uses: actions/upload-artifact@v3
with:
path: ./build/libllama.dylib
name: llama-bin-macos-${{ matrix.build }}.dylib
name: llama-bin-osx-${{ matrix.build }}.dylib
- name: Upload Metal
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -210,9 +212,13 @@ jobs:
- name: Rearrange MacOS files
if: ${{ github.event.inputs.macos }}
run: |
mkdir deps/macos-metal
cp artifacts/llama-bin-macos-metal.dylib/libllama.dylib deps/macos-metal/libllama.dylib
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/macos-metal/ggml-metal.metal
mkdir deps/osx-arm64
mkdir deps/osx-x64
cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
- name: Rearrange CUDA files
if: ${{ github.event.inputs.cublas }}
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ jobs:
strategy:
fail-fast: false
matrix:
build: [linux-release, windows-release]
build: [linux-release, windows-release, osx-release]
include:
- build: linux-release
os: ubuntu-latest
config: release
# - build: macos-release
# os: macos-latest
# config: release
- build: osx-release
os: macos-latest
config: release
- build: windows-release
os: windows-2019
config: release
Expand Down
3 changes: 1 addition & 2 deletions LLama.Examples/NewVersion/GetEmbeddings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace LLama.Examples.NewVersion
{
public class GetEmbeddings
{
public static Task Run()
public static void Run()
{
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();
Expand All @@ -23,7 +23,6 @@ public static Task Run()
Console.WriteLine(string.Join(", ", embedder.GetEmbeddings(text)));
Console.WriteLine();
}
return Task.CompletedTask;
}
}
}
4 changes: 1 addition & 3 deletions LLama.Examples/NewVersion/QuantizeModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
public class QuantizeModel
{
public static Task Run()
public static void Run()
{
Console.Write("Please input your original model path: ");
var inputPath = Console.ReadLine();
Expand All @@ -21,8 +21,6 @@ public static Task Run()
{
Console.WriteLine("Quantization failed!");
}

return Task.CompletedTask;
}
}
}
135 changes: 95 additions & 40 deletions LLama.Examples/NewVersion/TestRunner.cs
Original file line number Diff line number Diff line change
@@ -1,54 +1,109 @@
using System.Linq.Expressions;
using Spectre.Console;

namespace LLama.Examples.NewVersion
namespace LLama.Examples.NewVersion
{
public class NewVersionTestRunner
{
static Dictionary<string, Func<Task>> Examples = new Dictionary<string, Func<Task>>
{
{"Run a chat session without stripping the role names.", () => ChatSessionWithRoleName.Run()},
{"Run a chat session with the role names stripped.",()=> ChatSessionStripRoleName.Run()},
{"Interactive mode chat by using executor.",()=> InteractiveModeExecute.Run()},
{"Instruct mode chat by using executor.",()=> InstructModeExecute.Run()},
{"Stateless mode chat by using executor.",()=> StatelessModeExecute.Run()},
{"Load and save chat session.",()=> SaveAndLoadSession.Run()},
{"Load and save state of model and executor.",()=> LoadAndSaveState.Run()},
{"Get embeddings from LLama model.",()=> GetEmbeddings.Run()},
{"Quantize the model.",()=> QuantizeModel.Run()},
{"Automatic conversation.",()=> TalkToYourself.Run()},
{"Constrain response to json format using grammar.",()=> GrammarJsonResponse.Run()},
{"Semantic Kernel Prompt.",()=> SemanticKernelPrompt.Run()},
{"Semantic Kernel Chat.",()=> SemanticKernelChat.Run()},
{"Semantic Kernel Memory.",()=> SemanticKernelMemory.Run()},
{"Coding Assistant.",()=> CodingAssistant.Run()},
{"Batch Decoding.",()=> BatchedDecoding.Run()},
{"SK Kernel Memory.",()=> KernelMemory.Run()},
{"Exit", ()=> Task.CompletedTask}
};
public static async Task Run()
{
AnsiConsole.Write(new Rule("LLamaSharp Examples"));
Console.WriteLine("================LLamaSharp Examples (New Version)==================\n");

Console.WriteLine("Please input a number to choose an example to run:");
Console.WriteLine("0: Run a chat session without stripping the role names.");
Console.WriteLine("1: Run a chat session with the role names stripped.");
Console.WriteLine("2: Interactive mode chat by using executor.");
Console.WriteLine("3: Instruct mode chat by using executor.");
Console.WriteLine("4: Stateless mode chat by using executor.");
Console.WriteLine("5: Load and save chat session.");
Console.WriteLine("6: Load and save state of model and executor.");
Console.WriteLine("7: Get embeddings from LLama model.");
Console.WriteLine("8: Quantize the model.");
Console.WriteLine("9: Automatic conversation.");
Console.WriteLine("10: Constrain response to json format using grammar.");
Console.WriteLine("11: Semantic Kernel Prompt.");
Console.WriteLine("12: Semantic Kernel Chat.");
Console.WriteLine("13: Semantic Kernel Memory.");
Console.WriteLine("14: Coding Assistant.");
Console.WriteLine("15: Batch Decoding.");
Console.WriteLine("16: SK Kernel Memory.");

while (true)
{
var choice = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Please choose[green] an example[/] to run: ")
.AddChoices(Examples.Keys));
Console.Write("\nYour choice: ");
int choice = int.Parse(Console.ReadLine());


if (Examples.TryGetValue(choice, out var example))
if (choice == 0)
{
if (choice == "Exit")
{
break;
}
AnsiConsole.Write(new Rule(choice));
await example();
await ChatSessionWithRoleName.Run();
}

AnsiConsole.Clear();
else if (choice == 1)
{
await ChatSessionStripRoleName.Run();
}
else if (choice == 2)
{
await InteractiveModeExecute.Run();
}
else if (choice == 3)
{
await InstructModeExecute.Run();
}
else if (choice == 4)
{
await StatelessModeExecute.Run();
}
else if (choice == 5)
{
await SaveAndLoadSession.Run();
}
else if (choice == 6)
{
await LoadAndSaveState.Run();
}
else if (choice == 7)
{
GetEmbeddings.Run();
}
else if (choice == 8)
{
QuantizeModel.Run();
}
else if (choice == 9)
{
await TalkToYourself.Run();
}
else if (choice == 10)
{
await GrammarJsonResponse.Run();
}
else if (choice == 11)
{
await SemanticKernelPrompt.Run();
}
else if (choice == 12)
{
await SemanticKernelChat.Run();
}
else if (choice == 13)
{
await SemanticKernelMemory.Run();
}
else if (choice == 14)
{
await CodingAssistant.Run();
}
else if (choice == 15)
{
await BatchedDecoding.Run();
}
else if (choice == 16)
{
await KernelMemory.Run();
}
else
{
Console.WriteLine("Cannot parse your choice. Please select again.");
continue;
}
break;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions LLama.Web/Common/ModelOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ public class ModelOptions
public int MaxInstances { get; set; }

/// <summary>
/// Model context size (n_ctx). Null to use value from model.
/// Model context size (n_ctx)
/// </summary>
public uint? ContextSize { get; set; }
public uint ContextSize { get; set; } = 512;

/// <summary>
/// the GPU that is used for scratch and small tensors
Expand Down
4 changes: 2 additions & 2 deletions LLama/Abstractions/IContextParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ namespace LLama.Abstractions;
public interface IContextParams
{
/// <summary>
/// Model context size (n_ctx). Null to use value from model file.
/// Model context size (n_ctx)
/// </summary>
uint? ContextSize { get; set; }
uint ContextSize { get; set; }

/// <summary>
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
Expand Down
2 changes: 1 addition & 1 deletion LLama/Common/FixedSizeQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public FixedSizeQueue(int size)
/// <param name="data"></param>
public FixedSizeQueue(int size, IEnumerable<T> data)
{
#if NET6_0_OR_GREATER
#if !NETSTANDARD2_0
// Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
// in which case we'll have to check later
if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)
Expand Down
Loading

0 comments on commit ed479d1

Please sign in to comment.