From 7a89ff0dddf2787709fb01755ed745812d67b173 Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Sat, 7 Sep 2024 00:54:43 +0200 Subject: [PATCH 1/3] Add patches for setting memory model These patches allow setting the memory model to TSO on guests running on Apple Silicon machines. Other systems are unaffected. Signed-off-by: Sergio Lopez --- ...ctl-Introduce-PR_-SET-GET-_MEM_MODEL.patch | 123 +++++++++ ...PR_-GET-SET-_MEM_MODEL-for-always-TS.patch | 233 ++++++++++++++++++ ...scaffolding-to-add-ACTLR_EL1-to-thre.patch | 141 +++++++++++ ...Apple-IMPDEF-TSO-memory-model-contro.patch | 181 ++++++++++++++ 4 files changed, 678 insertions(+) create mode 100644 patches/0016-prctl-Introduce-PR_-SET-GET-_MEM_MODEL.patch create mode 100644 patches/0017-arm64-Implement-PR_-GET-SET-_MEM_MODEL-for-always-TS.patch create mode 100644 patches/0018-arm64-Introduce-scaffolding-to-add-ACTLR_EL1-to-thre.patch create mode 100644 patches/0019-arm64-Implement-Apple-IMPDEF-TSO-memory-model-contro.patch diff --git a/patches/0016-prctl-Introduce-PR_-SET-GET-_MEM_MODEL.patch b/patches/0016-prctl-Introduce-PR_-SET-GET-_MEM_MODEL.patch new file mode 100644 index 0000000..8050d8a --- /dev/null +++ b/patches/0016-prctl-Introduce-PR_-SET-GET-_MEM_MODEL.patch @@ -0,0 +1,123 @@ +From e7f3885e24d060ae36aeccde44c03894ff4e3d6b Mon Sep 17 00:00:00 2001 +From: Hector Martin +Date: Thu, 11 Apr 2024 09:51:20 +0900 +Subject: [PATCH 1/4] prctl: Introduce PR_{SET,GET}_MEM_MODEL + +On some architectures, it is possible to query and/or change the CPU +memory model. This allows userspace to switch to a stricter memory model +for performance reasons, such as when emulating code for another +architecture where that model is the default. + +Introduce two prctls to allow userspace to query and set the memory +model for a thread. Two models are initially defined: + +- PR_SET_MEM_MODEL_DEFAULT requests the default memory model for the + architecture. +- PR_SET_MEM_MODEL_TSO requests the x86 TSO memory model. + +PR_SET_MEM_MODEL is allowed to set a stricter memory model than +requested if available, in which case it will return successfully. If +the requested memory model cannot be fulfilled, it will return an error. +The memory model that was actually set can be queried by a subsequent +call to PR_GET_MEM_MODEL. + +Examples: +- On a CPU with not support for a memory model at least as strong as + TSO, PR_SET_MEM_MODEL(PR_SET_MEM_MODEL_TSO) fails. +- On a CPU with runtime-configurable TSO support, PR_SET_MEM_MODEL can + toggle the memory model between DEFAULT and TSO at will. +- On a CPU where the only memory model is at least as strict as TSO, + PR_GET_MEM_MODEL will return PR_SET_MEM_MODEL_DEFAULT, and + PR_SET_MEM_MODEL(PR_SET_MEM_MODEL_TSO) will return success but leave + the memory model at PR_SET_MEM_MODEL_DEFAULT. This implies that the + default is in fact at least as strict as TSO. + +Signed-off-by: Hector Martin +Reviewed-by: Neal Gompa +--- + include/linux/memory_ordering_model.h | 11 +++++++++++ + include/uapi/linux/prctl.h | 5 +++++ + kernel/sys.c | 21 +++++++++++++++++++++ + 3 files changed, 37 insertions(+) + create mode 100644 include/linux/memory_ordering_model.h + +diff --git a/include/linux/memory_ordering_model.h b/include/linux/memory_ordering_model.h +new file mode 100644 +index 000000000..267a12ca6 +--- /dev/null ++++ b/include/linux/memory_ordering_model.h +@@ -0,0 +1,11 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __ASM_MEMORY_ORDERING_MODEL_H ++#define __ASM_MEMORY_ORDERING_MODEL_H ++ ++/* Arch hooks to implement the PR_{GET_SET}_MEM_MODEL prctls */ ++ ++struct task_struct; ++int arch_prctl_mem_model_get(struct task_struct *t); ++int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val); ++ ++#endif +diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h +index 370ed14b1..961216093 100644 +--- a/include/uapi/linux/prctl.h ++++ b/include/uapi/linux/prctl.h +@@ -306,4 +306,9 @@ struct prctl_mm_map { + # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc + # define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f + ++#define PR_GET_MEM_MODEL 0x6d4d444c ++#define PR_SET_MEM_MODEL 0x4d4d444c ++# define PR_SET_MEM_MODEL_DEFAULT 0 ++# define PR_SET_MEM_MODEL_TSO 1 ++ + #endif /* _LINUX_PRCTL_H */ +diff --git a/kernel/sys.c b/kernel/sys.c +index 7a4ae6d5a..54cff3b34 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -2429,6 +2430,16 @@ static int prctl_get_auxv(void __user *addr, unsigned long len) + return sizeof(mm->saved_auxv); + } + ++int __weak arch_prctl_mem_model_get(struct task_struct *t) ++{ ++ return -EINVAL; ++} ++ ++int __weak arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) ++{ ++ return -EINVAL; ++} ++ + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + unsigned long, arg4, unsigned long, arg5) + { +@@ -2744,6 +2755,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + case PR_RISCV_V_GET_CONTROL: + error = RISCV_V_GET_CONTROL(); + break; ++ case PR_GET_MEM_MODEL: ++ if (arg2 || arg3 || arg4 || arg5) ++ return -EINVAL; ++ error = arch_prctl_mem_model_get(me); ++ break; ++ case PR_SET_MEM_MODEL: ++ if (arg3 || arg4 || arg5) ++ return -EINVAL; ++ error = arch_prctl_mem_model_set(me, arg2); ++ break; + default: + error = -EINVAL; + break; +-- +2.45.1 + diff --git a/patches/0017-arm64-Implement-PR_-GET-SET-_MEM_MODEL-for-always-TS.patch b/patches/0017-arm64-Implement-PR_-GET-SET-_MEM_MODEL-for-always-TS.patch new file mode 100644 index 0000000..5cfad0a --- /dev/null +++ b/patches/0017-arm64-Implement-PR_-GET-SET-_MEM_MODEL-for-always-TS.patch @@ -0,0 +1,233 @@ +From 37e470e60bc5c0dad986f211ff05629bed0b9d47 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Mon, 6 May 2024 16:47:51 +0200 +Subject: [PATCH 2/4] arm64: Implement PR_{GET,SET}_MEM_MODEL for always-TSO + CPUs + +Some ARM64 implementations are known to always use the TSO memory model. +Add trivial support for the PR_{GET,SET}_MEM_MODEL prctl, which allows +userspace to learn this fact. + +Known TSO implementations: +- Nvidia Denver +- Nvidia Carmel +- Fujitsu A64FX + +Signed-off-by: Hector Martin +Reviewed-by: Neal Gompa +--- + arch/arm64/Kconfig | 9 +++++++ + arch/arm64/include/asm/cpufeature.h | 4 +++ + arch/arm64/kernel/Makefile | 2 +- + arch/arm64/kernel/cpufeature.c | 11 ++++---- + arch/arm64/kernel/cpufeature_impdef.c | 38 +++++++++++++++++++++++++++ + arch/arm64/kernel/process.c | 24 +++++++++++++++++ + arch/arm64/tools/cpucaps | 1 + + 7 files changed, 83 insertions(+), 6 deletions(-) + create mode 100644 arch/arm64/kernel/cpufeature_impdef.c + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index b5df38c2a..dade81d8f 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -2160,6 +2160,15 @@ config ARM64_DEBUG_PRIORITY_MASKING + If unsure, say N + endif # ARM64_PSEUDO_NMI + ++config ARM64_MEMORY_MODEL_CONTROL ++ bool "Runtime memory model control" ++ help ++ Some ARM64 CPUs support runtime switching of the CPU memory ++ model, which can be useful to emulate other CPU architectures ++ which have different memory models. Say Y to enable support ++ for the PR_SET_MEM_MODEL/PR_GET_MEM_MODEL prctl() calls on ++ CPUs with this feature. ++ + config RELOCATABLE + bool "Build a relocatable kernel image" if EXPERT + select ARCH_HAS_RELR +diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h +index 5bba39376..f83f951be 100644 +--- a/arch/arm64/include/asm/cpufeature.h ++++ b/arch/arm64/include/asm/cpufeature.h +@@ -924,6 +924,10 @@ extern struct arm64_ftr_override arm64_sw_feature_override; + u32 get_kvm_ipa_limit(void); + void dump_cpu_features(void); + ++void __init init_cpucap_indirect_list_impdef(void); ++void __init init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps); ++bool cpufeature_matches(u64 reg, const struct arm64_cpu_capabilities *entry); ++ + #endif /* __ASSEMBLY__ */ + + #endif +diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile +index d95b3d6b4..2a86fc69c 100644 +--- a/arch/arm64/kernel/Makefile ++++ b/arch/arm64/kernel/Makefile +@@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ + cpufeature.o alternative.o cacheinfo.o \ + smp.o smp_spin_table.o topology.o smccc-call.o \ + syscall.o proton-pack.o idreg-override.o idle.o \ +- patching.o ++ patching.o cpufeature_impdef.o + + obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ + sys_compat.o +diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c +index 444a73c2e..2f3b99ea5 100644 +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -965,7 +965,7 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new) + extern const struct arm64_cpu_capabilities arm64_errata[]; + static const struct arm64_cpu_capabilities arm64_features[]; + +-static void __init ++void __init + init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) + { + for (; caps->matches; caps++) { +@@ -1066,6 +1066,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) + * handle the boot CPU below. + */ + init_cpucap_indirect_list(); ++ init_cpucap_indirect_list_impdef(); + + /* + * Detect and enable early CPU capabilities based on the boot CPU, +@@ -1437,8 +1438,8 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope) + return true; + } + +-static bool +-feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) ++bool ++cpufeature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) + { + int val = cpuid_feature_extract_field_width(reg, entry->field_pos, + entry->field_width, +@@ -1474,14 +1475,14 @@ has_user_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) + if (!mask) + return false; + +- return feature_matches(val, entry); ++ return cpufeature_matches(val, entry); + } + + static bool + has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) + { + u64 val = read_scoped_sysreg(entry, scope); +- return feature_matches(val, entry); ++ return cpufeature_matches(val, entry); + } + + const struct cpumask *system_32bit_el0_cpumask(void) +diff --git a/arch/arm64/kernel/cpufeature_impdef.c b/arch/arm64/kernel/cpufeature_impdef.c +new file mode 100644 +index 000000000..bb04a8e3d +--- /dev/null ++++ b/arch/arm64/kernel/cpufeature_impdef.c +@@ -0,0 +1,38 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Contains implementation-defined CPU feature definitions. ++ */ ++ ++#include ++ ++#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL ++static bool has_tso_fixed(const struct arm64_cpu_capabilities *entry, int scope) ++{ ++ /* List of CPUs that always use the TSO memory model */ ++ static const struct midr_range fixed_tso_list[] = { ++ MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER), ++ MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), ++ MIDR_ALL_VERSIONS(MIDR_FUJITSU_A64FX), ++ { /* sentinel */ } ++ }; ++ ++ return is_midr_in_range_list(read_cpuid_id(), fixed_tso_list); ++} ++#endif ++ ++static const struct arm64_cpu_capabilities arm64_impdef_features[] = { ++#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL ++ { ++ .desc = "TSO memory model (Fixed)", ++ .capability = ARM64_HAS_TSO_FIXED, ++ .type = ARM64_CPUCAP_SYSTEM_FEATURE, ++ .matches = has_tso_fixed, ++ }, ++#endif ++ {}, ++}; ++ ++void __init init_cpucap_indirect_list_impdef(void) ++{ ++ init_cpucap_indirect_list_from_array(arm64_impdef_features); ++} +diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c +index 0fcc4eb1a..2f37a56a4 100644 +--- a/arch/arm64/kernel/process.c ++++ b/arch/arm64/kernel/process.c +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -516,6 +517,25 @@ void update_sctlr_el1(u64 sctlr) + isb(); + } + ++#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL ++int arch_prctl_mem_model_get(struct task_struct *t) ++{ ++ return PR_SET_MEM_MODEL_DEFAULT; ++} ++ ++int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) ++{ ++ if (alternative_has_cap_unlikely(ARM64_HAS_TSO_FIXED) && ++ val == PR_SET_MEM_MODEL_TSO) ++ return 0; ++ ++ if (val == PR_SET_MEM_MODEL_DEFAULT) ++ return 0; ++ ++ return -EINVAL; ++} ++#endif ++ + /* + * Thread switching. + */ +@@ -654,6 +674,10 @@ void arch_setup_new_exec(void) + arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS, + PR_SPEC_ENABLE); + } ++ ++#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL ++ arch_prctl_mem_model_set(current, PR_SET_MEM_MODEL_DEFAULT); ++#endif + } + + #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps +index 5511bee15..419810c21 100644 +--- a/arch/arm64/tools/cpucaps ++++ b/arch/arm64/tools/cpucaps +@@ -50,6 +50,7 @@ HAS_STAGE2_FWB + HAS_TCR2 + HAS_TIDCP1 + HAS_TLB_RANGE ++HAS_TSO_FIXED + HAS_VIRT_HOST_EXTN + HAS_WFXT + HW_DBM +-- +2.45.1 + diff --git a/patches/0018-arm64-Introduce-scaffolding-to-add-ACTLR_EL1-to-thre.patch b/patches/0018-arm64-Introduce-scaffolding-to-add-ACTLR_EL1-to-thre.patch new file mode 100644 index 0000000..21c1197 --- /dev/null +++ b/patches/0018-arm64-Introduce-scaffolding-to-add-ACTLR_EL1-to-thre.patch @@ -0,0 +1,141 @@ +From c3564a75278be85e3e585fad1b1834a631b6e0e9 Mon Sep 17 00:00:00 2001 +From: Hector Martin +Date: Thu, 11 Apr 2024 09:51:22 +0900 +Subject: [PATCH 3/4] arm64: Introduce scaffolding to add ACTLR_EL1 to thread + state + +Some CPUs expose IMPDEF features in ACTLR_EL1 that can be meaningfully +controlled per-thread (like TSO control on Apple cores). Add the basic +scaffolding to save/restore this register as part of context switching. + +This mechanism is disabled by default both by config symbol and via a +runtime check, which ensures it is never triggered unless the system is +known to need it for some feature (which also implies that the layout of +ACTLR_EL1 is uniform between all CPU core types). + +Signed-off-by: Hector Martin +Reviewed-by: Neal Gompa +--- + arch/arm64/Kconfig | 3 +++ + arch/arm64/include/asm/cpufeature.h | 5 +++++ + arch/arm64/include/asm/processor.h | 3 +++ + arch/arm64/kernel/process.c | 25 +++++++++++++++++++++++++ + arch/arm64/kernel/setup.c | 8 ++++++++ + 5 files changed, 44 insertions(+) + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index dade81d8f..c6c8bb46b 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -409,6 +409,9 @@ config KASAN_SHADOW_OFFSET + config UNWIND_TABLES + bool + ++config ARM64_ACTLR_STATE ++ bool ++ + source "arch/arm64/Kconfig.platforms" + + menu "Kernel Features" +diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h +index f83f951be..8c5d5a03b 100644 +--- a/arch/arm64/include/asm/cpufeature.h ++++ b/arch/arm64/include/asm/cpufeature.h +@@ -908,6 +908,11 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) + return 8; + } + ++static __always_inline bool system_has_actlr_state(void) ++{ ++ return false; ++} ++ + s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); + struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); + +diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h +index e5bc54522..e1ca89202 100644 +--- a/arch/arm64/include/asm/processor.h ++++ b/arch/arm64/include/asm/processor.h +@@ -179,6 +179,9 @@ struct thread_struct { + u64 sctlr_user; + u64 svcr; + u64 tpidr2_el0; ++#ifdef CONFIG_ARM64_ACTLR_STATE ++ u64 actlr; ++#endif + }; + + static inline unsigned int thread_get_vl(struct thread_struct *thread, +diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c +index 2f37a56a4..235c965eb 100644 +--- a/arch/arm64/kernel/process.c ++++ b/arch/arm64/kernel/process.c +@@ -375,6 +375,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) + if (system_supports_tpidr2()) + p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); + ++#ifdef CONFIG_ARM64_ACTLR_STATE ++ if (system_has_actlr_state()) ++ p->thread.actlr = read_sysreg(actlr_el1); ++#endif ++ + if (stack_start) { + if (is_compat_thread(task_thread_info(p))) + childregs->compat_sp = stack_start; +@@ -536,6 +541,25 @@ int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) + } + #endif + ++#ifdef CONFIG_ARM64_ACTLR_STATE ++/* ++ * IMPDEF control register ACTLR_EL1 handling. Some CPUs use this to ++ * expose features that can be controlled by userspace. ++ */ ++static void actlr_thread_switch(struct task_struct *next) ++{ ++ if (!system_has_actlr_state()) ++ return; ++ ++ current->thread.actlr = read_sysreg(actlr_el1); ++ write_sysreg(next->thread.actlr, actlr_el1); ++} ++#else ++static inline void actlr_thread_switch(struct task_struct *next) ++{ ++} ++#endif ++ + /* + * Thread switching. + */ +@@ -553,6 +577,7 @@ struct task_struct *__switch_to(struct task_struct *prev, + ssbs_thread_switch(next); + erratum_1418040_thread_switch(next); + ptrauth_thread_switch_user(next); ++ actlr_thread_switch(next); + + /* + * Complete any pending TLB or cache maintenance on this CPU in case +diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c +index 417a8a86b..4580297d0 100644 +--- a/arch/arm64/kernel/setup.c ++++ b/arch/arm64/kernel/setup.c +@@ -382,6 +382,14 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) + */ + init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); + #endif ++#ifdef CONFIG_ARM64_ACTLR_STATE ++ /* Store the boot CPU ACTLR_EL1 value as the default. This will only ++ * be actually restored during context switching iff the platform is ++ * known to use ACTLR_EL1 for exposable features and its layout is ++ * known to be the same on all CPUs. ++ */ ++ init_task.thread.actlr = read_sysreg(actlr_el1); ++#endif + + if (boot_args[1] || boot_args[2] || boot_args[3]) { + pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" +-- +2.45.1 + diff --git a/patches/0019-arm64-Implement-Apple-IMPDEF-TSO-memory-model-contro.patch b/patches/0019-arm64-Implement-Apple-IMPDEF-TSO-memory-model-contro.patch new file mode 100644 index 0000000..fbb3afd --- /dev/null +++ b/patches/0019-arm64-Implement-Apple-IMPDEF-TSO-memory-model-contro.patch @@ -0,0 +1,181 @@ +From bab76e17f7b70036d6c9d85efbd77d503a0743dc Mon Sep 17 00:00:00 2001 +From: Hector Martin +Date: Thu, 11 Apr 2024 09:51:23 +0900 +Subject: [PATCH 4/4] arm64: Implement Apple IMPDEF TSO memory model control + +Apple CPUs may implement the TSO memory model as an optional +configurable mode. This allows x86 emulators to simplify their +load/store handling, greatly increasing performance. + +Expose this via the prctl PR_SET_MEM_MODEL_TSO mechanism. We use the +Apple IMPDEF AIDR_EL1 register to check for the availability of TSO +mode, and enable this codepath on all CPUs with an Apple implementer. + +This relies on the ACTLR_EL1 thread state scaffolding introduced +earlier. + +Signed-off-by: Hector Martin +Reviewed-by: Neal Gompa +--- + arch/arm64/Kconfig | 2 ++ + arch/arm64/include/asm/apple_cpufeature.h | 15 +++++++++++++++ + arch/arm64/include/asm/cpufeature.h | 3 ++- + arch/arm64/kernel/cpufeature_impdef.c | 23 +++++++++++++++++++++++ + arch/arm64/kernel/process.c | 22 ++++++++++++++++++++++ + arch/arm64/tools/cpucaps | 1 + + 6 files changed, 65 insertions(+), 1 deletion(-) + create mode 100644 arch/arm64/include/asm/apple_cpufeature.h + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index c6c8bb46b..b4a54e3e2 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -2165,6 +2165,8 @@ endif # ARM64_PSEUDO_NMI + + config ARM64_MEMORY_MODEL_CONTROL + bool "Runtime memory model control" ++ default ARCH_APPLE ++ select ARM64_ACTLR_STATE + help + Some ARM64 CPUs support runtime switching of the CPU memory + model, which can be useful to emulate other CPU architectures +diff --git a/arch/arm64/include/asm/apple_cpufeature.h b/arch/arm64/include/asm/apple_cpufeature.h +new file mode 100644 +index 000000000..4370d91ff +--- /dev/null ++++ b/arch/arm64/include/asm/apple_cpufeature.h +@@ -0,0 +1,15 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++#ifndef __ASM_APPLE_CPUFEATURES_H ++#define __ASM_APPLE_CPUFEATURES_H ++ ++#include ++#include ++ ++#define AIDR_APPLE_TSO_SHIFT 9 ++#define AIDR_APPLE_TSO BIT(9) ++ ++#define ACTLR_APPLE_TSO_SHIFT 1 ++#define ACTLR_APPLE_TSO BIT(1) ++ ++#endif +diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h +index 8c5d5a03b..fb2e732c4 100644 +--- a/arch/arm64/include/asm/cpufeature.h ++++ b/arch/arm64/include/asm/cpufeature.h +@@ -910,7 +910,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) + + static __always_inline bool system_has_actlr_state(void) + { +- return false; ++ return IS_ENABLED(CONFIG_ARM64_ACTLR_STATE) && ++ alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE); + } + + s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); +diff --git a/arch/arm64/kernel/cpufeature_impdef.c b/arch/arm64/kernel/cpufeature_impdef.c +index bb04a8e3d..9325d1eb1 100644 +--- a/arch/arm64/kernel/cpufeature_impdef.c ++++ b/arch/arm64/kernel/cpufeature_impdef.c +@@ -4,8 +4,21 @@ + */ + + #include ++#include + + #ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL ++static bool has_apple_feature(const struct arm64_cpu_capabilities *entry, int scope) ++{ ++ u64 val; ++ WARN_ON(scope != SCOPE_SYSTEM); ++ ++ if (read_cpuid_implementor() != ARM_CPU_IMP_APPLE) ++ return false; ++ ++ val = read_sysreg(aidr_el1); ++ return cpufeature_matches(val, entry); ++} ++ + static bool has_tso_fixed(const struct arm64_cpu_capabilities *entry, int scope) + { + /* List of CPUs that always use the TSO memory model */ +@@ -22,6 +35,16 @@ static bool has_tso_fixed(const struct arm64_cpu_capabilities *entry, int scope) + + static const struct arm64_cpu_capabilities arm64_impdef_features[] = { + #ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL ++ { ++ .desc = "TSO memory model (Apple)", ++ .capability = ARM64_HAS_TSO_APPLE, ++ .type = ARM64_CPUCAP_SYSTEM_FEATURE, ++ .matches = has_apple_feature, ++ .field_pos = AIDR_APPLE_TSO_SHIFT, ++ .field_width = 1, ++ .sign = FTR_UNSIGNED, ++ .min_field_value = 1, ++ }, + { + .desc = "TSO memory model (Fixed)", + .capability = ARM64_HAS_TSO_FIXED, +diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c +index 235c965eb..9cb54aa3b 100644 +--- a/arch/arm64/kernel/process.c ++++ b/arch/arm64/kernel/process.c +@@ -44,6 +44,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -525,6 +526,10 @@ void update_sctlr_el1(u64 sctlr) + #ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL + int arch_prctl_mem_model_get(struct task_struct *t) + { ++ if (alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE) && ++ t->thread.actlr & ACTLR_APPLE_TSO) ++ return PR_SET_MEM_MODEL_TSO; ++ + return PR_SET_MEM_MODEL_DEFAULT; + } + +@@ -534,6 +539,23 @@ int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val) + val == PR_SET_MEM_MODEL_TSO) + return 0; + ++ if (alternative_has_cap_unlikely(ARM64_HAS_TSO_APPLE)) { ++ WARN_ON(!system_has_actlr_state()); ++ ++ switch (val) { ++ case PR_SET_MEM_MODEL_TSO: ++ t->thread.actlr |= ACTLR_APPLE_TSO; ++ break; ++ case PR_SET_MEM_MODEL_DEFAULT: ++ t->thread.actlr &= ~ACTLR_APPLE_TSO; ++ break; ++ default: ++ return -EINVAL; ++ } ++ write_sysreg(t->thread.actlr, actlr_el1); ++ return 0; ++ } ++ + if (val == PR_SET_MEM_MODEL_DEFAULT) + return 0; + +diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps +index 419810c21..f887e1742 100644 +--- a/arch/arm64/tools/cpucaps ++++ b/arch/arm64/tools/cpucaps +@@ -50,6 +50,7 @@ HAS_STAGE2_FWB + HAS_TCR2 + HAS_TIDCP1 + HAS_TLB_RANGE ++HAS_TSO_APPLE + HAS_TSO_FIXED + HAS_VIRT_HOST_EXTN + HAS_WFXT +-- +2.45.1 + From 50b4eb77e8b4c8a0bf49aed9835c15604280a78d Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Sat, 7 Sep 2024 00:57:35 +0200 Subject: [PATCH 2/3] aarch64: enable ARM64_MEMORY_MODEL_CONTROL Enable ARM64_MEMORY_MODEL_CONTROL, introduced by the patches added in the previous commit, so guests on Apple Silicon machine can set the memory model as desired. Signed-off-by: Sergio Lopez --- config-libkrunfw_aarch64 | 1 + 1 file changed, 1 insertion(+) diff --git a/config-libkrunfw_aarch64 b/config-libkrunfw_aarch64 index 2f70172..59923b2 100644 --- a/config-libkrunfw_aarch64 +++ b/config-libkrunfw_aarch64 @@ -483,6 +483,7 @@ CONFIG_ARM64_EPAN=y CONFIG_ARM64_SVE=y CONFIG_ARM64_SME=y # CONFIG_ARM64_PSEUDO_NMI is not set +CONFIG_ARM64_MEMORY_MODEL_CONTROL=y CONFIG_RELOCATABLE=y # CONFIG_RANDOMIZE_BASE is not set CONFIG_CC_HAVE_STACKPROTECTOR_SYSREG=y From 26b90da34c56cb3dc84ce15048c626095dc5c91c Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Sat, 7 Sep 2024 00:59:49 +0200 Subject: [PATCH 3/3] Bump release to 4.3.1 This release just adds the paches for setting the memory model on Apple Silicon guests and enables it. Signed-off-by: Sergio Lopez --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index df3c206..24a5264 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,8 @@ KERNEL_PATCHES = $(shell find patches/ -name "0*.patch" | sort) KERNEL_C_BUNDLE = kernel.c ABI_VERSION = 4 -FULL_VERSION = 4.3.0 -TIMESTAMP = "Mon Aug 5 10:53:49 CEST 2024" +FULL_VERSION = 4.3.1 +TIMESTAMP = "Sat Sep 7 00:58:59 CEST 2024" KERNEL_FLAGS = KBUILD_BUILD_TIMESTAMP=$(TIMESTAMP) KERNEL_FLAGS += KBUILD_BUILD_USER=root