Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add downstream patches for setting memory model on guests running on Apple Silicon #64

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ KERNEL_PATCHES = $(shell find patches/ -name "0*.patch" | sort)
KERNEL_C_BUNDLE = kernel.c

ABI_VERSION = 4
FULL_VERSION = 4.3.0
TIMESTAMP = "Mon Aug 5 10:53:49 CEST 2024"
FULL_VERSION = 4.3.1
TIMESTAMP = "Sat Sep 7 00:58:59 CEST 2024"

KERNEL_FLAGS = KBUILD_BUILD_TIMESTAMP=$(TIMESTAMP)
KERNEL_FLAGS += KBUILD_BUILD_USER=root
Expand Down
1 change: 1 addition & 0 deletions config-libkrunfw_aarch64
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ CONFIG_ARM64_EPAN=y
CONFIG_ARM64_SVE=y
CONFIG_ARM64_SME=y
# CONFIG_ARM64_PSEUDO_NMI is not set
CONFIG_ARM64_MEMORY_MODEL_CONTROL=y
CONFIG_RELOCATABLE=y
# CONFIG_RANDOMIZE_BASE is not set
CONFIG_CC_HAVE_STACKPROTECTOR_SYSREG=y
Expand Down
123 changes: 123 additions & 0 deletions patches/0016-prctl-Introduce-PR_-SET-GET-_MEM_MODEL.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
From e7f3885e24d060ae36aeccde44c03894ff4e3d6b Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 11 Apr 2024 09:51:20 +0900
Subject: [PATCH 1/4] prctl: Introduce PR_{SET,GET}_MEM_MODEL

On some architectures, it is possible to query and/or change the CPU
memory model. This allows userspace to switch to a stricter memory model
for performance reasons, such as when emulating code for another
architecture where that model is the default.

Introduce two prctls to allow userspace to query and set the memory
model for a thread. Two models are initially defined:

- PR_SET_MEM_MODEL_DEFAULT requests the default memory model for the
architecture.
- PR_SET_MEM_MODEL_TSO requests the x86 TSO memory model.

PR_SET_MEM_MODEL is allowed to set a stricter memory model than
requested if available, in which case it will return successfully. If
the requested memory model cannot be fulfilled, it will return an error.
The memory model that was actually set can be queried by a subsequent
call to PR_GET_MEM_MODEL.

Examples:
- On a CPU with not support for a memory model at least as strong as
TSO, PR_SET_MEM_MODEL(PR_SET_MEM_MODEL_TSO) fails.
- On a CPU with runtime-configurable TSO support, PR_SET_MEM_MODEL can
toggle the memory model between DEFAULT and TSO at will.
- On a CPU where the only memory model is at least as strict as TSO,
PR_GET_MEM_MODEL will return PR_SET_MEM_MODEL_DEFAULT, and
PR_SET_MEM_MODEL(PR_SET_MEM_MODEL_TSO) will return success but leave
the memory model at PR_SET_MEM_MODEL_DEFAULT. This implies that the
default is in fact at least as strict as TSO.

Signed-off-by: Hector Martin <marcan@marcan.st>
Reviewed-by: Neal Gompa <neal@gompa.dev>
---
include/linux/memory_ordering_model.h | 11 +++++++++++
include/uapi/linux/prctl.h | 5 +++++
kernel/sys.c | 21 +++++++++++++++++++++
3 files changed, 37 insertions(+)
create mode 100644 include/linux/memory_ordering_model.h

diff --git a/include/linux/memory_ordering_model.h b/include/linux/memory_ordering_model.h
new file mode 100644
index 000000000..267a12ca6
--- /dev/null
+++ b/include/linux/memory_ordering_model.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MEMORY_ORDERING_MODEL_H
+#define __ASM_MEMORY_ORDERING_MODEL_H
+
+/* Arch hooks to implement the PR_{GET_SET}_MEM_MODEL prctls */
+
+struct task_struct;
+int arch_prctl_mem_model_get(struct task_struct *t);
+int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val);
+
+#endif
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 370ed14b1..961216093 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -306,4 +306,9 @@ struct prctl_mm_map {
# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f

+#define PR_GET_MEM_MODEL 0x6d4d444c
+#define PR_SET_MEM_MODEL 0x4d4d444c
+# define PR_SET_MEM_MODEL_DEFAULT 0
+# define PR_SET_MEM_MODEL_TSO 1
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 7a4ae6d5a..54cff3b34 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -45,6 +45,7 @@
#include <linux/version.h>
#include <linux/ctype.h>
#include <linux/syscall_user_dispatch.h>
+#include <linux/memory_ordering_model.h>

#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -2429,6 +2430,16 @@ static int prctl_get_auxv(void __user *addr, unsigned long len)
return sizeof(mm->saved_auxv);
}

+int __weak arch_prctl_mem_model_get(struct task_struct *t)
+{
+ return -EINVAL;
+}
+
+int __weak arch_prctl_mem_model_set(struct task_struct *t, unsigned long val)
+{
+ return -EINVAL;
+}
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2744,6 +2755,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_RISCV_V_GET_CONTROL:
error = RISCV_V_GET_CONTROL();
break;
+ case PR_GET_MEM_MODEL:
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = arch_prctl_mem_model_get(me);
+ break;
+ case PR_SET_MEM_MODEL:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = arch_prctl_mem_model_set(me, arg2);
+ break;
default:
error = -EINVAL;
break;
--
2.45.1

Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
From 37e470e60bc5c0dad986f211ff05629bed0b9d47 Mon Sep 17 00:00:00 2001
From: Sergio Lopez <slp@redhat.com>
Date: Mon, 6 May 2024 16:47:51 +0200
Subject: [PATCH 2/4] arm64: Implement PR_{GET,SET}_MEM_MODEL for always-TSO
CPUs

Some ARM64 implementations are known to always use the TSO memory model.
Add trivial support for the PR_{GET,SET}_MEM_MODEL prctl, which allows
userspace to learn this fact.

Known TSO implementations:
- Nvidia Denver
- Nvidia Carmel
- Fujitsu A64FX

Signed-off-by: Hector Martin <marcan@marcan.st>
Reviewed-by: Neal Gompa <neal@gompa.dev>
---
arch/arm64/Kconfig | 9 +++++++
arch/arm64/include/asm/cpufeature.h | 4 +++
arch/arm64/kernel/Makefile | 2 +-
arch/arm64/kernel/cpufeature.c | 11 ++++----
arch/arm64/kernel/cpufeature_impdef.c | 38 +++++++++++++++++++++++++++
arch/arm64/kernel/process.c | 24 +++++++++++++++++
arch/arm64/tools/cpucaps | 1 +
7 files changed, 83 insertions(+), 6 deletions(-)
create mode 100644 arch/arm64/kernel/cpufeature_impdef.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b5df38c2a..dade81d8f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2160,6 +2160,15 @@ config ARM64_DEBUG_PRIORITY_MASKING
If unsure, say N
endif # ARM64_PSEUDO_NMI

+config ARM64_MEMORY_MODEL_CONTROL
+ bool "Runtime memory model control"
+ help
+ Some ARM64 CPUs support runtime switching of the CPU memory
+ model, which can be useful to emulate other CPU architectures
+ which have different memory models. Say Y to enable support
+ for the PR_SET_MEM_MODEL/PR_GET_MEM_MODEL prctl() calls on
+ CPUs with this feature.
+
config RELOCATABLE
bool "Build a relocatable kernel image" if EXPERT
select ARCH_HAS_RELR
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 5bba39376..f83f951be 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -924,6 +924,10 @@ extern struct arm64_ftr_override arm64_sw_feature_override;
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);

+void __init init_cpucap_indirect_list_impdef(void);
+void __init init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps);
+bool cpufeature_matches(u64 reg, const struct arm64_cpu_capabilities *entry);
+
#endif /* __ASSEMBLY__ */

#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d95b3d6b4..2a86fc69c 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o proton-pack.o idreg-override.o idle.o \
- patching.o
+ patching.o cpufeature_impdef.o

obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 444a73c2e..2f3b99ea5 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -965,7 +965,7 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
extern const struct arm64_cpu_capabilities arm64_errata[];
static const struct arm64_cpu_capabilities arm64_features[];

-static void __init
+void __init
init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps)
{
for (; caps->matches; caps++) {
@@ -1066,6 +1066,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
* handle the boot CPU below.
*/
init_cpucap_indirect_list();
+ init_cpucap_indirect_list_impdef();

/*
* Detect and enable early CPU capabilities based on the boot CPU,
@@ -1437,8 +1438,8 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope)
return true;
}

-static bool
-feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
+bool
+cpufeature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
{
int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
entry->field_width,
@@ -1474,14 +1475,14 @@ has_user_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
if (!mask)
return false;

- return feature_matches(val, entry);
+ return cpufeature_matches(val, entry);
}

static bool
has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
{
u64 val = read_scoped_sysreg(entry, scope);
- return feature_matches(val, entry);
+ return cpufeature_matches(val, entry);
}

const struct cpumask *system_32bit_el0_cpumask(void)
diff --git a/arch/arm64/kernel/cpufeature_impdef.c b/arch/arm64/kernel/cpufeature_impdef.c
new file mode 100644
index 000000000..bb04a8e3d
--- /dev/null
+++ b/arch/arm64/kernel/cpufeature_impdef.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Contains implementation-defined CPU feature definitions.
+ */
+
+#include <asm/cpufeature.h>
+
+#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL
+static bool has_tso_fixed(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ /* List of CPUs that always use the TSO memory model */
+ static const struct midr_range fixed_tso_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
+ MIDR_ALL_VERSIONS(MIDR_FUJITSU_A64FX),
+ { /* sentinel */ }
+ };
+
+ return is_midr_in_range_list(read_cpuid_id(), fixed_tso_list);
+}
+#endif
+
+static const struct arm64_cpu_capabilities arm64_impdef_features[] = {
+#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL
+ {
+ .desc = "TSO memory model (Fixed)",
+ .capability = ARM64_HAS_TSO_FIXED,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_tso_fixed,
+ },
+#endif
+ {},
+};
+
+void __init init_cpucap_indirect_list_impdef(void)
+{
+ init_cpucap_indirect_list_from_array(arm64_impdef_features);
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 0fcc4eb1a..2f37a56a4 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -41,6 +41,7 @@
#include <linux/thread_info.h>
#include <linux/prctl.h>
#include <linux/stacktrace.h>
+#include <linux/memory_ordering_model.h>

#include <asm/alternative.h>
#include <asm/compat.h>
@@ -516,6 +517,25 @@ void update_sctlr_el1(u64 sctlr)
isb();
}

+#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL
+int arch_prctl_mem_model_get(struct task_struct *t)
+{
+ return PR_SET_MEM_MODEL_DEFAULT;
+}
+
+int arch_prctl_mem_model_set(struct task_struct *t, unsigned long val)
+{
+ if (alternative_has_cap_unlikely(ARM64_HAS_TSO_FIXED) &&
+ val == PR_SET_MEM_MODEL_TSO)
+ return 0;
+
+ if (val == PR_SET_MEM_MODEL_DEFAULT)
+ return 0;
+
+ return -EINVAL;
+}
+#endif
+
/*
* Thread switching.
*/
@@ -654,6 +674,10 @@ void arch_setup_new_exec(void)
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
PR_SPEC_ENABLE);
}
+
+#ifdef CONFIG_ARM64_MEMORY_MODEL_CONTROL
+ arch_prctl_mem_model_set(current, PR_SET_MEM_MODEL_DEFAULT);
+#endif
}

#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 5511bee15..419810c21 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -50,6 +50,7 @@ HAS_STAGE2_FWB
HAS_TCR2
HAS_TIDCP1
HAS_TLB_RANGE
+HAS_TSO_FIXED
HAS_VIRT_HOST_EXTN
HAS_WFXT
HW_DBM
--
2.45.1

Loading
Loading