Xen 4.17.4-pre-patchset-1HEAD 4.17.4-pre-patchset-1 main

Signed-off-by: Tomáš Mózes <hydrapolic@gmail.com>
author: Tomáš Mózes <hydrapolic@gmail.com> 2024-04-05 08:59:40 +0200
committer: Tomáš Mózes <hydrapolic@gmail.com> 2024-04-05 08:59:40 +0200
commit: d0ce95087288b30e5e211bac8e9a0817f2effcf5 (patch)
tree: ce2e128cfdf8d491a494d6583979bc5330db21e2
parent: Xen 4.17.4-pre-patchset-0 (diff)
download: xen-upstream-patches-main.tar.gz
xen-upstream-patches-main.tar.bz2
xen-upstream-patches-main.zip
68 files changed, 6591 insertions, 22 deletions
diff --git a/0001-update-Xen-version-to-4.17.4-pre.patch b/0001-update-Xen-version-to-4.17.4-pre.patch
index b532743..e1070c9 100644
--- a/0001-update-Xen-version-to-4.17.4-pre.patch
+++ b/0001-update-Xen-version-to-4.17.4-pre.patch
@@ -1,7 +1,7 @@
 From 4f6e9d4327eb5252f1e8cac97a095d8b8485dadb Mon Sep 17 00:00:00 2001
 From: Jan Beulich <jbeulich@suse.com>
 Date: Tue, 30 Jan 2024 14:36:44 +0100
-Subject: [PATCH 01/10] update Xen version to 4.17.4-pre
+Subject: [PATCH 01/67] update Xen version to 4.17.4-pre
 
 ---
  xen/Makefile | 2 +-
@@ -21,5 +21,5 @@ index a46e6330db..dd0b004e1c 100644
  -include xen-version
  
 -- 
-2.43.0
+2.44.0
 
diff --git a/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch b/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch
index d91802f..bafad55 100644
--- a/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch
+++ b/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch
@@ -1,7 +1,7 @@
 From f9e1ed51bdba31017ea17e1819eb2ade6b5c8615 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
 Date: Tue, 30 Jan 2024 14:37:39 +0100
-Subject: [PATCH 02/10] pci: fail device assignment if phantom functions cannot
+Subject: [PATCH 02/67] pci: fail device assignment if phantom functions cannot
  be assigned
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
@@ -87,5 +87,5 @@ index 07d1986d33..8c62b14d19 100644
      else if ( d == dom_io )
          pdev->quarantine = true;
 -- 
-2.43.0
+2.44.0
 
diff --git a/0003-VT-d-Fix-else-vs-endif-misplacement.patch b/0003-VT-d-Fix-else-vs-endif-misplacement.patch
index 2e7f78d..622fa18 100644
--- a/0003-VT-d-Fix-else-vs-endif-misplacement.patch
+++ b/0003-VT-d-Fix-else-vs-endif-misplacement.patch
@@ -1,7 +1,7 @@
 From 6b1864afc14d484cdbc9754ce3172ac3dc189846 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Tue, 30 Jan 2024 14:38:38 +0100
-Subject: [PATCH 03/10] VT-d: Fix "else" vs "#endif" misplacement
+Subject: [PATCH 03/67] VT-d: Fix "else" vs "#endif" misplacement
 
 In domain_pgd_maddr() the "#endif" is misplaced with respect to "else".  This
 generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body
@@ -66,5 +66,5 @@ index b4c11a6b48..908b3ba6ee 100644
          if ( !hd->arch.vtd.pgd_maddr )
          {
 -- 
-2.43.0
+2.44.0
 
diff --git a/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch b/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch
index f1289aa..fa90a46 100644
--- a/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch
+++ b/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch
@@ -1,7 +1,7 @@
 From abcc32f0634627fe21117a48bd10e792bfbdd6dc Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
 Date: Fri, 2 Feb 2024 08:01:09 +0100
-Subject: [PATCH 04/10] x86/amd: Extend CPU erratum #1474 fix to more affected
+Subject: [PATCH 04/67] x86/amd: Extend CPU erratum #1474 fix to more affected
  models
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
@@ -119,5 +119,5 @@ index 29ae97e7c0..3d85e9797d 100644
 -presmp_initcall(zen2_c6_errata_check);
 +presmp_initcall(amd_check_erratum_1474);
 -- 
-2.43.0
+2.44.0
 
diff --git a/0005-CirrusCI-drop-FreeBSD-12.patch b/0005-CirrusCI-drop-FreeBSD-12.patch
index cca7bb0..dac712b 100644
--- a/0005-CirrusCI-drop-FreeBSD-12.patch
+++ b/0005-CirrusCI-drop-FreeBSD-12.patch
@@ -1,7 +1,7 @@
 From 0ef1fb43ddd61b3c4c953e833e012ac21ad5ca0f Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
 Date: Fri, 2 Feb 2024 08:01:50 +0100
-Subject: [PATCH 05/10] CirrusCI: drop FreeBSD 12
+Subject: [PATCH 05/67] CirrusCI: drop FreeBSD 12
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
@@ -35,5 +35,5 @@ index 7e0beb200d..63f3afb104 100644
    name: 'FreeBSD 13'
    freebsd_instance:
 -- 
-2.43.0
+2.44.0
 
diff --git a/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch b/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch
index dc64ad6..ce07803 100644
--- a/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch
+++ b/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch
@@ -1,7 +1,7 @@
 From d0ad2cc5eac1b5d3cfd14204d377ce2384f52607 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
 Date: Fri, 2 Feb 2024 08:02:20 +0100
-Subject: [PATCH 06/10] x86/intel: ensure Global Performance Counter Control is
+Subject: [PATCH 06/67] x86/intel: ensure Global Performance Counter Control is
  setup correctly
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
@@ -70,5 +70,5 @@ index b40ac696e6..96723b5d44 100644
  
  	if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
 -- 
-2.43.0
+2.44.0
 
diff --git a/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch b/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch
index a1937a7..2100acc 100644
--- a/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch
+++ b/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch
@@ -1,7 +1,7 @@
 From eca5416f9b0e179de9553900de8de660ab09199d Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Fri, 2 Feb 2024 08:02:51 +0100
-Subject: [PATCH 07/10] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT
+Subject: [PATCH 07/67] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT
 
 When receiving an INIT, a prior bugfix tried to ignore the INIT and continue
 onwards.
@@ -61,5 +61,5 @@ index 072288a5ef..31f4a861c6 100644
          break;
      case EXIT_REASON_TRIPLE_FAULT:
 -- 
-2.43.0
+2.44.0
 
diff --git a/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch b/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch
index 12c2d59..3af45e8 100644
--- a/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch
+++ b/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch
@@ -1,7 +1,7 @@
 From 7bd612727df792671e44152a8205f0cf821ad984 Mon Sep 17 00:00:00 2001
 From: Andrew Cooper <andrew.cooper3@citrix.com>
 Date: Fri, 2 Feb 2024 08:03:26 +0100
-Subject: [PATCH 08/10] x86/vmx: Disallow the use of inactivity states
+Subject: [PATCH 08/67] x86/vmx: Disallow the use of inactivity states
 
 Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and
 enter the vCPU.  Luckily for us, nested-virt is explicitly unsupported for
@@ -122,5 +122,5 @@ index 78404e42b3..0af021d5f5 100644
  #define VMX_MISC_CR3_TARGET                     0x01ff0000
  #define VMX_MISC_VMWRITE_ALL                    0x20000000
 -- 
-2.43.0
+2.44.0
 
diff --git a/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch b/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch
index 9ee7104..f33d27d 100644
--- a/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch
+++ b/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch
@@ -1,7 +1,7 @@
 From afb85cf1e8f165abf88de9d8a6df625692a753b1 Mon Sep 17 00:00:00 2001
 From: Michal Orzel <michal.orzel@amd.com>
 Date: Fri, 2 Feb 2024 08:04:07 +0100
-Subject: [PATCH 09/10] lib{fdt,elf}: move lib{fdt,elf}-temp.o and their deps
+Subject: [PATCH 09/67] lib{fdt,elf}: move lib{fdt,elf}-temp.o and their deps
  to $(targets)
 
 At the moment, trying to run xencov read/reset (calling SYSCTL_coverage_op
@@ -66,5 +66,5 @@ index 75aaefa2e3..4d14fd61ba 100644
 -extra-y += libfdt-temp.o $(LIBFDT_OBJS)
 +targets += libfdt-temp.o $(LIBFDT_OBJS)
 -- 
-2.43.0
+2.44.0
 
diff --git a/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch b/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch
index ba99063..9b3b9a0 100644
--- a/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch
+++ b/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch
@@ -1,7 +1,7 @@
 From 091466ba55d1e2e75738f751818ace2e3ed08ccf Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
 Date: Fri, 2 Feb 2024 08:04:33 +0100
-Subject: [PATCH 10/10] x86/p2m-pt: fix off by one in entry check assert
+Subject: [PATCH 10/67] x86/p2m-pt: fix off by one in entry check assert
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
@@ -32,5 +32,5 @@ index eaba2b0fb4..f02ebae372 100644
                new == p2m_mmio_dm )
          ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN));
 -- 
-2.43.0
+2.44.0
 
diff --git a/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch b/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch
new file mode 100644
index 0000000..6bf11d9
--- /dev/null
+++ b/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch
@@ -0,0 +1,67 @@
+From 61da71968ea44964fd1dd2e449b053c77eb83139 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Cyril=20R=C3=A9bert=20=28zithro=29?= <slack@rabbit.lu>
+Date: Tue, 27 Feb 2024 14:06:53 +0100
+Subject: [PATCH 11/67] tools/xentop: fix sorting bug for some columns
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Sort doesn't work on columns VBD_OO, VBD_RD, VBD_WR and VBD_RSECT.
+Fix by adjusting variables names in compare functions.
+Bug fix only. No functional change.
+
+Fixes: 91c3e3dc91d6 ("tools/xentop: Display '-' when stats are not available.")
+Signed-off-by: Cyril Rébert (zithro) <slack@rabbit.lu>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: 29f17d837421f13c0e0010802de1b2d51d2ded4a
+master date: 2024-02-05 17:58:23 +0000
+---
+ tools/xentop/xentop.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/tools/xentop/xentop.c b/tools/xentop/xentop.c
+index 950e8935c4..545bd5e96d 100644
+--- a/tools/xentop/xentop.c
++++ b/tools/xentop/xentop.c
+@@ -684,7 +684,7 @@ static int compare_vbd_oo(xenstat_domain *domain1, xenstat_domain *domain2)
+ 	unsigned long long dom1_vbd_oo = 0, dom2_vbd_oo = 0;
+ 
+ 	tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom1_vbd_oo);
+-	tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom2_vbd_oo);
++	tot_vbd_reqs(domain2, FIELD_VBD_OO, &dom2_vbd_oo);
+ 
+ 	return -compare(dom1_vbd_oo, dom2_vbd_oo);
+ }
+@@ -711,9 +711,9 @@ static int compare_vbd_rd(xenstat_domain *domain1, xenstat_domain *domain2)
+ 	unsigned long long dom1_vbd_rd = 0, dom2_vbd_rd = 0;
+ 
+ 	tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom1_vbd_rd);
+-	tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom2_vbd_rd);
++	tot_vbd_reqs(domain2, FIELD_VBD_RD, &dom2_vbd_rd);
+ 
+-	return -compare(dom1_vbd_rd, dom1_vbd_rd);
++	return -compare(dom1_vbd_rd, dom2_vbd_rd);
+ }
+ 
+ /* Prints number of total VBD READ requests statistic */
+@@ -738,7 +738,7 @@ static int compare_vbd_wr(xenstat_domain *domain1, xenstat_domain *domain2)
+ 	unsigned long long dom1_vbd_wr = 0, dom2_vbd_wr = 0;
+ 
+ 	tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom1_vbd_wr);
+-	tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom2_vbd_wr);
++	tot_vbd_reqs(domain2, FIELD_VBD_WR, &dom2_vbd_wr);
+ 
+ 	return -compare(dom1_vbd_wr, dom2_vbd_wr);
+ }
+@@ -765,7 +765,7 @@ static int compare_vbd_rsect(xenstat_domain *domain1, xenstat_domain *domain2)
+ 	unsigned long long dom1_vbd_rsect = 0, dom2_vbd_rsect = 0;
+ 
+ 	tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom1_vbd_rsect);
+-	tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom2_vbd_rsect);
++	tot_vbd_reqs(domain2, FIELD_VBD_RSECT, &dom2_vbd_rsect);
+ 
+ 	return -compare(dom1_vbd_rsect, dom2_vbd_rsect);
+ }
+-- 
+2.44.0
+
diff --git a/0012-amd-vi-fix-IVMD-memory-type-checks.patch b/0012-amd-vi-fix-IVMD-memory-type-checks.patch
new file mode 100644
index 0000000..f38e39e
--- /dev/null
+++ b/0012-amd-vi-fix-IVMD-memory-type-checks.patch
@@ -0,0 +1,53 @@
+From 463aaf3fbf62d24e898ae0c2ba53d85ca0f94d3f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 27 Feb 2024 14:07:12 +0100
+Subject: [PATCH 12/67] amd-vi: fix IVMD memory type checks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The current code that parses the IVMD blocks is relaxed with regard to the
+restriction that such unity regions should always fall into memory ranges
+marked as reserved in the memory map.
+
+However the type checks for the IVMD addresses are inverted, and as a result
+IVMD ranges falling into RAM areas are accepted.  Note that having such ranges
+in the first place is a firmware bug, as IVMD should always fall into reserved
+ranges.
+
+Fixes: ed6c77ebf0c1 ('AMD/IOMMU: check / convert IVMD ranges for being / to be reserved')
+Reported-by: Ox <oxjo@proton.me>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Tested-by: oxjo <oxjo@proton.me>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 83afa313583019d9f159c122cecf867735d27ec5
+master date: 2024-02-06 11:56:13 +0100
+---
+ xen/drivers/passthrough/amd/iommu_acpi.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c
+index 3b577c9b39..3a7045c39b 100644
+--- a/xen/drivers/passthrough/amd/iommu_acpi.c
++++ b/xen/drivers/passthrough/amd/iommu_acpi.c
+@@ -426,9 +426,14 @@ static int __init parse_ivmd_block(const struct acpi_ivrs_memory *ivmd_block)
+                 return -EIO;
+             }
+ 
+-            /* Types which won't be handed out are considered good enough. */
+-            if ( !(type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI |
+-                           RAM_TYPE_UNUSABLE)) )
++            /*
++             * Types which aren't RAM are considered good enough.
++             * Note that a page being partially RESERVED, ACPI or UNUSABLE will
++             * force Xen into assuming the whole page as having that type in
++             * practice.
++             */
++            if ( type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI |
++                         RAM_TYPE_UNUSABLE) )
+                 continue;
+ 
+             AMD_IOMMU_ERROR("IVMD: page at %lx can't be converted\n", addr);
+-- 
+2.44.0
+
diff --git a/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch b/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch
new file mode 100644
index 0000000..2a14354
--- /dev/null
+++ b/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch
@@ -0,0 +1,86 @@
+From 415f770d23f9fcbc02436560fa6583dcd8e1343f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Petr=20Bene=C5=A1?= <w1benny@gmail.com>
+Date: Tue, 27 Feb 2024 14:07:45 +0100
+Subject: [PATCH 13/67] x86/hvm: Fix fast singlestep state persistence
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This patch addresses an issue where the fast singlestep setting would persist
+despite xc_domain_debug_control being called with XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF.
+Specifically, if fast singlestep was enabled in a VMI session and that session
+stopped before the MTF trap occurred, the fast singlestep setting remained
+active even though MTF itself was disabled.  This led to a situation where, upon
+starting a new VMI session, the first event to trigger an EPT violation would
+cause the corresponding EPT event callback to be skipped due to the lingering
+fast singlestep setting.
+
+The fix ensures that the fast singlestep setting is properly reset when
+disabling single step debugging operations.
+
+Signed-off-by: Petr Beneš <w1benny@gmail.com>
+Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com>
+master commit: 897def94b56175ce569673a05909d2f223e1e749
+master date: 2024-02-12 09:37:58 +0100
+---
+ xen/arch/x86/hvm/hvm.c | 34 ++++++++++++++++++++++++----------
+ 1 file changed, 24 insertions(+), 10 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
+index d6c6ab8897..558dc3eddc 100644
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -5153,26 +5153,40 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
+ 
+ int hvm_debug_op(struct vcpu *v, int32_t op)
+ {
+-    int rc;
++    int rc = 0;
+ 
+     switch ( op )
+     {
+         case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
+         case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
+-            rc = -EOPNOTSUPP;
+             if ( !cpu_has_monitor_trap_flag )
+-                break;
+-            rc = 0;
+-            vcpu_pause(v);
+-            v->arch.hvm.single_step =
+-                (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON);
+-            vcpu_unpause(v); /* guest will latch new state */
++                return -EOPNOTSUPP;
+             break;
+         default:
+-            rc = -ENOSYS;
+-            break;
++            return -ENOSYS;
++    }
++
++    vcpu_pause(v);
++
++    switch ( op )
++    {
++    case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
++        v->arch.hvm.single_step = true;
++        break;
++
++    case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
++        v->arch.hvm.single_step = false;
++        v->arch.hvm.fast_single_step.enabled = false;
++        v->arch.hvm.fast_single_step.p2midx = 0;
++        break;
++
++    default: /* Excluded above */
++        ASSERT_UNREACHABLE();
++        return -ENOSYS;
+     }
+ 
++    vcpu_unpause(v); /* guest will latch new state */
++
+     return rc;
+ }
+ 
+-- 
+2.44.0
+
diff --git a/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch b/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch
new file mode 100644
index 0000000..6536674
--- /dev/null
+++ b/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch
@@ -0,0 +1,63 @@
+From b3ae0e6201495216b12157bd8b2382b28fdd7dae Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 27 Feb 2024 14:08:20 +0100
+Subject: [PATCH 14/67] x86/HVM: tidy state on hvmemul_map_linear_addr()'s
+ error path
+
+While in the vast majority of cases failure of the function will not
+be followed by re-invocation with the same emulation context, a few
+very specific insns - involving multiple independent writes, e.g. ENTER
+and PUSHA - exist where this can happen. Since failure of the function
+only signals to the caller that it ought to try an MMIO write instead,
+such failure also cannot be assumed to result in wholesale failure of
+emulation of the current insn. Instead we have to maintain internal
+state such that another invocation of the function with the same
+emulation context remains possible. To achieve that we need to reset MFN
+slots after putting page references on the error path.
+
+Note that all of this affects debugging code only, in causing an
+assertion to trigger (higher up in the function). There's otherwise no
+misbehavior - such a "leftover" slot would simply be overwritten by new
+contents in a release build.
+
+Also extend the related unmap() assertion, to further check for MFN 0.
+
+Fixes: 8cbd4fb0b7ea ("x86/hvm: implement hvmemul_write() using real mappings")
+Reported-by: Manuel Andreas <manuel.andreas@tum.de>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Paul Durrant <paul@xen.org>
+master commit: e72f951df407bc3be82faac64d8733a270036ba1
+master date: 2024-02-13 09:36:14 +0100
+---
+ xen/arch/x86/hvm/emulate.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
+index 275451dd36..27928dc3f3 100644
+--- a/xen/arch/x86/hvm/emulate.c
++++ b/xen/arch/x86/hvm/emulate.c
+@@ -697,7 +697,12 @@ static void *hvmemul_map_linear_addr(
+  out:
+     /* Drop all held references. */
+     while ( mfn-- > hvmemul_ctxt->mfn )
++    {
+         put_page(mfn_to_page(*mfn));
++#ifndef NDEBUG /* Clean slot for a subsequent map()'s error checking. */
++        *mfn = _mfn(0);
++#endif
++    }
+ 
+     return err;
+ }
+@@ -719,7 +724,7 @@ static void hvmemul_unmap_linear_addr(
+ 
+     for ( i = 0; i < nr_frames; i++ )
+     {
+-        ASSERT(mfn_valid(*mfn));
++        ASSERT(mfn_x(*mfn) && mfn_valid(*mfn));
+         paging_mark_dirty(currd, *mfn);
+         put_page(mfn_to_page(*mfn));
+ 
+-- 
+2.44.0
+
diff --git a/0015-build-Replace-which-with-command-v.patch b/0015-build-Replace-which-with-command-v.patch
new file mode 100644
index 0000000..57f21d4
--- /dev/null
+++ b/0015-build-Replace-which-with-command-v.patch
@@ -0,0 +1,57 @@
+From 1330a5fe44ca91f98857b53fe8bbe06522d9db27 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Tue, 27 Feb 2024 14:08:50 +0100
+Subject: [PATCH 15/67] build: Replace `which` with `command -v`
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The `which` command is not standard, may not exist on the build host,
+or may not behave as expected by the build system. It is recommended
+to use `command -v` to find out if a command exist and have its path,
+and it's part of a POSIX shell standard (at least, it seems to be
+mandatory since IEEE Std 1003.1-2008, but was optional before).
+
+Fixes: c8a8645f1efe ("xen/build: Automatically locate a suitable python interpreter")
+Fixes: 3b47bcdb6d38 ("xen/build: Use a distro version of figlet")
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: f93629b18b528a5ab1b1092949c5420069c7226c
+master date: 2024-02-19 12:45:48 +0100
+---
+ xen/Makefile | 4 ++--
+ xen/build.mk | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index dd0b004e1c..7ea13a6791 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -25,8 +25,8 @@ export XEN_BUILD_HOST	:= $(shell hostname)
+ endif
+ 
+ # Best effort attempt to find a python interpreter, defaulting to Python 3 if
+-# available.  Fall back to just `python` if `which` is nowhere to be found.
+-PYTHON_INTERPRETER	:= $(word 1,$(shell which python3 python python2 2>/dev/null) python)
++# available.  Fall back to just `python`.
++PYTHON_INTERPRETER	:= $(word 1,$(shell command -v python3 || command -v python || command -v python2) python)
+ export PYTHON		?= $(PYTHON_INTERPRETER)
+ 
+ export CHECKPOLICY	?= checkpolicy
+diff --git a/xen/build.mk b/xen/build.mk
+index 9ecb104f1e..b489f77b7c 100644
+--- a/xen/build.mk
++++ b/xen/build.mk
+@@ -1,6 +1,6 @@
+ quiet_cmd_banner = BANNER  $@
+ define cmd_banner
+-    if which figlet >/dev/null 2>&1 ; then \
++    if command -v figlet >/dev/null 2>&1 ; then \
+ 	echo " Xen $(XEN_FULLVERSION)" | figlet -f $< > $@.tmp; \
+     else \
+ 	echo " Xen $(XEN_FULLVERSION)" > $@.tmp; \
+-- 
+2.44.0
+
diff --git a/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch b/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch
new file mode 100644
index 0000000..f75e07c
--- /dev/null
+++ b/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch
@@ -0,0 +1,50 @@
+From b9745280736ee526374873aa3c4142596e2ba10b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
+ <marmarek@invisiblethingslab.com>
+Date: Tue, 27 Feb 2024 14:09:19 +0100
+Subject: [PATCH 16/67] libxl: Disable relocating memory for qemu-xen in
+ stubdomain too
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+According to comments (and experiments) qemu-xen cannot handle memory
+reolcation done by hvmloader. The code was already disabled when running
+qemu-xen in dom0 (see libxl__spawn_local_dm()), but it was missed when
+adding qemu-xen support to stubdomain. Adjust libxl__spawn_stub_dm() to
+be consistent in this regard.
+
+Reported-by: Neowutran <xen@neowutran.ovh>
+Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
+Acked-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: 97883aa269f6745a6ded232be3a855abb1297e0d
+master date: 2024-02-22 11:48:22 +0100
+---
+ tools/libs/light/libxl_dm.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index 14b593110f..ed620a9d8e 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -2432,6 +2432,16 @@ void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss)
+                         "%s",
+                         libxl_bios_type_to_string(guest_config->b_info.u.hvm.bios));
+     }
++    /* Disable relocating memory to make the MMIO hole larger
++     * unless we're running qemu-traditional and vNUMA is not
++     * configured. */
++    libxl__xs_printf(gc, XBT_NULL,
++                     libxl__sprintf(gc, "%s/hvmloader/allow-memory-relocate",
++                                    libxl__xs_get_dompath(gc, guest_domid)),
++                     "%d",
++                     guest_config->b_info.device_model_version
++                        == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL &&
++                     !libxl__vnuma_configured(&guest_config->b_info));
+     ret = xc_domain_set_target(ctx->xch, dm_domid, guest_domid);
+     if (ret<0) {
+         LOGED(ERROR, guest_domid, "setting target domain %d -> %d",
+-- 
+2.44.0
+
diff --git a/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch b/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch
new file mode 100644
index 0000000..1bb3aa8
--- /dev/null
+++ b/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch
@@ -0,0 +1,58 @@
+From ea869977271f93945451908be9b6117ffd1fb02d Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 27 Feb 2024 14:09:37 +0100
+Subject: [PATCH 17/67] build: make sure build fails when running kconfig fails
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Because of using "-include", failure to (re)build auto.conf (with
+auto.conf.cmd produced as a secondary target) won't stop make from
+continuing the build. Arrange for it being possible to drop the - from
+Rules.mk, requiring that the include be skipped for tools-only targets.
+Note that relying on the inclusion in those cases wouldn't be correct
+anyway, as it might be a stale file (yet to be rebuilt) which would be
+included, while during initial build, the file would be absent
+altogether.
+
+Fixes: 8d4c17a90b0a ("xen/build: silence make warnings about missing auto.conf*")
+Reported-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: d34e5fa2e8db19f23081f46a3e710bb122130691
+master date: 2024-02-22 11:52:47 +0100
+---
+ xen/Makefile | 1 +
+ xen/Rules.mk | 4 +++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index 7ea13a6791..bac3684a36 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -374,6 +374,7 @@ $(KCONFIG_CONFIG): tools_fixdep
+ # This exploits the 'multi-target pattern rule' trick.
+ # The syncconfig should be executed only once to make all the targets.
+ include/config/%.conf include/config/%.conf.cmd: $(KCONFIG_CONFIG)
++	$(Q)rm -f include/config/auto.conf
+ 	$(Q)$(MAKE) $(build)=tools/kconfig syncconfig
+ 
+ ifeq ($(CONFIG_DEBUG),y)
+diff --git a/xen/Rules.mk b/xen/Rules.mk
+index 8af3dd7277..d759cccee3 100644
+--- a/xen/Rules.mk
++++ b/xen/Rules.mk
+@@ -15,7 +15,9 @@ srcdir := $(srctree)/$(src)
+ PHONY := __build
+ __build:
+ 
+--include $(objtree)/include/config/auto.conf
++ifneq ($(firstword $(subst /, ,$(obj))),tools)
++include $(objtree)/include/config/auto.conf
++endif
+ 
+ include $(XEN_ROOT)/Config.mk
+ include $(srctree)/scripts/Kbuild.include
+-- 
+2.44.0
+
diff --git a/0018-x86emul-add-missing-EVEX.R-checks.patch b/0018-x86emul-add-missing-EVEX.R-checks.patch
new file mode 100644
index 0000000..12e7702
--- /dev/null
+++ b/0018-x86emul-add-missing-EVEX.R-checks.patch
@@ -0,0 +1,50 @@
+From 16f2e47eb1207d866f95cf694a60a7ceb8f96a36 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 27 Feb 2024 14:09:55 +0100
+Subject: [PATCH 18/67] x86emul: add missing EVEX.R' checks
+
+EVEX.R' is not ignored in 64-bit code when encoding a GPR or mask
+register. While for mask registers suitable checks are in place (there
+also covering EVEX.R), they were missing for the few cases where in
+EVEX-encoded instructions ModR/M.reg encodes a GPR. While for VPEXTRW
+the bit is replaced before an emulation stub is invoked, for
+VCVT{,T}{S,D,H}2{,U}SI this actually would have led to #UD from inside
+an emulation stub, in turn raising #UD to the guest, but accompanied by
+log messages indicating something's wrong in Xen nevertheless.
+
+Fixes: 001bd91ad864 ("x86emul: support AVX512{F,BW,DQ} extract insns")
+Fixes: baf4a376f550 ("x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: cb319824bfa8d3c9ea0410cc71daaedc3e11aa2a
+master date: 2024-02-22 11:54:07 +0100
+---
+ xen/arch/x86/x86_emulate/x86_emulate.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
+index 0c0336f737..995670cbc8 100644
+--- a/xen/arch/x86/x86_emulate/x86_emulate.c
++++ b/xen/arch/x86/x86_emulate/x86_emulate.c
+@@ -6829,7 +6829,8 @@ x86_emulate(
+     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
+     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */
+     CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */
+-        generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
++        generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R ||
++                               evex.opmsk ||
+                                (ea.type != OP_REG && evex.brs)),
+                               EXC_UD);
+         host_and_vcpu_must_have(avx512f);
+@@ -10705,7 +10706,7 @@ x86_emulate(
+         goto pextr;
+ 
+     case X86EMUL_OPC_EVEX_66(0x0f, 0xc5):   /* vpextrw $imm8,xmm,reg */
+-        generate_exception_if(ea.type != OP_REG, EXC_UD);
++        generate_exception_if(ea.type != OP_REG || !evex.R, EXC_UD);
+         /* Convert to alternative encoding: We want to use a memory operand. */
+         evex.opcx = ext_0f3a;
+         b = 0x15;
+-- 
+2.44.0
+
diff --git a/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch b/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch
new file mode 100644
index 0000000..1676f7a
--- /dev/null
+++ b/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch
@@ -0,0 +1,36 @@
+From f6b12792542e372f36a71ea4c2563e6dd6e4fa57 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 27 Feb 2024 14:10:24 +0100
+Subject: [PATCH 19/67] xen/livepatch: fix norevert test hook setup typo
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The test code has a typo in using LIVEPATCH_APPLY_HOOK() instead of
+LIVEPATCH_REVERT_HOOK().
+
+Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+master commit: f0622dd4fd6ae6ddb523a45d89ed9b8f3a9a8f36
+master date: 2024-02-26 10:13:46 +0100
+---
+ xen/test/livepatch/xen_action_hooks_norevert.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c
+index 3e21ade6ab..c173855192 100644
+--- a/xen/test/livepatch/xen_action_hooks_norevert.c
++++ b/xen/test/livepatch/xen_action_hooks_norevert.c
+@@ -120,7 +120,7 @@ static void post_revert_hook(livepatch_payload_t *payload)
+     printk(KERN_DEBUG "%s: Hook done.\n", __func__);
+ }
+ 
+-LIVEPATCH_APPLY_HOOK(revert_hook);
++LIVEPATCH_REVERT_HOOK(revert_hook);
+ 
+ LIVEPATCH_PREAPPLY_HOOK(pre_apply_hook);
+ LIVEPATCH_POSTAPPLY_HOOK(post_apply_hook);
+-- 
+2.44.0
+
diff --git a/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch b/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch
new file mode 100644
index 0000000..b47d9ee
--- /dev/null
+++ b/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch
@@ -0,0 +1,38 @@
+From 229e8a72ee4cde5698aaf42cc59ae57446dce60f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 27 Feb 2024 14:10:39 +0100
+Subject: [PATCH 20/67] xen/cmdline: fix printf format specifier in
+ no_config_param()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+'*' sets the width field, which is the minimum number of characters to output,
+but what we want in no_config_param() is the precision instead, which is '.*'
+as it imposes a maximum limit on the output.
+
+Fixes: 68d757df8dd2 ('x86/pv: Options to disable and/or compile out 32bit PV support')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: ef101f525173cf51dc70f4c77862f6f10a8ddccf
+master date: 2024-02-26 10:17:40 +0100
+---
+ xen/include/xen/param.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/include/xen/param.h b/xen/include/xen/param.h
+index 93c3fe7cb7..e02e49635c 100644
+--- a/xen/include/xen/param.h
++++ b/xen/include/xen/param.h
+@@ -191,7 +191,7 @@ static inline void no_config_param(const char *cfg, const char *param,
+ {
+     int len = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s);
+ 
+-    printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%*s' setting\n",
++    printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%.*s' setting\n",
+            cfg, param, len, s);
+ }
+ 
+-- 
+2.44.0
+
diff --git a/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch b/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch
new file mode 100644
index 0000000..ab050dd
--- /dev/null
+++ b/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch
@@ -0,0 +1,141 @@
+From 1aafe054e7d1efbf8e8482a9cdd4be5753b79e2f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 27 Feb 2024 14:11:04 +0100
+Subject: [PATCH 21/67] x86/altcall: use a union as register type for function
+ parameters on clang
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The current code for alternative calls uses the caller parameter types as the
+types for the register variables that serve as function parameters:
+
+uint8_t foo;
+[...]
+alternative_call(myfunc, foo);
+
+Would expand roughly into:
+
+register unint8_t a1_ asm("rdi") = foo;
+register unsigned long a2_ asm("rsi");
+[...]
+asm volatile ("call *%c[addr](%%rip)"...);
+
+However with -O2 clang will generate incorrect code, given the following
+example:
+
+unsigned int func(uint8_t t)
+{
+    return t;
+}
+
+static void bar(uint8_t b)
+{
+    int ret_;
+    register uint8_t di asm("rdi") = b;
+    register unsigned long si asm("rsi");
+    register unsigned long dx asm("rdx");
+    register unsigned long cx asm("rcx");
+    register unsigned long r8 asm("r8");
+    register unsigned long r9 asm("r9");
+    register unsigned long r10 asm("r10");
+    register unsigned long r11 asm("r11");
+
+    asm volatile ( "call %c[addr]"
+                   : "+r" (di), "=r" (si), "=r" (dx),
+                     "=r" (cx), "=r" (r8), "=r" (r9),
+                     "=r" (r10), "=r" (r11), "=a" (ret_)
+                   : [addr] "i" (&(func)), "g" (func)
+                   : "memory" );
+}
+
+void foo(unsigned int a)
+{
+    bar(a);
+}
+
+Clang generates the following assembly code:
+
+func:                                   # @func
+        movl    %edi, %eax
+        retq
+foo:                                    # @foo
+        callq   func
+        retq
+
+Note the truncation of the unsigned int parameter 'a' of foo() to uint8_t when
+passed into bar() is lost.  clang doesn't zero extend the parameters in the
+callee when required, as the psABI mandates.
+
+The above can be worked around by using a union when defining the register
+variables, so that `di` becomes:
+
+register union {
+    uint8_t e;
+    unsigned long r;
+} di asm("rdi") = { .e = b };
+
+Which results in following code generated for `foo()`:
+
+foo:                                    # @foo
+        movzbl  %dil, %edi
+        callq   func
+        retq
+
+So the truncation is not longer lost.  Apply such workaround only when built
+with clang.
+
+Reported-by: Matthew Grooms <mgrooms@shrew.net>
+Link: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=277200
+Link: https://github.com/llvm/llvm-project/issues/12579
+Link: https://github.com/llvm/llvm-project/issues/82598
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: 2ce562b2a413cbdb2e1128989ed1722290a27c4e
+master date: 2024-02-26 10:18:01 +0100
+---
+ xen/arch/x86/include/asm/alternative.h | 25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
+index a7a82c2c03..bcb1dc94f4 100644
+--- a/xen/arch/x86/include/asm/alternative.h
++++ b/xen/arch/x86/include/asm/alternative.h
+@@ -167,9 +167,34 @@ extern void alternative_branches(void);
+ #define ALT_CALL_arg5 "r8"
+ #define ALT_CALL_arg6 "r9"
+ 
++#ifdef CONFIG_CC_IS_CLANG
++/*
++ * Use a union with an unsigned long in order to prevent clang from
++ * skipping a possible truncation of the value.  By using the union any
++ * truncation is carried before the call instruction, in turn covering
++ * for ABI-non-compliance in that the necessary clipping / extension of
++ * the value is supposed to be carried out in the callee.
++ *
++ * Note this behavior is not mandated by the standard, and hence could
++ * stop being a viable workaround, or worse, could cause a different set
++ * of code-generation issues in future clang versions.
++ *
++ * This has been reported upstream:
++ * https://github.com/llvm/llvm-project/issues/12579
++ * https://github.com/llvm/llvm-project/issues/82598
++ */
++#define ALT_CALL_ARG(arg, n)                                            \
++    register union {                                                    \
++        typeof(arg) e;                                                  \
++        unsigned long r;                                                \
++    } a ## n ## _ asm ( ALT_CALL_arg ## n ) = {                         \
++        .e = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); })   \
++    }
++#else
+ #define ALT_CALL_ARG(arg, n) \
+     register typeof(arg) a ## n ## _ asm ( ALT_CALL_arg ## n ) = \
+         ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); })
++#endif
+ #define ALT_CALL_NO_ARG(n) \
+     register unsigned long a ## n ## _ asm ( ALT_CALL_arg ## n )
+ 
+-- 
+2.44.0
+
diff --git a/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch b/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch
new file mode 100644
index 0000000..ce01c1a
--- /dev/null
+++ b/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch
@@ -0,0 +1,57 @@
+From 91650010815f3da0834bc9781c4359350d1162a5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 27 Feb 2024 14:11:40 +0100
+Subject: [PATCH 22/67] x86/spec: fix BRANCH_HARDEN option to only be set when
+ build-enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The current logic to handle the BRANCH_HARDEN option will report it as enabled
+even when build-time disabled. Fix this by only allowing the option to be set
+when support for it is built into Xen.
+
+Fixes: 2d6f36daa086 ('x86/nospec: Introduce CONFIG_SPECULATIVE_HARDEN_BRANCH')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3
+master date: 2024-02-26 16:06:42 +0100
+---
+ xen/arch/x86/spec_ctrl.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 56e07d7536..661716d695 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -62,7 +62,8 @@ int8_t __initdata opt_psfd = -1;
+ int8_t __ro_after_init opt_ibpb_ctxt_switch = -1;
+ int8_t __read_mostly opt_eager_fpu = -1;
+ int8_t __read_mostly opt_l1d_flush = -1;
+-static bool __initdata opt_branch_harden = true;
++static bool __initdata opt_branch_harden =
++    IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH);
+ 
+ bool __initdata bsp_delay_spec_ctrl;
+ uint8_t __read_mostly default_xen_spec_ctrl;
+@@ -280,7 +281,16 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+         else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
+             opt_l1d_flush = val;
+         else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 )
+-            opt_branch_harden = val;
++        {
++            if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) )
++                opt_branch_harden = val;
++            else
++            {
++                no_config_param("SPECULATIVE_HARDEN_BRANCH", "spec-ctrl", s,
++                                ss);
++                rc = -EINVAL;
++            }
++        }
+         else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
+             opt_srb_lock = val;
+         else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
+-- 
+2.44.0
+
diff --git a/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch b/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch
new file mode 100644
index 0000000..e23a764
--- /dev/null
+++ b/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch
@@ -0,0 +1,212 @@
+From 49f77602373b58b7bbdb40cea2b49d2f88d4003d Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 27 Feb 2024 14:12:11 +0100
+Subject: [PATCH 23/67] x86: account for shadow stack in exception-from-stub
+ recovery
+
+Dealing with exceptions raised from within emulation stubs involves
+discarding return address (replaced by exception related information).
+Such discarding of course also requires removing the corresponding entry
+from the shadow stack.
+
+Also amend the comment in fixup_exception_return(), to further clarify
+why use of ptr[1] can't be an out-of-bounds access.
+
+This is CVE-2023-46841 / XSA-451.
+
+Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 91f5f7a9154919a765c3933521760acffeddbf28
+master date: 2024-02-27 13:49:22 +0100
+---
+ xen/arch/x86/extable.c             | 20 ++++++----
+ xen/arch/x86/include/asm/uaccess.h |  3 +-
+ xen/arch/x86/traps.c               | 63 +++++++++++++++++++++++++++---
+ 3 files changed, 71 insertions(+), 15 deletions(-)
+
+diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c
+index 6758ba1dca..dd9583f2a5 100644
+--- a/xen/arch/x86/extable.c
++++ b/xen/arch/x86/extable.c
+@@ -86,26 +86,29 @@ search_one_extable(const struct exception_table_entry *first,
+ }
+ 
+ unsigned long
+-search_exception_table(const struct cpu_user_regs *regs)
++search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra)
+ {
+     const struct virtual_region *region = find_text_region(regs->rip);
+     unsigned long stub = this_cpu(stubs.addr);
+ 
+     if ( region && region->ex )
++    {
++        *stub_ra = 0;
+         return search_one_extable(region->ex, region->ex_end, regs->rip);
++    }
+ 
+     if ( regs->rip >= stub + STUB_BUF_SIZE / 2 &&
+          regs->rip < stub + STUB_BUF_SIZE &&
+          regs->rsp > (unsigned long)regs &&
+          regs->rsp < (unsigned long)get_cpu_info() )
+     {
+-        unsigned long retptr = *(unsigned long *)regs->rsp;
++        unsigned long retaddr = *(unsigned long *)regs->rsp, fixup;
+ 
+-        region = find_text_region(retptr);
+-        retptr = region && region->ex
+-                 ? search_one_extable(region->ex, region->ex_end, retptr)
+-                 : 0;
+-        if ( retptr )
++        region = find_text_region(retaddr);
++        fixup = region && region->ex
++                ? search_one_extable(region->ex, region->ex_end, retaddr)
++                : 0;
++        if ( fixup )
+         {
+             /*
+              * Put trap number and error code on the stack (in place of the
+@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_user_regs *regs)
+             };
+ 
+             *(unsigned long *)regs->rsp = token.raw;
+-            return retptr;
++            *stub_ra = retaddr;
++            return fixup;
+         }
+     }
+ 
+diff --git a/xen/arch/x86/include/asm/uaccess.h b/xen/arch/x86/include/asm/uaccess.h
+index 684fccd95c..74bb222c03 100644
+--- a/xen/arch/x86/include/asm/uaccess.h
++++ b/xen/arch/x86/include/asm/uaccess.h
+@@ -421,7 +421,8 @@ union stub_exception_token {
+     unsigned long raw;
+ };
+ 
+-extern unsigned long search_exception_table(const struct cpu_user_regs *regs);
++extern unsigned long search_exception_table(const struct cpu_user_regs *regs,
++                                            unsigned long *stub_ra);
+ extern void sort_exception_tables(void);
+ extern void sort_exception_table(struct exception_table_entry *start,
+                                  const struct exception_table_entry *stop);
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index 06c4f3868b..7599bee361 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -856,7 +856,7 @@ void do_unhandled_trap(struct cpu_user_regs *regs)
+ }
+ 
+ static void fixup_exception_return(struct cpu_user_regs *regs,
+-                                   unsigned long fixup)
++                                   unsigned long fixup, unsigned long stub_ra)
+ {
+     if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
+     {
+@@ -873,7 +873,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
+             /*
+              * Search for %rip.  The shstk currently looks like this:
+              *
+-             *   ...  [Likely pointed to by SSP]
++             *   tok  [Supervisor token, == &tok | BUSY, only with FRED inactive]
++             *   ...  [Pointed to by SSP for most exceptions, empty in IST cases]
+              *   %cs  [== regs->cs]
+              *   %rip [== regs->rip]
+              *   SSP  [Likely points to 3 slots higher, above %cs]
+@@ -891,7 +892,56 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
+              */
+             if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
+             {
++                unsigned long primary_shstk =
++                    (ssp & ~(STACK_SIZE - 1)) +
++                    (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8;
++
+                 wrss(fixup, ptr);
++
++                if ( !stub_ra )
++                    goto shstk_done;
++
++                /*
++                 * Stub recovery ought to happen only when the outer context
++                 * was on the main shadow stack.  We need to also "pop" the
++                 * stub's return address from the interrupted context's shadow
++                 * stack.  That is,
++                 * - if we're still on the main stack, we need to move the
++                 *   entire stack (up to and including the exception frame)
++                 *   up by one slot, incrementing the original SSP in the
++                 *   exception frame,
++                 * - if we're on an IST stack, we need to increment the
++                 *   original SSP.
++                 */
++                BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT);
++
++                if ( (ssp ^ primary_shstk) >> PAGE_SHIFT )
++                {
++                    /*
++                     * We're on an IST stack.  First make sure the two return
++                     * addresses actually match.  Then increment the interrupted
++                     * context's SSP.
++                     */
++                    BUG_ON(stub_ra != *(unsigned long*)ptr[-1]);
++                    wrss(ptr[-1] + 8, &ptr[-1]);
++                    goto shstk_done;
++                }
++
++                /* Make sure the two return addresses actually match. */
++                BUG_ON(stub_ra != ptr[2]);
++
++                /* Move exception frame, updating SSP there. */
++                wrss(ptr[1], &ptr[2]); /* %cs */
++                wrss(ptr[0], &ptr[1]); /* %rip */
++                wrss(ptr[-1] + 8, &ptr[0]); /* SSP */
++
++                /* Move all newer entries. */
++                while ( --ptr != _p(ssp) )
++                    wrss(ptr[-1], &ptr[0]);
++
++                /* Finally account for our own stack having shifted up. */
++                asm volatile ( "incsspd %0" :: "r" (2) );
++
+                 goto shstk_done;
+             }
+         }
+@@ -912,7 +962,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs,
+ 
+ static bool extable_fixup(struct cpu_user_regs *regs, bool print)
+ {
+-    unsigned long fixup = search_exception_table(regs);
++    unsigned long stub_ra = 0;
++    unsigned long fixup = search_exception_table(regs, &stub_ra);
+ 
+     if ( unlikely(fixup == 0) )
+         return false;
+@@ -926,7 +977,7 @@ static bool extable_fixup(struct cpu_user_regs *regs, bool print)
+                vector_name(regs->entry_vector), regs->error_code,
+                _p(regs->rip), _p(regs->rip), _p(fixup));
+ 
+-    fixup_exception_return(regs, fixup);
++    fixup_exception_return(regs, fixup, stub_ra);
+     this_cpu(last_extable_addr) = regs->rip;
+ 
+     return true;
+@@ -1214,7 +1265,7 @@ void do_invalid_op(struct cpu_user_regs *regs)
+         void (*fn)(struct cpu_user_regs *) = bug_ptr(bug);
+ 
+         fn(regs);
+-        fixup_exception_return(regs, (unsigned long)eip);
++        fixup_exception_return(regs, (unsigned long)eip, 0);
+         return;
+     }
+ 
+@@ -1235,7 +1286,7 @@ void do_invalid_op(struct cpu_user_regs *regs)
+     case BUGFRAME_warn:
+         printk("Xen WARN at %s%s:%d\n", prefix, filename, lineno);
+         show_execution_state(regs);
+-        fixup_exception_return(regs, (unsigned long)eip);
++        fixup_exception_return(regs, (unsigned long)eip, 0);
+         return;
+ 
+     case BUGFRAME_bug:
+-- 
+2.44.0
+
diff --git a/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch b/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch
new file mode 100644
index 0000000..7bdd651
--- /dev/null
+++ b/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch
@@ -0,0 +1,52 @@
+From 6cbccc4071ef49a8c591ecaddfdcb1cc26d28411 Mon Sep 17 00:00:00 2001
+From: Michal Orzel <michal.orzel@amd.com>
+Date: Thu, 8 Feb 2024 11:43:39 +0100
+Subject: [PATCH 24/67] xen/arm: Fix UBSAN failure in start_xen()
+
+When running Xen on arm32, in scenario where Xen is loaded at an address
+such as boot_phys_offset >= 2GB, UBSAN reports the following:
+
+(XEN) UBSAN: Undefined behaviour in arch/arm/setup.c:739:58
+(XEN) pointer operation underflowed 00200000 to 86800000
+(XEN) Xen WARN at common/ubsan/ubsan.c:172
+(XEN) ----[ Xen-4.19-unstable  arm32  debug=y ubsan=y  Not tainted ]----
+...
+(XEN) Xen call trace:
+(XEN)    [<0031b4c0>] ubsan.c#ubsan_epilogue+0x18/0xf0 (PC)
+(XEN)    [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 (LR)
+(XEN)    [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4
+(XEN)    [<004d15a8>] start_xen+0xe0/0xbe0
+(XEN)    [<0020007c>] head.o#primary_switched+0x4/0x30
+
+The failure is reported for the following line:
+(paddr_t)(uintptr_t)(_start + boot_phys_offset)
+
+This occurs because the compiler treats (ptr + size) with size bigger than
+PTRDIFF_MAX as undefined behavior. To address this, switch to macro
+virt_to_maddr(), given the future plans to eliminate boot_phys_offset.
+
+Signed-off-by: Michal Orzel <michal.orzel@amd.com>
+Reviewed-by: Luca Fancellu <luca.fancellu@arm.com>
+Tested-by: Luca Fancellu <luca.fancellu@arm.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit e11f5766503c0ff074b4e0f888bbfc931518a169)
+---
+ xen/arch/arm/setup.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
+index 4395640019..9ee19c2bc1 100644
+--- a/xen/arch/arm/setup.c
++++ b/xen/arch/arm/setup.c
+@@ -1025,7 +1025,7 @@ void __init start_xen(unsigned long boot_phys_offset,
+ 
+     /* Register Xen's load address as a boot module. */
+     xen_bootmodule = add_boot_module(BOOTMOD_XEN,
+-                             (paddr_t)(uintptr_t)(_start + boot_phys_offset),
++                             virt_to_maddr(_start),
+                              (paddr_t)(uintptr_t)(_end - _start), false);
+     BUG_ON(!xen_bootmodule);
+ 
+-- 
+2.44.0
+
diff --git a/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch b/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch
new file mode 100644
index 0000000..28e489b
--- /dev/null
+++ b/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch
@@ -0,0 +1,67 @@
+From 9c0d518eb8dc69430e6a8d767bd101dad19b846a Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 5 Mar 2024 11:56:31 +0100
+Subject: [PATCH 25/67] x86/HVM: hide SVM/VMX when their enabling is prohibited
+ by firmware
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+... or we fail to enable the functionality on the BSP for other reasons.
+The only place where hardware announcing the feature is recorded is the
+raw CPU policy/featureset.
+
+Inspired by https://lore.kernel.org/all/20230921114940.957141-1-pbonzini@redhat.com/.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 0b5f149338e35a795bf609ce584640b0977f9e6c
+master date: 2024-01-09 14:06:34 +0100
+---
+ xen/arch/x86/hvm/svm/svm.c  |  1 +
+ xen/arch/x86/hvm/vmx/vmcs.c | 17 +++++++++++++++++
+ 2 files changed, 18 insertions(+)
+
+diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
+index fd32600ae3..3c17464550 100644
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -1669,6 +1669,7 @@ const struct hvm_function_table * __init start_svm(void)
+ 
+     if ( _svm_cpu_up(true) )
+     {
++        setup_clear_cpu_cap(X86_FEATURE_SVM);
+         printk("SVM: failed to initialise.\n");
+         return NULL;
+     }
+diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
+index bcbecc6945..b5ecc51b43 100644
+--- a/xen/arch/x86/hvm/vmx/vmcs.c
++++ b/xen/arch/x86/hvm/vmx/vmcs.c
+@@ -2163,6 +2163,23 @@ int __init vmx_vmcs_init(void)
+ 
+     if ( !ret )
+         register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1);
++    else
++    {
++        setup_clear_cpu_cap(X86_FEATURE_VMX);
++
++        /*
++         * _vmx_vcpu_up() may have made it past feature identification.
++         * Make sure all dependent features are off as well.
++         */
++        vmx_basic_msr              = 0;
++        vmx_pin_based_exec_control = 0;
++        vmx_cpu_based_exec_control = 0;
++        vmx_secondary_exec_control = 0;
++        vmx_vmexit_control         = 0;
++        vmx_vmentry_control        = 0;
++        vmx_ept_vpid_cap           = 0;
++        vmx_vmfunc                 = 0;
++    }
+ 
+     return ret;
+ }
+-- 
+2.44.0
+
diff --git a/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch b/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch
new file mode 100644
index 0000000..4b051ea
--- /dev/null
+++ b/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch
@@ -0,0 +1,86 @@
+From b75bee183210318150e678e14b35224d7c73edb6 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 5 Mar 2024 11:57:02 +0100
+Subject: [PATCH 26/67] xen/sched: Fix UB shift in compat_set_timer_op()
+
+Tamas reported this UBSAN failure from fuzzing:
+
+  (XEN) ================================================================================
+  (XEN) UBSAN: Undefined behaviour in common/sched/compat.c:48:37
+  (XEN) left shift of negative value -2147425536
+  (XEN) ----[ Xen-4.19-unstable  x86_64  debug=y ubsan=y  Not tainted ]----
+  ...
+  (XEN) Xen call trace:
+  (XEN)    [<ffff82d040307c1c>] R ubsan.c#ubsan_epilogue+0xa/0xd9
+  (XEN)    [<ffff82d040308afb>] F __ubsan_handle_shift_out_of_bounds+0x11a/0x1c5
+  (XEN)    [<ffff82d040307758>] F compat_set_timer_op+0x41/0x43
+  (XEN)    [<ffff82d04040e4cc>] F hvm_do_multicall_call+0x77f/0xa75
+  (XEN)    [<ffff82d040519462>] F arch_do_multicall_call+0xec/0xf1
+  (XEN)    [<ffff82d040261567>] F do_multicall+0x1dc/0xde3
+  (XEN)    [<ffff82d04040d2b3>] F hvm_hypercall+0xa00/0x149a
+  (XEN)    [<ffff82d0403cd072>] F vmx_vmexit_handler+0x1596/0x279c
+  (XEN)    [<ffff82d0403d909b>] F vmx_asm_vmexit_handler+0xdb/0x200
+
+Left-shifting any negative value is strictly undefined behaviour in C, and
+the two parameters here come straight from the guest.
+
+The fuzzer happened to choose lo 0xf, hi 0x8000e300.
+
+Switch everything to be unsigned values, making the shift well defined.
+
+As GCC documents:
+
+  As an extension to the C language, GCC does not use the latitude given in
+  C99 and C11 only to treat certain aspects of signed '<<' as undefined.
+  However, -fsanitize=shift (and -fsanitize=undefined) will diagnose such
+  cases.
+
+this was deemed not to need an XSA.
+
+Note: The unsigned -> signed conversion for do_set_timer_op()'s s_time_t
+parameter is also well defined.  C makes it implementation defined, and GCC
+defines it as reduction modulo 2^N to be within range of the new type.
+
+Fixes: 2942f45e09fb ("Enable compatibility mode operation for HYPERVISOR_sched_op and HYPERVISOR_set_timer_op.")
+Reported-by: Tamas K Lengyel <tamas@tklengyel.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: ae6d4fd876765e6d623eec67d14f5d0464be09cb
+master date: 2024-02-01 19:52:44 +0000
+---
+ xen/common/sched/compat.c    | 4 ++--
+ xen/include/hypercall-defs.c | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/common/sched/compat.c b/xen/common/sched/compat.c
+index 040b4caca2..b827fdecb8 100644
+--- a/xen/common/sched/compat.c
++++ b/xen/common/sched/compat.c
+@@ -39,9 +39,9 @@ static int compat_poll(struct compat_sched_poll *compat)
+ 
+ #include "core.c"
+ 
+-int compat_set_timer_op(u32 lo, s32 hi)
++int compat_set_timer_op(uint32_t lo, uint32_t hi)
+ {
+-    return do_set_timer_op(((s64)hi << 32) | lo);
++    return do_set_timer_op(((uint64_t)hi << 32) | lo);
+ }
+ 
+ /*
+diff --git a/xen/include/hypercall-defs.c b/xen/include/hypercall-defs.c
+index 1896121074..c442dee284 100644
+--- a/xen/include/hypercall-defs.c
++++ b/xen/include/hypercall-defs.c
+@@ -127,7 +127,7 @@ xenoprof_op(int op, void *arg)
+ 
+ #ifdef CONFIG_COMPAT
+ prefix: compat
+-set_timer_op(uint32_t lo, int32_t hi)
++set_timer_op(uint32_t lo, uint32_t hi)
+ multicall(multicall_entry_compat_t *call_list, uint32_t nr_calls)
+ memory_op(unsigned int cmd, void *arg)
+ #ifdef CONFIG_IOREQ_SERVER
+-- 
+2.44.0
+
diff --git a/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch b/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch
new file mode 100644
index 0000000..845247a
--- /dev/null
+++ b/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch
@@ -0,0 +1,54 @@
+From 76ea2aab3652cc34e474de0905f0a9cd4df7d087 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Mar 2024 11:57:41 +0100
+Subject: [PATCH 27/67] x86/spec: print the built-in SPECULATIVE_HARDEN_*
+ options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Just like it's done for INDIRECT_THUNK and SHADOW_PAGING.
+
+Reported-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 6e9507f7d51fe49df8bc70f83e49ce06c92e4e54
+master date: 2024-02-27 14:57:52 +0100
+---
+ xen/arch/x86/spec_ctrl.c | 14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 661716d695..93f1cf3bb5 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -488,13 +488,25 @@ static void __init print_details(enum ind_thunk thunk)
+            (e21a & cpufeat_mask(X86_FEATURE_SBPB))           ? " SBPB"           : "");
+ 
+     /* Compiled-in support which pertains to mitigations. */
+-    if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
++    if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ||
++         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) ||
++         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ||
++         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) )
+         printk("  Compiled-in support:"
+ #ifdef CONFIG_INDIRECT_THUNK
+                " INDIRECT_THUNK"
+ #endif
+ #ifdef CONFIG_SHADOW_PAGING
+                " SHADOW_PAGING"
++#endif
++#ifdef CONFIG_SPECULATIVE_HARDEN_ARRAY
++               " HARDEN_ARRAY"
++#endif
++#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH
++               " HARDEN_BRANCH"
++#endif
++#ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS
++               " HARDEN_GUEST_ACCESS"
+ #endif
+                "\n");
+ 
+-- 
+2.44.0
+
diff --git a/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch b/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch
new file mode 100644
index 0000000..dfbf516
--- /dev/null
+++ b/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch
@@ -0,0 +1,67 @@
+From 693455c3c370e535eb6cd065800ff91e147815fa Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Mar 2024 11:58:04 +0100
+Subject: [PATCH 28/67] x86/spec: fix INDIRECT_THUNK option to only be set when
+ build-enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Attempt to provide a more helpful error message when the user attempts to set
+spec-ctrl=bti-thunk option but the support is build-time disabled.
+
+While there also adjust the command line documentation to mention
+CONFIG_INDIRECT_THUNK instead of INDIRECT_THUNK.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 8441fa806a3b778867867cd0159fa1722e90397e
+master date: 2024-02-27 14:58:20 +0100
+---
+ docs/misc/xen-command-line.pandoc | 10 +++++-----
+ xen/arch/x86/spec_ctrl.c          |  7 ++++++-
+ 2 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index 05f613c71c..2006697226 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2378,11 +2378,11 @@ guests to use.
+   performance reasons dom0 is unprotected by default.  If it is necessary to
+   protect dom0 too, boot with `spec-ctrl=ibpb-entry`.
+ 
+-If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to
+-select which of the thunks gets patched into the `__x86_indirect_thunk_%reg`
+-locations.  The default thunk is `retpoline` (generally preferred), with the
+-alternatives being `jmp` (a `jmp *%reg` gadget, minimal overhead), and
+-`lfence` (an `lfence; jmp *%reg` gadget).
++If Xen was compiled with `CONFIG_INDIRECT_THUNK` support, `bti-thunk=` can be
++used to select which of the thunks gets patched into the
++`__x86_indirect_thunk_%reg` locations.  The default thunk is `retpoline`
++(generally preferred), with the alternatives being `jmp` (a `jmp *%reg` gadget,
++minimal overhead), and `lfence` (an `lfence; jmp *%reg` gadget).
+ 
+ On hardware supporting IBRS (Indirect Branch Restricted Speculation), the
+ `ibrs=` option can be used to force or prevent Xen using the feature itself.
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 93f1cf3bb5..098fa3184d 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -253,7 +253,12 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+         {
+             s += 10;
+ 
+-            if ( !cmdline_strcmp(s, "retpoline") )
++            if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) )
++            {
++                no_config_param("INDIRECT_THUNK", "spec-ctrl", s - 10, ss);
++                rc = -EINVAL;
++            }
++            else if ( !cmdline_strcmp(s, "retpoline") )
+                 opt_thunk = THUNK_RETPOLINE;
+             else if ( !cmdline_strcmp(s, "lfence") )
+                 opt_thunk = THUNK_LFENCE;
+-- 
+2.44.0
+
diff --git a/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch b/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch
new file mode 100644
index 0000000..71e6633
--- /dev/null
+++ b/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch
@@ -0,0 +1,50 @@
+From 0ce25b46ab2fb53a1b58f7682ca14971453f4f2c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Mar 2024 11:58:36 +0100
+Subject: [PATCH 29/67] x86/spec: do not print thunk option selection if not
+ built-in
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Since the thunk built-in enable is printed as part of the "Compiled-in
+support:" line, avoid printing anything in "Xen settings:" if the thunk is
+disabled at build time.
+
+Note the BTI-Thunk option printing is also adjusted to print a colon in the
+same way the other options on the line do.
+
+Requested-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 576528a2a742069af203e90c613c5c93e23c9755
+master date: 2024-02-27 14:58:40 +0100
+---
+ xen/arch/x86/spec_ctrl.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 098fa3184d..25a18ac598 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -516,11 +516,12 @@ static void __init print_details(enum ind_thunk thunk)
+                "\n");
+ 
+     /* Settings for Xen's protection, irrespective of guests. */
+-    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
+-           thunk == THUNK_NONE      ? "N/A" :
+-           thunk == THUNK_RETPOLINE ? "RETPOLINE" :
+-           thunk == THUNK_LFENCE    ? "LFENCE" :
+-           thunk == THUNK_JMP       ? "JMP" : "?",
++    printk("  Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
++           thunk != THUNK_NONE      ? "BTI-Thunk: " : "",
++           thunk == THUNK_NONE      ? "" :
++           thunk == THUNK_RETPOLINE ? "RETPOLINE, " :
++           thunk == THUNK_LFENCE    ? "LFENCE, " :
++           thunk == THUNK_JMP       ? "JMP, " : "?, ",
+            (!boot_cpu_has(X86_FEATURE_IBRSB) &&
+             !boot_cpu_has(X86_FEATURE_IBRS))         ? "No" :
+            (default_xen_spec_ctrl & SPEC_CTRL_IBRS)  ? "IBRS+" :  "IBRS-",
+-- 
+2.44.0
+
diff --git a/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch b/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch
new file mode 100644
index 0000000..f521ecc
--- /dev/null
+++ b/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch
@@ -0,0 +1,159 @@
+From b11917de0cd261a878beaf50c18a689bde0b2f50 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Mar 2024 11:59:26 +0100
+Subject: [PATCH 30/67] xen/livepatch: register livepatch regions when loaded
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently livepatch regions are registered as virtual regions only after the
+livepatch has been applied.
+
+This can lead to issues when using the pre-apply or post-revert hooks, as at
+that point the livepatch is not in the virtual regions list.  If a livepatch
+pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the
+code to handle the bug frame won't be able to find the instruction pointer that
+triggered the #UD in any of the registered virtual regions, and hence crash.
+
+Fix this by adding the livepatch payloads as virtual regions as soon as loaded,
+and only remove them once the payload is unloaded.  This requires some changes
+to the virtual regions code, as the removal of the virtual regions is no longer
+done in stop machine context, and hence an RCU barrier is added in order to
+make sure there are no users of the virtual region after it's been removed from
+the list.
+
+Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+master commit: a57b4074ab39bee78b6c116277f0a9963bd8e687
+master date: 2024-02-28 16:57:25 +0000
+---
+ xen/common/livepatch.c      |  4 ++--
+ xen/common/virtual_region.c | 44 ++++++++++++++-----------------------
+ 2 files changed, 19 insertions(+), 29 deletions(-)
+
+diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
+index c2ae84d18b..537e9f33e4 100644
+--- a/xen/common/livepatch.c
++++ b/xen/common/livepatch.c
+@@ -1015,6 +1015,7 @@ static int build_symbol_table(struct payload *payload,
+ static void free_payload(struct payload *data)
+ {
+     ASSERT(spin_is_locked(&payload_lock));
++    unregister_virtual_region(&data->region);
+     list_del(&data->list);
+     payload_cnt--;
+     payload_version++;
+@@ -1114,6 +1115,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload)
+         INIT_LIST_HEAD(&data->list);
+         INIT_LIST_HEAD(&data->applied_list);
+ 
++        register_virtual_region(&data->region);
+         list_add_tail(&data->list, &payload_list);
+         payload_cnt++;
+         payload_version++;
+@@ -1330,7 +1332,6 @@ static inline void apply_payload_tail(struct payload *data)
+      * The applied_list is iterated by the trap code.
+      */
+     list_add_tail_rcu(&data->applied_list, &applied_list);
+-    register_virtual_region(&data->region);
+ 
+     data->state = LIVEPATCH_STATE_APPLIED;
+ }
+@@ -1376,7 +1377,6 @@ static inline void revert_payload_tail(struct payload *data)
+      * The applied_list is iterated by the trap code.
+      */
+     list_del_rcu(&data->applied_list);
+-    unregister_virtual_region(&data->region);
+ 
+     data->reverted = true;
+     data->state = LIVEPATCH_STATE_CHECKED;
+diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
+index 5f89703f51..9f12c30efe 100644
+--- a/xen/common/virtual_region.c
++++ b/xen/common/virtual_region.c
+@@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = {
+ };
+ 
+ /*
+- * RCU locking. Additions are done either at startup (when there is only
+- * one CPU) or when all CPUs are running without IRQs.
+- *
+- * Deletions are bit tricky. We do it when Live Patch (all CPUs running
+- * without IRQs) or during bootup (when clearing the init).
+- *
+- * Hence we use list_del_rcu (which sports an memory fence) and a spinlock
+- * on deletion.
++ * RCU locking. Modifications to the list must be done in exclusive mode, and
++ * hence need to hold the spinlock.
+  *
+  * All readers of virtual_region_list MUST use list_for_each_entry_rcu.
+  */
+@@ -58,41 +52,36 @@ const struct virtual_region *find_text_region(unsigned long addr)
+ 
+ void register_virtual_region(struct virtual_region *r)
+ {
+-    ASSERT(!local_irq_is_enabled());
++    unsigned long flags;
+ 
++    spin_lock_irqsave(&virtual_region_lock, flags);
+     list_add_tail_rcu(&r->list, &virtual_region_list);
++    spin_unlock_irqrestore(&virtual_region_lock, flags);
+ }
+ 
+-static void remove_virtual_region(struct virtual_region *r)
++/*
++ * Suggest inline so when !CONFIG_LIVEPATCH the function is not left
++ * unreachable after init code is removed.
++ */
++static void inline remove_virtual_region(struct virtual_region *r)
+ {
+     unsigned long flags;
+ 
+     spin_lock_irqsave(&virtual_region_lock, flags);
+     list_del_rcu(&r->list);
+     spin_unlock_irqrestore(&virtual_region_lock, flags);
+-    /*
+-     * We do not need to invoke call_rcu.
+-     *
+-     * This is due to the fact that on the deletion we have made sure
+-     * to use spinlocks (to guard against somebody else calling
+-     * unregister_virtual_region) and list_deletion spiced with
+-     * memory barrier.
+-     *
+-     * That protects us from corrupting the list as the readers all
+-     * use list_for_each_entry_rcu which is safe against concurrent
+-     * deletions.
+-     */
+ }
+ 
++#ifdef CONFIG_LIVEPATCH
+ void unregister_virtual_region(struct virtual_region *r)
+ {
+-    /* Expected to be called from Live Patch - which has IRQs disabled. */
+-    ASSERT(!local_irq_is_enabled());
+-
+     remove_virtual_region(r);
++
++    /* Assert that no CPU might be using the removed region. */
++    rcu_barrier();
+ }
+ 
+-#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86)
++#ifdef CONFIG_X86
+ void relax_virtual_region_perms(void)
+ {
+     const struct virtual_region *region;
+@@ -116,7 +105,8 @@ void tighten_virtual_region_perms(void)
+                                  PAGE_HYPERVISOR_RX);
+     rcu_read_unlock(&rcu_virtual_region_lock);
+ }
+-#endif
++#endif /* CONFIG_X86 */
++#endif /* CONFIG_LIVEPATCH */
+ 
+ void __init unregister_init_virtual_region(void)
+ {
+-- 
+2.44.0
+
diff --git a/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch b/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch
new file mode 100644
index 0000000..c778639
--- /dev/null
+++ b/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch
@@ -0,0 +1,149 @@
+From c54cf903b06fb1933fad053cc547580c92c856ea Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Mar 2024 11:59:35 +0100
+Subject: [PATCH 31/67] xen/livepatch: search for symbols in all loaded
+ payloads
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When checking if an address belongs to a patch, or when resolving a symbol,
+take into account all loaded livepatch payloads, even if not applied.
+
+This is required in order for the pre-apply and post-revert hooks to work
+properly, or else Xen won't detect the instruction pointer belonging to those
+hooks as being part of the currently active text.
+
+Move the RCU handling to be used for payload_list instead of applied_list, as
+now the calls from trap code will iterate over the payload_list.
+
+Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+master commit: d2daa40fb3ddb8f83e238e57854bd878924cde90
+master date: 2024-02-28 16:57:25 +0000
+---
+ xen/common/livepatch.c | 49 +++++++++++++++---------------------------
+ 1 file changed, 17 insertions(+), 32 deletions(-)
+
+diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
+index 537e9f33e4..a129ab9973 100644
+--- a/xen/common/livepatch.c
++++ b/xen/common/livepatch.c
+@@ -36,13 +36,14 @@
+  * caller in schedule_work.
+  */
+ static DEFINE_SPINLOCK(payload_lock);
+-static LIST_HEAD(payload_list);
+-
+ /*
+- * Patches which have been applied. Need RCU in case we crash (and then
+- * traps code would iterate via applied_list) when adding entries on the list.
++ * Need RCU in case we crash (and then traps code would iterate via
++ * payload_list) when adding entries on the list.
+  */
+-static DEFINE_RCU_READ_LOCK(rcu_applied_lock);
++static DEFINE_RCU_READ_LOCK(rcu_payload_lock);
++static LIST_HEAD(payload_list);
++
++/* Patches which have been applied. Only modified from stop machine context. */
+ static LIST_HEAD(applied_list);
+ 
+ static unsigned int payload_cnt;
+@@ -111,12 +112,8 @@ bool_t is_patch(const void *ptr)
+     const struct payload *data;
+     bool_t r = 0;
+ 
+-    /*
+-     * Only RCU locking since this list is only ever changed during apply
+-     * or revert context. And in case it dies there we need an safe list.
+-     */
+-    rcu_read_lock(&rcu_applied_lock);
+-    list_for_each_entry_rcu ( data, &applied_list, applied_list )
++    rcu_read_lock(&rcu_payload_lock);
++    list_for_each_entry_rcu ( data, &payload_list, list )
+     {
+         if ( (ptr >= data->rw_addr &&
+               ptr < (data->rw_addr + data->rw_size)) ||
+@@ -130,7 +127,7 @@ bool_t is_patch(const void *ptr)
+         }
+ 
+     }
+-    rcu_read_unlock(&rcu_applied_lock);
++    rcu_read_unlock(&rcu_payload_lock);
+ 
+     return r;
+ }
+@@ -166,12 +163,8 @@ static const char *cf_check livepatch_symbols_lookup(
+     const void *va = (const void *)addr;
+     const char *n = NULL;
+ 
+-    /*
+-     * Only RCU locking since this list is only ever changed during apply
+-     * or revert context. And in case it dies there we need an safe list.
+-     */
+-    rcu_read_lock(&rcu_applied_lock);
+-    list_for_each_entry_rcu ( data, &applied_list, applied_list )
++    rcu_read_lock(&rcu_payload_lock);
++    list_for_each_entry_rcu ( data, &payload_list, list )
+     {
+         if ( va < data->text_addr ||
+              va >= (data->text_addr + data->text_size) )
+@@ -200,7 +193,7 @@ static const char *cf_check livepatch_symbols_lookup(
+         n = data->symtab[best].name;
+         break;
+     }
+-    rcu_read_unlock(&rcu_applied_lock);
++    rcu_read_unlock(&rcu_payload_lock);
+ 
+     return n;
+ }
+@@ -1016,7 +1009,8 @@ static void free_payload(struct payload *data)
+ {
+     ASSERT(spin_is_locked(&payload_lock));
+     unregister_virtual_region(&data->region);
+-    list_del(&data->list);
++    list_del_rcu(&data->list);
++    rcu_barrier();
+     payload_cnt--;
+     payload_version++;
+     free_payload_data(data);
+@@ -1116,7 +1110,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload)
+         INIT_LIST_HEAD(&data->applied_list);
+ 
+         register_virtual_region(&data->region);
+-        list_add_tail(&data->list, &payload_list);
++        list_add_tail_rcu(&data->list, &payload_list);
+         payload_cnt++;
+         payload_version++;
+     }
+@@ -1327,11 +1321,7 @@ static int apply_payload(struct payload *data)
+ 
+ static inline void apply_payload_tail(struct payload *data)
+ {
+-    /*
+-     * We need RCU variant (which has barriers) in case we crash here.
+-     * The applied_list is iterated by the trap code.
+-     */
+-    list_add_tail_rcu(&data->applied_list, &applied_list);
++    list_add_tail(&data->applied_list, &applied_list);
+ 
+     data->state = LIVEPATCH_STATE_APPLIED;
+ }
+@@ -1371,12 +1361,7 @@ static int revert_payload(struct payload *data)
+ 
+ static inline void revert_payload_tail(struct payload *data)
+ {
+-
+-    /*
+-     * We need RCU variant (which has barriers) in case we crash here.
+-     * The applied_list is iterated by the trap code.
+-     */
+-    list_del_rcu(&data->applied_list);
++    list_del(&data->applied_list);
+ 
+     data->reverted = true;
+     data->state = LIVEPATCH_STATE_CHECKED;
+-- 
+2.44.0
+
diff --git a/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch b/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch
new file mode 100644
index 0000000..76af9ef
--- /dev/null
+++ b/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch
@@ -0,0 +1,186 @@
+From 5564323f643715f9d364df88e0eb9c7d6fd2c22b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Mar 2024 11:59:43 +0100
+Subject: [PATCH 32/67] xen/livepatch: fix norevert test attempt to open-code
+ revert
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The purpose of the norevert test is to install a dummy handler that replaces
+the internal Xen revert code, and then perform the revert in the post-revert
+hook.  For that purpose the usage of the previous common_livepatch_revert() is
+not enough, as that just reverts specific functions, but not the whole state of
+the payload.
+
+Remove both common_livepatch_{apply,revert}() and instead expose
+revert_payload{,_tail}() in order to perform the patch revert from the
+post-revert hook.
+
+Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+master commit: cdae267ce10d04d71d1687b5701ff2911a96b6dc
+master date: 2024-02-28 16:57:25 +0000
+---
+ xen/common/livepatch.c                        | 41 +++++++++++++++++--
+ xen/include/xen/livepatch.h                   | 32 ++-------------
+ .../livepatch/xen_action_hooks_norevert.c     | 22 +++-------
+ 3 files changed, 46 insertions(+), 49 deletions(-)
+
+diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
+index a129ab9973..a5068a2217 100644
+--- a/xen/common/livepatch.c
++++ b/xen/common/livepatch.c
+@@ -1310,7 +1310,22 @@ static int apply_payload(struct payload *data)
+     ASSERT(!local_irq_is_enabled());
+ 
+     for ( i = 0; i < data->nfuncs; i++ )
+-        common_livepatch_apply(&data->funcs[i], &data->fstate[i]);
++    {
++        const struct livepatch_func *func = &data->funcs[i];
++        struct livepatch_fstate *state = &data->fstate[i];
++
++        /* If the action has been already executed on this function, do nothing. */
++        if ( state->applied == LIVEPATCH_FUNC_APPLIED )
++        {
++            printk(XENLOG_WARNING LIVEPATCH
++                   "%s: %s has been already applied before\n",
++                   __func__, func->name);
++            continue;
++        }
++
++        arch_livepatch_apply(func, state);
++        state->applied = LIVEPATCH_FUNC_APPLIED;
++    }
+ 
+     arch_livepatch_revive();
+ 
+@@ -1326,7 +1341,7 @@ static inline void apply_payload_tail(struct payload *data)
+     data->state = LIVEPATCH_STATE_APPLIED;
+ }
+ 
+-static int revert_payload(struct payload *data)
++int revert_payload(struct payload *data)
+ {
+     unsigned int i;
+     int rc;
+@@ -1341,7 +1356,25 @@ static int revert_payload(struct payload *data)
+     }
+ 
+     for ( i = 0; i < data->nfuncs; i++ )
+-        common_livepatch_revert(&data->funcs[i], &data->fstate[i]);
++    {
++        const struct livepatch_func *func = &data->funcs[i];
++        struct livepatch_fstate *state = &data->fstate[i];
++
++        /*
++         * If the apply action hasn't been executed on this function, do
++         * nothing.
++         */
++        if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED )
++        {
++            printk(XENLOG_WARNING LIVEPATCH
++                   "%s: %s has not been applied before\n",
++                   __func__, func->name);
++            continue;
++        }
++
++        arch_livepatch_revert(func, state);
++        state->applied = LIVEPATCH_FUNC_NOT_APPLIED;
++    }
+ 
+     /*
+      * Since we are running with IRQs disabled and the hooks may call common
+@@ -1359,7 +1392,7 @@ static int revert_payload(struct payload *data)
+     return 0;
+ }
+ 
+-static inline void revert_payload_tail(struct payload *data)
++void revert_payload_tail(struct payload *data)
+ {
+     list_del(&data->applied_list);
+ 
+diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h
+index 537d3d58b6..c9ee58fd37 100644
+--- a/xen/include/xen/livepatch.h
++++ b/xen/include/xen/livepatch.h
+@@ -136,35 +136,11 @@ void arch_livepatch_post_action(void);
+ void arch_livepatch_mask(void);
+ void arch_livepatch_unmask(void);
+ 
+-static inline void common_livepatch_apply(const struct livepatch_func *func,
+-                                          struct livepatch_fstate *state)
+-{
+-    /* If the action has been already executed on this function, do nothing. */
+-    if ( state->applied == LIVEPATCH_FUNC_APPLIED )
+-    {
+-        printk(XENLOG_WARNING LIVEPATCH "%s: %s has been already applied before\n",
+-                __func__, func->name);
+-        return;
+-    }
+-
+-    arch_livepatch_apply(func, state);
+-    state->applied = LIVEPATCH_FUNC_APPLIED;
+-}
++/* Only for testing purposes. */
++struct payload;
++int revert_payload(struct payload *data);
++void revert_payload_tail(struct payload *data);
+ 
+-static inline void common_livepatch_revert(const struct livepatch_func *func,
+-                                           struct livepatch_fstate *state)
+-{
+-    /* If the apply action hasn't been executed on this function, do nothing. */
+-    if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED )
+-    {
+-        printk(XENLOG_WARNING LIVEPATCH "%s: %s has not been applied before\n",
+-                __func__, func->name);
+-        return;
+-    }
+-
+-    arch_livepatch_revert(func, state);
+-    state->applied = LIVEPATCH_FUNC_NOT_APPLIED;
+-}
+ #else
+ 
+ /*
+diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c
+index c173855192..c5fbab1746 100644
+--- a/xen/test/livepatch/xen_action_hooks_norevert.c
++++ b/xen/test/livepatch/xen_action_hooks_norevert.c
+@@ -96,26 +96,14 @@ static int revert_hook(livepatch_payload_t *payload)
+ 
+ static void post_revert_hook(livepatch_payload_t *payload)
+ {
+-    int i;
++    unsigned long flags;
+ 
+     printk(KERN_DEBUG "%s: Hook starting.\n", __func__);
+ 
+-    for (i = 0; i < payload->nfuncs; i++)
+-    {
+-        const struct livepatch_func *func = &payload->funcs[i];
+-        struct livepatch_fstate *fstate = &payload->fstate[i];
+-
+-        BUG_ON(revert_cnt != 1);
+-        BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED);
+-
+-        /* Outside of quiesce zone: MAY TRIGGER HOST CRASH/UNDEFINED BEHAVIOR */
+-        arch_livepatch_quiesce();
+-        common_livepatch_revert(payload);
+-        arch_livepatch_revive();
+-        BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED);
+-
+-        printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name);
+-    }
++    local_irq_save(flags);
++    BUG_ON(revert_payload(payload));
++    revert_payload_tail(payload);
++    local_irq_restore(flags);
+ 
+     printk(KERN_DEBUG "%s: Hook done.\n", __func__);
+ }
+-- 
+2.44.0
+
diff --git a/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch b/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch
new file mode 100644
index 0000000..76803c6
--- /dev/null
+++ b/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch
@@ -0,0 +1,43 @@
+From a59106b27609b6ae2873bd6755949b1258290872 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Mar 2024 11:59:51 +0100
+Subject: [PATCH 33/67] xen/livepatch: properly build the noapply and norevert
+ tests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+It seems the build variables for those tests where copy-pasted from
+xen_action_hooks_marker-objs and not adjusted to use the correct source files.
+
+Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+master commit: e579677095782c7dec792597ba8b037b7d716b32
+master date: 2024-02-28 16:57:25 +0000
+---
+ xen/test/livepatch/Makefile | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xen/test/livepatch/Makefile b/xen/test/livepatch/Makefile
+index c258ab0b59..d987a8367f 100644
+--- a/xen/test/livepatch/Makefile
++++ b/xen/test/livepatch/Makefile
+@@ -118,12 +118,12 @@ xen_action_hooks_marker-objs := xen_action_hooks_marker.o xen_hello_world_func.o
+ $(obj)/xen_action_hooks_noapply.o: $(obj)/config.h
+ 
+ extra-y += xen_action_hooks_noapply.livepatch
+-xen_action_hooks_noapply-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o
++xen_action_hooks_noapply-objs := xen_action_hooks_noapply.o xen_hello_world_func.o note.o xen_note.o
+ 
+ $(obj)/xen_action_hooks_norevert.o: $(obj)/config.h
+ 
+ extra-y += xen_action_hooks_norevert.livepatch
+-xen_action_hooks_norevert-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o
++xen_action_hooks_norevert-objs := xen_action_hooks_norevert.o xen_hello_world_func.o note.o xen_note.o
+ 
+ EXPECT_BYTES_COUNT := 8
+ CODE_GET_EXPECT=$(shell $(OBJDUMP) -d --insn-width=1 $(1) | sed -n -e '/<'$(2)'>:$$/,/^$$/ p' | tail -n +2 | head -n $(EXPECT_BYTES_COUNT) | awk '{$$0=$$2; printf "%s", substr($$0,length-1)}' | sed 's/.\{2\}/0x&,/g' | sed 's/^/{/;s/,$$/}/g')
+-- 
+2.44.0
+
diff --git a/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch b/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch
new file mode 100644
index 0000000..7f23a73
--- /dev/null
+++ b/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch
@@ -0,0 +1,39 @@
+From c4ee68eda9937743527fff41f4ede0f6a3228080 Mon Sep 17 00:00:00 2001
+From: Jason Andryuk <jandryuk@gmail.com>
+Date: Tue, 5 Mar 2024 12:00:30 +0100
+Subject: [PATCH 34/67] libxl: Fix segfault in device_model_spawn_outcome
+
+libxl__spawn_qdisk_backend() explicitly sets guest_config to NULL when
+starting QEMU (the usual launch through libxl__spawn_local_dm() has a
+guest_config though).
+
+Bail early on a NULL guest_config/d_config.  This skips the QMP queries
+for chardevs and VNC, but this xenpv QEMU instance isn't expected to
+provide those - only qdisk (or 9pfs backends after an upcoming change).
+
+Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
+Acked-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: d4f3d35f043f6ef29393166b0dd131c8102cf255
+master date: 2024-02-29 08:18:38 +0100
+---
+ tools/libs/light/libxl_dm.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index ed620a9d8e..29b43ed20a 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -3172,8 +3172,8 @@ static void device_model_spawn_outcome(libxl__egc *egc,
+ 
+     /* Check if spawn failed */
+     if (rc) goto out;
+-
+-    if (d_config->b_info.device_model_version
++    /* d_config is NULL for xl devd/libxl__spawn_qemu_xenpv_backend(). */
++    if (d_config && d_config->b_info.device_model_version
+             == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
+         rc = libxl__ev_time_register_rel(ao, &dmss->timeout,
+                                          devise_model_postconfig_timeout,
+-- 
+2.44.0
+
diff --git a/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch b/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch
new file mode 100644
index 0000000..177c73b
--- /dev/null
+++ b/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch
@@ -0,0 +1,197 @@
+From 2f49d9f89c14519d4cb1e06ab8370cf4ba50fab7 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 5 Mar 2024 12:00:47 +0100
+Subject: [PATCH 35/67] x86/altcall: always use a temporary parameter stashing
+ variable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The usage in ALT_CALL_ARG() on clang of:
+
+register union {
+    typeof(arg) e;
+    const unsigned long r;
+} ...
+
+When `arg` is the first argument to alternative_{,v}call() and
+const_vlapic_vcpu() is used results in clang 3.5.0 complaining with:
+
+arch/x86/hvm/vlapic.c:141:47: error: non-const static data member must be initialized out of line
+         alternative_call(hvm_funcs.test_pir, const_vlapic_vcpu(vlapic), vec) )
+
+Workaround this by pulling `arg1` into a local variable, like it's done for
+further arguments (arg2, arg3...)
+
+Originally arg1 wasn't pulled into a variable because for the a1_ register
+local variable the possible clobbering as a result of operators on other
+variables don't matter:
+
+https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+
+Note clang version 3.8.1 seems to already be fixed and don't require the
+workaround, but since it's harmless do it uniformly everywhere.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Fixes: 2ce562b2a413 ('x86/altcall: use a union as register type for function parameters on clang')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: c20850540ad6a32f4fc17bde9b01c92b0df18bf0
+master date: 2024-02-29 08:21:49 +0100
+---
+ xen/arch/x86/include/asm/alternative.h | 36 +++++++++++++++++---------
+ 1 file changed, 24 insertions(+), 12 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
+index bcb1dc94f4..fa04481316 100644
+--- a/xen/arch/x86/include/asm/alternative.h
++++ b/xen/arch/x86/include/asm/alternative.h
+@@ -253,21 +253,24 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_vcall1(func, arg) ({           \
+-    ALT_CALL_ARG(arg, 1);                          \
++    typeof(arg) v1_ = (arg);                       \
++    ALT_CALL_ARG(v1_, 1);                          \
+     ALT_CALL_NO_ARG2;                              \
+     (void)sizeof(func(arg));                       \
+     (void)alternative_callN(1, int, func);         \
+ })
+ 
+ #define alternative_call1(func, arg) ({            \
+-    ALT_CALL_ARG(arg, 1);                          \
++    typeof(arg) v1_ = (arg);                       \
++    ALT_CALL_ARG(v1_, 1);                          \
+     ALT_CALL_NO_ARG2;                              \
+     alternative_callN(1, typeof(func(arg)), func); \
+ })
+ 
+ #define alternative_vcall2(func, arg1, arg2) ({           \
++    typeof(arg1) v1_ = (arg1);                            \
+     typeof(arg2) v2_ = (arg2);                            \
+-    ALT_CALL_ARG(arg1, 1);                                \
++    ALT_CALL_ARG(v1_, 1);                                 \
+     ALT_CALL_ARG(v2_, 2);                                 \
+     ALT_CALL_NO_ARG3;                                     \
+     (void)sizeof(func(arg1, arg2));                       \
+@@ -275,17 +278,19 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_call2(func, arg1, arg2) ({            \
++    typeof(arg1) v1_ = (arg1);                            \
+     typeof(arg2) v2_ = (arg2);                            \
+-    ALT_CALL_ARG(arg1, 1);                                \
++    ALT_CALL_ARG(v1_, 1);                                 \
+     ALT_CALL_ARG(v2_, 2);                                 \
+     ALT_CALL_NO_ARG3;                                     \
+     alternative_callN(2, typeof(func(arg1, arg2)), func); \
+ })
+ 
+ #define alternative_vcall3(func, arg1, arg2, arg3) ({    \
++    typeof(arg1) v1_ = (arg1);                           \
+     typeof(arg2) v2_ = (arg2);                           \
+     typeof(arg3) v3_ = (arg3);                           \
+-    ALT_CALL_ARG(arg1, 1);                               \
++    ALT_CALL_ARG(v1_, 1);                                \
+     ALT_CALL_ARG(v2_, 2);                                \
+     ALT_CALL_ARG(v3_, 3);                                \
+     ALT_CALL_NO_ARG4;                                    \
+@@ -294,9 +299,10 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_call3(func, arg1, arg2, arg3) ({     \
++    typeof(arg1) v1_ = (arg1);                            \
+     typeof(arg2) v2_ = (arg2);                           \
+     typeof(arg3) v3_ = (arg3);                           \
+-    ALT_CALL_ARG(arg1, 1);                               \
++    ALT_CALL_ARG(v1_, 1);                                \
+     ALT_CALL_ARG(v2_, 2);                                \
+     ALT_CALL_ARG(v3_, 3);                                \
+     ALT_CALL_NO_ARG4;                                    \
+@@ -305,10 +311,11 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_vcall4(func, arg1, arg2, arg3, arg4) ({ \
++    typeof(arg1) v1_ = (arg1);                              \
+     typeof(arg2) v2_ = (arg2);                              \
+     typeof(arg3) v3_ = (arg3);                              \
+     typeof(arg4) v4_ = (arg4);                              \
+-    ALT_CALL_ARG(arg1, 1);                                  \
++    ALT_CALL_ARG(v1_, 1);                                   \
+     ALT_CALL_ARG(v2_, 2);                                   \
+     ALT_CALL_ARG(v3_, 3);                                   \
+     ALT_CALL_ARG(v4_, 4);                                   \
+@@ -318,10 +325,11 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_call4(func, arg1, arg2, arg3, arg4) ({  \
++    typeof(arg1) v1_ = (arg1);                              \
+     typeof(arg2) v2_ = (arg2);                              \
+     typeof(arg3) v3_ = (arg3);                              \
+     typeof(arg4) v4_ = (arg4);                              \
+-    ALT_CALL_ARG(arg1, 1);                                  \
++    ALT_CALL_ARG(v1_, 1);                                   \
+     ALT_CALL_ARG(v2_, 2);                                   \
+     ALT_CALL_ARG(v3_, 3);                                   \
+     ALT_CALL_ARG(v4_, 4);                                   \
+@@ -332,11 +340,12 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_vcall5(func, arg1, arg2, arg3, arg4, arg5) ({ \
++    typeof(arg1) v1_ = (arg1);                                    \
+     typeof(arg2) v2_ = (arg2);                                    \
+     typeof(arg3) v3_ = (arg3);                                    \
+     typeof(arg4) v4_ = (arg4);                                    \
+     typeof(arg5) v5_ = (arg5);                                    \
+-    ALT_CALL_ARG(arg1, 1);                                        \
++    ALT_CALL_ARG(v1_, 1);                                         \
+     ALT_CALL_ARG(v2_, 2);                                         \
+     ALT_CALL_ARG(v3_, 3);                                         \
+     ALT_CALL_ARG(v4_, 4);                                         \
+@@ -347,11 +356,12 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_call5(func, arg1, arg2, arg3, arg4, arg5) ({  \
++    typeof(arg1) v1_ = (arg1);                                    \
+     typeof(arg2) v2_ = (arg2);                                    \
+     typeof(arg3) v3_ = (arg3);                                    \
+     typeof(arg4) v4_ = (arg4);                                    \
+     typeof(arg5) v5_ = (arg5);                                    \
+-    ALT_CALL_ARG(arg1, 1);                                        \
++    ALT_CALL_ARG(v1_, 1);                                         \
+     ALT_CALL_ARG(v2_, 2);                                         \
+     ALT_CALL_ARG(v3_, 3);                                         \
+     ALT_CALL_ARG(v4_, 4);                                         \
+@@ -363,12 +373,13 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_vcall6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \
++    typeof(arg1) v1_ = (arg1);                                          \
+     typeof(arg2) v2_ = (arg2);                                          \
+     typeof(arg3) v3_ = (arg3);                                          \
+     typeof(arg4) v4_ = (arg4);                                          \
+     typeof(arg5) v5_ = (arg5);                                          \
+     typeof(arg6) v6_ = (arg6);                                          \
+-    ALT_CALL_ARG(arg1, 1);                                              \
++    ALT_CALL_ARG(v1_, 1);                                               \
+     ALT_CALL_ARG(v2_, 2);                                               \
+     ALT_CALL_ARG(v3_, 3);                                               \
+     ALT_CALL_ARG(v4_, 4);                                               \
+@@ -379,12 +390,13 @@ extern void alternative_branches(void);
+ })
+ 
+ #define alternative_call6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({  \
++    typeof(arg1) v1_ = (arg1);                                          \
+     typeof(arg2) v2_ = (arg2);                                          \
+     typeof(arg3) v3_ = (arg3);                                          \
+     typeof(arg4) v4_ = (arg4);                                          \
+     typeof(arg5) v5_ = (arg5);                                          \
+     typeof(arg6) v6_ = (arg6);                                          \
+-    ALT_CALL_ARG(arg1, 1);                                              \
++    ALT_CALL_ARG(v1_, 1);                                               \
+     ALT_CALL_ARG(v2_, 2);                                               \
+     ALT_CALL_ARG(v3_, 3);                                               \
+     ALT_CALL_ARG(v4_, 4);                                               \
+-- 
+2.44.0
+
diff --git a/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch b/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch
new file mode 100644
index 0000000..b91ff52
--- /dev/null
+++ b/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch
@@ -0,0 +1,102 @@
+From 54dacb5c02cba4676879ed077765734326b78e39 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 5 Mar 2024 12:01:22 +0100
+Subject: [PATCH 36/67] x86/cpu-policy: Allow for levelling of VERW side
+ effects
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+MD_CLEAR and FB_CLEAR need OR-ing across a migrate pool.  Allow this, by
+having them unconditinally set in max, with the host values reflected in
+default.  Annotate the bits as having special properies.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: de17162cafd27f2865a3102a2ec0f386a02ed03d
+master date: 2024-03-01 20:14:19 +0000
+---
+ xen/arch/x86/cpu-policy.c                   | 24 +++++++++++++++++++++
+ xen/arch/x86/include/asm/cpufeature.h       |  1 +
+ xen/include/public/arch-x86/cpufeatureset.h |  4 ++--
+ 3 files changed, 27 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
+index f0f2c8a1c0..7b875a7221 100644
+--- a/xen/arch/x86/cpu-policy.c
++++ b/xen/arch/x86/cpu-policy.c
+@@ -435,6 +435,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
+         __set_bit(X86_FEATURE_RSBA, fs);
+         __set_bit(X86_FEATURE_RRSBA, fs);
+ 
++        /*
++         * These bits indicate that the VERW instruction may have gained
++         * scrubbing side effects.  With pooling, they mean "you might migrate
++         * somewhere where scrubbing is necessary", and may need exposing on
++         * unaffected hardware.  This is fine, because the VERW instruction
++         * has been around since the 286.
++         */
++        __set_bit(X86_FEATURE_MD_CLEAR, fs);
++        __set_bit(X86_FEATURE_FB_CLEAR, fs);
++
+         /*
+          * The Gather Data Sampling microcode mitigation (August 2023) has an
+          * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
+@@ -469,6 +479,20 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
+              cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) )
+             __clear_bit(X86_FEATURE_RDRAND, fs);
+ 
++        /*
++         * These bits indicate that the VERW instruction may have gained
++         * scrubbing side effects.  The max policy has them set for migration
++         * reasons, so reset the default policy back to the host values in
++         * case we're unaffected.
++         */
++        __clear_bit(X86_FEATURE_MD_CLEAR, fs);
++        if ( cpu_has_md_clear )
++            __set_bit(X86_FEATURE_MD_CLEAR, fs);
++
++        __clear_bit(X86_FEATURE_FB_CLEAR, fs);
++        if ( cpu_has_fb_clear )
++            __set_bit(X86_FEATURE_FB_CLEAR, fs);
++
+         /*
+          * The Gather Data Sampling microcode mitigation (August 2023) has an
+          * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
+diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h
+index 9ef7756593..ec824e8954 100644
+--- a/xen/arch/x86/include/asm/cpufeature.h
++++ b/xen/arch/x86/include/asm/cpufeature.h
+@@ -136,6 +136,7 @@
+ #define cpu_has_avx512_4fmaps   boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
+ #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT)
+ #define cpu_has_srbds_ctrl      boot_cpu_has(X86_FEATURE_SRBDS_CTRL)
++#define cpu_has_md_clear        boot_cpu_has(X86_FEATURE_MD_CLEAR)
+ #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)
+ #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
+ #define cpu_has_serialize       boot_cpu_has(X86_FEATURE_SERIALIZE)
+diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
+index 94d211df2f..aec1407613 100644
+--- a/xen/include/public/arch-x86/cpufeatureset.h
++++ b/xen/include/public/arch-x86/cpufeatureset.h
+@@ -260,7 +260,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A  AVX512 Multiply Accumulation Single
+ XEN_CPUFEATURE(FSRM,          9*32+ 4) /*A  Fast Short REP MOVS */
+ XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a  VP2INTERSECT{D,Q} insns */
+ XEN_CPUFEATURE(SRBDS_CTRL,    9*32+ 9) /*   MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */
+-XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /*A  VERW clears microarchitectural buffers */
++XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /*!A VERW clears microarchitectural buffers */
+ XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */
+ XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
+ XEN_CPUFEATURE(SERIALIZE,     9*32+14) /*A  SERIALIZE insn */
+@@ -321,7 +321,7 @@ XEN_CPUFEATURE(DOITM,              16*32+12) /*   Data Operand Invariant Timing
+ XEN_CPUFEATURE(SBDR_SSDP_NO,       16*32+13) /*A  No Shared Buffer Data Read or Sideband Stale Data Propagation */
+ XEN_CPUFEATURE(FBSDP_NO,           16*32+14) /*A  No Fill Buffer Stale Data Propagation */
+ XEN_CPUFEATURE(PSDP_NO,            16*32+15) /*A  No Primary Stale Data Propagation */
+-XEN_CPUFEATURE(FB_CLEAR,           16*32+17) /*A  Fill Buffers cleared by VERW */
++XEN_CPUFEATURE(FB_CLEAR,           16*32+17) /*!A Fill Buffers cleared by VERW */
+ XEN_CPUFEATURE(FB_CLEAR_CTRL,      16*32+18) /*   MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */
+ XEN_CPUFEATURE(RRSBA,              16*32+19) /*!  Restricted RSB Alternative */
+ XEN_CPUFEATURE(BHI_NO,             16*32+20) /*A  No Branch History Injection  */
+-- 
+2.44.0
+
diff --git a/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch b/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch
new file mode 100644
index 0000000..a46f913
--- /dev/null
+++ b/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch
@@ -0,0 +1,99 @@
+From 1e9808227c10717228969e924cab49cad4af6265 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 12 Mar 2024 12:08:48 +0100
+Subject: [PATCH 37/67] hvmloader/PCI: skip huge BARs in certain calculations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+BARs of size 2Gb and up can't possibly fit below 4Gb: Both the bottom of
+the lower 2Gb range and the top of the higher 2Gb range have special
+purpose. Don't even have them influence whether to (perhaps) relocate
+low RAM.
+
+Reported-by: Neowutran <xen@neowutran.ovh>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 57acad12a09ffa490e870ebe17596aad858f0191
+master date: 2024-03-06 10:19:29 +0100
+---
+ tools/firmware/hvmloader/pci.c | 28 ++++++++++++++++++++--------
+ 1 file changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/tools/firmware/hvmloader/pci.c b/tools/firmware/hvmloader/pci.c
+index 257a6feb61..c3c61ca060 100644
+--- a/tools/firmware/hvmloader/pci.c
++++ b/tools/firmware/hvmloader/pci.c
+@@ -33,6 +33,13 @@ uint32_t pci_mem_start = HVM_BELOW_4G_MMIO_START;
+ const uint32_t pci_mem_end = RESERVED_MEMBASE;
+ uint64_t pci_hi_mem_start = 0, pci_hi_mem_end = 0;
+ 
++/*
++ * BARs larger than this value are put in 64-bit space unconditionally.  That
++ * is, such BARs also don't play into the determination of how big the lowmem
++ * MMIO hole needs to be.
++ */
++#define BAR_RELOC_THRESH GB(1)
++
+ enum virtual_vga virtual_vga = VGA_none;
+ unsigned long igd_opregion_pgbase = 0;
+ 
+@@ -286,9 +293,11 @@ void pci_setup(void)
+             bars[i].bar_reg = bar_reg;
+             bars[i].bar_sz  = bar_sz;
+ 
+-            if ( ((bar_data & PCI_BASE_ADDRESS_SPACE) ==
+-                  PCI_BASE_ADDRESS_SPACE_MEMORY) ||
+-                 (bar_reg == PCI_ROM_ADDRESS) )
++            if ( is_64bar && bar_sz > BAR_RELOC_THRESH )
++                bar64_relocate = 1;
++            else if ( ((bar_data & PCI_BASE_ADDRESS_SPACE) ==
++                       PCI_BASE_ADDRESS_SPACE_MEMORY) ||
++                      (bar_reg == PCI_ROM_ADDRESS) )
+                 mmio_total += bar_sz;
+ 
+             nr_bars++;
+@@ -367,7 +376,7 @@ void pci_setup(void)
+             pci_mem_start = hvm_info->low_mem_pgend << PAGE_SHIFT;
+     }
+ 
+-    if ( mmio_total > (pci_mem_end - pci_mem_start) )
++    if ( mmio_total > (pci_mem_end - pci_mem_start) || bar64_relocate )
+     {
+         printf("Low MMIO hole not large enough for all devices,"
+                " relocating some BARs to 64-bit\n");
+@@ -430,7 +439,8 @@ void pci_setup(void)
+ 
+         /*
+          * Relocate to high memory if the total amount of MMIO needed
+-         * is more than the low MMIO available.  Because devices are
++         * is more than the low MMIO available or BARs bigger than
++         * BAR_RELOC_THRESH are present.  Because devices are
+          * processed in order of bar_sz, this will preferentially
+          * relocate larger devices to high memory first.
+          *
+@@ -446,8 +456,9 @@ void pci_setup(void)
+          *   the code here assumes it to be.)
+          * Should either of those two conditions change, this code will break.
+          */
+-        using_64bar = bars[i].is_64bar && bar64_relocate
+-            && (mmio_total > (mem_resource.max - mem_resource.base));
++        using_64bar = bars[i].is_64bar && bar64_relocate &&
++            (mmio_total > (mem_resource.max - mem_resource.base) ||
++             bar_sz > BAR_RELOC_THRESH);
+         bar_data = pci_readl(devfn, bar_reg);
+ 
+         if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
+@@ -467,7 +478,8 @@ void pci_setup(void)
+                 resource = &mem_resource;
+                 bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
+             }
+-            mmio_total -= bar_sz;
++            if ( bar_sz <= BAR_RELOC_THRESH )
++                mmio_total -= bar_sz;
+         }
+         else
+         {
+-- 
+2.44.0
+
diff --git a/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch b/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch
new file mode 100644
index 0000000..66b4db3
--- /dev/null
+++ b/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch
@@ -0,0 +1,41 @@
+From 1f94117bec55a7b934fed3dfd3529db624eb441f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 12 Mar 2024 12:08:59 +0100
+Subject: [PATCH 38/67] x86/mm: fix detection of last L1 entry in
+ modify_xen_mappings_lite()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The current logic to detect when to switch to the next L1 table is incorrectly
+using l2_table_offset() in order to notice when the last entry on the current
+L1 table has been reached.
+
+It should instead use l1_table_offset() to check whether the index has wrapped
+to point to the first entry, and so the next L1 table should be used.
+
+Fixes: 8676092a0f16 ('x86/livepatch: Fix livepatch application when CET is active')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 7c81558208de7858251b62f168a449be84305595
+master date: 2024-03-11 11:09:42 +0000
+---
+ xen/arch/x86/mm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index e884a6fdbd..330c4abcd1 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5963,7 +5963,7 @@ void init_or_livepatch modify_xen_mappings_lite(
+ 
+                 v += 1UL << L1_PAGETABLE_SHIFT;
+ 
+-                if ( l2_table_offset(v) == 0 )
++                if ( l1_table_offset(v) == 0 )
+                     break;
+             }
+ 
+-- 
+2.44.0
+
diff --git a/0039-x86-entry-Introduce-EFRAME_-constants.patch b/0039-x86-entry-Introduce-EFRAME_-constants.patch
new file mode 100644
index 0000000..c280286
--- /dev/null
+++ b/0039-x86-entry-Introduce-EFRAME_-constants.patch
@@ -0,0 +1,314 @@
+From e691f99f17198906f813b85dcabafe5addb9a57a Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Sat, 27 Jan 2024 17:52:09 +0000
+Subject: [PATCH 39/67] x86/entry: Introduce EFRAME_* constants
+
+restore_all_guest() does a lot of manipulation of the stack after popping the
+GPRs, and uses raw %rsp displacements to do so.  Also, almost all entrypaths
+use raw %rsp displacements prior to pushing GPRs.
+
+Provide better mnemonics, to aid readability and reduce the chance of errors
+when editing.
+
+No functional change.  The resulting binary is identical.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 37541208f119a9c552c6c6c3246ea61be0d44035)
+---
+ xen/arch/x86/x86_64/asm-offsets.c  | 17 ++++++++
+ xen/arch/x86/x86_64/compat/entry.S |  2 +-
+ xen/arch/x86/x86_64/entry.S        | 70 +++++++++++++++---------------
+ 3 files changed, 53 insertions(+), 36 deletions(-)
+
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index 287dac101a..31fa63b77f 100644
+--- a/xen/arch/x86/x86_64/asm-offsets.c
++++ b/xen/arch/x86/x86_64/asm-offsets.c
+@@ -51,6 +51,23 @@ void __dummy__(void)
+     OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es);
+     BLANK();
+ 
++    /*
++     * EFRAME_* is for the entry/exit logic where %rsp is pointing at
++     * UREGS_error_code and GPRs are still/already guest values.
++     */
++#define OFFSET_EF(sym, mem)                                             \
++    DEFINE(sym, offsetof(struct cpu_user_regs, mem) -                   \
++                offsetof(struct cpu_user_regs, error_code))
++
++    OFFSET_EF(EFRAME_entry_vector,    entry_vector);
++    OFFSET_EF(EFRAME_rip,             rip);
++    OFFSET_EF(EFRAME_cs,              cs);
++    OFFSET_EF(EFRAME_eflags,          eflags);
++    OFFSET_EF(EFRAME_rsp,             rsp);
++    BLANK();
++
++#undef OFFSET_EF
++
+     OFFSET(VCPU_processor, struct vcpu, processor);
+     OFFSET(VCPU_domain, struct vcpu, domain);
+     OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index 253bb1688c..7c211314d8 100644
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -15,7 +15,7 @@ ENTRY(entry_int82)
+         ENDBR64
+         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
+         pushq $0
+-        movl  $HYPERCALL_VECTOR, 4(%rsp)
++        movl  $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp)
+         SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
+ 
+         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 585b0c9551..412cbeb3ec 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -190,15 +190,15 @@ restore_all_guest:
+         SPEC_CTRL_EXIT_TO_PV    /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+ 
+         RESTORE_ALL
+-        testw $TRAP_syscall,4(%rsp)
++        testw $TRAP_syscall, EFRAME_entry_vector(%rsp)
+         jz    iret_exit_to_guest
+ 
+-        movq  24(%rsp),%r11           # RFLAGS
++        mov   EFRAME_eflags(%rsp), %r11
+         andq  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11
+         orq   $X86_EFLAGS_IF,%r11
+ 
+         /* Don't use SYSRET path if the return address is not canonical. */
+-        movq  8(%rsp),%rcx
++        mov   EFRAME_rip(%rsp), %rcx
+         sarq  $47,%rcx
+         incl  %ecx
+         cmpl  $1,%ecx
+@@ -213,20 +213,20 @@ restore_all_guest:
+         ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
+ #endif
+ 
+-        movq  8(%rsp), %rcx           # RIP
+-        cmpw  $FLAT_USER_CS32,16(%rsp)# CS
+-        movq  32(%rsp),%rsp           # RSP
++        mov   EFRAME_rip(%rsp), %rcx
++        cmpw  $FLAT_USER_CS32, EFRAME_cs(%rsp)
++        mov   EFRAME_rsp(%rsp), %rsp
+         je    1f
+         sysretq
+ 1:      sysretl
+ 
+         ALIGN
+ .Lrestore_rcx_iret_exit_to_guest:
+-        movq  8(%rsp), %rcx           # RIP
++        mov   EFRAME_rip(%rsp), %rcx
+ /* No special register assumptions. */
+ iret_exit_to_guest:
+-        andl  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), 24(%rsp)
+-        orl   $X86_EFLAGS_IF,24(%rsp)
++        andl  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
++        orl   $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
+         addq  $8,%rsp
+ .Lft0:  iretq
+         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
+@@ -257,7 +257,7 @@ ENTRY(lstar_enter)
+         pushq $FLAT_KERNEL_CS64
+         pushq %rcx
+         pushq $0
+-        movl  $TRAP_syscall, 4(%rsp)
++        movl  $TRAP_syscall, EFRAME_entry_vector(%rsp)
+         SAVE_ALL
+ 
+         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+@@ -294,7 +294,7 @@ ENTRY(cstar_enter)
+         pushq $FLAT_USER_CS32
+         pushq %rcx
+         pushq $0
+-        movl  $TRAP_syscall, 4(%rsp)
++        movl  $TRAP_syscall, EFRAME_entry_vector(%rsp)
+         SAVE_ALL
+ 
+         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+@@ -335,7 +335,7 @@ GLOBAL(sysenter_eflags_saved)
+         pushq $3 /* ring 3 null cs */
+         pushq $0 /* null rip */
+         pushq $0
+-        movl  $TRAP_syscall, 4(%rsp)
++        movl  $TRAP_syscall, EFRAME_entry_vector(%rsp)
+         SAVE_ALL
+ 
+         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+@@ -389,7 +389,7 @@ ENTRY(int80_direct_trap)
+         ENDBR64
+         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
+         pushq $0
+-        movl  $0x80, 4(%rsp)
++        movl  $0x80, EFRAME_entry_vector(%rsp)
+         SAVE_ALL
+ 
+         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+@@ -649,7 +649,7 @@ ret_from_intr:
+         .section .init.text, "ax", @progbits
+ ENTRY(early_page_fault)
+         ENDBR64
+-        movl  $TRAP_page_fault, 4(%rsp)
++        movl  $TRAP_page_fault, EFRAME_entry_vector(%rsp)
+         SAVE_ALL
+         movq  %rsp, %rdi
+         call  do_early_page_fault
+@@ -716,7 +716,7 @@ ENTRY(common_interrupt)
+ 
+ ENTRY(page_fault)
+         ENDBR64
+-        movl  $TRAP_page_fault,4(%rsp)
++        movl  $TRAP_page_fault, EFRAME_entry_vector(%rsp)
+ /* No special register assumptions. */
+ GLOBAL(handle_exception)
+         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
+@@ -892,90 +892,90 @@ FATAL_exception_with_ints_disabled:
+ ENTRY(divide_error)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_divide_error,4(%rsp)
++        movl  $TRAP_divide_error, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(coprocessor_error)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_copro_error,4(%rsp)
++        movl  $TRAP_copro_error, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(simd_coprocessor_error)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_simd_error,4(%rsp)
++        movl  $TRAP_simd_error, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(device_not_available)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_no_device,4(%rsp)
++        movl  $TRAP_no_device, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(debug)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_debug,4(%rsp)
++        movl  $TRAP_debug, EFRAME_entry_vector(%rsp)
+         jmp   handle_ist_exception
+ 
+ ENTRY(int3)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_int3,4(%rsp)
++        movl  $TRAP_int3, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(overflow)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_overflow,4(%rsp)
++        movl  $TRAP_overflow, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(bounds)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_bounds,4(%rsp)
++        movl  $TRAP_bounds, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(invalid_op)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_invalid_op,4(%rsp)
++        movl  $TRAP_invalid_op, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(invalid_TSS)
+         ENDBR64
+-        movl  $TRAP_invalid_tss,4(%rsp)
++        movl  $TRAP_invalid_tss, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(segment_not_present)
+         ENDBR64
+-        movl  $TRAP_no_segment,4(%rsp)
++        movl  $TRAP_no_segment, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(stack_segment)
+         ENDBR64
+-        movl  $TRAP_stack_error,4(%rsp)
++        movl  $TRAP_stack_error, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(general_protection)
+         ENDBR64
+-        movl  $TRAP_gp_fault,4(%rsp)
++        movl  $TRAP_gp_fault, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(alignment_check)
+         ENDBR64
+-        movl  $TRAP_alignment_check,4(%rsp)
++        movl  $TRAP_alignment_check, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(entry_CP)
+         ENDBR64
+-        movl  $X86_EXC_CP, 4(%rsp)
++        movl  $X86_EXC_CP, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+ ENTRY(double_fault)
+         ENDBR64
+-        movl  $TRAP_double_fault,4(%rsp)
++        movl  $TRAP_double_fault, EFRAME_entry_vector(%rsp)
+         /* Set AC to reduce chance of further SMAP faults */
+         ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP
+         SAVE_ALL
+@@ -1001,7 +1001,7 @@ ENTRY(double_fault)
+ ENTRY(nmi)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_nmi,4(%rsp)
++        movl  $TRAP_nmi, EFRAME_entry_vector(%rsp)
+ handle_ist_exception:
+         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
+         SAVE_ALL
+@@ -1134,7 +1134,7 @@ handle_ist_exception:
+ ENTRY(machine_check)
+         ENDBR64
+         pushq $0
+-        movl  $TRAP_machine_check,4(%rsp)
++        movl  $TRAP_machine_check, EFRAME_entry_vector(%rsp)
+         jmp   handle_ist_exception
+ 
+ /* No op trap handler.  Required for kexec crash path. */
+@@ -1171,7 +1171,7 @@ autogen_stubs: /* Automatically generated stubs. */
+ 1:
+         ENDBR64
+         pushq $0
+-        movb  $vec,4(%rsp)
++        movb  $vec, EFRAME_entry_vector(%rsp)
+         jmp   common_interrupt
+ 
+         entrypoint 1b
+@@ -1185,7 +1185,7 @@ autogen_stubs: /* Automatically generated stubs. */
+         test  $8,%spl        /* 64bit exception frames are 16 byte aligned, but the word */
+         jz    2f             /* size is 8 bytes.  Check whether the processor gave us an */
+         pushq $0             /* error code, and insert an empty one if not.              */
+-2:      movb  $vec,4(%rsp)
++2:      movb  $vec, EFRAME_entry_vector(%rsp)
+         jmp   handle_exception
+ 
+         entrypoint 1b
+-- 
+2.44.0
+
diff --git a/0040-x86-Resync-intel-family.h-from-Linux.patch b/0040-x86-Resync-intel-family.h-from-Linux.patch
new file mode 100644
index 0000000..84e0304
--- /dev/null
+++ b/0040-x86-Resync-intel-family.h-from-Linux.patch
@@ -0,0 +1,98 @@
+From abc43cf5a6579f1aa0decf0a2349cdd2d2473117 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 27 Feb 2024 16:07:39 +0000
+Subject: [PATCH 40/67] x86: Resync intel-family.h from Linux
+
+From v6.8-rc6
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 195e75371b13c4f7ecdf7b5c50aed0d02f2d7ce8)
+---
+ xen/arch/x86/include/asm/intel-family.h | 38 ++++++++++++++++++++++---
+ 1 file changed, 34 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/intel-family.h b/xen/arch/x86/include/asm/intel-family.h
+index ffc49151be..b65e9c46b9 100644
+--- a/xen/arch/x86/include/asm/intel-family.h
++++ b/xen/arch/x86/include/asm/intel-family.h
+@@ -26,6 +26,9 @@
+  *		_G	- parts with extra graphics on
+  *		_X	- regular server parts
+  *		_D	- micro server parts
++ *		_N,_P	- other mobile parts
++ *		_H	- premium mobile parts
++ *		_S	- other client parts
+  *
+  *		Historical OPTDIFFs:
+  *
+@@ -37,6 +40,9 @@
+  * their own names :-(
+  */
+ 
++/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
++#define INTEL_FAM6_ANY			X86_MODEL_ANY
++
+ #define INTEL_FAM6_CORE_YONAH		0x0E
+ 
+ #define INTEL_FAM6_CORE2_MEROM		0x0F
+@@ -93,8 +99,6 @@
+ #define INTEL_FAM6_ICELAKE_L		0x7E	/* Sunny Cove */
+ #define INTEL_FAM6_ICELAKE_NNPI		0x9D	/* Sunny Cove */
+ 
+-#define INTEL_FAM6_LAKEFIELD		0x8A	/* Sunny Cove / Tremont */
+-
+ #define INTEL_FAM6_ROCKETLAKE		0xA7	/* Cypress Cove */
+ 
+ #define INTEL_FAM6_TIGERLAKE_L		0x8C	/* Willow Cove */
+@@ -102,12 +106,31 @@
+ 
+ #define INTEL_FAM6_SAPPHIRERAPIDS_X	0x8F	/* Golden Cove */
+ 
++#define INTEL_FAM6_EMERALDRAPIDS_X	0xCF
++
++#define INTEL_FAM6_GRANITERAPIDS_X	0xAD
++#define INTEL_FAM6_GRANITERAPIDS_D	0xAE
++
++/* "Hybrid" Processors (P-Core/E-Core) */
++
++#define INTEL_FAM6_LAKEFIELD		0x8A	/* Sunny Cove / Tremont */
++
+ #define INTEL_FAM6_ALDERLAKE		0x97	/* Golden Cove / Gracemont */
+ #define INTEL_FAM6_ALDERLAKE_L		0x9A	/* Golden Cove / Gracemont */
+ 
+-#define INTEL_FAM6_RAPTORLAKE		0xB7
++#define INTEL_FAM6_RAPTORLAKE		0xB7	/* Raptor Cove / Enhanced Gracemont */
++#define INTEL_FAM6_RAPTORLAKE_P		0xBA
++#define INTEL_FAM6_RAPTORLAKE_S		0xBF
++
++#define INTEL_FAM6_METEORLAKE		0xAC
++#define INTEL_FAM6_METEORLAKE_L		0xAA
++
++#define INTEL_FAM6_ARROWLAKE_H		0xC5
++#define INTEL_FAM6_ARROWLAKE		0xC6
++
++#define INTEL_FAM6_LUNARLAKE_M		0xBD
+ 
+-/* "Small Core" Processors (Atom) */
++/* "Small Core" Processors (Atom/E-Core) */
+ 
+ #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
+ #define INTEL_FAM6_ATOM_BONNELL_MID	0x26 /* Silverthorne, Lincroft */
+@@ -134,6 +157,13 @@
+ #define INTEL_FAM6_ATOM_TREMONT		0x96 /* Elkhart Lake */
+ #define INTEL_FAM6_ATOM_TREMONT_L	0x9C /* Jasper Lake */
+ 
++#define INTEL_FAM6_ATOM_GRACEMONT	0xBE /* Alderlake N */
++
++#define INTEL_FAM6_ATOM_CRESTMONT_X	0xAF /* Sierra Forest */
++#define INTEL_FAM6_ATOM_CRESTMONT	0xB6 /* Grand Ridge */
++
++#define INTEL_FAM6_ATOM_DARKMONT_X	0xDD /* Clearwater Forest */
++
+ /* Xeon Phi */
+ 
+ #define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
+-- 
+2.44.0
+
diff --git a/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch b/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch
new file mode 100644
index 0000000..871f10f
--- /dev/null
+++ b/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch
@@ -0,0 +1,146 @@
+From 77f2bec134049aba29b9b459f955022722d10847 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 23 Jun 2023 11:32:00 +0100
+Subject: [PATCH 41/67] x86/vmx: Perform VERW flushing later in the VMExit path
+
+Broken out of the following patch because this change is subtle enough on its
+own.  See it for the rational of why we're moving VERW.
+
+As for how, extend the trick already used to hold one condition in
+flags (RESUME vs LAUNCH) through the POPing of GPRs.
+
+Move the MOV CR earlier.  Intel specify flags to be undefined across it.
+
+Encode the two conditions we want using SF and PF.  See the code comment for
+exactly how.
+
+Leave a comment to explain the lack of any content around
+SPEC_CTRL_EXIT_TO_VMX, but leave the block in place.  Sods law says if we
+delete it, we'll need to reintroduce it.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 475fa20b7384464210f42bad7195f87bd6f1c63f)
+---
+ xen/arch/x86/hvm/vmx/entry.S             | 36 +++++++++++++++++++++---
+ xen/arch/x86/include/asm/asm_defns.h     |  8 ++++++
+ xen/arch/x86/include/asm/spec_ctrl_asm.h |  7 +++++
+ xen/arch/x86/x86_64/asm-offsets.c        |  1 +
+ 4 files changed, 48 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
+index 5f5de45a13..cdde76e138 100644
+--- a/xen/arch/x86/hvm/vmx/entry.S
++++ b/xen/arch/x86/hvm/vmx/entry.S
+@@ -87,17 +87,39 @@ UNLIKELY_END(realmode)
+ 
+         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+         /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
+-        DO_SPEC_CTRL_COND_VERW
++        /*
++         * All speculation safety work happens to be elsewhere.  VERW is after
++         * popping the GPRs, while restoring the guest MSR_SPEC_CTRL is left
++         * to the MSR load list.
++         */
+ 
+         mov  VCPU_hvm_guest_cr2(%rbx),%rax
++        mov  %rax, %cr2
++
++        /*
++         * We need to perform two conditional actions (VERW, and Resume vs
++         * Launch) after popping GPRs.  With some cunning, we can encode both
++         * of these in eflags together.
++         *
++         * Parity is only calculated over the bottom byte of the answer, while
++         * Sign is simply the top bit.
++         *
++         * Therefore, the final OR instruction ends up producing:
++         *   SF = VCPU_vmx_launched
++         *   PF = !SCF_verw
++         */
++        BUILD_BUG_ON(SCF_verw & ~0xff)
++        movzbl VCPU_vmx_launched(%rbx), %ecx
++        shl  $31, %ecx
++        movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax
++        and  $SCF_verw, %eax
++        or   %eax, %ecx
+ 
+         pop  %r15
+         pop  %r14
+         pop  %r13
+         pop  %r12
+         pop  %rbp
+-        mov  %rax,%cr2
+-        cmpb $0,VCPU_vmx_launched(%rbx)
+         pop  %rbx
+         pop  %r11
+         pop  %r10
+@@ -108,7 +130,13 @@ UNLIKELY_END(realmode)
+         pop  %rdx
+         pop  %rsi
+         pop  %rdi
+-        je   .Lvmx_launch
++
++        jpe  .L_skip_verw
++        /* VERW clobbers ZF, but preserves all others, including SF. */
++        verw STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)(%rsp)
++.L_skip_verw:
++
++        jns  .Lvmx_launch
+ 
+ /*.Lvmx_resume:*/
+         VMRESUME
+diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h
+index d9431180cf..abc6822b08 100644
+--- a/xen/arch/x86/include/asm/asm_defns.h
++++ b/xen/arch/x86/include/asm/asm_defns.h
+@@ -81,6 +81,14 @@ register unsigned long current_stack_pointer asm("rsp");
+ 
+ #ifdef __ASSEMBLY__
+ 
++.macro BUILD_BUG_ON condstr, cond:vararg
++        .if \cond
++        .error "Condition \"\condstr\" not satisfied"
++        .endif
++.endm
++/* preprocessor macro to make error message more user friendly */
++#define BUILD_BUG_ON(cond) BUILD_BUG_ON #cond, cond
++
+ #ifdef HAVE_AS_QUOTED_SYM
+ #define SUBSECTION_LBL(tag)                        \
+         .ifndef .L.tag;                            \
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index f4b8b9d956..ca9cb0f5dd 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -164,6 +164,13 @@
+ #endif
+ .endm
+ 
++/*
++ * Helper to improve the readibility of stack dispacements with %rsp in
++ * unusual positions.  Both @field and @top_of_stack should be constants from
++ * the same object.  @top_of_stack should be where %rsp is currently pointing.
++ */
++#define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
++
+ .macro DO_SPEC_CTRL_COND_VERW
+ /*
+  * Requires %rsp=cpuinfo
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index 31fa63b77f..a4e94d6930 100644
+--- a/xen/arch/x86/x86_64/asm-offsets.c
++++ b/xen/arch/x86/x86_64/asm-offsets.c
+@@ -135,6 +135,7 @@ void __dummy__(void)
+ #endif
+ 
+     OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
++    OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
+     OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
+     OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
+     OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
+-- 
+2.44.0
+
diff --git a/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch b/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch
new file mode 100644
index 0000000..ac78acd
--- /dev/null
+++ b/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch
@@ -0,0 +1,209 @@
+From 76af773de5d3e68b7140cc9c5343be6746c9101c Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Sat, 27 Jan 2024 18:20:56 +0000
+Subject: [PATCH 42/67] x86/spec-ctrl: Perform VERW flushing later in exit
+ paths
+
+On parts vulnerable to RFDS, VERW's side effects are extended to scrub all
+non-architectural entries in various Physical Register Files.  To remove all
+of Xen's values, the VERW must be after popping the GPRs.
+
+Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position,
+but with overrides for other contexts.  Identify that it clobbers eflags; this
+is particularly relevant for the SYSRET path.
+
+For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a
+shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to
+issue the VERW.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7)
+---
+ xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++--------
+ xen/arch/x86/x86_64/asm-offsets.c        | 13 +++++++--
+ xen/arch/x86/x86_64/compat/entry.S       |  6 ++++
+ xen/arch/x86/x86_64/entry.S              | 21 +++++++++++++-
+ 4 files changed, 61 insertions(+), 15 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+index ca9cb0f5dd..97a97b2b82 100644
+--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h
++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h
+@@ -171,16 +171,23 @@
+  */
+ #define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
+ 
+-.macro DO_SPEC_CTRL_COND_VERW
++.macro SPEC_CTRL_COND_VERW \
++    scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \
++    sel=STK_REL(CPUINFO_verw_sel,        CPUINFO_error_code)
+ /*
+- * Requires %rsp=cpuinfo
++ * Requires \scf and \sel as %rsp-relative expressions
++ * Clobbers eflags
++ *
++ * VERW needs to run after guest GPRs have been restored, where only %rsp is
++ * good to use.  Default to expecting %rsp pointing at CPUINFO_error_code.
++ * Contexts where this is not true must provide an alternative \scf and \sel.
+  *
+  * Issue a VERW for its flushing side effect, if indicated.  This is a Spectre
+  * v1 gadget, but the IRET/VMEntry is serialising.
+  */
+-    testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp)
++    testb $SCF_verw, \scf(%rsp)
+     jz .L\@_verw_skip
+-    verw CPUINFO_verw_sel(%rsp)
++    verw \sel(%rsp)
+ .L\@_verw_skip:
+ .endm
+ 
+@@ -298,8 +305,6 @@
+  */
+     ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
+ 
+-    DO_SPEC_CTRL_COND_VERW
+-
+     ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
+ .endm
+ 
+@@ -379,7 +384,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+  */
+ .macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+- * Requires %r12=ist_exit, %r14=stack_end
++ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs
+  * Clobbers %rax, %rbx, %rcx, %rdx
+  */
+     movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
+@@ -407,11 +412,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+     test %r12, %r12
+     jz .L\@_skip_ist_exit
+ 
+-    /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */
+-    testb $SCF_verw, %bl
+-    jz .L\@_skip_verw
+-    verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
+-.L\@_skip_verw:
++    /*
++     * Stash SCF and verw_sel above eflags in the case of an IST_exit.  The
++     * VERW logic needs to run after guest GPRs have been restored; i.e. where
++     * we cannot use %r12 or %r14 for the purposes they have here.
++     *
++     * When the CPU pushed this exception frame, it zero-extended eflags.
++     * Therefore it is safe for the VERW logic to look at the stashed SCF
++     * outside of the ist_exit condition.  Also, this stashing won't influence
++     * any other restore_all_guest() paths.
++     */
++    or $(__HYPERVISOR_DS32 << 16), %ebx
++    mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */
+ 
+     ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
+ 
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index a4e94d6930..4cd5938d7b 100644
+--- a/xen/arch/x86/x86_64/asm-offsets.c
++++ b/xen/arch/x86/x86_64/asm-offsets.c
+@@ -55,14 +55,22 @@ void __dummy__(void)
+      * EFRAME_* is for the entry/exit logic where %rsp is pointing at
+      * UREGS_error_code and GPRs are still/already guest values.
+      */
+-#define OFFSET_EF(sym, mem)                                             \
++#define OFFSET_EF(sym, mem, ...)                                        \
+     DEFINE(sym, offsetof(struct cpu_user_regs, mem) -                   \
+-                offsetof(struct cpu_user_regs, error_code))
++                offsetof(struct cpu_user_regs, error_code) __VA_ARGS__)
+ 
+     OFFSET_EF(EFRAME_entry_vector,    entry_vector);
+     OFFSET_EF(EFRAME_rip,             rip);
+     OFFSET_EF(EFRAME_cs,              cs);
+     OFFSET_EF(EFRAME_eflags,          eflags);
++
++    /*
++     * These aren't real fields.  They're spare space, used by the IST
++     * exit-to-xen path.
++     */
++    OFFSET_EF(EFRAME_shadow_scf,      eflags, +4);
++    OFFSET_EF(EFRAME_shadow_sel,      eflags, +6);
++
+     OFFSET_EF(EFRAME_rsp,             rsp);
+     BLANK();
+ 
+@@ -136,6 +144,7 @@ void __dummy__(void)
+ 
+     OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
+     OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
++    OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip);
+     OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
+     OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
+     OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index 7c211314d8..3b2fbcd873 100644
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest)
+         SPEC_CTRL_EXIT_TO_PV    /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+ 
+         RESTORE_ALL adj=8 compat=1
++
++        /* Account for ev/ec having already been popped off the stack. */
++        SPEC_CTRL_COND_VERW \
++            scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \
++            sel=STK_REL(CPUINFO_verw_sel,        CPUINFO_rip)
++
+ .Lft0:  iretq
+         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
+ 
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 412cbeb3ec..ef517e2945 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -214,6 +214,9 @@ restore_all_guest:
+ #endif
+ 
+         mov   EFRAME_rip(%rsp), %rcx
++
++        SPEC_CTRL_COND_VERW     /* Req: %rsp=eframe                    Clob: efl */
++
+         cmpw  $FLAT_USER_CS32, EFRAME_cs(%rsp)
+         mov   EFRAME_rsp(%rsp), %rsp
+         je    1f
+@@ -227,6 +230,9 @@ restore_all_guest:
+ iret_exit_to_guest:
+         andl  $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
+         orl   $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
++
++        SPEC_CTRL_COND_VERW     /* Req: %rsp=eframe                    Clob: efl */
++
+         addq  $8,%rsp
+ .Lft0:  iretq
+         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
+@@ -679,9 +685,22 @@ UNLIKELY_START(ne, exit_cr3)
+ UNLIKELY_END(exit_cr3)
+ 
+         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+-        SPEC_CTRL_EXIT_TO_XEN     /* Req: %r12=ist_exit %r14=end, Clob: abcd */
++        SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */
+ 
+         RESTORE_ALL adj=8
++
++        /*
++         * When the CPU pushed this exception frame, it zero-extended eflags.
++         * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of
++         * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs,
++         * and we're at a random place on the stack, not in a CPUFINFO block.
++         *
++         * Account for ev/ec having already been popped off the stack.
++         */
++        SPEC_CTRL_COND_VERW \
++            scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \
++            sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip)
++
+         iretq
+ 
+ ENTRY(common_interrupt)
+-- 
+2.44.0
+
diff --git a/0043-x86-spec-ctrl-Rename-VERW-related-options.patch b/0043-x86-spec-ctrl-Rename-VERW-related-options.patch
new file mode 100644
index 0000000..38edc15
--- /dev/null
+++ b/0043-x86-spec-ctrl-Rename-VERW-related-options.patch
@@ -0,0 +1,248 @@
+From d55d52961d13d4fcd1441fcfca98f690e687b941 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 12 Feb 2024 17:50:43 +0000
+Subject: [PATCH 43/67] x86/spec-ctrl: Rename VERW related options
+
+VERW is going to be used for a 3rd purpose, and the existing nomenclature
+didn't survive the Stale MMIO issues terribly well.
+
+Rename the command line option from `md-clear=` to `verw=`.  This is more
+consistent with other options which tend to be named based on what they're
+doing, not which feature enumeration they use behind the scenes.  Retain
+`md-clear=` as a deprecated alias.
+
+Rename opt_md_clear_{pv,hvm} and opt_fb_clear_mmio to opt_verw_{pv,hvm,mmio},
+which has a side effect of making spec_ctrl_init_domain() rather clearer to
+follow.
+
+No functional change.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit f7603ca252e4226739eb3129a5290ee3da3f8ea4)
+---
+ docs/misc/xen-command-line.pandoc | 15 ++++----
+ xen/arch/x86/spec_ctrl.c          | 62 ++++++++++++++++---------------
+ 2 files changed, 40 insertions(+), 37 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index 2006697226..d909ec94fe 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2324,7 +2324,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
+ 
+ ### spec-ctrl (x86)
+ > `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>,
+->              {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
++>              {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
+ >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
+ >              eager-fpu,l1d-flush,branch-harden,srb-lock,
+ >              unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
+@@ -2349,7 +2349,7 @@ in place for guests to use.
+ 
+ Use of a positive boolean value for either of these options is invalid.
+ 
+-The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options
++The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options
+ offer fine grained control over the primitives by Xen.  These impact Xen's
+ ability to protect itself, and/or Xen's ability to virtualise support for
+ guests to use.
+@@ -2366,11 +2366,12 @@ guests to use.
+   guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
+ * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
+   Return Address Stack on entry to Xen and on idle.
+-* `md-clear=` offers control over whether to use VERW to flush
+-  microarchitectural buffers on idle and exit from Xen.  *Note: For
+-  compatibility with development versions of this fix, `mds=` is also accepted
+-  on Xen 4.12 and earlier as an alias.  Consult vendor documentation in
+-  preference to here.*
++* `verw=` offers control over whether to use VERW for its scrubbing side
++  effects at appropriate privilege transitions.  The exact side effects are
++  microarchitecture and microcode specific.  *Note: `md-clear=` is accepted as
++  a deprecated alias.  For compatibility with development versions of XSA-297,
++  `mds=` is also accepted on Xen 4.12 and earlier as an alias.  Consult vendor
++  documentation in preference to here.*
+ * `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction
+   Barrier) is used on entry to Xen.  This is used by default on hardware
+   vulnerable to Branch Type Confusion, and hardware vulnerable to Speculative
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 25a18ac598..e12ec9930c 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -37,8 +37,8 @@ static bool __initdata opt_msr_sc_pv = true;
+ static bool __initdata opt_msr_sc_hvm = true;
+ static int8_t __initdata opt_rsb_pv = -1;
+ static bool __initdata opt_rsb_hvm = true;
+-static int8_t __ro_after_init opt_md_clear_pv = -1;
+-static int8_t __ro_after_init opt_md_clear_hvm = -1;
++static int8_t __ro_after_init opt_verw_pv = -1;
++static int8_t __ro_after_init opt_verw_hvm = -1;
+ 
+ static int8_t __ro_after_init opt_ibpb_entry_pv = -1;
+ static int8_t __ro_after_init opt_ibpb_entry_hvm = -1;
+@@ -78,7 +78,7 @@ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination.
+ 
+ static int8_t __initdata opt_srb_lock = -1;
+ static bool __initdata opt_unpriv_mmio;
+-static bool __ro_after_init opt_fb_clear_mmio;
++static bool __ro_after_init opt_verw_mmio;
+ static int8_t __initdata opt_gds_mit = -1;
+ static int8_t __initdata opt_div_scrub = -1;
+ 
+@@ -120,8 +120,8 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+         disable_common:
+             opt_rsb_pv = false;
+             opt_rsb_hvm = false;
+-            opt_md_clear_pv = 0;
+-            opt_md_clear_hvm = 0;
++            opt_verw_pv = 0;
++            opt_verw_hvm = 0;
+             opt_ibpb_entry_pv = 0;
+             opt_ibpb_entry_hvm = 0;
+             opt_ibpb_entry_dom0 = false;
+@@ -152,14 +152,14 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+         {
+             opt_msr_sc_pv = val;
+             opt_rsb_pv = val;
+-            opt_md_clear_pv = val;
++            opt_verw_pv = val;
+             opt_ibpb_entry_pv = val;
+         }
+         else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
+         {
+             opt_msr_sc_hvm = val;
+             opt_rsb_hvm = val;
+-            opt_md_clear_hvm = val;
++            opt_verw_hvm = val;
+             opt_ibpb_entry_hvm = val;
+         }
+         else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 )
+@@ -204,21 +204,22 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+                 break;
+             }
+         }
+-        else if ( (val = parse_boolean("md-clear", s, ss)) != -1 )
++        else if ( (val = parse_boolean("verw", s, ss)) != -1 ||
++                  (val = parse_boolean("md-clear", s, ss)) != -1 )
+         {
+             switch ( val )
+             {
+             case 0:
+             case 1:
+-                opt_md_clear_pv = opt_md_clear_hvm = val;
++                opt_verw_pv = opt_verw_hvm = val;
+                 break;
+ 
+             case -2:
+-                s += strlen("md-clear=");
++                s += (*s == 'v') ? strlen("verw=") : strlen("md-clear=");
+                 if ( (val = parse_boolean("pv", s, ss)) >= 0 )
+-                    opt_md_clear_pv = val;
++                    opt_verw_pv = val;
+                 else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
+-                    opt_md_clear_hvm = val;
++                    opt_verw_hvm = val;
+                 else
+             default:
+                     rc = -EINVAL;
+@@ -540,8 +541,8 @@ static void __init print_details(enum ind_thunk thunk)
+            opt_srb_lock                              ? " SRB_LOCK+" : " SRB_LOCK-",
+            opt_ibpb_ctxt_switch                      ? " IBPB-ctxt" : "",
+            opt_l1d_flush                             ? " L1D_FLUSH" : "",
+-           opt_md_clear_pv || opt_md_clear_hvm ||
+-           opt_fb_clear_mmio                         ? " VERW"  : "",
++           opt_verw_pv || opt_verw_hvm ||
++           opt_verw_mmio                             ? " VERW"  : "",
+            opt_div_scrub                             ? " DIV" : "",
+            opt_branch_harden                         ? " BRANCH_HARDEN" : "");
+ 
+@@ -562,13 +563,13 @@ static void __init print_details(enum ind_thunk thunk)
+             boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
+             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
+             amd_virt_spec_ctrl ||
+-            opt_eager_fpu || opt_md_clear_hvm)       ? ""               : " None",
++            opt_eager_fpu || opt_verw_hvm)           ? ""               : " None",
+            boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
+            (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
+             amd_virt_spec_ctrl)                      ? " MSR_VIRT_SPEC_CTRL" : "",
+            boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "",
+            opt_eager_fpu                             ? " EAGER_FPU"     : "",
+-           opt_md_clear_hvm                          ? " MD_CLEAR"      : "",
++           opt_verw_hvm                              ? " VERW"          : "",
+            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM)  ? " IBPB-entry"    : "");
+ 
+ #endif
+@@ -577,11 +578,11 @@ static void __init print_details(enum ind_thunk thunk)
+            (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+             boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
+             boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
+-            opt_eager_fpu || opt_md_clear_pv)        ? ""               : " None",
++            opt_eager_fpu || opt_verw_pv)            ? ""               : " None",
+            boot_cpu_has(X86_FEATURE_SC_MSR_PV)       ? " MSR_SPEC_CTRL" : "",
+            boot_cpu_has(X86_FEATURE_SC_RSB_PV)       ? " RSB"           : "",
+            opt_eager_fpu                             ? " EAGER_FPU"     : "",
+-           opt_md_clear_pv                           ? " MD_CLEAR"      : "",
++           opt_verw_pv                               ? " VERW"          : "",
+            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)   ? " IBPB-entry"    : "");
+ 
+     printk("  XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
+@@ -1514,8 +1515,8 @@ void spec_ctrl_init_domain(struct domain *d)
+ {
+     bool pv = is_pv_domain(d);
+ 
+-    bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
+-                 (opt_fb_clear_mmio && is_iommu_enabled(d)));
++    bool verw = ((pv ? opt_verw_pv : opt_verw_hvm) ||
++                 (opt_verw_mmio && is_iommu_enabled(d)));
+ 
+     bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) &&
+                  (d->domain_id != 0 || opt_ibpb_entry_dom0));
+@@ -1878,19 +1879,20 @@ void __init init_speculation_mitigations(void)
+      * the return-to-guest path.
+      */
+     if ( opt_unpriv_mmio )
+-        opt_fb_clear_mmio = cpu_has_fb_clear;
++        opt_verw_mmio = cpu_has_fb_clear;
+ 
+     /*
+      * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
+      * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
+      * but it is somewhat better than nothing.
+      */
+-    if ( opt_md_clear_pv == -1 )
+-        opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+-                           boot_cpu_has(X86_FEATURE_MD_CLEAR));
+-    if ( opt_md_clear_hvm == -1 )
+-        opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+-                            boot_cpu_has(X86_FEATURE_MD_CLEAR));
++    if ( opt_verw_pv == -1 )
++        opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
++                       cpu_has_md_clear);
++
++    if ( opt_verw_hvm == -1 )
++        opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
++                        cpu_has_md_clear);
+ 
+     /*
+      * Enable MDS/MMIO defences as applicable.  The Idle blocks need using if
+@@ -1903,12 +1905,12 @@ void __init init_speculation_mitigations(void)
+      * MDS mitigations.  L1D_FLUSH is not safe for MMIO mitigations.)
+      *
+      * After calculating the appropriate idle setting, simplify
+-     * opt_md_clear_hvm to mean just "should we VERW on the way into HVM
++     * opt_verw_hvm to mean just "should we VERW on the way into HVM
+      * guests", so spec_ctrl_init_domain() can calculate suitable settings.
+      */
+-    if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio )
++    if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio )
+         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
+-    opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
++    opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
+ 
+     /*
+      * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
+-- 
+2.44.0
+
diff --git a/0044-x86-spec-ctrl-VERW-handling-adjustments.patch b/0044-x86-spec-ctrl-VERW-handling-adjustments.patch
new file mode 100644
index 0000000..e2458c9
--- /dev/null
+++ b/0044-x86-spec-ctrl-VERW-handling-adjustments.patch
@@ -0,0 +1,171 @@
+From 6663430b442fdf9698bd8e03f701a4547309ad71 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 5 Mar 2024 19:33:37 +0000
+Subject: [PATCH 44/67] x86/spec-ctrl: VERW-handling adjustments
+
+... before we add yet more complexity to this logic.  Mostly expanded
+comments, but with three minor changes.
+
+1) Introduce cpu_has_useful_md_clear to simplify later logic in this patch and
+   future ones.
+
+2) We only ever need SC_VERW_IDLE when SMT is active.  If SMT isn't active,
+   then there's no re-partition of pipeline resources based on thread-idleness
+   to worry about.
+
+3) The logic to adjust HVM VERW based on L1D_FLUSH is unmaintainable and, as
+   it turns out, wrong.  SKIP_L1DFL is just a hint bit, whereas opt_l1d_flush
+   is the relevant decision of whether to use L1D_FLUSH based on
+   susceptibility and user preference.
+
+   Rewrite the logic so it can be followed, and incorporate the fact that when
+   FB_CLEAR is visible, L1D_FLUSH isn't a safe substitution.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 1eb91a8a06230b4b64228c9a380194f8cfe6c5e2)
+---
+ xen/arch/x86/spec_ctrl.c | 99 +++++++++++++++++++++++++++++-----------
+ 1 file changed, 73 insertions(+), 26 deletions(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index e12ec9930c..adb6bc74e8 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -1531,7 +1531,7 @@ void __init init_speculation_mitigations(void)
+ {
+     enum ind_thunk thunk = THUNK_DEFAULT;
+     bool has_spec_ctrl, ibrs = false, hw_smt_enabled;
+-    bool cpu_has_bug_taa, retpoline_safe;
++    bool cpu_has_bug_taa, cpu_has_useful_md_clear, retpoline_safe;
+ 
+     hw_smt_enabled = check_smt_enabled();
+ 
+@@ -1867,50 +1867,97 @@ void __init init_speculation_mitigations(void)
+             "enabled.  Please assess your configuration and choose an\n"
+             "explicit 'smt=<bool>' setting.  See XSA-273.\n");
+ 
++    /*
++     * A brief summary of VERW-related changes.
++     *
++     * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html
++     * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html
++     *
++     * Relevant ucodes:
++     *
++     * - May 2019, for MDS.  Introduces the MD_CLEAR CPUID bit and VERW side
++     *   effects to scrub Store/Load/Fill buffers as applicable.  MD_CLEAR
++     *   exists architecturally, even when the side effects have been removed.
++     *
++     *   Use VERW to scrub on return-to-guest.  Parts with L1D_FLUSH to
++     *   mitigate L1TF have the same side effect, so no need to do both.
++     *
++     *   Various Atoms suffer from Store-buffer sampling only.  Store buffers
++     *   are statically partitioned between non-idle threads, so scrubbing is
++     *   wanted when going idle too.
++     *
++     *   Load ports and Fill buffers are competitively shared between threads.
++     *   SMT must be disabled for VERW scrubbing to be fully effective.
++     *
++     * - November 2019, for TAA.  Extended VERW side effects to TSX-enabled
++     *   MDS_NO parts.
++     *
++     * - February 2022, for Client TSX de-feature.  Removed VERW side effects
++     *   from Client CPUs only.
++     *
++     * - May 2022, for MMIO Stale Data.  (Re)introduced Fill Buffer scrubbing
++     *   on all MMIO-affected parts which didn't already have it for MDS
++     *   reasons, enumerating FB_CLEAR on those parts only.
++     *
++     *   If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing
++     *   side effects as VERW and cannot be used in its place.
++     */
+     mds_calculations();
+ 
+     /*
+-     * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have
+-     * reintroduced the VERW fill buffer flushing side effect because of a
+-     * susceptibility to FBSDP.
++     * Parts which enumerate FB_CLEAR are those with now-updated microcode
++     * which weren't susceptible to the original MFBDS (and therefore didn't
++     * have Fill Buffer scrubbing side effects to begin with, or were Client
++     * MDS_NO non-TAA_NO parts where the scrubbing was removed), but have had
++     * the scrubbing reintroduced because of a susceptibility to FBSDP.
+      *
+      * If unprivileged guests have (or will have) MMIO mappings, we can
+      * mitigate cross-domain leakage of fill buffer data by issuing VERW on
+-     * the return-to-guest path.
++     * the return-to-guest path.  This is only a token effort if SMT is
++     * active.
+      */
+     if ( opt_unpriv_mmio )
+         opt_verw_mmio = cpu_has_fb_clear;
+ 
+     /*
+-     * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
+-     * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
+-     * but it is somewhat better than nothing.
++     * MD_CLEAR is enumerated architecturally forevermore, even after the
++     * scrubbing side effects have been removed.  Create ourselves an version
++     * which expressed whether we think MD_CLEAR is having any useful side
++     * effect.
++     */
++    cpu_has_useful_md_clear = (cpu_has_md_clear &&
++                               (cpu_has_bug_mds || cpu_has_bug_msbds_only));
++
++    /*
++     * By default, use VERW scrubbing on applicable hardware, if we think it's
++     * going to have an effect.  This will only be a token effort for
++     * MLPDS/MFBDS when SMT is enabled.
+      */
+     if ( opt_verw_pv == -1 )
+-        opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+-                       cpu_has_md_clear);
++        opt_verw_pv = cpu_has_useful_md_clear;
+ 
+     if ( opt_verw_hvm == -1 )
+-        opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+-                        cpu_has_md_clear);
++        opt_verw_hvm = cpu_has_useful_md_clear;
+ 
+     /*
+-     * Enable MDS/MMIO defences as applicable.  The Idle blocks need using if
+-     * either the PV or HVM MDS defences are used, or if we may give MMIO
+-     * access to untrusted guests.
+-     *
+-     * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
+-     * equivalent semantics to avoid needing to perform both flushes on the
+-     * HVM path.  Therefore, we don't need VERW in addition to L1D_FLUSH (for
+-     * MDS mitigations.  L1D_FLUSH is not safe for MMIO mitigations.)
+-     *
+-     * After calculating the appropriate idle setting, simplify
+-     * opt_verw_hvm to mean just "should we VERW on the way into HVM
+-     * guests", so spec_ctrl_init_domain() can calculate suitable settings.
++     * If SMT is active, and we're protecting against MDS or MMIO stale data,
++     * we need to scrub before going idle as well as on return to guest.
++     * Various pipeline resources are repartitioned amongst non-idle threads.
+      */
+-    if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio )
++    if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) ||
++          opt_verw_mmio) && hw_smt_enabled )
+         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
+-    opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
++
++    /*
++     * After calculating the appropriate idle setting, simplify opt_verw_hvm
++     * to mean just "should we VERW on the way into HVM guests", so
++     * spec_ctrl_init_domain() can calculate suitable settings.
++     *
++     * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the
++     * only *_CLEAR we can see.
++     */
++    if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear )
++        opt_verw_hvm = false;
+ 
+     /*
+      * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
+-- 
+2.44.0
+
diff --git a/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch b/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch
new file mode 100644
index 0000000..4a10524
--- /dev/null
+++ b/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch
@@ -0,0 +1,320 @@
+From d85481135d87abbbf1feab18b749288fa08b65f2 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 22 Jun 2023 23:32:19 +0100
+Subject: [PATCH 45/67] x86/spec-ctrl: Mitigation Register File Data Sampling
+
+RFDS affects Atom cores, also branded E-cores, between the Goldmont and
+Gracemont microarchitectures.  This includes Alder Lake and Raptor Lake hybrid
+clien systems which have a mix of Gracemont and other types of cores.
+
+Two new bits have been defined; RFDS_CLEAR to indicate VERW has more side
+effets, and RFDS_NO to incidate that the system is unaffected.  Plenty of
+unaffected CPUs won't be getting RFDS_NO retrofitted in microcode, so we
+synthesise it.  Alder Lake and Raptor Lake Xeon-E's are unaffected due to
+their platform configuration, and we must use the Hybrid CPUID bit to
+distinguish them from their non-Xeon counterparts.
+
+Like MD_CLEAR and FB_CLEAR, RFDS_CLEAR needs OR-ing across a resource pool, so
+set it in the max policies and reflect the host setting in default.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit fb5b6f6744713410c74cfc12b7176c108e3c9a31)
+---
+ tools/misc/xen-cpuid.c                      |   5 +-
+ xen/arch/x86/cpu-policy.c                   |   5 +
+ xen/arch/x86/include/asm/cpufeature.h       |   3 +
+ xen/arch/x86/include/asm/msr-index.h        |   2 +
+ xen/arch/x86/spec_ctrl.c                    | 100 +++++++++++++++++++-
+ xen/include/public/arch-x86/cpufeatureset.h |   3 +
+ 6 files changed, 111 insertions(+), 7 deletions(-)
+
+diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
+index aefc140d66..5ceea8be07 100644
+--- a/tools/misc/xen-cpuid.c
++++ b/tools/misc/xen-cpuid.c
+@@ -172,7 +172,7 @@ static const char *const str_7d0[32] =
+     [ 8] = "avx512-vp2intersect", [ 9] = "srbds-ctrl",
+     [10] = "md-clear",            [11] = "rtm-always-abort",
+     /* 12 */                [13] = "tsx-force-abort",
+-    [14] = "serialize",
++    [14] = "serialize",     [15] = "hybrid",
+     [16] = "tsxldtrk",
+     [18] = "pconfig",
+     [20] = "cet-ibt",
+@@ -237,7 +237,8 @@ static const char *const str_m10Al[32] =
+     [20] = "bhi-no",              [21] = "xapic-status",
+     /* 22 */                      [23] = "ovrclk-status",
+     [24] = "pbrsb-no",            [25] = "gds-ctrl",
+-    [26] = "gds-no",
++    [26] = "gds-no",              [27] = "rfds-no",
++    [28] = "rfds-clear",
+ };
+ 
+ static const char *const str_m10Ah[32] =
+diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
+index 7b875a7221..96c2cee1a8 100644
+--- a/xen/arch/x86/cpu-policy.c
++++ b/xen/arch/x86/cpu-policy.c
+@@ -444,6 +444,7 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
+          */
+         __set_bit(X86_FEATURE_MD_CLEAR, fs);
+         __set_bit(X86_FEATURE_FB_CLEAR, fs);
++        __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
+ 
+         /*
+          * The Gather Data Sampling microcode mitigation (August 2023) has an
+@@ -493,6 +494,10 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
+         if ( cpu_has_fb_clear )
+             __set_bit(X86_FEATURE_FB_CLEAR, fs);
+ 
++        __clear_bit(X86_FEATURE_RFDS_CLEAR, fs);
++        if ( cpu_has_rfds_clear )
++            __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
++
+         /*
+          * The Gather Data Sampling microcode mitigation (August 2023) has an
+          * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
+diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h
+index ec824e8954..a6b8af1296 100644
+--- a/xen/arch/x86/include/asm/cpufeature.h
++++ b/xen/arch/x86/include/asm/cpufeature.h
+@@ -140,6 +140,7 @@
+ #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)
+ #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
+ #define cpu_has_serialize       boot_cpu_has(X86_FEATURE_SERIALIZE)
++#define cpu_has_hybrid          boot_cpu_has(X86_FEATURE_HYBRID)
+ #define cpu_has_avx512_fp16     boot_cpu_has(X86_FEATURE_AVX512_FP16)
+ #define cpu_has_arch_caps       boot_cpu_has(X86_FEATURE_ARCH_CAPS)
+ 
+@@ -161,6 +162,8 @@
+ #define cpu_has_rrsba           boot_cpu_has(X86_FEATURE_RRSBA)
+ #define cpu_has_gds_ctrl        boot_cpu_has(X86_FEATURE_GDS_CTRL)
+ #define cpu_has_gds_no          boot_cpu_has(X86_FEATURE_GDS_NO)
++#define cpu_has_rfds_no         boot_cpu_has(X86_FEATURE_RFDS_NO)
++#define cpu_has_rfds_clear      boot_cpu_has(X86_FEATURE_RFDS_CLEAR)
+ 
+ /* Synthesized. */
+ #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
+diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h
+index 6abf7bc34a..9b5f67711f 100644
+--- a/xen/arch/x86/include/asm/msr-index.h
++++ b/xen/arch/x86/include/asm/msr-index.h
+@@ -88,6 +88,8 @@
+ #define  ARCH_CAPS_PBRSB_NO                 (_AC(1, ULL) << 24)
+ #define  ARCH_CAPS_GDS_CTRL                 (_AC(1, ULL) << 25)
+ #define  ARCH_CAPS_GDS_NO                   (_AC(1, ULL) << 26)
++#define  ARCH_CAPS_RFDS_NO                  (_AC(1, ULL) << 27)
++#define  ARCH_CAPS_RFDS_CLEAR               (_AC(1, ULL) << 28)
+ 
+ #define MSR_FLUSH_CMD                       0x0000010b
+ #define  FLUSH_CMD_L1D                      (_AC(1, ULL) <<  0)
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index adb6bc74e8..1ee81e2dfe 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -24,6 +24,7 @@
+ 
+ #include <asm/amd.h>
+ #include <asm/hvm/svm/svm.h>
++#include <asm/intel-family.h>
+ #include <asm/microcode.h>
+ #include <asm/msr.h>
+ #include <asm/pv/domain.h>
+@@ -447,7 +448,7 @@ static void __init print_details(enum ind_thunk thunk)
+      * Hardware read-only information, stating immunity to certain issues, or
+      * suggestions of which mitigation to use.
+      */
+-    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
++    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+            (caps & ARCH_CAPS_RDCL_NO)                        ? " RDCL_NO"        : "",
+            (caps & ARCH_CAPS_EIBRS)                          ? " EIBRS"          : "",
+            (caps & ARCH_CAPS_RSBA)                           ? " RSBA"           : "",
+@@ -463,6 +464,7 @@ static void __init print_details(enum ind_thunk thunk)
+            (caps & ARCH_CAPS_FB_CLEAR)                       ? " FB_CLEAR"       : "",
+            (caps & ARCH_CAPS_PBRSB_NO)                       ? " PBRSB_NO"       : "",
+            (caps & ARCH_CAPS_GDS_NO)                         ? " GDS_NO"         : "",
++           (caps & ARCH_CAPS_RFDS_NO)                        ? " RFDS_NO"        : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS))    ? " IBRS_ALWAYS"    : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS))   ? " STIBP_ALWAYS"   : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_FAST))      ? " IBRS_FAST"      : "",
+@@ -473,7 +475,7 @@ static void __init print_details(enum ind_thunk thunk)
+            (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO))        ? " SRSO_NO"        : "");
+ 
+     /* Hardware features which need driving to mitigate issues. */
+-    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
++    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBPB)) ||
+            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB))          ? " IBPB"           : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS)) ||
+@@ -491,6 +493,7 @@ static void __init print_details(enum ind_thunk thunk)
+            (caps & ARCH_CAPS_TSX_CTRL)                       ? " TSX_CTRL"       : "",
+            (caps & ARCH_CAPS_FB_CLEAR_CTRL)                  ? " FB_CLEAR_CTRL"  : "",
+            (caps & ARCH_CAPS_GDS_CTRL)                       ? " GDS_CTRL"       : "",
++           (caps & ARCH_CAPS_RFDS_CLEAR)                     ? " RFDS_CLEAR"     : "",
+            (e21a & cpufeat_mask(X86_FEATURE_SBPB))           ? " SBPB"           : "");
+ 
+     /* Compiled-in support which pertains to mitigations. */
+@@ -1359,6 +1362,83 @@ static __init void mds_calculations(void)
+     }
+ }
+ 
++/*
++ * Register File Data Sampling affects Atom cores from the Goldmont to
++ * Gracemont microarchitectures.  The March 2024 microcode adds RFDS_NO to
++ * some but not all unaffected parts, and RFDS_CLEAR to affected parts still
++ * in support.
++ *
++ * Alder Lake and Raptor Lake client CPUs have a mix of P cores
++ * (Golden/Raptor Cove, not vulnerable) and E cores (Gracemont,
++ * vulnerable), and both enumerate RFDS_CLEAR.
++ *
++ * Both exist in a Xeon SKU, which has the E cores (Gracemont) disabled by
++ * platform configuration, and enumerate RFDS_NO.
++ *
++ * With older parts, or with out-of-date microcode, synthesise RFDS_NO when
++ * safe to do so.
++ *
++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html
++ */
++static void __init rfds_calculations(void)
++{
++    /* RFDS is only known to affect Intel Family 6 processors at this time. */
++    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
++         boot_cpu_data.x86 != 6 )
++        return;
++
++    /*
++     * If RFDS_NO or RFDS_CLEAR are visible, we've either got suitable
++     * microcode, or an RFDS-aware hypervisor is levelling us in a pool.
++     */
++    if ( cpu_has_rfds_no || cpu_has_rfds_clear )
++        return;
++
++    /* If we're virtualised, don't attempt to synthesise RFDS_NO. */
++    if ( cpu_has_hypervisor )
++        return;
++
++    /*
++     * Not all CPUs are expected to get a microcode update enumerating one of
++     * RFDS_{NO,CLEAR}, or we might have out-of-date microcode.
++     */
++    switch ( boot_cpu_data.x86_model )
++    {
++    case INTEL_FAM6_ALDERLAKE:
++    case INTEL_FAM6_RAPTORLAKE:
++        /*
++         * Alder Lake and Raptor Lake might be a client SKU (with the
++         * Gracemont cores active, and therefore vulnerable) or might be a
++         * server SKU (with the Gracemont cores disabled, and therefore not
++         * vulnerable).
++         *
++         * See if the CPU identifies as hybrid to distinguish the two cases.
++         */
++        if ( !cpu_has_hybrid )
++            break;
++        fallthrough;
++    case INTEL_FAM6_ALDERLAKE_L:
++    case INTEL_FAM6_RAPTORLAKE_P:
++    case INTEL_FAM6_RAPTORLAKE_S:
++
++    case INTEL_FAM6_ATOM_GOLDMONT:      /* Apollo Lake */
++    case INTEL_FAM6_ATOM_GOLDMONT_D:    /* Denverton */
++    case INTEL_FAM6_ATOM_GOLDMONT_PLUS: /* Gemini Lake */
++    case INTEL_FAM6_ATOM_TREMONT_D:     /* Snow Ridge / Parker Ridge */
++    case INTEL_FAM6_ATOM_TREMONT:       /* Elkhart Lake */
++    case INTEL_FAM6_ATOM_TREMONT_L:     /* Jasper Lake */
++    case INTEL_FAM6_ATOM_GRACEMONT:     /* Alder Lake N */
++        return;
++    }
++
++    /*
++     * We appear to be on an unaffected CPU which didn't enumerate RFDS_NO,
++     * perhaps because of it's age or because of out-of-date microcode.
++     * Synthesise it.
++     */
++    setup_force_cpu_cap(X86_FEATURE_RFDS_NO);
++}
++
+ static bool __init cpu_has_gds(void)
+ {
+     /*
+@@ -1872,6 +1952,7 @@ void __init init_speculation_mitigations(void)
+      *
+      * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html
+      * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html
++     * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html
+      *
+      * Relevant ucodes:
+      *
+@@ -1901,8 +1982,12 @@ void __init init_speculation_mitigations(void)
+      *
+      *   If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing
+      *   side effects as VERW and cannot be used in its place.
++     *
++     * - March 2023, for RFDS.  Enumerate RFDS_CLEAR to mean that VERW now
++     *   scrubs non-architectural entries from certain register files.
+      */
+     mds_calculations();
++    rfds_calculations();
+ 
+     /*
+      * Parts which enumerate FB_CLEAR are those with now-updated microcode
+@@ -1934,15 +2019,19 @@ void __init init_speculation_mitigations(void)
+      * MLPDS/MFBDS when SMT is enabled.
+      */
+     if ( opt_verw_pv == -1 )
+-        opt_verw_pv = cpu_has_useful_md_clear;
++        opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear;
+ 
+     if ( opt_verw_hvm == -1 )
+-        opt_verw_hvm = cpu_has_useful_md_clear;
++        opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear;
+ 
+     /*
+      * If SMT is active, and we're protecting against MDS or MMIO stale data,
+      * we need to scrub before going idle as well as on return to guest.
+      * Various pipeline resources are repartitioned amongst non-idle threads.
++     *
++     * We don't need to scrub on idle for RFDS.  There are no affected cores
++     * which support SMT, despite there being affected cores in hybrid systems
++     * which have SMT elsewhere in the platform.
+      */
+     if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) ||
+           opt_verw_mmio) && hw_smt_enabled )
+@@ -1956,7 +2045,8 @@ void __init init_speculation_mitigations(void)
+      * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the
+      * only *_CLEAR we can see.
+      */
+-    if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear )
++    if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear &&
++         !cpu_has_rfds_clear )
+         opt_verw_hvm = false;
+ 
+     /*
+diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
+index aec1407613..113e6cadc1 100644
+--- a/xen/include/public/arch-x86/cpufeatureset.h
++++ b/xen/include/public/arch-x86/cpufeatureset.h
+@@ -264,6 +264,7 @@ XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /*!A VERW clears microarchitectural buffe
+ XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */
+ XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
+ XEN_CPUFEATURE(SERIALIZE,     9*32+14) /*A  SERIALIZE insn */
++XEN_CPUFEATURE(HYBRID,        9*32+15) /*   Heterogeneous platform */
+ XEN_CPUFEATURE(TSXLDTRK,      9*32+16) /*a  TSX load tracking suspend/resume insns */
+ XEN_CPUFEATURE(CET_IBT,       9*32+20) /*   CET - Indirect Branch Tracking */
+ XEN_CPUFEATURE(AVX512_FP16,   9*32+23) /*   AVX512 FP16 instructions */
+@@ -330,6 +331,8 @@ XEN_CPUFEATURE(OVRCLK_STATUS,      16*32+23) /*   MSR_OVERCLOCKING_STATUS */
+ XEN_CPUFEATURE(PBRSB_NO,           16*32+24) /*A  No Post-Barrier RSB predictions */
+ XEN_CPUFEATURE(GDS_CTRL,           16*32+25) /*   MCU_OPT_CTRL.GDS_MIT_{DIS,LOCK} */
+ XEN_CPUFEATURE(GDS_NO,             16*32+26) /*A  No Gather Data Sampling */
++XEN_CPUFEATURE(RFDS_NO,            16*32+27) /*A  No Register File Data Sampling */
++XEN_CPUFEATURE(RFDS_CLEAR,         16*32+28) /*!A Register File(s) cleared by VERW */
+ 
+ /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */
+ 
+-- 
+2.44.0
+
diff --git a/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch b/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch
new file mode 100644
index 0000000..ce397a1
--- /dev/null
+++ b/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch
@@ -0,0 +1,161 @@
+From bf70ce8b3449c49eb828d5b1f4934a49b00fef35 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 20 Sep 2023 20:06:53 +0100
+Subject: [PATCH 46/67] x86/paging: Delete update_cr3()'s do_locking parameter
+
+Nicola reports that the XSA-438 fix introduced new MISRA violations because of
+some incidental tidying it tried to do.  The parameter is useless, so resolve
+the MISRA regression by removing it.
+
+hap_update_cr3() discards the parameter entirely, while sh_update_cr3() uses
+it to distinguish internal and external callers and therefore whether the
+paging lock should be taken.
+
+However, we have paging_lock_recursive() for this purpose, which also avoids
+the ability for the shadow internal callers to accidentally not hold the lock.
+
+Fixes: fb0ff49fe9f7 ("x86/shadow: defer releasing of PV's top-level shadow reference")
+Reported-by: Nicola Vetrini <nicola.vetrini@bugseng.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Release-acked-by: Henry Wang <Henry.Wang@arm.com>
+(cherry picked from commit e71157d1ac2a7fbf413130663cf0a93ff9fbcf7e)
+---
+ xen/arch/x86/include/asm/paging.h |  5 ++---
+ xen/arch/x86/mm/hap/hap.c         |  5 ++---
+ xen/arch/x86/mm/shadow/common.c   |  2 +-
+ xen/arch/x86/mm/shadow/multi.c    | 17 ++++++++---------
+ xen/arch/x86/mm/shadow/none.c     |  3 +--
+ 5 files changed, 14 insertions(+), 18 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/paging.h b/xen/arch/x86/include/asm/paging.h
+index 94c590f31a..809ff35d9a 100644
+--- a/xen/arch/x86/include/asm/paging.h
++++ b/xen/arch/x86/include/asm/paging.h
+@@ -138,8 +138,7 @@ struct paging_mode {
+                                             paddr_t ga, uint32_t *pfec,
+                                             unsigned int *page_order);
+ #endif
+-    pagetable_t   (*update_cr3            )(struct vcpu *v, bool do_locking,
+-                                            bool noflush);
++    pagetable_t   (*update_cr3            )(struct vcpu *v, bool noflush);
+     void          (*update_paging_modes   )(struct vcpu *v);
+     bool          (*flush_tlb             )(const unsigned long *vcpu_bitmap);
+ 
+@@ -312,7 +311,7 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
+  * as the value to load into the host CR3 to schedule this vcpu */
+ static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
+ {
+-    return paging_get_hostmode(v)->update_cr3(v, 1, noflush);
++    return paging_get_hostmode(v)->update_cr3(v, noflush);
+ }
+ 
+ /* Update all the things that are derived from the guest's CR0/CR3/CR4.
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 57a19c3d59..3ad39a7dd7 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -739,8 +739,7 @@ static bool cf_check hap_invlpg(struct vcpu *v, unsigned long linear)
+     return 1;
+ }
+ 
+-static pagetable_t cf_check hap_update_cr3(
+-    struct vcpu *v, bool do_locking, bool noflush)
++static pagetable_t cf_check hap_update_cr3(struct vcpu *v, bool noflush)
+ {
+     v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
+     hvm_update_guest_cr3(v, noflush);
+@@ -826,7 +825,7 @@ static void cf_check hap_update_paging_modes(struct vcpu *v)
+     }
+ 
+     /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
+-    hap_update_cr3(v, 0, false);
++    hap_update_cr3(v, false);
+ 
+  unlock:
+     paging_unlock(d);
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index c0940f939e..18714dbd02 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2579,7 +2579,7 @@ static void sh_update_paging_modes(struct vcpu *v)
+     }
+ #endif /* OOS */
+ 
+-    v->arch.paging.mode->update_cr3(v, 0, false);
++    v->arch.paging.mode->update_cr3(v, false);
+ }
+ 
+ void cf_check shadow_update_paging_modes(struct vcpu *v)
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index c92b354a78..e54a507b54 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -2506,7 +2506,7 @@ static int cf_check sh_page_fault(
+          * In any case, in the PAE case, the ASSERT is not true; it can
+          * happen because of actions the guest is taking. */
+ #if GUEST_PAGING_LEVELS == 3
+-        v->arch.paging.mode->update_cr3(v, 0, false);
++        v->arch.paging.mode->update_cr3(v, false);
+ #else
+         ASSERT(d->is_shutting_down);
+ #endif
+@@ -3224,17 +3224,13 @@ static void cf_check sh_detach_old_tables(struct vcpu *v)
+     }
+ }
+ 
+-static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking,
+-                                          bool noflush)
++static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool noflush)
+ /* Updates vcpu->arch.cr3 after the guest has changed CR3.
+  * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
+  * if appropriate).
+  * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works;
+  * this function will call hvm_update_guest_cr(v, 3) to tell them where the
+  * shadow tables are.
+- * If do_locking != 0, assume we are being called from outside the
+- * shadow code, and must take and release the paging lock; otherwise
+- * that is the caller's responsibility.
+  */
+ {
+     struct domain *d = v->domain;
+@@ -3252,7 +3248,11 @@ static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking,
+         return old_entry;
+     }
+ 
+-    if ( do_locking ) paging_lock(v->domain);
++    /*
++     * This is used externally (with the paging lock not taken) and internally
++     * by the shadow code (with the lock already taken).
++     */
++    paging_lock_recursive(v->domain);
+ 
+ #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+     /* Need to resync all the shadow entries on a TLB flush.  Resync
+@@ -3480,8 +3480,7 @@ static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking,
+     shadow_sync_other_vcpus(v);
+ #endif
+ 
+-    /* Release the lock, if we took it (otherwise it's the caller's problem) */
+-    if ( do_locking ) paging_unlock(v->domain);
++    paging_unlock(v->domain);
+ 
+     return old_entry;
+ }
+diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
+index 743c0ffb85..7e4e386cd0 100644
+--- a/xen/arch/x86/mm/shadow/none.c
++++ b/xen/arch/x86/mm/shadow/none.c
+@@ -52,8 +52,7 @@ static unsigned long cf_check _gva_to_gfn(
+ }
+ #endif
+ 
+-static pagetable_t cf_check _update_cr3(struct vcpu *v, bool do_locking,
+-                                        bool noflush)
++static pagetable_t cf_check _update_cr3(struct vcpu *v, bool noflush)
+ {
+     ASSERT_UNREACHABLE();
+     return pagetable_null();
+-- 
+2.44.0
+
diff --git a/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch b/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch
new file mode 100644
index 0000000..3e58906
--- /dev/null
+++ b/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch
@@ -0,0 +1,58 @@
+From 0a53565f1886201cc8a8afe9b2619ee297c20955 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 2 Feb 2024 00:39:42 +0000
+Subject: [PATCH 47/67] xen: Swap order of actions in the FREE*() macros
+
+Wherever possible, it is a good idea to NULL out the visible reference to an
+object prior to freeing it.  The FREE*() macros already collect together both
+parts, making it easy to adjust.
+
+This has a marginal code generation improvement, as some of the calls to the
+free() function can be tailcall optimised.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit c4f427ec879e7c0df6d44d02561e8bee838a293e)
+---
+ xen/include/xen/mm.h      | 3 ++-
+ xen/include/xen/xmalloc.h | 7 ++++---
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
+index 3dc61bcc3c..211685a5d2 100644
+--- a/xen/include/xen/mm.h
++++ b/xen/include/xen/mm.h
+@@ -80,8 +80,9 @@ bool scrub_free_pages(void);
+ 
+ /* Free an allocation, and zero the pointer to it. */
+ #define FREE_XENHEAP_PAGES(p, o) do { \
+-    free_xenheap_pages(p, o);         \
++    void *_ptr_ = (p);                \
+     (p) = NULL;                       \
++    free_xenheap_pages(_ptr_, o);     \
+ } while ( false )
+ #define FREE_XENHEAP_PAGE(p) FREE_XENHEAP_PAGES(p, 0)
+ 
+diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h
+index 16979a117c..d857298011 100644
+--- a/xen/include/xen/xmalloc.h
++++ b/xen/include/xen/xmalloc.h
+@@ -66,9 +66,10 @@
+ extern void xfree(void *);
+ 
+ /* Free an allocation, and zero the pointer to it. */
+-#define XFREE(p) do { \
+-    xfree(p);         \
+-    (p) = NULL;       \
++#define XFREE(p) do {                       \
++    void *_ptr_ = (p);                      \
++    (p) = NULL;                             \
++    xfree(_ptr_);                           \
+ } while ( false )
+ 
+ /* Underlying functions */
+-- 
+2.44.0
+
diff --git a/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch b/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch
new file mode 100644
index 0000000..ecf0830
--- /dev/null
+++ b/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch
@@ -0,0 +1,331 @@
+From 9d2f136328aab5537b7180a1b23e171893ebe455 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 13 Feb 2024 13:08:05 +0100
+Subject: [PATCH 48/67] x86/spinlock: introduce support for blocking
+ speculation into critical regions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce a new Kconfig option to block speculation into lock protected
+critical regions.  The Kconfig option is enabled by default, but the mitigation
+won't be engaged unless it's explicitly enabled in the command line using
+`spec-ctrl=lock-harden`.
+
+Convert the spinlock acquire macros into always-inline functions, and introduce
+a speculation barrier after the lock has been taken.  Note the speculation
+barrier is not placed inside the implementation of the spin lock functions, as
+to prevent speculation from falling through the call to the lock functions
+resulting in the barrier also being skipped.
+
+trylock variants are protected using a construct akin to the existing
+evaluate_nospec().
+
+This patch only implements the speculation barrier for x86.
+
+Note spin locks are the only locking primitive taken care in this change,
+further locking primitives will be adjusted by separate changes.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 7ef0084418e188d05f338c3e028fbbe8b6924afa)
+---
+ docs/misc/xen-command-line.pandoc      |  7 ++++-
+ xen/arch/x86/include/asm/cpufeatures.h |  2 +-
+ xen/arch/x86/include/asm/nospec.h      | 26 ++++++++++++++++++
+ xen/arch/x86/spec_ctrl.c               | 26 +++++++++++++++---
+ xen/common/Kconfig                     | 17 ++++++++++++
+ xen/include/xen/nospec.h               | 15 +++++++++++
+ xen/include/xen/spinlock.h             | 37 +++++++++++++++++++++-----
+ 7 files changed, 119 insertions(+), 11 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index d909ec94fe..e1d56407dd 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2327,7 +2327,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
+ >              {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
+ >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
+ >              eager-fpu,l1d-flush,branch-harden,srb-lock,
+->              unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
++>              unpriv-mmio,gds-mit,div-scrub,lock-harden}=<bool> ]`
+ 
+ Controls for speculative execution sidechannel mitigations.  By default, Xen
+ will pick the most appropriate mitigations based on compiled in support,
+@@ -2454,6 +2454,11 @@ On all hardware, the `div-scrub=` option can be used to force or prevent Xen
+ from mitigating the DIV-leakage vulnerability.  By default, Xen will mitigate
+ DIV-leakage on hardware believed to be vulnerable.
+ 
++If Xen is compiled with `CONFIG_SPECULATIVE_HARDEN_LOCK`, the `lock-harden=`
++boolean can be used to force or prevent Xen from using speculation barriers to
++protect lock critical regions.  This mitigation won't be engaged by default,
++and needs to be explicitly enabled on the command line.
++
+ ### sync_console
+ > `= <boolean>`
+ 
+diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h
+index c3aad21c3b..7e8221fd85 100644
+--- a/xen/arch/x86/include/asm/cpufeatures.h
++++ b/xen/arch/x86/include/asm/cpufeatures.h
+@@ -24,7 +24,7 @@ XEN_CPUFEATURE(APERFMPERF,        X86_SYNTH( 8)) /* APERFMPERF */
+ XEN_CPUFEATURE(MFENCE_RDTSC,      X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */
+ XEN_CPUFEATURE(XEN_SMEP,          X86_SYNTH(10)) /* SMEP gets used by Xen itself */
+ XEN_CPUFEATURE(XEN_SMAP,          X86_SYNTH(11)) /* SMAP gets used by Xen itself */
+-/* Bit 12 unused. */
++XEN_CPUFEATURE(SC_NO_LOCK_HARDEN, X86_SYNTH(12)) /* (Disable) Lock critical region hardening */
+ XEN_CPUFEATURE(IND_THUNK_LFENCE,  X86_SYNTH(13)) /* Use IND_THUNK_LFENCE */
+ XEN_CPUFEATURE(IND_THUNK_JMP,     X86_SYNTH(14)) /* Use IND_THUNK_JMP */
+ XEN_CPUFEATURE(SC_NO_BRANCH_HARDEN, X86_SYNTH(15)) /* (Disable) Conditional branch hardening */
+diff --git a/xen/arch/x86/include/asm/nospec.h b/xen/arch/x86/include/asm/nospec.h
+index 7150e76b87..0725839e19 100644
+--- a/xen/arch/x86/include/asm/nospec.h
++++ b/xen/arch/x86/include/asm/nospec.h
+@@ -38,6 +38,32 @@ static always_inline void block_speculation(void)
+     barrier_nospec_true();
+ }
+ 
++static always_inline void arch_block_lock_speculation(void)
++{
++    alternative("lfence", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
++}
++
++/* Allow to insert a read memory barrier into conditionals */
++static always_inline bool barrier_lock_true(void)
++{
++    alternative("lfence #nospec-true", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
++    return true;
++}
++
++static always_inline bool barrier_lock_false(void)
++{
++    alternative("lfence #nospec-false", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
++    return false;
++}
++
++static always_inline bool arch_lock_evaluate_nospec(bool condition)
++{
++    if ( condition )
++        return barrier_lock_true();
++    else
++        return barrier_lock_false();
++}
++
+ #endif /* _ASM_X86_NOSPEC_H */
+ 
+ /*
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 1ee81e2dfe..ac21af2c5c 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -65,6 +65,7 @@ int8_t __read_mostly opt_eager_fpu = -1;
+ int8_t __read_mostly opt_l1d_flush = -1;
+ static bool __initdata opt_branch_harden =
+     IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH);
++static bool __initdata opt_lock_harden;
+ 
+ bool __initdata bsp_delay_spec_ctrl;
+ uint8_t __read_mostly default_xen_spec_ctrl;
+@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+             opt_ssbd = false;
+             opt_l1d_flush = 0;
+             opt_branch_harden = false;
++            opt_lock_harden = false;
+             opt_srb_lock = 0;
+             opt_unpriv_mmio = false;
+             opt_gds_mit = 0;
+@@ -298,6 +300,16 @@ static int __init cf_check parse_spec_ctrl(const char *s)
+                 rc = -EINVAL;
+             }
+         }
++        else if ( (val = parse_boolean("lock-harden", s, ss)) >= 0 )
++        {
++            if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) )
++                opt_lock_harden = val;
++            else
++            {
++                no_config_param("SPECULATIVE_HARDEN_LOCK", "spec-ctrl", s, ss);
++                rc = -EINVAL;
++            }
++        }
+         else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
+             opt_srb_lock = val;
+         else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
+@@ -500,7 +512,8 @@ static void __init print_details(enum ind_thunk thunk)
+     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ||
+          IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) ||
+          IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ||
+-         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) )
++         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ||
++         IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) )
+         printk("  Compiled-in support:"
+ #ifdef CONFIG_INDIRECT_THUNK
+                " INDIRECT_THUNK"
+@@ -516,11 +529,14 @@ static void __init print_details(enum ind_thunk thunk)
+ #endif
+ #ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS
+                " HARDEN_GUEST_ACCESS"
++#endif
++#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
++               " HARDEN_LOCK"
+ #endif
+                "\n");
+ 
+     /* Settings for Xen's protection, irrespective of guests. */
+-    printk("  Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
++    printk("  Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s%s\n",
+            thunk != THUNK_NONE      ? "BTI-Thunk: " : "",
+            thunk == THUNK_NONE      ? "" :
+            thunk == THUNK_RETPOLINE ? "RETPOLINE, " :
+@@ -547,7 +563,8 @@ static void __init print_details(enum ind_thunk thunk)
+            opt_verw_pv || opt_verw_hvm ||
+            opt_verw_mmio                             ? " VERW"  : "",
+            opt_div_scrub                             ? " DIV" : "",
+-           opt_branch_harden                         ? " BRANCH_HARDEN" : "");
++           opt_branch_harden                         ? " BRANCH_HARDEN" : "",
++           opt_lock_harden                           ? " LOCK_HARDEN" : "");
+ 
+     /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
+     if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu )
+@@ -1930,6 +1947,9 @@ void __init init_speculation_mitigations(void)
+     if ( !opt_branch_harden )
+         setup_force_cpu_cap(X86_FEATURE_SC_NO_BRANCH_HARDEN);
+ 
++    if ( !opt_lock_harden )
++        setup_force_cpu_cap(X86_FEATURE_SC_NO_LOCK_HARDEN);
++
+     /*
+      * We do not disable HT by default on affected hardware.
+      *
+diff --git a/xen/common/Kconfig b/xen/common/Kconfig
+index e7794cb7f6..cd73851538 100644
+--- a/xen/common/Kconfig
++++ b/xen/common/Kconfig
+@@ -173,6 +173,23 @@ config SPECULATIVE_HARDEN_GUEST_ACCESS
+ 
+ 	  If unsure, say Y.
+ 
++config SPECULATIVE_HARDEN_LOCK
++	bool "Speculative lock context hardening"
++	default y
++	depends on X86
++	help
++	  Contemporary processors may use speculative execution as a
++	  performance optimisation, but this can potentially be abused by an
++	  attacker to leak data via speculative sidechannels.
++
++	  One source of data leakage is via speculative accesses to lock
++	  critical regions.
++
++	  This option is disabled by default at run time, and needs to be
++	  enabled on the command line.
++
++	  If unsure, say Y.
++
+ endmenu
+ 
+ config DIT_DEFAULT
+diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h
+index 76255bc46e..4552846403 100644
+--- a/xen/include/xen/nospec.h
++++ b/xen/include/xen/nospec.h
+@@ -70,6 +70,21 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
+ #define array_access_nospec(array, index)                               \
+     (array)[array_index_nospec(index, ARRAY_SIZE(array))]
+ 
++static always_inline void block_lock_speculation(void)
++{
++#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
++    arch_block_lock_speculation();
++#endif
++}
++
++static always_inline bool lock_evaluate_nospec(bool condition)
++{
++#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
++    return arch_lock_evaluate_nospec(condition);
++#endif
++    return condition;
++}
++
+ #endif /* XEN_NOSPEC_H */
+ 
+ /*
+diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
+index 961891bea4..daf48fdea7 100644
+--- a/xen/include/xen/spinlock.h
++++ b/xen/include/xen/spinlock.h
+@@ -1,6 +1,7 @@
+ #ifndef __SPINLOCK_H__
+ #define __SPINLOCK_H__
+ 
++#include <xen/nospec.h>
+ #include <xen/time.h>
+ #include <asm/system.h>
+ #include <asm/spinlock.h>
+@@ -189,13 +190,30 @@ int _spin_trylock_recursive(spinlock_t *lock);
+ void _spin_lock_recursive(spinlock_t *lock);
+ void _spin_unlock_recursive(spinlock_t *lock);
+ 
+-#define spin_lock(l)                  _spin_lock(l)
+-#define spin_lock_cb(l, c, d)         _spin_lock_cb(l, c, d)
+-#define spin_lock_irq(l)              _spin_lock_irq(l)
++static always_inline void spin_lock(spinlock_t *l)
++{
++    _spin_lock(l);
++    block_lock_speculation();
++}
++
++static always_inline void spin_lock_cb(spinlock_t *l, void (*c)(void *data),
++                                       void *d)
++{
++    _spin_lock_cb(l, c, d);
++    block_lock_speculation();
++}
++
++static always_inline void spin_lock_irq(spinlock_t *l)
++{
++    _spin_lock_irq(l);
++    block_lock_speculation();
++}
++
+ #define spin_lock_irqsave(l, f)                                 \
+     ({                                                          \
+         BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long));       \
+         ((f) = _spin_lock_irqsave(l));                          \
++        block_lock_speculation();                               \
+     })
+ 
+ #define spin_unlock(l)                _spin_unlock(l)
+@@ -203,7 +221,7 @@ void _spin_unlock_recursive(spinlock_t *lock);
+ #define spin_unlock_irqrestore(l, f)  _spin_unlock_irqrestore(l, f)
+ 
+ #define spin_is_locked(l)             _spin_is_locked(l)
+-#define spin_trylock(l)               _spin_trylock(l)
++#define spin_trylock(l)               lock_evaluate_nospec(_spin_trylock(l))
+ 
+ #define spin_trylock_irqsave(lock, flags)       \
+ ({                                              \
+@@ -224,8 +242,15 @@ void _spin_unlock_recursive(spinlock_t *lock);
+  * are any critical regions that cannot form part of such a set, they can use
+  * standard spin_[un]lock().
+  */
+-#define spin_trylock_recursive(l)     _spin_trylock_recursive(l)
+-#define spin_lock_recursive(l)        _spin_lock_recursive(l)
++#define spin_trylock_recursive(l) \
++    lock_evaluate_nospec(_spin_trylock_recursive(l))
++
++static always_inline void spin_lock_recursive(spinlock_t *l)
++{
++    _spin_lock_recursive(l);
++    block_lock_speculation();
++}
++
+ #define spin_unlock_recursive(l)      _spin_unlock_recursive(l)
+ 
+ #endif /* __SPINLOCK_H__ */
+-- 
+2.44.0
+
diff --git a/0049-rwlock-introduce-support-for-blocking-speculation-in.patch b/0049-rwlock-introduce-support-for-blocking-speculation-in.patch
new file mode 100644
index 0000000..593b588
--- /dev/null
+++ b/0049-rwlock-introduce-support-for-blocking-speculation-in.patch
@@ -0,0 +1,125 @@
+From 7454dad6ee15f9fa6d84fc285d366b86f3d47494 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 13 Feb 2024 16:08:52 +0100
+Subject: [PATCH 49/67] rwlock: introduce support for blocking speculation into
+ critical regions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce inline wrappers as required and add direct calls to
+block_lock_speculation() in order to prevent speculation into the rwlock
+protected critical regions.
+
+Note the rwlock primitives are adjusted to use the non speculation safe variants
+of the spinlock handlers, as a speculation barrier is added in the rwlock
+calling wrappers.
+
+trylock variants are protected by using lock_evaluate_nospec().
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit a1fb15f61692b1fa9945fc51f55471ace49cdd59)
+---
+ xen/common/rwlock.c      | 14 +++++++++++---
+ xen/include/xen/rwlock.h | 34 ++++++++++++++++++++++++++++------
+ 2 files changed, 39 insertions(+), 9 deletions(-)
+
+diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
+index aa15529bbe..cda06b9d6e 100644
+--- a/xen/common/rwlock.c
++++ b/xen/common/rwlock.c
+@@ -34,8 +34,11 @@ void queue_read_lock_slowpath(rwlock_t *lock)
+ 
+     /*
+      * Put the reader into the wait queue.
++     *
++     * Use the speculation unsafe helper, as it's the caller responsibility to
++     * issue a speculation barrier if required.
+      */
+-    spin_lock(&lock->lock);
++    _spin_lock(&lock->lock);
+ 
+     /*
+      * At the head of the wait queue now, wait until the writer state
+@@ -64,8 +67,13 @@ void queue_write_lock_slowpath(rwlock_t *lock)
+ {
+     u32 cnts;
+ 
+-    /* Put the writer into the wait queue. */
+-    spin_lock(&lock->lock);
++    /*
++     * Put the writer into the wait queue.
++     *
++     * Use the speculation unsafe helper, as it's the caller responsibility to
++     * issue a speculation barrier if required.
++     */
++    _spin_lock(&lock->lock);
+ 
+     /* Try to acquire the lock directly if no reader is present. */
+     if ( !atomic_read(&lock->cnts) &&
+diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
+index 0cc9167715..fd0458be94 100644
+--- a/xen/include/xen/rwlock.h
++++ b/xen/include/xen/rwlock.h
+@@ -247,27 +247,49 @@ static inline int _rw_is_write_locked(rwlock_t *lock)
+     return (atomic_read(&lock->cnts) & _QW_WMASK) == _QW_LOCKED;
+ }
+ 
+-#define read_lock(l)                  _read_lock(l)
+-#define read_lock_irq(l)              _read_lock_irq(l)
++static always_inline void read_lock(rwlock_t *l)
++{
++    _read_lock(l);
++    block_lock_speculation();
++}
++
++static always_inline void read_lock_irq(rwlock_t *l)
++{
++    _read_lock_irq(l);
++    block_lock_speculation();
++}
++
+ #define read_lock_irqsave(l, f)                                 \
+     ({                                                          \
+         BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long));       \
+         ((f) = _read_lock_irqsave(l));                          \
++        block_lock_speculation();                               \
+     })
+ 
+ #define read_unlock(l)                _read_unlock(l)
+ #define read_unlock_irq(l)            _read_unlock_irq(l)
+ #define read_unlock_irqrestore(l, f)  _read_unlock_irqrestore(l, f)
+-#define read_trylock(l)               _read_trylock(l)
++#define read_trylock(l)               lock_evaluate_nospec(_read_trylock(l))
++
++static always_inline void write_lock(rwlock_t *l)
++{
++    _write_lock(l);
++    block_lock_speculation();
++}
++
++static always_inline void write_lock_irq(rwlock_t *l)
++{
++    _write_lock_irq(l);
++    block_lock_speculation();
++}
+ 
+-#define write_lock(l)                 _write_lock(l)
+-#define write_lock_irq(l)             _write_lock_irq(l)
+ #define write_lock_irqsave(l, f)                                \
+     ({                                                          \
+         BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long));       \
+         ((f) = _write_lock_irqsave(l));                         \
++        block_lock_speculation();                               \
+     })
+-#define write_trylock(l)              _write_trylock(l)
++#define write_trylock(l)              lock_evaluate_nospec(_write_trylock(l))
+ 
+ #define write_unlock(l)               _write_unlock(l)
+ #define write_unlock_irq(l)           _write_unlock_irq(l)
+-- 
+2.44.0
+
diff --git a/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch b/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch
new file mode 100644
index 0000000..1da2128
--- /dev/null
+++ b/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch
@@ -0,0 +1,87 @@
+From 468a368b2e5a38fc0be8e9e5f475820f7e4a6b4f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 13 Feb 2024 17:57:38 +0100
+Subject: [PATCH 50/67] percpu-rwlock: introduce support for blocking
+ speculation into critical regions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add direct calls to block_lock_speculation() where required in order to prevent
+speculation into the lock protected critical regions.  Also convert
+_percpu_read_lock() from inline to always_inline.
+
+Note that _percpu_write_lock() has been modified the use the non speculation
+safe of the locking primites, as a speculation is added unconditionally by the
+calling wrapper.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit f218daf6d3a3b847736d37c6a6b76031a0d08441)
+---
+ xen/common/rwlock.c      |  6 +++++-
+ xen/include/xen/rwlock.h | 14 ++++++++++----
+ 2 files changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
+index cda06b9d6e..4da0ed8fad 100644
+--- a/xen/common/rwlock.c
++++ b/xen/common/rwlock.c
+@@ -125,8 +125,12 @@ void _percpu_write_lock(percpu_rwlock_t **per_cpudata,
+     /*
+      * First take the write lock to protect against other writers or slow
+      * path readers.
++     *
++     * Note we use the speculation unsafe variant of write_lock(), as the
++     * calling wrapper already adds a speculation barrier after the lock has
++     * been taken.
+      */
+-    write_lock(&percpu_rwlock->rwlock);
++    _write_lock(&percpu_rwlock->rwlock);
+ 
+     /* Now set the global variable so that readers start using read_lock. */
+     percpu_rwlock->writer_activating = 1;
+diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
+index fd0458be94..abe0804bf7 100644
+--- a/xen/include/xen/rwlock.h
++++ b/xen/include/xen/rwlock.h
+@@ -326,8 +326,8 @@ static inline void _percpu_rwlock_owner_check(percpu_rwlock_t **per_cpudata,
+ #define percpu_rwlock_resource_init(l, owner) \
+     (*(l) = (percpu_rwlock_t)PERCPU_RW_LOCK_UNLOCKED(&get_per_cpu_var(owner)))
+ 
+-static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
+-                                         percpu_rwlock_t *percpu_rwlock)
++static always_inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
++                                            percpu_rwlock_t *percpu_rwlock)
+ {
+     /* Validate the correct per_cpudata variable has been provided. */
+     _percpu_rwlock_owner_check(per_cpudata, percpu_rwlock);
+@@ -362,6 +362,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
+     }
+     else
+     {
++        /* Other branch already has a speculation barrier in read_lock(). */
++        block_lock_speculation();
+         /* All other paths have implicit check_lock() calls via read_lock(). */
+         check_lock(&percpu_rwlock->rwlock.lock.debug, false);
+     }
+@@ -410,8 +412,12 @@ static inline void _percpu_write_unlock(percpu_rwlock_t **per_cpudata,
+     _percpu_read_lock(&get_per_cpu_var(percpu), lock)
+ #define percpu_read_unlock(percpu, lock) \
+     _percpu_read_unlock(&get_per_cpu_var(percpu), lock)
+-#define percpu_write_lock(percpu, lock) \
+-    _percpu_write_lock(&get_per_cpu_var(percpu), lock)
++
++#define percpu_write_lock(percpu, lock)                 \
++({                                                      \
++    _percpu_write_lock(&get_per_cpu_var(percpu), lock); \
++    block_lock_speculation();                           \
++})
+ #define percpu_write_unlock(percpu, lock) \
+     _percpu_write_unlock(&get_per_cpu_var(percpu), lock)
+ 
+-- 
+2.44.0
+
diff --git a/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch b/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch
new file mode 100644
index 0000000..822836d
--- /dev/null
+++ b/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch
@@ -0,0 +1,405 @@
+From 2cc5e57be680a516aa5cdef4281856d09b9d0ea6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 4 Mar 2024 14:29:36 +0100
+Subject: [PATCH 51/67] locking: attempt to ensure lock wrappers are always
+ inline
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+In order to prevent the locking speculation barriers from being inside of
+`call`ed functions that could be speculatively bypassed.
+
+While there also add an extra locking barrier to _mm_write_lock() in the branch
+taken when the lock is already held.
+
+Note some functions are switched to use the unsafe variants (without speculation
+barrier) of the locking primitives, but a speculation barrier is always added
+to the exposed public lock wrapping helper.  That's the case with
+sched_spin_lock_double() or pcidevs_lock() for example.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 197ecd838a2aaf959a469df3696d4559c4f8b762)
+---
+ xen/arch/x86/hvm/vpt.c         | 10 +++++++---
+ xen/arch/x86/include/asm/irq.h |  1 +
+ xen/arch/x86/mm/mm-locks.h     | 28 +++++++++++++++-------------
+ xen/arch/x86/mm/p2m-pod.c      |  2 +-
+ xen/common/event_channel.c     |  5 +++--
+ xen/common/grant_table.c       |  6 +++---
+ xen/common/sched/core.c        | 19 ++++++++++++-------
+ xen/common/sched/private.h     | 26 ++++++++++++++++++++++++--
+ xen/common/timer.c             |  8 +++++---
+ xen/drivers/passthrough/pci.c  |  5 +++--
+ xen/include/xen/event.h        |  4 ++--
+ xen/include/xen/pci.h          |  8 ++++++--
+ 12 files changed, 82 insertions(+), 40 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
+index cb1d81bf9e..66f1095245 100644
+--- a/xen/arch/x86/hvm/vpt.c
++++ b/xen/arch/x86/hvm/vpt.c
+@@ -161,7 +161,7 @@ static int pt_irq_masked(struct periodic_time *pt)
+  * pt->vcpu field, because another thread holding the pt_migrate lock
+  * may already be spinning waiting for your vcpu lock.
+  */
+-static void pt_vcpu_lock(struct vcpu *v)
++static always_inline void pt_vcpu_lock(struct vcpu *v)
+ {
+     spin_lock(&v->arch.hvm.tm_lock);
+ }
+@@ -180,9 +180,13 @@ static void pt_vcpu_unlock(struct vcpu *v)
+  * need to take an additional lock that protects against pt->vcpu
+  * changing.
+  */
+-static void pt_lock(struct periodic_time *pt)
++static always_inline void pt_lock(struct periodic_time *pt)
+ {
+-    read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
++    /*
++     * Use the speculation unsafe variant for the first lock, as the following
++     * lock taking helper already includes a speculation barrier.
++     */
++    _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
+     spin_lock(&pt->vcpu->arch.hvm.tm_lock);
+ }
+ 
+diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h
+index f6a0207a80..823d627fd0 100644
+--- a/xen/arch/x86/include/asm/irq.h
++++ b/xen/arch/x86/include/asm/irq.h
+@@ -178,6 +178,7 @@ void cf_check irq_complete_move(struct irq_desc *);
+ 
+ extern struct irq_desc *irq_desc;
+ 
++/* Not speculation safe, only used for AP bringup. */
+ void lock_vector_lock(void);
+ void unlock_vector_lock(void);
+ 
+diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
+index c1523aeccf..265239c49f 100644
+--- a/xen/arch/x86/mm/mm-locks.h
++++ b/xen/arch/x86/mm/mm-locks.h
+@@ -86,8 +86,8 @@ static inline void _set_lock_level(int l)
+     this_cpu(mm_lock_level) = l;
+ }
+ 
+-static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
+-                            const char *func, int level, int rec)
++static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l,
++                                   const char *func, int level, int rec)
+ {
+     if ( !((mm_locked_by_me(l)) && rec) )
+         _check_lock_level(d, level);
+@@ -137,8 +137,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l)
+     return (l->locker == get_processor_id());
+ }
+ 
+-static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+-                                  const char *func, int level)
++static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
++                                         const char *func, int level)
+ {
+     if ( !mm_write_locked_by_me(l) )
+     {
+@@ -149,6 +149,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+         l->unlock_level = _get_lock_level();
+         _set_lock_level(_lock_level(d, level));
+     }
++    else
++        block_speculation();
+     l->recurse_count++;
+ }
+ 
+@@ -162,8 +164,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
+     percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
+ }
+ 
+-static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
+-                                 int level)
++static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
++                                        int level)
+ {
+     _check_lock_level(d, level);
+     percpu_read_lock(p2m_percpu_rwlock, &l->lock);
+@@ -178,15 +180,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l)
+ 
+ /* This wrapper uses the line number to express the locking order below */
+ #define declare_mm_lock(name)                                                 \
+-    static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l,   \
+-                                      const char *func, int rec)              \
++    static always_inline void mm_lock_##name(                                 \
++        const struct domain *d, mm_lock_t *l, const char *func, int rec)      \
+     { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
+ #define declare_mm_rwlock(name)                                               \
+-    static inline void mm_write_lock_##name(const struct domain *d,           \
+-                                            mm_rwlock_t *l, const char *func) \
++    static always_inline void mm_write_lock_##name(                           \
++        const struct domain *d, mm_rwlock_t *l, const char *func)             \
+     { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); }                     \
+-    static inline void mm_read_lock_##name(const struct domain *d,            \
+-                                           mm_rwlock_t *l)                    \
++    static always_inline void mm_read_lock_##name(const struct domain *d,     \
++                                                  mm_rwlock_t *l)             \
+     { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
+ /* These capture the name of the calling function */
+ #define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
+@@ -321,7 +323,7 @@ declare_mm_lock(altp2mlist)
+ #define MM_LOCK_ORDER_altp2m                 40
+ declare_mm_rwlock(altp2m);
+ 
+-static inline void p2m_lock(struct p2m_domain *p)
++static always_inline void p2m_lock(struct p2m_domain *p)
+ {
+     if ( p2m_is_altp2m(p) )
+         mm_write_lock(altp2m, p->domain, &p->lock);
+diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
+index fc110506dc..99dbcb3101 100644
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -36,7 +36,7 @@
+ #define superpage_aligned(_x)  (((_x)&(SUPERPAGE_PAGES-1))==0)
+ 
+ /* Enforce lock ordering when grabbing the "external" page_alloc lock */
+-static inline void lock_page_alloc(struct p2m_domain *p2m)
++static always_inline void lock_page_alloc(struct p2m_domain *p2m)
+ {
+     page_alloc_mm_pre_lock(p2m->domain);
+     spin_lock(&(p2m->domain->page_alloc_lock));
+diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
+index f5e0b12d15..dada9f15f5 100644
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -62,7 +62,7 @@
+  * just assume the event channel is free or unbound at the moment when the
+  * evtchn_read_trylock() returns false.
+  */
+-static inline void evtchn_write_lock(struct evtchn *evtchn)
++static always_inline void evtchn_write_lock(struct evtchn *evtchn)
+ {
+     write_lock(&evtchn->lock);
+ 
+@@ -364,7 +364,8 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port)
+     return rc;
+ }
+ 
+-static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
++static always_inline void double_evtchn_lock(struct evtchn *lchn,
++                                             struct evtchn *rchn)
+ {
+     ASSERT(lchn != rchn);
+ 
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index ee7cc496b8..62a8685cd5 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -410,7 +410,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn)
+ 
+ static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);
+ 
+-static inline void grant_read_lock(struct grant_table *gt)
++static always_inline void grant_read_lock(struct grant_table *gt)
+ {
+     percpu_read_lock(grant_rwlock, &gt->lock);
+ }
+@@ -420,7 +420,7 @@ static inline void grant_read_unlock(struct grant_table *gt)
+     percpu_read_unlock(grant_rwlock, &gt->lock);
+ }
+ 
+-static inline void grant_write_lock(struct grant_table *gt)
++static always_inline void grant_write_lock(struct grant_table *gt)
+ {
+     percpu_write_lock(grant_rwlock, &gt->lock);
+ }
+@@ -457,7 +457,7 @@ nr_active_grant_frames(struct grant_table *gt)
+     return num_act_frames_from_sha_frames(nr_grant_frames(gt));
+ }
+ 
+-static inline struct active_grant_entry *
++static always_inline struct active_grant_entry *
+ active_entry_acquire(struct grant_table *t, grant_ref_t e)
+ {
+     struct active_grant_entry *act;
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 078beb1adb..29bbab5ac6 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu)
+  * This avoids dead- or live-locks when this code is running on both
+  * cpus at the same time.
+  */
+-static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2,
+-                                   unsigned long *flags)
++static always_inline void sched_spin_lock_double(
++    spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags)
+ {
++    /*
++     * In order to avoid extra overhead, use the locking primitives without the
++     * speculation barrier, and introduce a single barrier here.
++     */
+     if ( lock1 == lock2 )
+     {
+-        spin_lock_irqsave(lock1, *flags);
++        *flags = _spin_lock_irqsave(lock1);
+     }
+     else if ( lock1 < lock2 )
+     {
+-        spin_lock_irqsave(lock1, *flags);
+-        spin_lock(lock2);
++        *flags = _spin_lock_irqsave(lock1);
++        _spin_lock(lock2);
+     }
+     else
+     {
+-        spin_lock_irqsave(lock2, *flags);
+-        spin_lock(lock1);
++        *flags = _spin_lock_irqsave(lock2);
++        _spin_lock(lock1);
+     }
++    block_lock_speculation();
+ }
+ 
+ static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2,
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 0527a8c70d..24a93dd0c1 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -207,8 +207,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
+ #define cpumask_scratch        (&this_cpu(cpumask_scratch))
+ #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))
+ 
++/*
++ * Deal with _spin_lock_irqsave() returning the flags value instead of storing
++ * it in a passed parameter.
++ */
++#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock)
++#define _sched_spinlock1(lock, irq, arg) ({ \
++    BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \
++    (arg) = _spin_lock##irq(lock); \
++})
++
++#define _sched_spinlock__(nr) _sched_spinlock ## nr
++#define _sched_spinlock_(nr)  _sched_spinlock__(nr)
++#define _sched_spinlock(lock, irq, args...) \
++    _sched_spinlock_(count_args(args))(lock, irq, ## args)
++
+ #define sched_lock(kind, param, cpu, irq, arg...) \
+-static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
++static always_inline spinlock_t \
++*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+ { \
+     for ( ; ; ) \
+     { \
+@@ -220,10 +236,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+          * \
+          * It may also be the case that v->processor may change but the \
+          * lock may be the same; this will succeed in that case. \
++         * \
++         * Use the speculation unsafe locking helper, there's a speculation \
++         * barrier before returning to the caller. \
+          */ \
+-        spin_lock##irq(lock, ## arg); \
++        _sched_spinlock(lock, irq, ## arg); \
+         if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \
++        { \
++            block_lock_speculation(); \
+             return lock; \
++        } \
+         spin_unlock##irq(lock, ## arg); \
+     } \
+ }
+diff --git a/xen/common/timer.c b/xen/common/timer.c
+index 9b5016d5ed..459668d417 100644
+--- a/xen/common/timer.c
++++ b/xen/common/timer.c
+@@ -240,7 +240,7 @@ static inline void deactivate_timer(struct timer *timer)
+     list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
+ }
+ 
+-static inline bool_t timer_lock(struct timer *timer)
++static inline bool_t timer_lock_unsafe(struct timer *timer)
+ {
+     unsigned int cpu;
+ 
+@@ -254,7 +254,8 @@ static inline bool_t timer_lock(struct timer *timer)
+             rcu_read_unlock(&timer_cpu_read_lock);
+             return 0;
+         }
+-        spin_lock(&per_cpu(timers, cpu).lock);
++        /* Use the speculation unsafe variant, the wrapper has the barrier. */
++        _spin_lock(&per_cpu(timers, cpu).lock);
+         if ( likely(timer->cpu == cpu) )
+             break;
+         spin_unlock(&per_cpu(timers, cpu).lock);
+@@ -267,8 +268,9 @@ static inline bool_t timer_lock(struct timer *timer)
+ #define timer_lock_irqsave(t, flags) ({         \
+     bool_t __x;                                 \
+     local_irq_save(flags);                      \
+-    if ( !(__x = timer_lock(t)) )               \
++    if ( !(__x = timer_lock_unsafe(t)) )        \
+         local_irq_restore(flags);               \
++    block_lock_speculation();                   \
+     __x;                                        \
+ })
+ 
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index 8c62b14d19..1b3d285166 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -52,9 +52,10 @@ struct pci_seg {
+ 
+ static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
+ 
+-void pcidevs_lock(void)
++/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */
++void pcidevs_lock_unsafe(void)
+ {
+-    spin_lock_recursive(&_pcidevs_lock);
++    _spin_lock_recursive(&_pcidevs_lock);
+ }
+ 
+ void pcidevs_unlock(void)
+diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
+index 8eae9984a9..dd96e84c69 100644
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -114,12 +114,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport);
+ #define bucket_from_port(d, p) \
+     ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])
+ 
+-static inline void evtchn_read_lock(struct evtchn *evtchn)
++static always_inline void evtchn_read_lock(struct evtchn *evtchn)
+ {
+     read_lock(&evtchn->lock);
+ }
+ 
+-static inline bool evtchn_read_trylock(struct evtchn *evtchn)
++static always_inline bool evtchn_read_trylock(struct evtchn *evtchn)
+ {
+     return read_trylock(&evtchn->lock);
+ }
+diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
+index 5975ca2f30..b373f139d1 100644
+--- a/xen/include/xen/pci.h
++++ b/xen/include/xen/pci.h
+@@ -155,8 +155,12 @@ struct pci_dev {
+  * devices, it also sync the access to the msi capability that is not
+  * interrupt handling related (the mask bit register).
+  */
+-
+-void pcidevs_lock(void);
++void pcidevs_lock_unsafe(void);
++static always_inline void pcidevs_lock(void)
++{
++    pcidevs_lock_unsafe();
++    block_lock_speculation();
++}
+ void pcidevs_unlock(void);
+ bool_t __must_check pcidevs_locked(void);
+ 
+-- 
+2.44.0
+
diff --git a/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch b/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch
new file mode 100644
index 0000000..9e20f78
--- /dev/null
+++ b/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch
@@ -0,0 +1,73 @@
+From 074b4c8987db235a0b86798810c045f68e4775b6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 4 Mar 2024 18:08:48 +0100
+Subject: [PATCH 52/67] x86/mm: add speculation barriers to open coded locks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add a speculation barrier to the clearly identified open-coded lock taking
+functions.
+
+Note that the memory sharing page_lock() replacement (_page_lock()) is left
+as-is, as the code is experimental and not security supported.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 42a572a38e22a97d86a4b648a22597628d5b42e4)
+---
+ xen/arch/x86/include/asm/mm.h | 4 +++-
+ xen/arch/x86/mm.c             | 6 ++++--
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
+index a5d7fdd32e..5845b729c3 100644
+--- a/xen/arch/x86/include/asm/mm.h
++++ b/xen/arch/x86/include/asm/mm.h
+@@ -393,7 +393,9 @@ const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
+  * The use of PGT_locked in mem_sharing does not collide, since mem_sharing is
+  * only supported for hvm guests, which do not have PV PTEs updated.
+  */
+-int page_lock(struct page_info *page);
++int page_lock_unsafe(struct page_info *page);
++#define page_lock(pg)   lock_evaluate_nospec(page_lock_unsafe(pg))
++
+ void page_unlock(struct page_info *page);
+ 
+ void put_page_type(struct page_info *page);
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 330c4abcd1..8d19d719bd 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2033,7 +2033,7 @@ static inline bool current_locked_page_ne_check(struct page_info *page) {
+ #define current_locked_page_ne_check(x) true
+ #endif
+ 
+-int page_lock(struct page_info *page)
++int page_lock_unsafe(struct page_info *page)
+ {
+     unsigned long x, nx;
+ 
+@@ -2094,7 +2094,7 @@ void page_unlock(struct page_info *page)
+  * l3t_lock(), so to avoid deadlock we must avoid grabbing them in
+  * reverse order.
+  */
+-static void l3t_lock(struct page_info *page)
++static always_inline void l3t_lock(struct page_info *page)
+ {
+     unsigned long x, nx;
+ 
+@@ -2103,6 +2103,8 @@ static void l3t_lock(struct page_info *page)
+             cpu_relax();
+         nx = x | PGT_locked;
+     } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
++
++    block_lock_speculation();
+ }
+ 
+ static void l3t_unlock(struct page_info *page)
+-- 
+2.44.0
+
diff --git a/0053-x86-protect-conditional-lock-taking-from-speculative.patch b/0053-x86-protect-conditional-lock-taking-from-speculative.patch
new file mode 100644
index 0000000..f0caa24
--- /dev/null
+++ b/0053-x86-protect-conditional-lock-taking-from-speculative.patch
@@ -0,0 +1,216 @@
+From 0ebd2e49bcd0f566ba6b9158555942aab8e41332 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 4 Mar 2024 16:24:21 +0100
+Subject: [PATCH 53/67] x86: protect conditional lock taking from speculative
+ execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Conditionally taken locks that use the pattern:
+
+if ( lock )
+    spin_lock(...);
+
+Need an else branch in order to issue an speculation barrier in the else case,
+just like it's done in case the lock needs to be acquired.
+
+eval_nospec() could be used on the condition itself, but that would result in a
+double barrier on the branch where the lock is taken.
+
+Introduce a new pair of helpers, {gfn,spin}_lock_if() that can be used to
+conditionally take a lock in a speculation safe way.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 03cf7ca23e0e876075954c558485b267b7d02406)
+---
+ xen/arch/x86/mm.c          | 35 +++++++++++++----------------------
+ xen/arch/x86/mm/mm-locks.h |  9 +++++++++
+ xen/arch/x86/mm/p2m.c      |  5 ++---
+ xen/include/xen/spinlock.h |  8 ++++++++
+ 4 files changed, 32 insertions(+), 25 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 8d19d719bd..d31b8d56ff 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5023,8 +5023,7 @@ static l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
+         if ( !l3t )
+             return NULL;
+         UNMAP_DOMAIN_PAGE(l3t);
+-        if ( locking )
+-            spin_lock(&map_pgdir_lock);
++        spin_lock_if(locking, &map_pgdir_lock);
+         if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
+         {
+             l4_pgentry_t l4e = l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR);
+@@ -5061,8 +5060,7 @@ static l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
+             return NULL;
+         }
+         UNMAP_DOMAIN_PAGE(l2t);
+-        if ( locking )
+-            spin_lock(&map_pgdir_lock);
++        spin_lock_if(locking, &map_pgdir_lock);
+         if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+         {
+             l3e_write(pl3e, l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR));
+@@ -5100,8 +5098,7 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
+             return NULL;
+         }
+         UNMAP_DOMAIN_PAGE(l1t);
+-        if ( locking )
+-            spin_lock(&map_pgdir_lock);
++        spin_lock_if(locking, &map_pgdir_lock);
+         if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+         {
+             l2e_write(pl2e, l2e_from_mfn(l1mfn, __PAGE_HYPERVISOR));
+@@ -5132,6 +5129,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
+     do {                      \
+         if ( locking )        \
+             l3t_lock(page);   \
++        else                            \
++            block_lock_speculation();   \
+     } while ( false )
+ 
+ #define L3T_UNLOCK(page)                           \
+@@ -5347,8 +5346,7 @@ int map_pages_to_xen(
+             if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
+                 flush_flags |= FLUSH_TLB_GLOBAL;
+ 
+-            if ( locking )
+-                spin_lock(&map_pgdir_lock);
++            spin_lock_if(locking, &map_pgdir_lock);
+             if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
+                  (l3e_get_flags(*pl3e) & _PAGE_PSE) )
+             {
+@@ -5452,8 +5450,7 @@ int map_pages_to_xen(
+                 if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
+                     flush_flags |= FLUSH_TLB_GLOBAL;
+ 
+-                if ( locking )
+-                    spin_lock(&map_pgdir_lock);
++                spin_lock_if(locking, &map_pgdir_lock);
+                 if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
+                      (l2e_get_flags(*pl2e) & _PAGE_PSE) )
+                 {
+@@ -5494,8 +5491,7 @@ int map_pages_to_xen(
+                 unsigned long base_mfn;
+                 const l1_pgentry_t *l1t;
+ 
+-                if ( locking )
+-                    spin_lock(&map_pgdir_lock);
++                spin_lock_if(locking, &map_pgdir_lock);
+ 
+                 ol2e = *pl2e;
+                 /*
+@@ -5549,8 +5545,7 @@ int map_pages_to_xen(
+             unsigned long base_mfn;
+             const l2_pgentry_t *l2t;
+ 
+-            if ( locking )
+-                spin_lock(&map_pgdir_lock);
++            spin_lock_if(locking, &map_pgdir_lock);
+ 
+             ol3e = *pl3e;
+             /*
+@@ -5694,8 +5689,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+                                        l3e_get_flags(*pl3e)));
+             UNMAP_DOMAIN_PAGE(l2t);
+ 
+-            if ( locking )
+-                spin_lock(&map_pgdir_lock);
++            spin_lock_if(locking, &map_pgdir_lock);
+             if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
+                  (l3e_get_flags(*pl3e) & _PAGE_PSE) )
+             {
+@@ -5754,8 +5748,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+                                            l2e_get_flags(*pl2e) & ~_PAGE_PSE));
+                 UNMAP_DOMAIN_PAGE(l1t);
+ 
+-                if ( locking )
+-                    spin_lock(&map_pgdir_lock);
++                spin_lock_if(locking, &map_pgdir_lock);
+                 if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
+                      (l2e_get_flags(*pl2e) & _PAGE_PSE) )
+                 {
+@@ -5799,8 +5792,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+              */
+             if ( (nf & _PAGE_PRESENT) || ((v != e) && (l1_table_offset(v) != 0)) )
+                 continue;
+-            if ( locking )
+-                spin_lock(&map_pgdir_lock);
++            spin_lock_if(locking, &map_pgdir_lock);
+ 
+             /*
+              * L2E may be already cleared, or set to a superpage, by
+@@ -5847,8 +5839,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+         if ( (nf & _PAGE_PRESENT) ||
+              ((v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0)) )
+             continue;
+-        if ( locking )
+-            spin_lock(&map_pgdir_lock);
++        spin_lock_if(locking, &map_pgdir_lock);
+ 
+         /*
+          * L3E may be already cleared, or set to a superpage, by
+diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
+index 265239c49f..3ea2d8eb03 100644
+--- a/xen/arch/x86/mm/mm-locks.h
++++ b/xen/arch/x86/mm/mm-locks.h
+@@ -347,6 +347,15 @@ static inline void p2m_unlock(struct p2m_domain *p)
+ #define p2m_locked_by_me(p)   mm_write_locked_by_me(&(p)->lock)
+ #define gfn_locked_by_me(p,g) p2m_locked_by_me(p)
+ 
++static always_inline void gfn_lock_if(bool condition, struct p2m_domain *p2m,
++                                      gfn_t gfn, unsigned int order)
++{
++    if ( condition )
++        gfn_lock(p2m, gfn, order);
++    else
++        block_lock_speculation();
++}
++
+ /* PoD lock (per-p2m-table)
+  *
+  * Protects private PoD data structs: entry and cache
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index b28c899b5e..1fa9e01012 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -292,9 +292,8 @@ mfn_t p2m_get_gfn_type_access(struct p2m_domain *p2m, gfn_t gfn,
+     if ( q & P2M_UNSHARE )
+         q |= P2M_ALLOC;
+ 
+-    if ( locked )
+-        /* Grab the lock here, don't release until put_gfn */
+-        gfn_lock(p2m, gfn, 0);
++    /* Grab the lock here, don't release until put_gfn */
++    gfn_lock_if(locked, p2m, gfn, 0);
+ 
+     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
+ 
+diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
+index daf48fdea7..7e75d0e2e7 100644
+--- a/xen/include/xen/spinlock.h
++++ b/xen/include/xen/spinlock.h
+@@ -216,6 +216,14 @@ static always_inline void spin_lock_irq(spinlock_t *l)
+         block_lock_speculation();                               \
+     })
+ 
++/* Conditionally take a spinlock in a speculation safe way. */
++static always_inline void spin_lock_if(bool condition, spinlock_t *l)
++{
++    if ( condition )
++        _spin_lock(l);
++    block_lock_speculation();
++}
++
+ #define spin_unlock(l)                _spin_unlock(l)
+ #define spin_unlock_irq(l)            _spin_unlock_irq(l)
+ #define spin_unlock_irqrestore(l, f)  _spin_unlock_irqrestore(l, f)
+-- 
+2.44.0
+
diff --git a/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch b/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch
new file mode 100644
index 0000000..90efaf8
--- /dev/null
+++ b/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch
@@ -0,0 +1,33 @@
+From a01c0b0f9691a8350e74938329892f949669119e Mon Sep 17 00:00:00 2001
+From: Olaf Hering <olaf@aepfle.de>
+Date: Wed, 27 Mar 2024 12:27:03 +0100
+Subject: [PATCH 54/67] tools: ipxe: update for fixing build with GCC12
+
+Use a snapshot which includes commit
+b0ded89e917b48b73097d3b8b88dfa3afb264ed0 ("[build] Disable dangling
+pointer checking for GCC"), which fixes build with gcc12.
+
+Signed-off-by: Olaf Hering <olaf@aepfle.de>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 18a36b4a9b088875486cfe33a2d4a8ae7eb4ab47
+master date: 2023-04-25 23:47:45 +0100
+---
+ tools/firmware/etherboot/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/firmware/etherboot/Makefile b/tools/firmware/etherboot/Makefile
+index 4bc3633ba3..7a56fe8014 100644
+--- a/tools/firmware/etherboot/Makefile
++++ b/tools/firmware/etherboot/Makefile
+@@ -11,7 +11,7 @@ IPXE_GIT_URL ?= git://git.ipxe.org/ipxe.git
+ endif
+ 
+ # put an updated tar.gz on xenbits after changes to this variable
+-IPXE_GIT_TAG := 3c040ad387099483102708bb1839110bc788cefb
++IPXE_GIT_TAG := 1d1cf74a5e58811822bee4b3da3cff7282fcdfca
+ 
+ IPXE_TARBALL_URL ?= $(XEN_EXTFILES_URL)/ipxe-git-$(IPXE_GIT_TAG).tar.gz
+ 
+-- 
+2.44.0
+
diff --git a/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch b/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch
new file mode 100644
index 0000000..719234c
--- /dev/null
+++ b/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch
@@ -0,0 +1,35 @@
+From a153b8b42e9027ba3057bc7c8bf55e4d71e86ec3 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Mar 2024 12:28:24 +0100
+Subject: [PATCH 55/67] x86/mm: use block_lock_speculation() in
+ _mm_write_lock()
+
+I can only guess that using block_speculation() there was a leftover
+from, earlier on, SPECULATIVE_HARDEN_LOCK depending on
+SPECULATIVE_HARDEN_BRANCH.
+
+Fixes: 197ecd838a2a ("locking: attempt to ensure lock wrappers are always inline")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 62018f08708a5ff6ef8fc8ff2aaaac46e5a60430
+master date: 2024-03-18 13:53:37 +0100
+---
+ xen/arch/x86/mm/mm-locks.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
+index 3ea2d8eb03..7d6e4d2a7c 100644
+--- a/xen/arch/x86/mm/mm-locks.h
++++ b/xen/arch/x86/mm/mm-locks.h
+@@ -150,7 +150,7 @@ static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+         _set_lock_level(_lock_level(d, level));
+     }
+     else
+-        block_speculation();
++        block_lock_speculation();
+     l->recurse_count++;
+ }
+ 
+-- 
+2.44.0
+
diff --git a/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch b/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch
new file mode 100644
index 0000000..5d549c1
--- /dev/null
+++ b/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch
@@ -0,0 +1,120 @@
+From 471b53c6a092940f3629990d9ca946aa22bd8535 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 27 Mar 2024 12:29:11 +0100
+Subject: [PATCH 56/67] x86/boot: Fix setup_apic_nmi_watchdog() to fail more
+ cleanly
+
+Right now, if the user requests the watchdog on the command line,
+setup_apic_nmi_watchdog() will blindly assume that setting up the watchdog
+worked.  Reuse nmi_perfctr_msr to identify when the watchdog has been
+configured.
+
+Rearrange setup_p6_watchdog() to not set nmi_perfctr_msr until the sanity
+checks are complete.  Turn setup_p4_watchdog() into a void function, matching
+the others.
+
+If the watchdog isn't set up, inform the user and override to NMI_NONE, which
+will prevent check_nmi_watchdog() from claiming that all CPUs are stuck.
+
+e.g.:
+
+  (XEN) alt table ffff82d040697c38 -> ffff82d0406a97f0
+  (XEN) Failed to configure NMI watchdog
+  (XEN) Brought up 512 CPUs
+  (XEN) Scheduling granularity: cpu, 1 CPU per sched-resource
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: f658321374687c7339235e1ac643e0427acff717
+master date: 2024-03-19 18:29:37 +0000
+---
+ xen/arch/x86/nmi.c | 25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
+index 7656023748..7c9591b65e 100644
+--- a/xen/arch/x86/nmi.c
++++ b/xen/arch/x86/nmi.c
+@@ -323,8 +323,6 @@ static void setup_p6_watchdog(unsigned counter)
+ {
+     unsigned int evntsel;
+ 
+-    nmi_perfctr_msr = MSR_P6_PERFCTR(0);
+-
+     if ( !nmi_p6_event_width && current_cpu_data.cpuid_level >= 0xa )
+         nmi_p6_event_width = MASK_EXTR(cpuid_eax(0xa), P6_EVENT_WIDTH_MASK);
+     if ( !nmi_p6_event_width )
+@@ -334,6 +332,8 @@ static void setup_p6_watchdog(unsigned counter)
+          nmi_p6_event_width > BITS_PER_LONG )
+         return;
+ 
++    nmi_perfctr_msr = MSR_P6_PERFCTR(0);
++
+     clear_msr_range(MSR_P6_EVNTSEL(0), 2);
+     clear_msr_range(MSR_P6_PERFCTR(0), 2);
+ 
+@@ -349,13 +349,13 @@ static void setup_p6_watchdog(unsigned counter)
+     wrmsr(MSR_P6_EVNTSEL(0), evntsel, 0);
+ }
+ 
+-static int setup_p4_watchdog(void)
++static void setup_p4_watchdog(void)
+ {
+     uint64_t misc_enable;
+ 
+     rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+     if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL))
+-        return 0;
++        return;
+ 
+     nmi_perfctr_msr = MSR_P4_IQ_PERFCTR0;
+     nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
+@@ -378,13 +378,12 @@ static int setup_p4_watchdog(void)
+     clear_msr_range(0x3E0, 2);
+     clear_msr_range(MSR_P4_BPU_CCCR0, 18);
+     clear_msr_range(MSR_P4_BPU_PERFCTR0, 18);
+-        
++
+     wrmsrl(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0);
+     wrmsrl(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE);
+     write_watchdog_counter("P4_IQ_COUNTER0");
+     apic_write(APIC_LVTPC, APIC_DM_NMI);
+     wrmsrl(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val);
+-    return 1;
+ }
+ 
+ void setup_apic_nmi_watchdog(void)
+@@ -399,8 +398,6 @@ void setup_apic_nmi_watchdog(void)
+         case 0xf ... 0x19:
+             setup_k7_watchdog();
+             break;
+-        default:
+-            return;
+         }
+         break;
+     case X86_VENDOR_INTEL:
+@@ -411,14 +408,16 @@ void setup_apic_nmi_watchdog(void)
+                               : CORE_EVENT_CPU_CLOCKS_NOT_HALTED);
+             break;
+         case 15:
+-            if (!setup_p4_watchdog())
+-                return;
++            setup_p4_watchdog();
+             break;
+-        default:
+-            return;
+         }
+         break;
+-    default:
++    }
++
++    if ( nmi_perfctr_msr == 0 )
++    {
++        printk(XENLOG_WARNING "Failed to configure NMI watchdog\n");
++        nmi_watchdog = NMI_NONE;
+         return;
+     }
+ 
+-- 
+2.44.0
+
diff --git a/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch b/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch
new file mode 100644
index 0000000..dedc1c2
--- /dev/null
+++ b/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch
@@ -0,0 +1,61 @@
+From bfb69205376d94ff91b09a337c47fb665ee12da3 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Mar 2024 12:29:33 +0100
+Subject: [PATCH 57/67] x86/PoD: tie together P2M update and increment of entry
+ count
+
+When not holding the PoD lock across the entire region covering P2M
+update and stats update, the entry count - if to be incorrect at all -
+should indicate too large a value in preference to a too small one, to
+avoid functions bailing early when they find the count is zero. However,
+instead of moving the increment ahead (and adjust back upon failure),
+extend the PoD-locked region.
+
+Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@cloud.com>
+master commit: cc950c49ae6a6690f7fc3041a1f43122c250d250
+master date: 2024-03-21 09:48:10 +0100
+---
+ xen/arch/x86/mm/p2m-pod.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
+index 99dbcb3101..e903db9d93 100644
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -1370,19 +1370,28 @@ mark_populate_on_demand(struct domain *d, unsigned long gfn_l,
+         }
+     }
+ 
++    /*
++     * P2M update and stats increment need to collectively be under PoD lock,
++     * to prevent code elsewhere observing PoD entry count being zero despite
++     * there actually still being PoD entries (created by the p2m_set_entry()
++     * invocation below).
++     */
++    pod_lock(p2m);
++
+     /* Now, actually do the two-way mapping */
+     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order,
+                        p2m_populate_on_demand, p2m->default_access);
+     if ( rc == 0 )
+     {
+-        pod_lock(p2m);
+         p2m->pod.entry_count += 1UL << order;
+         p2m->pod.entry_count -= pod_count;
+         BUG_ON(p2m->pod.entry_count < 0);
+-        pod_unlock(p2m);
++    }
++
++    pod_unlock(p2m);
+ 
++    if ( rc == 0 )
+         ioreq_request_mapcache_invalidate(d);
+-    }
+     else if ( order )
+     {
+         /*
+-- 
+2.44.0
+
diff --git a/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch b/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch
new file mode 100644
index 0000000..dfc7f5a
--- /dev/null
+++ b/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch
@@ -0,0 +1,143 @@
+From 7abd305607938b846da1a37dd1bda7bf7d47dba5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
+Date: Wed, 31 Jan 2024 10:52:55 +0000
+Subject: [PATCH 58/67] tools/oxenstored: Use Map instead of Hashtbl for quotas
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+On a stress test running 1000 VMs flamegraphs have shown that
+`oxenstored` spends a large amount of time in `Hashtbl.copy` and the GC.
+
+Hashtable complexity:
+ * read/write: O(1) average
+ * copy: O(domains) -- copying the entire table
+
+Map complexity:
+ * read/write: O(log n) worst case
+ * copy: O(1) -- a word copy
+
+We always perform at least one 'copy' when processing each xenstore
+packet (regardless whether it is a readonly operation or inside a
+transaction or not), so the actual complexity per packet is:
+  * Hashtbl: O(domains)
+  * Map: O(log domains)
+
+Maps are the clear winner, and a better fit for the immutable xenstore
+tree.
+
+Signed-off-by: Edwin Török <edwin.torok@cloud.com>
+Acked-by: Christian Lindig <christian.lindig@cloud.com>
+(cherry picked from commit b6cf604207fd0a04451a48f2ce6d05fb66c612ab)
+---
+ tools/ocaml/xenstored/quota.ml | 65 ++++++++++++++++++----------------
+ 1 file changed, 34 insertions(+), 31 deletions(-)
+
+diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml
+index 6e3d6401ae..ee8dd22581 100644
+--- a/tools/ocaml/xenstored/quota.ml
++++ b/tools/ocaml/xenstored/quota.ml
+@@ -23,66 +23,69 @@ let activate = ref true
+ let maxent = ref (1000)
+ let maxsize = ref (2048)
+ 
++module Domid = struct
++	type t = Xenctrl.domid
++	let compare (a:t) (b:t) = compare a b
++end
++
++module DomidMap = Map.Make(Domid)
++
+ type t = {
+ 	maxent: int;               (* max entities per domU *)
+ 	maxsize: int;              (* max size of data store in one node *)
+-	cur: (Xenctrl.domid, int) Hashtbl.t; (* current domains quota *)
++	mutable cur: int DomidMap.t; (* current domains quota *)
+ }
+ 
+ let to_string quota domid =
+-	if Hashtbl.mem quota.cur domid
+-	then Printf.sprintf "dom%i quota: %i/%i" domid (Hashtbl.find quota.cur domid) quota.maxent
+-	else Printf.sprintf "dom%i quota: not set" domid
++	try
++		Printf.sprintf "dom%i quota: %i/%i" domid (DomidMap.find domid quota.cur) quota.maxent
++	with Not_found ->
++		Printf.sprintf "dom%i quota: not set" domid
+ 
+ let create () =
+-	{ maxent = !maxent; maxsize = !maxsize; cur = Hashtbl.create 100; }
++	{ maxent = !maxent; maxsize = !maxsize; cur = DomidMap.empty; }
+ 
+-let copy quota = { quota with cur = (Hashtbl.copy quota.cur) }
++let copy quota = { quota with cur = quota.cur }
+ 
+-let del quota id = Hashtbl.remove quota.cur id
++let del quota id = { quota with cur = DomidMap.remove id quota.cur }
+ 
+ let _check quota id size =
+ 	if size > quota.maxsize then (
+ 		warn "domain %u err create entry: data too big %d" id size;
+ 		raise Data_too_big
+ 	);
+-	if id > 0 && Hashtbl.mem quota.cur id then
+-		let entry = Hashtbl.find quota.cur id in
++	if id > 0 then
++	try
++		let entry = DomidMap.find id quota.cur in
+ 		if entry >= quota.maxent then (
+ 			warn "domain %u cannot create entry: quota reached" id;
+ 			raise Limit_reached
+ 		)
++	with Not_found -> ()
+ 
+ let check quota id size =
+ 	if !activate then
+ 		_check quota id size
+ 
+-let get_entry quota id = Hashtbl.find quota.cur id
++let find_or_zero quota_cur id =
++	try DomidMap.find id quota_cur with Not_found -> 0
+ 
+-let set_entry quota id nb =
+-	if nb = 0
+-	then Hashtbl.remove quota.cur id
+-	else begin
+-	if Hashtbl.mem quota.cur id then
+-		Hashtbl.replace quota.cur id nb
+-	else
+-		Hashtbl.add quota.cur id nb
+-	end
++let update_entry quota_cur id diff =
++	let nb = diff + find_or_zero quota_cur id in
++	if nb = 0 then DomidMap.remove id quota_cur
++	else DomidMap.add id nb quota_cur
+ 
+ let del_entry quota id =
+-	try
+-		let nb = get_entry quota id in
+-		set_entry quota id (nb - 1)
+-	with Not_found -> ()
++	quota.cur <- update_entry quota.cur id (-1)
+ 
+ let add_entry quota id =
+-	let nb = try get_entry quota id with Not_found -> 0 in
+-	set_entry quota id (nb + 1)
+-
+-let add quota diff =
+-	Hashtbl.iter (fun id nb -> set_entry quota id (get_entry quota id + nb)) diff.cur
++	quota.cur <- update_entry quota.cur id (+1)
+ 
+ let merge orig_quota mod_quota dest_quota =
+-	  Hashtbl.iter (fun id nb -> let diff = nb - (try get_entry orig_quota id with Not_found -> 0) in
+-				if diff <> 0 then
+-					set_entry dest_quota id ((try get_entry dest_quota id with Not_found -> 0) + diff)) mod_quota.cur
++	let fold_merge id nb dest =
++		match nb - find_or_zero orig_quota.cur id with
++		| 0 -> dest (* not modified *)
++		| diff -> update_entry dest id diff (* update with [x=x+diff] *)
++	in
++	dest_quota.cur <- DomidMap.fold fold_merge mod_quota.cur dest_quota.cur
++	(* dest_quota = dest_quota + (mod_quota - orig_quota) *)
+-- 
+2.44.0
+
diff --git a/0059-tools-oxenstored-Make-Quota.t-pure.patch b/0059-tools-oxenstored-Make-Quota.t-pure.patch
new file mode 100644
index 0000000..7616b90
--- /dev/null
+++ b/0059-tools-oxenstored-Make-Quota.t-pure.patch
@@ -0,0 +1,121 @@
+From f38a815a54000ca51ff5165b2863d60b6bbea49c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
+Date: Wed, 31 Jan 2024 10:52:56 +0000
+Subject: [PATCH 59/67] tools/oxenstored: Make Quota.t pure
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Now that we no longer have a hashtable inside we can make Quota.t pure, and
+push the mutable update to its callers.  Store.t already had a mutable Quota.t
+field.
+
+No functional change.
+
+Signed-off-by: Edwin Török <edwin.torok@cloud.com>
+Acked-by: Christian Lindig <christian.lindig@cloud.com>
+(cherry picked from commit 098d868e52ac0165b7f36e22b767ea70cef70054)
+---
+ tools/ocaml/xenstored/quota.ml |  8 ++++----
+ tools/ocaml/xenstored/store.ml | 17 ++++++++++-------
+ 2 files changed, 14 insertions(+), 11 deletions(-)
+
+diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml
+index ee8dd22581..b3ab678c72 100644
+--- a/tools/ocaml/xenstored/quota.ml
++++ b/tools/ocaml/xenstored/quota.ml
+@@ -33,7 +33,7 @@ module DomidMap = Map.Make(Domid)
+ type t = {
+ 	maxent: int;               (* max entities per domU *)
+ 	maxsize: int;              (* max size of data store in one node *)
+-	mutable cur: int DomidMap.t; (* current domains quota *)
++	cur: int DomidMap.t; (* current domains quota *)
+ }
+ 
+ let to_string quota domid =
+@@ -76,10 +76,10 @@ let update_entry quota_cur id diff =
+ 	else DomidMap.add id nb quota_cur
+ 
+ let del_entry quota id =
+-	quota.cur <- update_entry quota.cur id (-1)
++	{quota with cur = update_entry quota.cur id (-1)}
+ 
+ let add_entry quota id =
+-	quota.cur <- update_entry quota.cur id (+1)
++	{quota with cur = update_entry quota.cur id (+1)}
+ 
+ let merge orig_quota mod_quota dest_quota =
+ 	let fold_merge id nb dest =
+@@ -87,5 +87,5 @@ let merge orig_quota mod_quota dest_quota =
+ 		| 0 -> dest (* not modified *)
+ 		| diff -> update_entry dest id diff (* update with [x=x+diff] *)
+ 	in
+-	dest_quota.cur <- DomidMap.fold fold_merge mod_quota.cur dest_quota.cur
++	{dest_quota with cur = DomidMap.fold fold_merge mod_quota.cur dest_quota.cur}
+ 	(* dest_quota = dest_quota + (mod_quota - orig_quota) *)
+diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml
+index c94dbf3a62..5dd965db15 100644
+--- a/tools/ocaml/xenstored/store.ml
++++ b/tools/ocaml/xenstored/store.ml
+@@ -85,7 +85,9 @@ let check_owner node connection =
+ 		raise Define.Permission_denied;
+ 	end
+ 
+-let rec recurse fct node = fct node; SymbolMap.iter (fun _ -> recurse fct) node.children
++let rec recurse fct node acc =
++	let acc = fct node acc in
++	SymbolMap.fold (fun _ -> recurse fct) node.children acc
+ 
+ (** [recurse_filter_map f tree] applies [f] on each node in the tree recursively,
+     possibly removing some nodes.
+@@ -408,7 +410,7 @@ let dump_buffer store = dump_store_buf store.root
+ let set_node store path node orig_quota mod_quota =
+ 	let root = Path.set_node store.root path node in
+ 	store.root <- root;
+-	Quota.merge orig_quota mod_quota store.quota
++	store.quota <- Quota.merge orig_quota mod_quota store.quota
+ 
+ let write store perm path value =
+ 	let node, existing = get_deepest_existing_node store path in
+@@ -422,7 +424,7 @@ let write store perm path value =
+ 	let root, node_created = path_write store perm path value in
+ 	store.root <- root;
+ 	if node_created
+-	then Quota.add_entry store.quota owner
++	then store.quota <- Quota.add_entry store.quota owner
+ 
+ let mkdir store perm path =
+ 	let node, existing = get_deepest_existing_node store path in
+@@ -431,7 +433,7 @@ let mkdir store perm path =
+ 	if not (existing || (Perms.Connection.is_dom0 perm)) then Quota.check store.quota owner 0;
+ 	store.root <- path_mkdir store perm path;
+ 	if not existing then
+-	Quota.add_entry store.quota owner
++	store.quota <- Quota.add_entry store.quota owner
+ 
+ let rm store perm path =
+ 	let rmed_node = Path.get_node store.root path in
+@@ -439,7 +441,7 @@ let rm store perm path =
+ 	| None -> raise Define.Doesnt_exist
+ 	| Some rmed_node ->
+ 		store.root <- path_rm store perm path;
+-		Node.recurse (fun node -> Quota.del_entry store.quota (Node.get_owner node)) rmed_node
++		store.quota <- Node.recurse (fun node quota -> Quota.del_entry quota (Node.get_owner node)) rmed_node store.quota
+ 
+ let setperms store perm path nperms =
+ 	match Path.get_node store.root path with
+@@ -450,8 +452,9 @@ let setperms store perm path nperms =
+ 		if not ((old_owner = new_owner) || (Perms.Connection.is_dom0 perm)) then
+ 			raise Define.Permission_denied;
+ 		store.root <- path_setperms store perm path nperms;
+-		Quota.del_entry store.quota old_owner;
+-		Quota.add_entry store.quota new_owner
++		store.quota <-
++			let quota = Quota.del_entry store.quota old_owner in
++			Quota.add_entry quota new_owner
+ 
+ let reset_permissions store domid =
+ 	Logging.info "store|node" "Cleaning up xenstore ACLs for domid %d" domid;
+-- 
+2.44.0
+
diff --git a/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch b/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch
new file mode 100644
index 0000000..ce2b89d
--- /dev/null
+++ b/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch
@@ -0,0 +1,90 @@
+From bb27e11c56963e170d1f6d2fbddbc956f7164121 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 2 Apr 2024 16:17:25 +0200
+Subject: [PATCH 60/67] x86/cpu-policy: Hide x2APIC from PV guests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+PV guests can't write to MSR_APIC_BASE (in order to set EXTD), nor can they
+access any of the x2APIC MSR range.  Therefore they mustn't see the x2APIC
+CPUID bit saying that they can.
+
+Right now, the host x2APIC flag filters into PV guests, meaning that PV guests
+generally see x2APIC except on Zen1-and-older AMD systems.
+
+Linux works around this by explicitly hiding the bit itself, and filtering
+EXTD out of MSR_APIC_BASE reads.  NetBSD behaves more in the spirit of PV
+guests, and entirely ignores the APIC when built as a PV guest.
+
+Change the annotation from !A to !S.  This has a consequence of stripping it
+out of both PV featuremasks.  However, as existing guests may have seen the
+bit, set it back into the PV Max policy; a VM which saw the bit and is alive
+enough to migrate will have ignored it one way or another.
+
+Hiding x2APIC does change the contents of leaf 0xb, but as the information is
+nonsense to begin with, this is likely an improvement on the status quo.
+
+Xen's blind assumption that APIC_ID = vCPU_ID * 2 isn't interlinked with the
+host's topology structure, where a PV guest may see real host values, and the
+APIC_IDs are useless without an MADT to start with.  Dom0 is the only PV VM to
+get an MADT but it's the host one, meaning the two sets of APIC_IDs are from
+different address spaces.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 5420aa165dfa5fe95dd84bb71cb96c15459935b1
+master date: 2024-03-01 20:14:19 +0000
+---
+ xen/arch/x86/cpu-policy.c                   | 11 +++++++++--
+ xen/include/public/arch-x86/cpufeatureset.h |  2 +-
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
+index 96c2cee1a8..ed64d56294 100644
+--- a/xen/arch/x86/cpu-policy.c
++++ b/xen/arch/x86/cpu-policy.c
+@@ -559,6 +559,14 @@ static void __init calculate_pv_max_policy(void)
+     for ( i = 0; i < ARRAY_SIZE(fs); ++i )
+         fs[i] &= pv_max_featuremask[i];
+ 
++    /*
++     * Xen at the time of writing (Feb 2024, 4.19 dev cycle) used to leak the
++     * host x2APIC capability into PV guests, but never supported the guest
++     * trying to turn x2APIC mode on.  Tolerate an incoming VM which saw the
++     * x2APIC CPUID bit and is alive enough to migrate.
++     */
++    __set_bit(X86_FEATURE_X2APIC, fs);
++
+     /*
+      * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional
+      * availability, or admin choice), hide the feature.
+@@ -837,11 +845,10 @@ void recalculate_cpuid_policy(struct domain *d)
+     }
+ 
+     /*
+-     * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY.  These bits
++     * Allow the toolstack to set HTT and CMP_LEGACY.  These bits
+      * affect how to interpret topology information in other cpuid leaves.
+      */
+     __set_bit(X86_FEATURE_HTT, max_fs);
+-    __set_bit(X86_FEATURE_X2APIC, max_fs);
+     __set_bit(X86_FEATURE_CMP_LEGACY, max_fs);
+ 
+     /*
+diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
+index 113e6cadc1..bc971f3c6f 100644
+--- a/xen/include/public/arch-x86/cpufeatureset.h
++++ b/xen/include/public/arch-x86/cpufeatureset.h
+@@ -123,7 +123,7 @@ XEN_CPUFEATURE(PCID,          1*32+17) /*H  Process Context ID */
+ XEN_CPUFEATURE(DCA,           1*32+18) /*   Direct Cache Access */
+ XEN_CPUFEATURE(SSE4_1,        1*32+19) /*A  Streaming SIMD Extensions 4.1 */
+ XEN_CPUFEATURE(SSE4_2,        1*32+20) /*A  Streaming SIMD Extensions 4.2 */
+-XEN_CPUFEATURE(X2APIC,        1*32+21) /*!A Extended xAPIC */
++XEN_CPUFEATURE(X2APIC,        1*32+21) /*!S Extended xAPIC */
+ XEN_CPUFEATURE(MOVBE,         1*32+22) /*A  movbe instruction */
+ XEN_CPUFEATURE(POPCNT,        1*32+23) /*A  POPCNT instruction */
+ XEN_CPUFEATURE(TSC_DEADLINE,  1*32+24) /*S  TSC Deadline Timer */
+-- 
+2.44.0
+
diff --git a/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch b/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch
new file mode 100644
index 0000000..d1b8786
--- /dev/null
+++ b/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch
@@ -0,0 +1,85 @@
+From 70ad9c5fdeac4814050080c87e06d44292ecf868 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 2 Apr 2024 16:18:05 +0200
+Subject: [PATCH 61/67] x86/cpu-policy: Fix visibility of HTT/CMP_LEGACY in max
+ policies
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The block in recalculate_cpuid_policy() predates the proper split between
+default and max policies, and was a "slightly max for a toolstack which knows
+about it" capability.  It didn't get transformed properly in Xen 4.14.
+
+Because Xen will accept a VM with HTT/CMP_LEGACY seen, they should be visible
+in the max polices.  Keep the default policy matching host settings.
+
+This manifested as an incorrectly-rejected migration across XenServer's Xen
+4.13 -> 4.17 upgrade, as Xapi is slowly growing the logic to check a VM
+against the target max policy.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: e2d8a652251660c3252d92b442e1a9c5d6e6a1e9
+master date: 2024-03-01 20:14:19 +0000
+---
+ xen/arch/x86/cpu-policy.c | 29 ++++++++++++++++++++++-------
+ 1 file changed, 22 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
+index ed64d56294..24acd12ce2 100644
+--- a/xen/arch/x86/cpu-policy.c
++++ b/xen/arch/x86/cpu-policy.c
+@@ -458,6 +458,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
+              raw_cpu_policy.feat.clwb )
+             __set_bit(X86_FEATURE_CLWB, fs);
+     }
++
++    /*
++     * Topology information inside the guest is entirely at the toolstack's
++     * discretion, and bears no relationship to the host we're running on.
++     *
++     * HTT identifies p->basic.lppp as valid
++     * CMP_LEGACY identifies p->extd.nc as valid
++     */
++    __set_bit(X86_FEATURE_HTT, fs);
++    __set_bit(X86_FEATURE_CMP_LEGACY, fs);
+ }
+ 
+ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
+@@ -512,6 +522,18 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
+             __clear_bit(X86_FEATURE_CLWB, fs);
+     }
+ 
++    /*
++     * Topology information is at the toolstack's discretion so these are
++     * unconditionally set in max, but pick a default which matches the host.
++     */
++    __clear_bit(X86_FEATURE_HTT, fs);
++    if ( cpu_has_htt )
++        __set_bit(X86_FEATURE_HTT, fs);
++
++    __clear_bit(X86_FEATURE_CMP_LEGACY, fs);
++    if ( cpu_has_cmp_legacy )
++        __set_bit(X86_FEATURE_CMP_LEGACY, fs);
++
+     /*
+      * On certain hardware, speculative or errata workarounds can result in
+      * TSX being placed in "force-abort" mode, where it doesn't actually
+@@ -844,13 +866,6 @@ void recalculate_cpuid_policy(struct domain *d)
+         }
+     }
+ 
+-    /*
+-     * Allow the toolstack to set HTT and CMP_LEGACY.  These bits
+-     * affect how to interpret topology information in other cpuid leaves.
+-     */
+-    __set_bit(X86_FEATURE_HTT, max_fs);
+-    __set_bit(X86_FEATURE_CMP_LEGACY, max_fs);
+-
+     /*
+      * 32bit PV domains can't use any Long Mode features, and cannot use
+      * SYSCALL on non-AMD hardware.
+-- 
+2.44.0
+
diff --git a/0062-xen-virtual-region-Rename-the-start-end-fields.patch b/0062-xen-virtual-region-Rename-the-start-end-fields.patch
new file mode 100644
index 0000000..9dbd5c9
--- /dev/null
+++ b/0062-xen-virtual-region-Rename-the-start-end-fields.patch
@@ -0,0 +1,140 @@
+From 2392e958ec6fd2e48e011781344cf94dee6d6142 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 2 Apr 2024 16:18:51 +0200
+Subject: [PATCH 62/67] xen/virtual-region: Rename the start/end fields
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+... to text_{start,end}.  We're about to introduce another start/end pair.
+
+Despite it's name, struct virtual_region has always been a module-ish
+description.  Call this out specifically.
+
+As minor cleanup, replace ROUNDUP(x, PAGE_SIZE) with the more concise
+PAGE_ALIGN() ahead of duplicating the example.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+master commit: 989556c6f8ca080f5f202417af97d1188b9ba52a
+master date: 2024-03-07 14:24:42 +0000
+---
+ xen/common/livepatch.c           |  9 +++++----
+ xen/common/virtual_region.c      | 19 ++++++++++---------
+ xen/include/xen/virtual_region.h | 11 +++++++++--
+ 3 files changed, 24 insertions(+), 15 deletions(-)
+
+diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
+index a5068a2217..29395f286f 100644
+--- a/xen/common/livepatch.c
++++ b/xen/common/livepatch.c
+@@ -785,8 +785,8 @@ static int prepare_payload(struct payload *payload,
+     region = &payload->region;
+ 
+     region->symbols_lookup = livepatch_symbols_lookup;
+-    region->start = payload->text_addr;
+-    region->end = payload->text_addr + payload->text_size;
++    region->text_start = payload->text_addr;
++    region->text_end = payload->text_addr + payload->text_size;
+ 
+     /* Optional sections. */
+     for ( i = 0; i < BUGFRAME_NR; i++ )
+@@ -823,8 +823,9 @@ static int prepare_payload(struct payload *payload,
+             const void *instr = ALT_ORIG_PTR(a);
+             const void *replacement = ALT_REPL_PTR(a);
+ 
+-            if ( (instr < region->start && instr >= region->end) ||
+-                 (replacement < region->start && replacement >= region->end) )
++            if ( (instr < region->text_start && instr >= region->text_end) ||
++                 (replacement < region->text_start &&
++                  replacement >= region->text_end) )
+             {
+                 printk(XENLOG_ERR LIVEPATCH "%s Alt patching outside payload: %p\n",
+                        elf->name, instr);
+diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
+index 9f12c30efe..b22ffb75c4 100644
+--- a/xen/common/virtual_region.c
++++ b/xen/common/virtual_region.c
+@@ -11,15 +11,15 @@
+ 
+ static struct virtual_region core = {
+     .list = LIST_HEAD_INIT(core.list),
+-    .start = _stext,
+-    .end = _etext,
++    .text_start = _stext,
++    .text_end = _etext,
+ };
+ 
+ /* Becomes irrelevant when __init sections are cleared. */
+ static struct virtual_region core_init __initdata = {
+     .list = LIST_HEAD_INIT(core_init.list),
+-    .start = _sinittext,
+-    .end = _einittext,
++    .text_start = _sinittext,
++    .text_end = _einittext,
+ };
+ 
+ /*
+@@ -39,7 +39,8 @@ const struct virtual_region *find_text_region(unsigned long addr)
+     rcu_read_lock(&rcu_virtual_region_lock);
+     list_for_each_entry_rcu( region, &virtual_region_list, list )
+     {
+-        if ( (void *)addr >= region->start && (void *)addr < region->end )
++        if ( (void *)addr >= region->text_start &&
++             (void *)addr <  region->text_end )
+         {
+             rcu_read_unlock(&rcu_virtual_region_lock);
+             return region;
+@@ -88,8 +89,8 @@ void relax_virtual_region_perms(void)
+ 
+     rcu_read_lock(&rcu_virtual_region_lock);
+     list_for_each_entry_rcu( region, &virtual_region_list, list )
+-        modify_xen_mappings_lite((unsigned long)region->start,
+-                                 ROUNDUP((unsigned long)region->end, PAGE_SIZE),
++        modify_xen_mappings_lite((unsigned long)region->text_start,
++                                 PAGE_ALIGN((unsigned long)region->text_end),
+                                  PAGE_HYPERVISOR_RWX);
+     rcu_read_unlock(&rcu_virtual_region_lock);
+ }
+@@ -100,8 +101,8 @@ void tighten_virtual_region_perms(void)
+ 
+     rcu_read_lock(&rcu_virtual_region_lock);
+     list_for_each_entry_rcu( region, &virtual_region_list, list )
+-        modify_xen_mappings_lite((unsigned long)region->start,
+-                                 ROUNDUP((unsigned long)region->end, PAGE_SIZE),
++        modify_xen_mappings_lite((unsigned long)region->text_start,
++                                 PAGE_ALIGN((unsigned long)region->text_end),
+                                  PAGE_HYPERVISOR_RX);
+     rcu_read_unlock(&rcu_virtual_region_lock);
+ }
+diff --git a/xen/include/xen/virtual_region.h b/xen/include/xen/virtual_region.h
+index d053620711..442a45bf1f 100644
+--- a/xen/include/xen/virtual_region.h
++++ b/xen/include/xen/virtual_region.h
+@@ -9,11 +9,18 @@
+ #include <xen/list.h>
+ #include <xen/symbols.h>
+ 
++/*
++ * Despite it's name, this is a module(ish) description.
++ *
++ * There's one region for the runtime .text/etc, one region for .init during
++ * boot only, and one region per livepatch.
++ */
+ struct virtual_region
+ {
+     struct list_head list;
+-    const void *start;                /* Virtual address start. */
+-    const void *end;                  /* Virtual address end. */
++
++    const void *text_start;                /* .text virtual address start. */
++    const void *text_end;                  /* .text virtual address end. */
+ 
+     /* If this is NULL the default lookup mechanism is used. */
+     symbols_lookup_t *symbols_lookup;
+-- 
+2.44.0
+
diff --git a/0063-xen-virtual-region-Include-rodata-pointers.patch b/0063-xen-virtual-region-Include-rodata-pointers.patch
new file mode 100644
index 0000000..9f51d4d
--- /dev/null
+++ b/0063-xen-virtual-region-Include-rodata-pointers.patch
@@ -0,0 +1,71 @@
+From 335cbb55567b20df8e8bd2d1b340609e272ddab6 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 2 Apr 2024 16:19:11 +0200
+Subject: [PATCH 63/67] xen/virtual-region: Include rodata pointers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+These are optional.  .init doesn't distinguish types of data like this, and
+livepatches don't necesserily have any .rodata either.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+master commit: ef969144a425e39f5b214a875b5713d0ea8575fb
+master date: 2024-03-07 14:24:42 +0000
+---
+ xen/common/livepatch.c           | 6 ++++++
+ xen/common/virtual_region.c      | 2 ++
+ xen/include/xen/virtual_region.h | 3 +++
+ 3 files changed, 11 insertions(+)
+
+diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
+index 29395f286f..28c09ddf58 100644
+--- a/xen/common/livepatch.c
++++ b/xen/common/livepatch.c
+@@ -788,6 +788,12 @@ static int prepare_payload(struct payload *payload,
+     region->text_start = payload->text_addr;
+     region->text_end = payload->text_addr + payload->text_size;
+ 
++    if ( payload->ro_size )
++    {
++        region->rodata_start = payload->ro_addr;
++        region->rodata_end = payload->ro_addr + payload->ro_size;
++    }
++
+     /* Optional sections. */
+     for ( i = 0; i < BUGFRAME_NR; i++ )
+     {
+diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
+index b22ffb75c4..9c566f8ec9 100644
+--- a/xen/common/virtual_region.c
++++ b/xen/common/virtual_region.c
+@@ -13,6 +13,8 @@ static struct virtual_region core = {
+     .list = LIST_HEAD_INIT(core.list),
+     .text_start = _stext,
+     .text_end = _etext,
++    .rodata_start = _srodata,
++    .rodata_end = _erodata,
+ };
+ 
+ /* Becomes irrelevant when __init sections are cleared. */
+diff --git a/xen/include/xen/virtual_region.h b/xen/include/xen/virtual_region.h
+index 442a45bf1f..dcdc95ba49 100644
+--- a/xen/include/xen/virtual_region.h
++++ b/xen/include/xen/virtual_region.h
+@@ -22,6 +22,9 @@ struct virtual_region
+     const void *text_start;                /* .text virtual address start. */
+     const void *text_end;                  /* .text virtual address end. */
+ 
++    const void *rodata_start;              /* .rodata virtual address start (optional). */
++    const void *rodata_end;                /* .rodata virtual address end. */
++
+     /* If this is NULL the default lookup mechanism is used. */
+     symbols_lookup_t *symbols_lookup;
+ 
+-- 
+2.44.0
+
diff --git a/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch b/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch
new file mode 100644
index 0000000..bc80769
--- /dev/null
+++ b/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch
@@ -0,0 +1,85 @@
+From c3ff11b11c21777a9b1c616607705f3a7340b391 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 2 Apr 2024 16:19:36 +0200
+Subject: [PATCH 64/67] x86/livepatch: Relax permissions on rodata too
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This reinstates the capability to patch .rodata in load/unload hooks, which
+was lost when we stopped using CR0.WP=0 to patch.
+
+This turns out to be rather less of a large TODO than I thought at the time.
+
+Fixes: 8676092a0f16 ("x86/livepatch: Fix livepatch application when CET is active")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+master commit: b083b1c393dc8961acf0959b1d2e0ad459985ae3
+master date: 2024-03-07 14:24:42 +0000
+---
+ xen/arch/x86/livepatch.c    |  4 ++--
+ xen/common/virtual_region.c | 12 ++++++++++++
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/livepatch.c b/xen/arch/x86/livepatch.c
+index ee539f001b..4f76127e1f 100644
+--- a/xen/arch/x86/livepatch.c
++++ b/xen/arch/x86/livepatch.c
+@@ -62,7 +62,7 @@ int arch_livepatch_safety_check(void)
+ int noinline arch_livepatch_quiesce(void)
+ {
+     /*
+-     * Relax perms on .text to be RWX, so we can modify them.
++     * Relax perms on .text/.rodata, so we can modify them.
+      *
+      * This relaxes perms globally, but all other CPUs are waiting on us.
+      */
+@@ -75,7 +75,7 @@ int noinline arch_livepatch_quiesce(void)
+ void noinline arch_livepatch_revive(void)
+ {
+     /*
+-     * Reinstate perms on .text to be RX.  This also cleans out the dirty
++     * Reinstate perms on .text/.rodata.  This also cleans out the dirty
+      * bits, which matters when CET Shstk is active.
+      *
+      * The other CPUs waiting for us could in principle have re-walked while
+diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c
+index 9c566f8ec9..aefc08e75f 100644
+--- a/xen/common/virtual_region.c
++++ b/xen/common/virtual_region.c
+@@ -91,9 +91,15 @@ void relax_virtual_region_perms(void)
+ 
+     rcu_read_lock(&rcu_virtual_region_lock);
+     list_for_each_entry_rcu( region, &virtual_region_list, list )
++    {
+         modify_xen_mappings_lite((unsigned long)region->text_start,
+                                  PAGE_ALIGN((unsigned long)region->text_end),
+                                  PAGE_HYPERVISOR_RWX);
++        if ( region->rodata_start )
++            modify_xen_mappings_lite((unsigned long)region->rodata_start,
++                                     PAGE_ALIGN((unsigned long)region->rodata_end),
++                                     PAGE_HYPERVISOR_RW);
++    }
+     rcu_read_unlock(&rcu_virtual_region_lock);
+ }
+ 
+@@ -103,9 +109,15 @@ void tighten_virtual_region_perms(void)
+ 
+     rcu_read_lock(&rcu_virtual_region_lock);
+     list_for_each_entry_rcu( region, &virtual_region_list, list )
++    {
+         modify_xen_mappings_lite((unsigned long)region->text_start,
+                                  PAGE_ALIGN((unsigned long)region->text_end),
+                                  PAGE_HYPERVISOR_RX);
++        if ( region->rodata_start )
++            modify_xen_mappings_lite((unsigned long)region->rodata_start,
++                                     PAGE_ALIGN((unsigned long)region->rodata_end),
++                                     PAGE_HYPERVISOR_RO);
++    }
+     rcu_read_unlock(&rcu_virtual_region_lock);
+ }
+ #endif /* CONFIG_X86 */
+-- 
+2.44.0
+
diff --git a/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch b/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch
new file mode 100644
index 0000000..4a46326
--- /dev/null
+++ b/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch
@@ -0,0 +1,106 @@
+From 846fb984b506135917c2862d2e4607005d6afdeb Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 2 Apr 2024 16:20:09 +0200
+Subject: [PATCH 65/67] x86/boot: Improve the boot watchdog determination of
+ stuck cpus
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Right now, check_nmi_watchdog() has two processing loops over all online CPUs
+using prev_nmi_count as storage.
+
+Use a cpumask_t instead (1/32th as much initdata) and have wait_for_nmis()
+make the determination of whether it is stuck, rather than having both
+functions needing to agree on how many ticks mean stuck.
+
+More importantly though, it means we can use the standard cpumask
+infrastructure, including turning this:
+
+  (XEN) Brought up 512 CPUs
+  (XEN) Testing NMI watchdog on all CPUs: {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511} stuck
+
+into the rather more manageable:
+
+  (XEN) Brought up 512 CPUs
+  (XEN) Testing NMI watchdog on all CPUs: {0-511} stuck
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 9e18f339830c828798aef465556d4029d83476a0
+master date: 2024-03-19 18:29:37 +0000
+---
+ xen/arch/x86/nmi.c | 33 ++++++++++++++-------------------
+ 1 file changed, 14 insertions(+), 19 deletions(-)
+
+diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
+index 7c9591b65e..dd31034ac8 100644
+--- a/xen/arch/x86/nmi.c
++++ b/xen/arch/x86/nmi.c
+@@ -150,6 +150,8 @@ int nmi_active;
+ 
+ static void __init cf_check wait_for_nmis(void *p)
+ {
++    cpumask_t *stuck_cpus = p;
++    unsigned int cpu = smp_processor_id();
+     unsigned int start_count = this_cpu(nmi_count);
+     unsigned long ticks = 10 * 1000 * cpu_khz / nmi_hz;
+     unsigned long s, e;
+@@ -158,42 +160,35 @@ static void __init cf_check wait_for_nmis(void *p)
+     do {
+         cpu_relax();
+         if ( this_cpu(nmi_count) >= start_count + 2 )
+-            break;
++            return;
++
+         e = rdtsc();
+-    } while( e - s < ticks );
++    } while ( e - s < ticks );
++
++    /* Timeout.  Mark ourselves as stuck. */
++    cpumask_set_cpu(cpu, stuck_cpus);
+ }
+ 
+ void __init check_nmi_watchdog(void)
+ {
+-    static unsigned int __initdata prev_nmi_count[NR_CPUS];
+-    int cpu;
+-    bool ok = true;
++    static cpumask_t __initdata stuck_cpus;
+ 
+     if ( nmi_watchdog == NMI_NONE )
+         return;
+ 
+     printk("Testing NMI watchdog on all CPUs:");
+ 
+-    for_each_online_cpu ( cpu )
+-        prev_nmi_count[cpu] = per_cpu(nmi_count, cpu);
+-
+     /*
+      * Wait at most 10 ticks for 2 watchdog NMIs on each CPU.
+      * Busy-wait on all CPUs: the LAPIC counter that the NMI watchdog
+      * uses only runs while the core's not halted
+      */
+-    on_selected_cpus(&cpu_online_map, wait_for_nmis, NULL, 1);
+-
+-    for_each_online_cpu ( cpu )
+-    {
+-        if ( per_cpu(nmi_count, cpu) - prev_nmi_count[cpu] < 2 )
+-        {
+-            printk(" %d", cpu);
+-            ok = false;
+-        }
+-    }
++    on_selected_cpus(&cpu_online_map, wait_for_nmis, &stuck_cpus, 1);
+ 
+-    printk(" %s\n", ok ? "ok" : "stuck");
++    if ( cpumask_empty(&stuck_cpus) )
++        printk("ok\n");
++    else
++        printk("{%*pbl} stuck\n", CPUMASK_PR(&stuck_cpus));
+ 
+     /*
+      * Now that we know it works we can reduce NMI frequency to
+-- 
+2.44.0
+
diff --git a/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch b/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch
new file mode 100644
index 0000000..e501861
--- /dev/null
+++ b/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch
@@ -0,0 +1,48 @@
+From 2777b499f1f6d5cea68f9479f82d055542b822ad Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 2 Apr 2024 16:20:30 +0200
+Subject: [PATCH 66/67] x86/boot: Support the watchdog on newer AMD systems
+
+The MSRs used by setup_k7_watchdog() are architectural in 64bit.  The Unit
+Select (0x76, cycles not in halt state) isn't, but it hasn't changed in 25
+years, making this a trend likely to continue.
+
+Drop the family check.  If the Unit Select does happen to change meaning in
+the future, check_nmi_watchdog() will still notice the watchdog not operating
+as expected.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 131892e0dcc1265b621c2b7d844cb9e7c3a4404f
+master date: 2024-03-19 18:29:37 +0000
+---
+ xen/arch/x86/nmi.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
+index dd31034ac8..c7c51614a6 100644
+--- a/xen/arch/x86/nmi.c
++++ b/xen/arch/x86/nmi.c
+@@ -386,15 +386,12 @@ void setup_apic_nmi_watchdog(void)
+     if ( nmi_watchdog == NMI_NONE )
+         return;
+ 
+-    switch (boot_cpu_data.x86_vendor) {
++    switch ( boot_cpu_data.x86_vendor )
++    {
+     case X86_VENDOR_AMD:
+-        switch (boot_cpu_data.x86) {
+-        case 6:
+-        case 0xf ... 0x19:
+-            setup_k7_watchdog();
+-            break;
+-        }
++        setup_k7_watchdog();
+         break;
++
+     case X86_VENDOR_INTEL:
+         switch (boot_cpu_data.x86) {
+         case 6:
+-- 
+2.44.0
+
diff --git a/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch b/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch
new file mode 100644
index 0000000..5ce4e17
--- /dev/null
+++ b/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch
@@ -0,0 +1,110 @@
+From 9bc40dbcf9eafccc1923b2555286bf6a2af03b7a Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 2 Apr 2024 16:24:07 +0200
+Subject: [PATCH 67/67] tests/resource: Fix HVM guest in !SHADOW builds
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Right now, test-resource always creates HVM Shadow guests.  But if Xen has
+SHADOW compiled out, running the test yields:
+
+  $./test-resource
+  XENMEM_acquire_resource tests
+  Test x86 PV
+    Created d1
+    Test grant table
+  Test x86 PVH
+    Skip: 95 - Operation not supported
+
+and doesn't really test HVM guests, but doesn't fail either.
+
+There's nothing paging-mode-specific about this test, so default to HAP if
+possible and provide a more specific message if neither HAP or Shadow are
+available.
+
+As we've got physinfo to hand, also provide more specific message about the
+absence of PV or HVM support.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 0263dc9069ddb66335c72a159e09050b1600e56a
+master date: 2024-03-01 20:14:19 +0000
+---
+ tools/tests/resource/test-resource.c | 39 ++++++++++++++++++++++++++++
+ 1 file changed, 39 insertions(+)
+
+diff --git a/tools/tests/resource/test-resource.c b/tools/tests/resource/test-resource.c
+index 0a950072f9..e2c4ba3478 100644
+--- a/tools/tests/resource/test-resource.c
++++ b/tools/tests/resource/test-resource.c
+@@ -20,6 +20,8 @@ static xc_interface *xch;
+ static xenforeignmemory_handle *fh;
+ static xengnttab_handle *gh;
+ 
++static xc_physinfo_t physinfo;
++
+ static void test_gnttab(uint32_t domid, unsigned int nr_frames,
+                         unsigned long gfn)
+ {
+@@ -172,6 +174,37 @@ static void test_domain_configurations(void)
+ 
+         printf("Test %s\n", t->name);
+ 
++#if defined(__x86_64__) || defined(__i386__)
++        if ( t->create.flags & XEN_DOMCTL_CDF_hvm )
++        {
++            if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hvm) )
++            {
++                printf("  Skip: HVM not available\n");
++                continue;
++            }
++
++            /*
++             * On x86, use HAP guests if possible, but skip if neither HAP nor
++             * SHADOW is available.
++             */
++            if ( physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hap )
++                t->create.flags |= XEN_DOMCTL_CDF_hap;
++            else if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_shadow) )
++            {
++                printf("  Skip: Neither HAP or SHADOW available\n");
++                continue;
++            }
++        }
++        else
++        {
++            if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_pv) )
++            {
++                printf("  Skip: PV not available\n");
++                continue;
++            }
++        }
++#endif
++
+         rc = xc_domain_create(xch, &domid, &t->create);
+         if ( rc )
+         {
+@@ -214,6 +247,8 @@ static void test_domain_configurations(void)
+ 
+ int main(int argc, char **argv)
+ {
++    int rc;
++
+     printf("XENMEM_acquire_resource tests\n");
+ 
+     xch = xc_interface_open(NULL, NULL, 0);
+@@ -227,6 +262,10 @@ int main(int argc, char **argv)
+     if ( !gh )
+         err(1, "xengnttab_open");
+ 
++    rc = xc_physinfo(xch, &physinfo);
++    if ( rc )
++        err(1, "Failed to obtain physinfo");
++
+     test_domain_configurations();
+ 
+     return !!nr_failures;
+-- 
+2.44.0
+
diff --git a/info.txt b/info.txt
index 0a99509..fa9f510 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #0 for 4.17.4-pre
+Xen upstream patchset #1 for 4.17.4-pre
 
 Containing patches from
 RELEASE-4.17.3 (07f413d7ffb06eab36045bd19f53555de1cacf62)
 to
-staging-4.17 (091466ba55d1e2e75738f751818ace2e3ed08ccf)
+staging-4.17 (9bc40dbcf9eafccc1923b2555286bf6a2af03b7a)
author	Tomáš Mózes <hydrapolic@gmail.com>	2024-04-05 08:59:40 +0200
committer	Tomáš Mózes <hydrapolic@gmail.com>	2024-04-05 08:59:40 +0200
commit	d0ce95087288b30e5e211bac8e9a0817f2effcf5 (patch)
tree	ce2e128cfdf8d491a494d6583979bc5330db21e2
parent	Xen 4.17.4-pre-patchset-0 (diff)
download	xen-upstream-patches-main.tar.gz xen-upstream-patches-main.tar.bz2 xen-upstream-patches-main.zip