summaryrefslogtreecommitdiff
path: root/4.8.10
diff options
context:
space:
mode:
Diffstat (limited to '4.8.10')
-rw-r--r--4.8.10/0000_README52
-rw-r--r--4.8.10/1008_linux-4.8.9.patch3120
-rw-r--r--4.8.10/1009_linux-4.8.10.patch4759
-rw-r--r--4.8.10/4420_grsecurity-3.1-4.8.10-201611232213.patch220815
-rw-r--r--4.8.10/4425_grsec_remove_EI_PAX.patch19
-rw-r--r--4.8.10/4427_force_XATTR_PAX_tmpfs.patch48
-rw-r--r--4.8.10/4430_grsec-remove-localversion-grsec.patch9
-rw-r--r--4.8.10/4435_grsec-mute-warnings.patch43
-rw-r--r--4.8.10/4440_grsec-remove-protected-paths.patch20
-rw-r--r--4.8.10/4450_grsec-kconfig-default-gids.patch111
-rw-r--r--4.8.10/4465_selinux-avc_audit-log-curr_ip.patch73
-rw-r--r--4.8.10/4470_disable-compat_vdso.patch58
-rw-r--r--4.8.10/4475_emutramp_default_on.patch34
13 files changed, 229161 insertions, 0 deletions
diff --git a/4.8.10/0000_README b/4.8.10/0000_README
new file mode 100644
index 0000000..c32fdff
--- /dev/null
+++ b/4.8.10/0000_README
@@ -0,0 +1,52 @@
+README
+-----------------------------------------------------------------------------
+Individual Patch Descriptions:
+-----------------------------------------------------------------------------
+Patch: 1008_linux-4.8.9.patch
+From: http://www.kernel.org
+Desc: Linux 4.8.9
+
+Patch: 1009_linux-4.8.10.patch
+From: http://www.kernel.org
+Desc: Linux 4.8.10
+
+Patch: 4420_grsecurity-3.1-4.8.10-201611232213.patch
+From: http://www.grsecurity.net
+Desc: hardened-sources base patch from upstream grsecurity
+
+Patch: 4425_grsec_remove_EI_PAX.patch
+From: Anthony G. Basile <blueness@gentoo.org>
+Desc: Remove EI_PAX option and force off
+
+Patch: 4430_grsec-remove-localversion-grsec.patch
+From: Kerin Millar <kerframil@gmail.com>
+Desc: Removes grsecurity's localversion-grsec file
+
+Patch: 4435_grsec-mute-warnings.patch
+From: Alexander Gabert <gaberta@fh-trier.de>
+ Gordon Malm <gengor@gentoo.org>
+Desc: Removes verbose compile warning settings from grsecurity, restores
+ mainline Linux kernel behavior
+
+Patch: 4440_grsec-remove-protected-paths.patch
+From: Anthony G. Basile <blueness@gentoo.org>
+Desc: Removes chmod statements from grsecurity/Makefile
+
+Patch: 4450_grsec-kconfig-default-gids.patch
+From: Kerin Millar <kerframil@gmail.com>
+Desc: Sets sane(r) default GIDs on various grsecurity group-dependent
+ features
+
+Patch: 4465_selinux-avc_audit-log-curr_ip.patch
+From: Gordon Malm <gengor@gentoo.org>
+ Anthony G. Basile <blueness@gentoo.org>
+Desc: Configurable option to add src IP address to SELinux log messages
+
+Patch: 4470_disable-compat_vdso.patch
+From: Gordon Malm <gengor@gentoo.org>
+ Kerin Millar <kerframil@gmail.com>
+Desc: Disables VDSO_COMPAT operation completely
+
+Patch: 4475_emutramp_default_on.patch
+From: Anthony G. Basile <blueness@gentoo.org>
+Desc: Set PAX_EMUTRAMP default on for libffi, bugs #329499 and #457194
diff --git a/4.8.10/1008_linux-4.8.9.patch b/4.8.10/1008_linux-4.8.9.patch
new file mode 100644
index 0000000..2f909eb
--- /dev/null
+++ b/4.8.10/1008_linux-4.8.9.patch
@@ -0,0 +1,3120 @@
+diff --git a/Makefile b/Makefile
+index 8f18daa..c1519ab 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 8
+-SUBLEVEL = 8
++SUBLEVEL = 9
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
+index f927b8d..c10390d 100644
+--- a/arch/arc/kernel/time.c
++++ b/arch/arc/kernel/time.c
+@@ -152,14 +152,17 @@ static cycle_t arc_read_rtc(struct clocksource *cs)
+ cycle_t full;
+ } stamp;
+
+-
+- __asm__ __volatile(
+- "1: \n"
+- " lr %0, [AUX_RTC_LOW] \n"
+- " lr %1, [AUX_RTC_HIGH] \n"
+- " lr %2, [AUX_RTC_CTRL] \n"
+- " bbit0.nt %2, 31, 1b \n"
+- : "=r" (stamp.low), "=r" (stamp.high), "=r" (status));
++ /*
++ * hardware has an internal state machine which tracks readout of
++ * low/high and updates the CTRL.status if
++ * - interrupt/exception taken between the two reads
++ * - high increments after low has been read
++ */
++ do {
++ stamp.low = read_aux_reg(AUX_RTC_LOW);
++ stamp.high = read_aux_reg(AUX_RTC_HIGH);
++ status = read_aux_reg(AUX_RTC_CTRL);
++ } while (!(status & _BITUL(31)));
+
+ return stamp.full;
+ }
+diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
+index 20afc65..9288851 100644
+--- a/arch/arc/mm/dma.c
++++ b/arch/arc/mm/dma.c
+@@ -105,6 +105,31 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
+ __free_pages(page, get_order(size));
+ }
+
++static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
++ void *cpu_addr, dma_addr_t dma_addr, size_t size,
++ unsigned long attrs)
++{
++ unsigned long user_count = vma_pages(vma);
++ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
++ unsigned long pfn = __phys_to_pfn(plat_dma_to_phys(dev, dma_addr));
++ unsigned long off = vma->vm_pgoff;
++ int ret = -ENXIO;
++
++ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
++
++ if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
++ return ret;
++
++ if (off < count && user_count <= (count - off)) {
++ ret = remap_pfn_range(vma, vma->vm_start,
++ pfn + off,
++ user_count << PAGE_SHIFT,
++ vma->vm_page_prot);
++ }
++
++ return ret;
++}
++
+ /*
+ * streaming DMA Mapping API...
+ * CPU accesses page via normal paddr, thus needs to explicitly made
+@@ -193,6 +218,7 @@ static int arc_dma_supported(struct device *dev, u64 dma_mask)
+ struct dma_map_ops arc_dma_ops = {
+ .alloc = arc_dma_alloc,
+ .free = arc_dma_free,
++ .mmap = arc_dma_mmap,
+ .map_page = arc_dma_map_page,
+ .map_sg = arc_dma_map_sg,
+ .sync_single_for_device = arc_dma_sync_single_for_device,
+diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
+index 28f03ca..794bebb 100644
+--- a/arch/s390/hypfs/hypfs_diag.c
++++ b/arch/s390/hypfs/hypfs_diag.c
+@@ -363,11 +363,11 @@ static void *diag204_store(void)
+ static int diag224_get_name_table(void)
+ {
+ /* memory must be below 2GB */
+- diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
++ diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA);
+ if (!diag224_cpu_names)
+ return -ENOMEM;
+ if (diag224(diag224_cpu_names)) {
+- kfree(diag224_cpu_names);
++ free_page((unsigned long) diag224_cpu_names);
+ return -EOPNOTSUPP;
+ }
+ EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
+@@ -376,7 +376,7 @@ static int diag224_get_name_table(void)
+
+ static void diag224_delete_name_table(void)
+ {
+- kfree(diag224_cpu_names);
++ free_page((unsigned long) diag224_cpu_names);
+ }
+
+ static int diag224_idx2name(int index, char *name)
+diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
+index 0332317..602af69 100644
+--- a/arch/s390/include/asm/processor.h
++++ b/arch/s390/include/asm/processor.h
+@@ -192,7 +192,7 @@ struct task_struct;
+ struct mm_struct;
+ struct seq_file;
+
+-typedef int (*dump_trace_func_t)(void *data, unsigned long address);
++typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
+ void dump_trace(dump_trace_func_t func, void *data,
+ struct task_struct *task, unsigned long sp);
+
+diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
+index 6693383..518f615 100644
+--- a/arch/s390/kernel/dumpstack.c
++++ b/arch/s390/kernel/dumpstack.c
+@@ -38,10 +38,10 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
+ if (sp < low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
++ if (func(data, sf->gprs[8], 0))
++ return sp;
+ /* Follow the backchain. */
+ while (1) {
+- if (func(data, sf->gprs[8]))
+- return sp;
+ low = sp;
+ sp = sf->back_chain;
+ if (!sp)
+@@ -49,6 +49,8 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
++ if (func(data, sf->gprs[8], 1))
++ return sp;
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long) (sf + 1);
+@@ -56,7 +58,7 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
+ return sp;
+ regs = (struct pt_regs *) sp;
+ if (!user_mode(regs)) {
+- if (func(data, regs->psw.addr))
++ if (func(data, regs->psw.addr, 1))
+ return sp;
+ }
+ low = sp;
+@@ -90,7 +92,7 @@ struct return_address_data {
+ int depth;
+ };
+
+-static int __return_address(void *data, unsigned long address)
++static int __return_address(void *data, unsigned long address, int reliable)
+ {
+ struct return_address_data *rd = data;
+
+@@ -109,9 +111,12 @@ unsigned long return_address(int depth)
+ }
+ EXPORT_SYMBOL_GPL(return_address);
+
+-static int show_address(void *data, unsigned long address)
++static int show_address(void *data, unsigned long address, int reliable)
+ {
+- printk("([<%016lx>] %pSR)\n", address, (void *)address);
++ if (reliable)
++ printk(" [<%016lx>] %pSR \n", address, (void *)address);
++ else
++ printk("([<%016lx>] %pSR)\n", address, (void *)address);
+ return 0;
+ }
+
+diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
+index 17431f6..955a7b6 100644
+--- a/arch/s390/kernel/perf_event.c
++++ b/arch/s390/kernel/perf_event.c
+@@ -222,7 +222,7 @@ static int __init service_level_perf_register(void)
+ }
+ arch_initcall(service_level_perf_register);
+
+-static int __perf_callchain_kernel(void *data, unsigned long address)
++static int __perf_callchain_kernel(void *data, unsigned long address, int reliable)
+ {
+ struct perf_callchain_entry_ctx *entry = data;
+
+diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
+index 44f84b2..355db9d 100644
+--- a/arch/s390/kernel/stacktrace.c
++++ b/arch/s390/kernel/stacktrace.c
+@@ -27,12 +27,12 @@ static int __save_address(void *data, unsigned long address, int nosched)
+ return 1;
+ }
+
+-static int save_address(void *data, unsigned long address)
++static int save_address(void *data, unsigned long address, int reliable)
+ {
+ return __save_address(data, address, 0);
+ }
+
+-static int save_address_nosched(void *data, unsigned long address)
++static int save_address_nosched(void *data, unsigned long address, int reliable)
+ {
+ return __save_address(data, address, 1);
+ }
+diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
+index 16f4c39..9a4de45 100644
+--- a/arch/s390/oprofile/init.c
++++ b/arch/s390/oprofile/init.c
+@@ -13,7 +13,7 @@
+ #include <linux/init.h>
+ #include <asm/processor.h>
+
+-static int __s390_backtrace(void *data, unsigned long address)
++static int __s390_backtrace(void *data, unsigned long address, int reliable)
+ {
+ unsigned int *depth = data;
+
+diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
+index 77f28ce..9976fce 100644
+--- a/arch/x86/entry/Makefile
++++ b/arch/x86/entry/Makefile
+@@ -5,8 +5,8 @@
+ OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
+ OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
+
+-CFLAGS_syscall_64.o += -Wno-override-init
+-CFLAGS_syscall_32.o += -Wno-override-init
++CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,)
++CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,)
+ obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+ obj-y += common.o
+
+diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
+index fbd1944..d99ca57 100644
+--- a/arch/x86/kernel/acpi/boot.c
++++ b/arch/x86/kernel/acpi/boot.c
+@@ -453,6 +453,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
+ polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
+
+ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
++ acpi_penalize_sci_irq(bus_irq, trigger, polarity);
+
+ /*
+ * stash over-ride to indicate we've been here
+diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
+index 60746ef..caea575 100644
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -662,7 +662,7 @@ static int ghes_proc(struct ghes *ghes)
+ ghes_do_proc(ghes, ghes->estatus);
+ out:
+ ghes_clear_estatus(ghes);
+- return 0;
++ return rc;
+ }
+
+ static void ghes_add_timer(struct ghes *ghes)
+diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
+index c983bf7..bc3d914 100644
+--- a/drivers/acpi/pci_link.c
++++ b/drivers/acpi/pci_link.c
+@@ -87,6 +87,7 @@ struct acpi_pci_link {
+
+ static LIST_HEAD(acpi_link_list);
+ static DEFINE_MUTEX(acpi_link_lock);
++static int sci_irq = -1, sci_penalty;
+
+ /* --------------------------------------------------------------------------
+ PCI Link Device Management
+@@ -496,25 +497,13 @@ static int acpi_irq_get_penalty(int irq)
+ {
+ int penalty = 0;
+
+- /*
+- * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict
+- * with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be
+- * use for PCI IRQs.
+- */
+- if (irq == acpi_gbl_FADT.sci_interrupt) {
+- u32 type = irq_get_trigger_type(irq) & IRQ_TYPE_SENSE_MASK;
+-
+- if (type != IRQ_TYPE_LEVEL_LOW)
+- penalty += PIRQ_PENALTY_ISA_ALWAYS;
+- else
+- penalty += PIRQ_PENALTY_PCI_USING;
+- }
++ if (irq == sci_irq)
++ penalty += sci_penalty;
+
+ if (irq < ACPI_MAX_ISA_IRQS)
+ return penalty + acpi_isa_irq_penalty[irq];
+
+- penalty += acpi_irq_pci_sharing_penalty(irq);
+- return penalty;
++ return penalty + acpi_irq_pci_sharing_penalty(irq);
+ }
+
+ int __init acpi_irq_penalty_init(void)
+@@ -619,6 +608,10 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link)
+ acpi_device_bid(link->device));
+ return -ENODEV;
+ } else {
++ if (link->irq.active < ACPI_MAX_ISA_IRQS)
++ acpi_isa_irq_penalty[link->irq.active] +=
++ PIRQ_PENALTY_PCI_USING;
++
+ printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
+ acpi_device_name(link->device),
+ acpi_device_bid(link->device), link->irq.active);
+@@ -849,7 +842,7 @@ static int __init acpi_irq_penalty_update(char *str, int used)
+ continue;
+
+ if (used)
+- new_penalty = acpi_irq_get_penalty(irq) +
++ new_penalty = acpi_isa_irq_penalty[irq] +
+ PIRQ_PENALTY_ISA_USED;
+ else
+ new_penalty = 0;
+@@ -871,7 +864,7 @@ static int __init acpi_irq_penalty_update(char *str, int used)
+ void acpi_penalize_isa_irq(int irq, int active)
+ {
+ if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
+- acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
++ acpi_isa_irq_penalty[irq] +=
+ (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
+ }
+
+@@ -881,6 +874,17 @@ bool acpi_isa_irq_available(int irq)
+ acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
+ }
+
++void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
++{
++ sci_irq = irq;
++
++ if (trigger == ACPI_MADT_TRIGGER_LEVEL &&
++ polarity == ACPI_MADT_POLARITY_ACTIVE_LOW)
++ sci_penalty = PIRQ_PENALTY_PCI_USING;
++ else
++ sci_penalty = PIRQ_PENALTY_ISA_ALWAYS;
++}
++
+ /*
+ * Over-ride default table to reserve additional IRQs for use by ISA
+ * e.g. acpi_irq_isa=5
+diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
+index 100be55..8348272 100644
+--- a/drivers/block/drbd/drbd_main.c
++++ b/drivers/block/drbd/drbd_main.c
+@@ -1871,7 +1871,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
+ drbd_update_congested(connection);
+ }
+ do {
+- rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
++ rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
+ if (rv == -EAGAIN) {
+ if (we_should_drop_the_connection(connection, sock))
+ break;
+diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
+index 4431129..0f7d28a 100644
+--- a/drivers/char/agp/intel-gtt.c
++++ b/drivers/char/agp/intel-gtt.c
+@@ -845,6 +845,8 @@ void intel_gtt_insert_page(dma_addr_t addr,
+ unsigned int flags)
+ {
+ intel_private.driver->write_entry(addr, pg, flags);
++ if (intel_private.driver->chipset_flush)
++ intel_private.driver->chipset_flush();
+ }
+ EXPORT_SYMBOL(intel_gtt_insert_page);
+
+diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
+index 9203f2d..340f96e 100644
+--- a/drivers/char/hw_random/core.c
++++ b/drivers/char/hw_random/core.c
+@@ -84,14 +84,14 @@ static size_t rng_buffer_size(void)
+
+ static void add_early_randomness(struct hwrng *rng)
+ {
+- unsigned char bytes[16];
+ int bytes_read;
++ size_t size = min_t(size_t, 16, rng_buffer_size());
+
+ mutex_lock(&reading_mutex);
+- bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
++ bytes_read = rng_get_data(rng, rng_buffer, size, 1);
+ mutex_unlock(&reading_mutex);
+ if (bytes_read > 0)
+- add_device_randomness(bytes, bytes_read);
++ add_device_randomness(rng_buffer, bytes_read);
+ }
+
+ static inline void cleanup_rng(struct kref *kref)
+diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c
+index 20b1055..80ae2a5 100644
+--- a/drivers/clk/clk-qoriq.c
++++ b/drivers/clk/clk-qoriq.c
+@@ -700,6 +700,7 @@ static struct clk * __init create_mux_common(struct clockgen *cg,
+ struct mux_hwclock *hwc,
+ const struct clk_ops *ops,
+ unsigned long min_rate,
++ unsigned long max_rate,
+ unsigned long pct80_rate,
+ const char *fmt, int idx)
+ {
+@@ -728,6 +729,8 @@ static struct clk * __init create_mux_common(struct clockgen *cg,
+ continue;
+ if (rate < min_rate)
+ continue;
++ if (rate > max_rate)
++ continue;
+
+ parent_names[j] = div->name;
+ hwc->parent_to_clksel[j] = i;
+@@ -759,7 +762,7 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx)
+ struct mux_hwclock *hwc;
+ const struct clockgen_pll_div *div;
+ unsigned long plat_rate, min_rate;
+- u64 pct80_rate;
++ u64 max_rate, pct80_rate;
+ u32 clksel;
+
+ hwc = kzalloc(sizeof(*hwc), GFP_KERNEL);
+@@ -787,8 +790,8 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx)
+ return NULL;
+ }
+
+- pct80_rate = clk_get_rate(div->clk);
+- pct80_rate *= 8;
++ max_rate = clk_get_rate(div->clk);
++ pct80_rate = max_rate * 8;
+ do_div(pct80_rate, 10);
+
+ plat_rate = clk_get_rate(cg->pll[PLATFORM_PLL].div[PLL_DIV1].clk);
+@@ -798,7 +801,7 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx)
+ else
+ min_rate = plat_rate / 2;
+
+- return create_mux_common(cg, hwc, &cmux_ops, min_rate,
++ return create_mux_common(cg, hwc, &cmux_ops, min_rate, max_rate,
+ pct80_rate, "cg-cmux%d", idx);
+ }
+
+@@ -813,7 +816,7 @@ static struct clk * __init create_one_hwaccel(struct clockgen *cg, int idx)
+ hwc->reg = cg->regs + 0x20 * idx + 0x10;
+ hwc->info = cg->info.hwaccel[idx];
+
+- return create_mux_common(cg, hwc, &hwaccel_ops, 0, 0,
++ return create_mux_common(cg, hwc, &hwaccel_ops, 0, ULONG_MAX, 0,
+ "cg-hwaccel%d", idx);
+ }
+
+diff --git a/drivers/clk/samsung/clk-exynos-audss.c b/drivers/clk/samsung/clk-exynos-audss.c
+index bdf8b97..0fa91f3 100644
+--- a/drivers/clk/samsung/clk-exynos-audss.c
++++ b/drivers/clk/samsung/clk-exynos-audss.c
+@@ -82,6 +82,7 @@ static const struct of_device_id exynos_audss_clk_of_match[] = {
+ .data = (void *)TYPE_EXYNOS5420, },
+ {},
+ };
++MODULE_DEVICE_TABLE(of, exynos_audss_clk_of_match);
+
+ static void exynos_audss_clk_teardown(void)
+ {
+diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
+index c184eb8..4f87f3e 100644
+--- a/drivers/clocksource/timer-sun5i.c
++++ b/drivers/clocksource/timer-sun5i.c
+@@ -152,6 +152,13 @@ static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
+ return IRQ_HANDLED;
+ }
+
++static cycle_t sun5i_clksrc_read(struct clocksource *clksrc)
++{
++ struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc);
++
++ return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1));
++}
++
+ static int sun5i_rate_cb_clksrc(struct notifier_block *nb,
+ unsigned long event, void *data)
+ {
+@@ -210,8 +217,13 @@ static int __init sun5i_setup_clocksource(struct device_node *node,
+ writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
+ base + TIMER_CTL_REG(1));
+
+- ret = clocksource_mmio_init(base + TIMER_CNTVAL_LO_REG(1), node->name,
+- rate, 340, 32, clocksource_mmio_readl_down);
++ cs->clksrc.name = node->name;
++ cs->clksrc.rating = 340;
++ cs->clksrc.read = sun5i_clksrc_read;
++ cs->clksrc.mask = CLOCKSOURCE_MASK(32);
++ cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
++
++ ret = clocksource_register_hz(&cs->clksrc, rate);
+ if (ret) {
+ pr_err("Couldn't register clock source.\n");
+ goto err_remove_notifier;
+diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
+index cd5dc27..1ed6132 100644
+--- a/drivers/gpio/gpio-mvebu.c
++++ b/drivers/gpio/gpio-mvebu.c
+@@ -293,10 +293,10 @@ static void mvebu_gpio_irq_ack(struct irq_data *d)
+ {
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct mvebu_gpio_chip *mvchip = gc->private;
+- u32 mask = ~(1 << (d->irq - gc->irq_base));
++ u32 mask = d->mask;
+
+ irq_gc_lock(gc);
+- writel_relaxed(mask, mvebu_gpioreg_edge_cause(mvchip));
++ writel_relaxed(~mask, mvebu_gpioreg_edge_cause(mvchip));
+ irq_gc_unlock(gc);
+ }
+
+@@ -305,7 +305,7 @@ static void mvebu_gpio_edge_irq_mask(struct irq_data *d)
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct mvebu_gpio_chip *mvchip = gc->private;
+ struct irq_chip_type *ct = irq_data_get_chip_type(d);
+- u32 mask = 1 << (d->irq - gc->irq_base);
++ u32 mask = d->mask;
+
+ irq_gc_lock(gc);
+ ct->mask_cache_priv &= ~mask;
+@@ -319,8 +319,7 @@ static void mvebu_gpio_edge_irq_unmask(struct irq_data *d)
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct mvebu_gpio_chip *mvchip = gc->private;
+ struct irq_chip_type *ct = irq_data_get_chip_type(d);
+-
+- u32 mask = 1 << (d->irq - gc->irq_base);
++ u32 mask = d->mask;
+
+ irq_gc_lock(gc);
+ ct->mask_cache_priv |= mask;
+@@ -333,8 +332,7 @@ static void mvebu_gpio_level_irq_mask(struct irq_data *d)
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct mvebu_gpio_chip *mvchip = gc->private;
+ struct irq_chip_type *ct = irq_data_get_chip_type(d);
+-
+- u32 mask = 1 << (d->irq - gc->irq_base);
++ u32 mask = d->mask;
+
+ irq_gc_lock(gc);
+ ct->mask_cache_priv &= ~mask;
+@@ -347,8 +345,7 @@ static void mvebu_gpio_level_irq_unmask(struct irq_data *d)
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct mvebu_gpio_chip *mvchip = gc->private;
+ struct irq_chip_type *ct = irq_data_get_chip_type(d);
+-
+- u32 mask = 1 << (d->irq - gc->irq_base);
++ u32 mask = d->mask;
+
+ irq_gc_lock(gc);
+ ct->mask_cache_priv |= mask;
+@@ -462,7 +459,7 @@ static void mvebu_gpio_irq_handler(struct irq_desc *desc)
+ for (i = 0; i < mvchip->chip.ngpio; i++) {
+ int irq;
+
+- irq = mvchip->irqbase + i;
++ irq = irq_find_mapping(mvchip->domain, i);
+
+ if (!(cause & (1 << i)))
+ continue;
+@@ -655,6 +652,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
+ struct irq_chip_type *ct;
+ struct clk *clk;
+ unsigned int ngpios;
++ bool have_irqs;
+ int soc_variant;
+ int i, cpu, id;
+ int err;
+@@ -665,6 +663,9 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
+ else
+ soc_variant = MVEBU_GPIO_SOC_VARIANT_ORION;
+
++ /* Some gpio controllers do not provide irq support */
++ have_irqs = of_irq_count(np) != 0;
++
+ mvchip = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_gpio_chip),
+ GFP_KERNEL);
+ if (!mvchip)
+@@ -697,7 +698,8 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
+ mvchip->chip.get = mvebu_gpio_get;
+ mvchip->chip.direction_output = mvebu_gpio_direction_output;
+ mvchip->chip.set = mvebu_gpio_set;
+- mvchip->chip.to_irq = mvebu_gpio_to_irq;
++ if (have_irqs)
++ mvchip->chip.to_irq = mvebu_gpio_to_irq;
+ mvchip->chip.base = id * MVEBU_MAX_GPIO_PER_BANK;
+ mvchip->chip.ngpio = ngpios;
+ mvchip->chip.can_sleep = false;
+@@ -758,34 +760,30 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
+ devm_gpiochip_add_data(&pdev->dev, &mvchip->chip, mvchip);
+
+ /* Some gpio controllers do not provide irq support */
+- if (!of_irq_count(np))
++ if (!have_irqs)
+ return 0;
+
+- /* Setup the interrupt handlers. Each chip can have up to 4
+- * interrupt handlers, with each handler dealing with 8 GPIO
+- * pins. */
+- for (i = 0; i < 4; i++) {
+- int irq = platform_get_irq(pdev, i);
+-
+- if (irq < 0)
+- continue;
+- irq_set_chained_handler_and_data(irq, mvebu_gpio_irq_handler,
+- mvchip);
+- }
+-
+- mvchip->irqbase = irq_alloc_descs(-1, 0, ngpios, -1);
+- if (mvchip->irqbase < 0) {
+- dev_err(&pdev->dev, "no irqs\n");
+- return mvchip->irqbase;
++ mvchip->domain =
++ irq_domain_add_linear(np, ngpios, &irq_generic_chip_ops, NULL);
++ if (!mvchip->domain) {
++ dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
++ mvchip->chip.label);
++ return -ENODEV;
+ }
+
+- gc = irq_alloc_generic_chip("mvebu_gpio_irq", 2, mvchip->irqbase,
+- mvchip->membase, handle_level_irq);
+- if (!gc) {
+- dev_err(&pdev->dev, "Cannot allocate generic irq_chip\n");
+- return -ENOMEM;
++ err = irq_alloc_domain_generic_chips(
++ mvchip->domain, ngpios, 2, np->name, handle_level_irq,
++ IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_LEVEL, 0, 0);
++ if (err) {
++ dev_err(&pdev->dev, "couldn't allocate irq chips %s (DT).\n",
++ mvchip->chip.label);
++ goto err_domain;
+ }
+
++ /* NOTE: The common accessors cannot be used because of the percpu
++ * access to the mask registers
++ */
++ gc = irq_get_domain_generic_chip(mvchip->domain, 0);
+ gc->private = mvchip;
+ ct = &gc->chip_types[0];
+ ct->type = IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW;
+@@ -803,27 +801,23 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
+ ct->handler = handle_edge_irq;
+ ct->chip.name = mvchip->chip.label;
+
+- irq_setup_generic_chip(gc, IRQ_MSK(ngpios), 0,
+- IRQ_NOREQUEST, IRQ_LEVEL | IRQ_NOPROBE);
++ /* Setup the interrupt handlers. Each chip can have up to 4
++ * interrupt handlers, with each handler dealing with 8 GPIO
++ * pins.
++ */
++ for (i = 0; i < 4; i++) {
++ int irq = platform_get_irq(pdev, i);
+
+- /* Setup irq domain on top of the generic chip. */
+- mvchip->domain = irq_domain_add_simple(np, mvchip->chip.ngpio,
+- mvchip->irqbase,
+- &irq_domain_simple_ops,
+- mvchip);
+- if (!mvchip->domain) {
+- dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
+- mvchip->chip.label);
+- err = -ENODEV;
+- goto err_generic_chip;
++ if (irq < 0)
++ continue;
++ irq_set_chained_handler_and_data(irq, mvebu_gpio_irq_handler,
++ mvchip);
+ }
+
+ return 0;
+
+-err_generic_chip:
+- irq_remove_generic_chip(gc, IRQ_MSK(ngpios), IRQ_NOREQUEST,
+- IRQ_LEVEL | IRQ_NOPROBE);
+- kfree(gc);
++err_domain:
++ irq_domain_remove(mvchip->domain);
+
+ return err;
+ }
+diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
+index a28feb3..e3fc901 100644
+--- a/drivers/gpio/gpiolib-of.c
++++ b/drivers/gpio/gpiolib-of.c
+@@ -26,14 +26,18 @@
+
+ #include "gpiolib.h"
+
+-static int of_gpiochip_match_node(struct gpio_chip *chip, void *data)
++static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data)
+ {
+- return chip->gpiodev->dev.of_node == data;
++ struct of_phandle_args *gpiospec = data;
++
++ return chip->gpiodev->dev.of_node == gpiospec->np &&
++ chip->of_xlate(chip, gpiospec, NULL) >= 0;
+ }
+
+-static struct gpio_chip *of_find_gpiochip_by_node(struct device_node *np)
++static struct gpio_chip *of_find_gpiochip_by_xlate(
++ struct of_phandle_args *gpiospec)
+ {
+- return gpiochip_find(np, of_gpiochip_match_node);
++ return gpiochip_find(gpiospec, of_gpiochip_match_node_and_xlate);
+ }
+
+ static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip,
+@@ -79,7 +83,7 @@ struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
+ return ERR_PTR(ret);
+ }
+
+- chip = of_find_gpiochip_by_node(gpiospec.np);
++ chip = of_find_gpiochip_by_xlate(&gpiospec);
+ if (!chip) {
+ desc = ERR_PTR(-EPROBE_DEFER);
+ goto out;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+index 892d60f..2057683 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+@@ -395,9 +395,12 @@ static int acp_hw_fini(void *handle)
+ {
+ int i, ret;
+ struct device *dev;
+-
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ /* return early if no ACP */
++ if (!adev->acp.acp_genpd)
++ return 0;
++
+ for (i = 0; i < ACP_DEVS ; i++) {
+ dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+ ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index 9aa533c..414a160 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -605,6 +605,7 @@ static int __init amdgpu_init(void)
+ {
+ amdgpu_sync_init();
+ amdgpu_fence_slab_init();
++ amd_sched_fence_slab_init();
+ if (vgacon_text_force()) {
+ DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
+ return -EINVAL;
+@@ -624,6 +625,7 @@ static void __exit amdgpu_exit(void)
+ drm_pci_exit(driver, pdriver);
+ amdgpu_unregister_atpx_handler();
+ amdgpu_sync_fini();
++ amd_sched_fence_slab_fini();
+ amdgpu_fence_slab_fini();
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index 0b109ae..c82b95b8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -68,6 +68,7 @@ int amdgpu_fence_slab_init(void)
+
+ void amdgpu_fence_slab_fini(void)
+ {
++ rcu_barrier();
+ kmem_cache_destroy(amdgpu_fence_slab);
+ }
+ /*
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index e24a8af..1ed64ae 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -99,6 +99,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
+
+ if ((amdgpu_runtime_pm != 0) &&
+ amdgpu_has_atpx() &&
++ (amdgpu_is_atpx_hybrid() ||
++ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ ((flags & AMD_IS_APU) == 0))
+ flags |= AMD_IS_PX;
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+index 80120fa..e86ca39 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -1654,5 +1654,6 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
+ fence_put(adev->vm_manager.ids[i].first);
+ amdgpu_sync_free(&adev->vm_manager.ids[i].active);
+ fence_put(id->flushed_updates);
++ fence_put(id->last_flush);
+ }
+ }
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+index 963a24d..ffe1f85 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -34,9 +34,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
+ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
+
+-struct kmem_cache *sched_fence_slab;
+-atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
+-
+ /* Initialize a given run queue struct */
+ static void amd_sched_rq_init(struct amd_sched_rq *rq)
+ {
+@@ -618,13 +615,6 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
+ INIT_LIST_HEAD(&sched->ring_mirror_list);
+ spin_lock_init(&sched->job_list_lock);
+ atomic_set(&sched->hw_rq_count, 0);
+- if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
+- sched_fence_slab = kmem_cache_create(
+- "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
+- SLAB_HWCACHE_ALIGN, NULL);
+- if (!sched_fence_slab)
+- return -ENOMEM;
+- }
+
+ /* Each scheduler will run on a seperate kernel thread */
+ sched->thread = kthread_run(amd_sched_main, sched, sched->name);
+@@ -645,6 +635,4 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched)
+ {
+ if (sched->thread)
+ kthread_stop(sched->thread);
+- if (atomic_dec_and_test(&sched_fence_slab_ref))
+- kmem_cache_destroy(sched_fence_slab);
+ }
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+index 7cbbbfb..51068e6 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+@@ -30,9 +30,6 @@
+ struct amd_gpu_scheduler;
+ struct amd_sched_rq;
+
+-extern struct kmem_cache *sched_fence_slab;
+-extern atomic_t sched_fence_slab_ref;
+-
+ /**
+ * A scheduler entity is a wrapper around a job queue or a group
+ * of other entities. Entities take turns emitting jobs from their
+@@ -145,6 +142,9 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
+ struct amd_sched_entity *entity);
+ void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
+
++int amd_sched_fence_slab_init(void);
++void amd_sched_fence_slab_fini(void);
++
+ struct amd_sched_fence *amd_sched_fence_create(
+ struct amd_sched_entity *s_entity, void *owner);
+ void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
+diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
+index 6b63bea..93ad2e1 100644
+--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
++++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
+@@ -27,6 +27,25 @@
+ #include <drm/drmP.h>
+ #include "gpu_scheduler.h"
+
++static struct kmem_cache *sched_fence_slab;
++
++int amd_sched_fence_slab_init(void)
++{
++ sched_fence_slab = kmem_cache_create(
++ "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
++ SLAB_HWCACHE_ALIGN, NULL);
++ if (!sched_fence_slab)
++ return -ENOMEM;
++
++ return 0;
++}
++
++void amd_sched_fence_slab_fini(void)
++{
++ rcu_barrier();
++ kmem_cache_destroy(sched_fence_slab);
++}
++
+ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
+ void *owner)
+ {
+diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
+index 5de36d8..d46fa22 100644
+--- a/drivers/gpu/drm/i915/i915_drv.c
++++ b/drivers/gpu/drm/i915/i915_drv.c
+@@ -1490,8 +1490,6 @@ static int i915_drm_suspend(struct drm_device *dev)
+
+ dev_priv->suspend_count++;
+
+- intel_display_set_init_power(dev_priv, false);
+-
+ intel_csr_ucode_suspend(dev_priv);
+
+ out:
+@@ -1508,6 +1506,8 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
+
+ disable_rpm_wakeref_asserts(dev_priv);
+
++ intel_display_set_init_power(dev_priv, false);
++
+ fw_csr = !IS_BROXTON(dev_priv) &&
+ suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload;
+ /*
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 63462f2..e26f889 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -9737,6 +9737,29 @@ static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state)
+ bxt_set_cdclk(to_i915(dev), req_cdclk);
+ }
+
++static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state,
++ int pixel_rate)
++{
++ struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
++
++ /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
++ if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
++ pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95);
++
++ /* BSpec says "Do not use DisplayPort with CDCLK less than
++ * 432 MHz, audio enabled, port width x4, and link rate
++ * HBR2 (5.4 GHz), or else there may be audio corruption or
++ * screen corruption."
++ */
++ if (intel_crtc_has_dp_encoder(crtc_state) &&
++ crtc_state->has_audio &&
++ crtc_state->port_clock >= 540000 &&
++ crtc_state->lane_count == 4)
++ pixel_rate = max(432000, pixel_rate);
++
++ return pixel_rate;
++}
++
+ /* compute the max rate for new configuration */
+ static int ilk_max_pixel_rate(struct drm_atomic_state *state)
+ {
+@@ -9762,9 +9785,9 @@ static int ilk_max_pixel_rate(struct drm_atomic_state *state)
+
+ pixel_rate = ilk_pipe_pixel_rate(crtc_state);
+
+- /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
+- if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
+- pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95);
++ if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv))
++ pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state,
++ pixel_rate);
+
+ intel_state->min_pixclk[i] = pixel_rate;
+ }
+diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
+index c3aa9e6..1421270 100644
+--- a/drivers/gpu/drm/i915/intel_hdmi.c
++++ b/drivers/gpu/drm/i915/intel_hdmi.c
+@@ -1759,6 +1759,50 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
+ intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
+ }
+
++static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv,
++ enum port port)
++{
++ const struct ddi_vbt_port_info *info =
++ &dev_priv->vbt.ddi_port_info[port];
++ u8 ddc_pin;
++
++ if (info->alternate_ddc_pin) {
++ DRM_DEBUG_KMS("Using DDC pin 0x%x for port %c (VBT)\n",
++ info->alternate_ddc_pin, port_name(port));
++ return info->alternate_ddc_pin;
++ }
++
++ switch (port) {
++ case PORT_B:
++ if (IS_BROXTON(dev_priv))
++ ddc_pin = GMBUS_PIN_1_BXT;
++ else
++ ddc_pin = GMBUS_PIN_DPB;
++ break;
++ case PORT_C:
++ if (IS_BROXTON(dev_priv))
++ ddc_pin = GMBUS_PIN_2_BXT;
++ else
++ ddc_pin = GMBUS_PIN_DPC;
++ break;
++ case PORT_D:
++ if (IS_CHERRYVIEW(dev_priv))
++ ddc_pin = GMBUS_PIN_DPD_CHV;
++ else
++ ddc_pin = GMBUS_PIN_DPD;
++ break;
++ default:
++ MISSING_CASE(port);
++ ddc_pin = GMBUS_PIN_DPB;
++ break;
++ }
++
++ DRM_DEBUG_KMS("Using DDC pin 0x%x for port %c (platform default)\n",
++ ddc_pin, port_name(port));
++
++ return ddc_pin;
++}
++
+ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
+ struct intel_connector *intel_connector)
+ {
+@@ -1768,7 +1812,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
+ struct drm_device *dev = intel_encoder->base.dev;
+ struct drm_i915_private *dev_priv = to_i915(dev);
+ enum port port = intel_dig_port->port;
+- uint8_t alternate_ddc_pin;
+
+ DRM_DEBUG_KMS("Adding HDMI connector on port %c\n",
+ port_name(port));
+@@ -1786,12 +1829,10 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
+ connector->doublescan_allowed = 0;
+ connector->stereo_allowed = 1;
+
++ intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port);
++
+ switch (port) {
+ case PORT_B:
+- if (IS_BROXTON(dev_priv))
+- intel_hdmi->ddc_bus = GMBUS_PIN_1_BXT;
+- else
+- intel_hdmi->ddc_bus = GMBUS_PIN_DPB;
+ /*
+ * On BXT A0/A1, sw needs to activate DDIA HPD logic and
+ * interrupts to check the external panel connection.
+@@ -1802,46 +1843,17 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
+ intel_encoder->hpd_pin = HPD_PORT_B;
+ break;
+ case PORT_C:
+- if (IS_BROXTON(dev_priv))
+- intel_hdmi->ddc_bus = GMBUS_PIN_2_BXT;
+- else
+- intel_hdmi->ddc_bus = GMBUS_PIN_DPC;
+ intel_encoder->hpd_pin = HPD_PORT_C;
+ break;
+ case PORT_D:
+- if (WARN_ON(IS_BROXTON(dev_priv)))
+- intel_hdmi->ddc_bus = GMBUS_PIN_DISABLED;
+- else if (IS_CHERRYVIEW(dev_priv))
+- intel_hdmi->ddc_bus = GMBUS_PIN_DPD_CHV;
+- else
+- intel_hdmi->ddc_bus = GMBUS_PIN_DPD;
+ intel_encoder->hpd_pin = HPD_PORT_D;
+ break;
+ case PORT_E:
+- /* On SKL PORT E doesn't have seperate GMBUS pin
+- * We rely on VBT to set a proper alternate GMBUS pin. */
+- alternate_ddc_pin =
+- dev_priv->vbt.ddi_port_info[PORT_E].alternate_ddc_pin;
+- switch (alternate_ddc_pin) {
+- case DDC_PIN_B:
+- intel_hdmi->ddc_bus = GMBUS_PIN_DPB;
+- break;
+- case DDC_PIN_C:
+- intel_hdmi->ddc_bus = GMBUS_PIN_DPC;
+- break;
+- case DDC_PIN_D:
+- intel_hdmi->ddc_bus = GMBUS_PIN_DPD;
+- break;
+- default:
+- MISSING_CASE(alternate_ddc_pin);
+- }
+ intel_encoder->hpd_pin = HPD_PORT_E;
+ break;
+- case PORT_A:
+- intel_encoder->hpd_pin = HPD_PORT_A;
+- /* Internal port only for eDP. */
+ default:
+- BUG();
++ MISSING_CASE(port);
++ return;
+ }
+
+ if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
+diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
+index 554ca71..edd2d03 100644
+--- a/drivers/gpu/drm/radeon/radeon_device.c
++++ b/drivers/gpu/drm/radeon/radeon_device.c
+@@ -104,6 +104,14 @@ static const char radeon_family_name[][16] = {
+ "LAST",
+ };
+
++#if defined(CONFIG_VGA_SWITCHEROO)
++bool radeon_has_atpx_dgpu_power_cntl(void);
++bool radeon_is_atpx_hybrid(void);
++#else
++static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
++static inline bool radeon_is_atpx_hybrid(void) { return false; }
++#endif
++
+ #define RADEON_PX_QUIRK_DISABLE_PX (1 << 0)
+ #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1)
+
+@@ -160,6 +168,11 @@ static void radeon_device_handle_px_quirks(struct radeon_device *rdev)
+
+ if (rdev->px_quirk_flags & RADEON_PX_QUIRK_DISABLE_PX)
+ rdev->flags &= ~RADEON_IS_PX;
++
++ /* disable PX is the system doesn't support dGPU power control or hybrid gfx */
++ if (!radeon_is_atpx_hybrid() &&
++ !radeon_has_atpx_dgpu_power_cntl())
++ rdev->flags &= ~RADEON_IS_PX;
+ }
+
+ /**
+diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
+index da3fb06..ce69048 100644
+--- a/drivers/iio/accel/st_accel_core.c
++++ b/drivers/iio/accel/st_accel_core.c
+@@ -743,8 +743,8 @@ static int st_accel_read_raw(struct iio_dev *indio_dev,
+
+ return IIO_VAL_INT;
+ case IIO_CHAN_INFO_SCALE:
+- *val = 0;
+- *val2 = adata->current_fullscale->gain;
++ *val = adata->current_fullscale->gain / 1000000;
++ *val2 = adata->current_fullscale->gain % 1000000;
+ return IIO_VAL_INT_PLUS_MICRO;
+ case IIO_CHAN_INFO_SAMP_FREQ:
+ *val = adata->odr;
+@@ -763,9 +763,13 @@ static int st_accel_write_raw(struct iio_dev *indio_dev,
+ int err;
+
+ switch (mask) {
+- case IIO_CHAN_INFO_SCALE:
+- err = st_sensors_set_fullscale_by_gain(indio_dev, val2);
++ case IIO_CHAN_INFO_SCALE: {
++ int gain;
++
++ gain = val * 1000000 + val2;
++ err = st_sensors_set_fullscale_by_gain(indio_dev, gain);
+ break;
++ }
+ case IIO_CHAN_INFO_SAMP_FREQ:
+ if (val2)
+ return -EINVAL;
+diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+index dc33c1d..b5beea53 100644
+--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
++++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+@@ -30,26 +30,26 @@ static struct {
+ u32 usage_id;
+ int unit; /* 0 for default others from HID sensor spec */
+ int scale_val0; /* scale, whole number */
+- int scale_val1; /* scale, fraction in micros */
++ int scale_val1; /* scale, fraction in nanos */
+ } unit_conversion[] = {
+- {HID_USAGE_SENSOR_ACCEL_3D, 0, 9, 806650},
++ {HID_USAGE_SENSOR_ACCEL_3D, 0, 9, 806650000},
+ {HID_USAGE_SENSOR_ACCEL_3D,
+ HID_USAGE_SENSOR_UNITS_METERS_PER_SEC_SQRD, 1, 0},
+ {HID_USAGE_SENSOR_ACCEL_3D,
+- HID_USAGE_SENSOR_UNITS_G, 9, 806650},
++ HID_USAGE_SENSOR_UNITS_G, 9, 806650000},
+
+- {HID_USAGE_SENSOR_GYRO_3D, 0, 0, 17453},
++ {HID_USAGE_SENSOR_GYRO_3D, 0, 0, 17453293},
+ {HID_USAGE_SENSOR_GYRO_3D,
+ HID_USAGE_SENSOR_UNITS_RADIANS_PER_SECOND, 1, 0},
+ {HID_USAGE_SENSOR_GYRO_3D,
+- HID_USAGE_SENSOR_UNITS_DEGREES_PER_SECOND, 0, 17453},
++ HID_USAGE_SENSOR_UNITS_DEGREES_PER_SECOND, 0, 17453293},
+
+- {HID_USAGE_SENSOR_COMPASS_3D, 0, 0, 1000},
++ {HID_USAGE_SENSOR_COMPASS_3D, 0, 0, 1000000},
+ {HID_USAGE_SENSOR_COMPASS_3D, HID_USAGE_SENSOR_UNITS_GAUSS, 1, 0},
+
+- {HID_USAGE_SENSOR_INCLINOMETER_3D, 0, 0, 17453},
++ {HID_USAGE_SENSOR_INCLINOMETER_3D, 0, 0, 17453293},
+ {HID_USAGE_SENSOR_INCLINOMETER_3D,
+- HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453},
++ HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453293},
+ {HID_USAGE_SENSOR_INCLINOMETER_3D,
+ HID_USAGE_SENSOR_UNITS_RADIANS, 1, 0},
+
+@@ -57,7 +57,7 @@ static struct {
+ {HID_USAGE_SENSOR_ALS, HID_USAGE_SENSOR_UNITS_LUX, 1, 0},
+
+ {HID_USAGE_SENSOR_PRESSURE, 0, 100, 0},
+- {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000},
++ {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000000},
+ };
+
+ static int pow_10(unsigned power)
+@@ -266,15 +266,15 @@ EXPORT_SYMBOL(hid_sensor_write_raw_hyst_value);
+ /*
+ * This fuction applies the unit exponent to the scale.
+ * For example:
+- * 9.806650 ->exp:2-> val0[980]val1[665000]
+- * 9.000806 ->exp:2-> val0[900]val1[80600]
+- * 0.174535 ->exp:2-> val0[17]val1[453500]
+- * 1.001745 ->exp:0-> val0[1]val1[1745]
+- * 1.001745 ->exp:2-> val0[100]val1[174500]
+- * 1.001745 ->exp:4-> val0[10017]val1[450000]
+- * 9.806650 ->exp:-2-> val0[0]val1[98066]
++ * 9.806650000 ->exp:2-> val0[980]val1[665000000]
++ * 9.000806000 ->exp:2-> val0[900]val1[80600000]
++ * 0.174535293 ->exp:2-> val0[17]val1[453529300]
++ * 1.001745329 ->exp:0-> val0[1]val1[1745329]
++ * 1.001745329 ->exp:2-> val0[100]val1[174532900]
++ * 1.001745329 ->exp:4-> val0[10017]val1[453290000]
++ * 9.806650000 ->exp:-2-> val0[0]val1[98066500]
+ */
+-static void adjust_exponent_micro(int *val0, int *val1, int scale0,
++static void adjust_exponent_nano(int *val0, int *val1, int scale0,
+ int scale1, int exp)
+ {
+ int i;
+@@ -285,32 +285,32 @@ static void adjust_exponent_micro(int *val0, int *val1, int scale0,
+ if (exp > 0) {
+ *val0 = scale0 * pow_10(exp);
+ res = 0;
+- if (exp > 6) {
++ if (exp > 9) {
+ *val1 = 0;
+ return;
+ }
+ for (i = 0; i < exp; ++i) {
+- x = scale1 / pow_10(5 - i);
++ x = scale1 / pow_10(8 - i);
+ res += (pow_10(exp - 1 - i) * x);
+- scale1 = scale1 % pow_10(5 - i);
++ scale1 = scale1 % pow_10(8 - i);
+ }
+ *val0 += res;
+ *val1 = scale1 * pow_10(exp);
+ } else if (exp < 0) {
+ exp = abs(exp);
+- if (exp > 6) {
++ if (exp > 9) {
+ *val0 = *val1 = 0;
+ return;
+ }
+ *val0 = scale0 / pow_10(exp);
+ rem = scale0 % pow_10(exp);
+ res = 0;
+- for (i = 0; i < (6 - exp); ++i) {
+- x = scale1 / pow_10(5 - i);
+- res += (pow_10(5 - exp - i) * x);
+- scale1 = scale1 % pow_10(5 - i);
++ for (i = 0; i < (9 - exp); ++i) {
++ x = scale1 / pow_10(8 - i);
++ res += (pow_10(8 - exp - i) * x);
++ scale1 = scale1 % pow_10(8 - i);
+ }
+- *val1 = rem * pow_10(6 - exp) + res;
++ *val1 = rem * pow_10(9 - exp) + res;
+ } else {
+ *val0 = scale0;
+ *val1 = scale1;
+@@ -332,14 +332,14 @@ int hid_sensor_format_scale(u32 usage_id,
+ unit_conversion[i].unit == attr_info->units) {
+ exp = hid_sensor_convert_exponent(
+ attr_info->unit_expo);
+- adjust_exponent_micro(val0, val1,
++ adjust_exponent_nano(val0, val1,
+ unit_conversion[i].scale_val0,
+ unit_conversion[i].scale_val1, exp);
+ break;
+ }
+ }
+
+- return IIO_VAL_INT_PLUS_MICRO;
++ return IIO_VAL_INT_PLUS_NANO;
+ }
+ EXPORT_SYMBOL(hid_sensor_format_scale);
+
+diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
+index 2d5282e..32a5946 100644
+--- a/drivers/iio/common/st_sensors/st_sensors_core.c
++++ b/drivers/iio/common/st_sensors/st_sensors_core.c
+@@ -619,7 +619,7 @@ EXPORT_SYMBOL(st_sensors_sysfs_sampling_frequency_avail);
+ ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
+- int i, len = 0;
++ int i, len = 0, q, r;
+ struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+@@ -628,8 +628,10 @@ ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
+ if (sdata->sensor_settings->fs.fs_avl[i].num == 0)
+ break;
+
+- len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
+- sdata->sensor_settings->fs.fs_avl[i].gain);
++ q = sdata->sensor_settings->fs.fs_avl[i].gain / 1000000;
++ r = sdata->sensor_settings->fs.fs_avl[i].gain % 1000000;
++
++ len += scnprintf(buf + len, PAGE_SIZE - len, "%u.%06u ", q, r);
+ }
+ mutex_unlock(&indio_dev->mlock);
+ buf[len - 1] = '\n';
+diff --git a/drivers/iio/orientation/hid-sensor-rotation.c b/drivers/iio/orientation/hid-sensor-rotation.c
+index b98b9d9..a97e802c 100644
+--- a/drivers/iio/orientation/hid-sensor-rotation.c
++++ b/drivers/iio/orientation/hid-sensor-rotation.c
+@@ -335,6 +335,7 @@ static struct platform_driver hid_dev_rot_platform_driver = {
+ .id_table = hid_dev_rot_ids,
+ .driver = {
+ .name = KBUILD_MODNAME,
++ .pm = &hid_sensor_pm_ops,
+ },
+ .probe = hid_dev_rot_probe,
+ .remove = hid_dev_rot_remove,
+diff --git a/drivers/input/rmi4/rmi_i2c.c b/drivers/input/rmi4/rmi_i2c.c
+index 6f2e0e4..1ebc2c1 100644
+--- a/drivers/input/rmi4/rmi_i2c.c
++++ b/drivers/input/rmi4/rmi_i2c.c
+@@ -221,6 +221,21 @@ static const struct of_device_id rmi_i2c_of_match[] = {
+ MODULE_DEVICE_TABLE(of, rmi_i2c_of_match);
+ #endif
+
++static void rmi_i2c_regulator_bulk_disable(void *data)
++{
++ struct rmi_i2c_xport *rmi_i2c = data;
++
++ regulator_bulk_disable(ARRAY_SIZE(rmi_i2c->supplies),
++ rmi_i2c->supplies);
++}
++
++static void rmi_i2c_unregister_transport(void *data)
++{
++ struct rmi_i2c_xport *rmi_i2c = data;
++
++ rmi_unregister_transport_device(&rmi_i2c->xport);
++}
++
+ static int rmi_i2c_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+ {
+@@ -264,6 +279,12 @@ static int rmi_i2c_probe(struct i2c_client *client,
+ if (retval < 0)
+ return retval;
+
++ retval = devm_add_action_or_reset(&client->dev,
++ rmi_i2c_regulator_bulk_disable,
++ rmi_i2c);
++ if (retval)
++ return retval;
++
+ of_property_read_u32(client->dev.of_node, "syna,startup-delay-ms",
+ &rmi_i2c->startup_delay);
+
+@@ -294,6 +315,11 @@ static int rmi_i2c_probe(struct i2c_client *client,
+ client->addr);
+ return retval;
+ }
++ retval = devm_add_action_or_reset(&client->dev,
++ rmi_i2c_unregister_transport,
++ rmi_i2c);
++ if (retval)
++ return retval;
+
+ retval = rmi_i2c_init_irq(client);
+ if (retval < 0)
+@@ -304,17 +330,6 @@ static int rmi_i2c_probe(struct i2c_client *client,
+ return 0;
+ }
+
+-static int rmi_i2c_remove(struct i2c_client *client)
+-{
+- struct rmi_i2c_xport *rmi_i2c = i2c_get_clientdata(client);
+-
+- rmi_unregister_transport_device(&rmi_i2c->xport);
+- regulator_bulk_disable(ARRAY_SIZE(rmi_i2c->supplies),
+- rmi_i2c->supplies);
+-
+- return 0;
+-}
+-
+ #ifdef CONFIG_PM_SLEEP
+ static int rmi_i2c_suspend(struct device *dev)
+ {
+@@ -431,7 +446,6 @@ static struct i2c_driver rmi_i2c_driver = {
+ },
+ .id_table = rmi_id,
+ .probe = rmi_i2c_probe,
+- .remove = rmi_i2c_remove,
+ };
+
+ module_i2c_driver(rmi_i2c_driver);
+diff --git a/drivers/input/rmi4/rmi_spi.c b/drivers/input/rmi4/rmi_spi.c
+index 55bd1b3..4ebef60 100644
+--- a/drivers/input/rmi4/rmi_spi.c
++++ b/drivers/input/rmi4/rmi_spi.c
+@@ -396,6 +396,13 @@ static inline int rmi_spi_of_probe(struct spi_device *spi,
+ }
+ #endif
+
++static void rmi_spi_unregister_transport(void *data)
++{
++ struct rmi_spi_xport *rmi_spi = data;
++
++ rmi_unregister_transport_device(&rmi_spi->xport);
++}
++
+ static int rmi_spi_probe(struct spi_device *spi)
+ {
+ struct rmi_spi_xport *rmi_spi;
+@@ -464,6 +471,11 @@ static int rmi_spi_probe(struct spi_device *spi)
+ dev_err(&spi->dev, "failed to register transport.\n");
+ return retval;
+ }
++ retval = devm_add_action_or_reset(&spi->dev,
++ rmi_spi_unregister_transport,
++ rmi_spi);
++ if (retval)
++ return retval;
+
+ retval = rmi_spi_init_irq(spi);
+ if (retval < 0)
+@@ -473,15 +485,6 @@ static int rmi_spi_probe(struct spi_device *spi)
+ return 0;
+ }
+
+-static int rmi_spi_remove(struct spi_device *spi)
+-{
+- struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
+-
+- rmi_unregister_transport_device(&rmi_spi->xport);
+-
+- return 0;
+-}
+-
+ #ifdef CONFIG_PM_SLEEP
+ static int rmi_spi_suspend(struct device *dev)
+ {
+@@ -577,7 +580,6 @@ static struct spi_driver rmi_spi_driver = {
+ },
+ .id_table = rmi_id,
+ .probe = rmi_spi_probe,
+- .remove = rmi_spi_remove,
+ };
+
+ module_spi_driver(rmi_spi_driver);
+diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
+index 96de97a..822fc4a 100644
+--- a/drivers/iommu/amd_iommu.c
++++ b/drivers/iommu/amd_iommu.c
+@@ -1654,6 +1654,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
+
+ free_pagetable(&dom->domain);
+
++ if (dom->domain.id)
++ domain_id_free(dom->domain.id);
++
+ kfree(dom);
+ }
+
+diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
+index ebb5bf3..1257b0b 100644
+--- a/drivers/iommu/intel-iommu.c
++++ b/drivers/iommu/intel-iommu.c
+@@ -1711,6 +1711,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
+ if (!iommu->domains || !iommu->domain_ids)
+ return;
+
++again:
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
+ struct dmar_domain *domain;
+@@ -1723,10 +1724,19 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
+
+ domain = info->domain;
+
+- dmar_remove_one_dev_info(domain, info->dev);
++ __dmar_remove_one_dev_info(info);
+
+- if (!domain_type_is_vm_or_si(domain))
++ if (!domain_type_is_vm_or_si(domain)) {
++ /*
++ * The domain_exit() function can't be called under
++ * device_domain_lock, as it takes this lock itself.
++ * So release the lock here and re-run the loop
++ * afterwards.
++ */
++ spin_unlock_irqrestore(&device_domain_lock, flags);
+ domain_exit(domain);
++ goto again;
++ }
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
+index def8ca1..f50e51c 100644
+--- a/drivers/iommu/io-pgtable-arm-v7s.c
++++ b/drivers/iommu/io-pgtable-arm-v7s.c
+@@ -633,6 +633,10 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
+ {
+ struct arm_v7s_io_pgtable *data;
+
++#ifdef PHYS_OFFSET
++ if (upper_32_bits(PHYS_OFFSET))
++ return NULL;
++#endif
+ if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
+ return NULL;
+
+diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c
+index bf890c3..f73e108 100644
+--- a/drivers/media/usb/dvb-usb/dib0700_core.c
++++ b/drivers/media/usb/dvb-usb/dib0700_core.c
+@@ -677,7 +677,7 @@ static void dib0700_rc_urb_completion(struct urb *purb)
+ struct dvb_usb_device *d = purb->context;
+ struct dib0700_rc_response *poll_reply;
+ enum rc_type protocol;
+- u32 uninitialized_var(keycode);
++ u32 keycode;
+ u8 toggle;
+
+ deb_info("%s()\n", __func__);
+@@ -719,7 +719,8 @@ static void dib0700_rc_urb_completion(struct urb *purb)
+ poll_reply->nec.data == 0x00 &&
+ poll_reply->nec.not_data == 0xff) {
+ poll_reply->data_state = 2;
+- break;
++ rc_repeat(d->rc_dev);
++ goto resubmit;
+ }
+
+ if ((poll_reply->nec.data ^ poll_reply->nec.not_data) != 0xff) {
+diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
+index e9e6ea3..75b9d4a 100644
+--- a/drivers/misc/mei/bus-fixup.c
++++ b/drivers/misc/mei/bus-fixup.c
+@@ -178,7 +178,7 @@ static int mei_nfc_if_version(struct mei_cl *cl,
+
+ ret = 0;
+ bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length);
+- if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) {
++ if (bytes_recv < if_version_length) {
+ dev_err(bus->dev, "Could not read IF version\n");
+ ret = -EIO;
+ goto err;
+diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
+index c57eb32..6ef1e3c 100644
+--- a/drivers/mmc/core/mmc.c
++++ b/drivers/mmc/core/mmc.c
+@@ -26,6 +26,8 @@
+ #include "mmc_ops.h"
+ #include "sd_ops.h"
+
++#define DEFAULT_CMD6_TIMEOUT_MS 500
++
+ static const unsigned int tran_exp[] = {
+ 10000, 100000, 1000000, 10000000,
+ 0, 0, 0, 0
+@@ -571,6 +573,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
+ card->erased_byte = 0x0;
+
+ /* eMMC v4.5 or later */
++ card->ext_csd.generic_cmd6_time = DEFAULT_CMD6_TIMEOUT_MS;
+ if (card->ext_csd.rev >= 6) {
+ card->ext_csd.feature_support |= MMC_DISCARD_FEATURE;
+
+diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
+index d839147..44ecebd 100644
+--- a/drivers/mmc/host/mxs-mmc.c
++++ b/drivers/mmc/host/mxs-mmc.c
+@@ -661,13 +661,13 @@ static int mxs_mmc_probe(struct platform_device *pdev)
+
+ platform_set_drvdata(pdev, mmc);
+
++ spin_lock_init(&host->lock);
++
+ ret = devm_request_irq(&pdev->dev, irq_err, mxs_mmc_irq_handler, 0,
+ dev_name(&pdev->dev), host);
+ if (ret)
+ goto out_free_dma;
+
+- spin_lock_init(&host->lock);
+-
+ ret = mmc_add_host(mmc);
+ if (ret)
+ goto out_free_dma;
+diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
+index 8ef44a2a..90ed2e1 100644
+--- a/drivers/mmc/host/sdhci-msm.c
++++ b/drivers/mmc/host/sdhci-msm.c
+@@ -647,6 +647,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
+ if (msm_host->pwr_irq < 0) {
+ dev_err(&pdev->dev, "Get pwr_irq failed (%d)\n",
+ msm_host->pwr_irq);
++ ret = msm_host->pwr_irq;
+ goto clk_disable;
+ }
+
+diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
+index a8a022a..6eb8f07 100644
+--- a/drivers/mmc/host/sdhci.c
++++ b/drivers/mmc/host/sdhci.c
+@@ -2269,10 +2269,8 @@ static bool sdhci_request_done(struct sdhci_host *host)
+
+ for (i = 0; i < SDHCI_MAX_MRQS; i++) {
+ mrq = host->mrqs_done[i];
+- if (mrq) {
+- host->mrqs_done[i] = NULL;
++ if (mrq)
+ break;
+- }
+ }
+
+ if (!mrq) {
+@@ -2303,6 +2301,17 @@ static bool sdhci_request_done(struct sdhci_host *host)
+ * upon error conditions.
+ */
+ if (sdhci_needs_reset(host, mrq)) {
++ /*
++ * Do not finish until command and data lines are available for
++ * reset. Note there can only be one other mrq, so it cannot
++ * also be in mrqs_done, otherwise host->cmd and host->data_cmd
++ * would both be null.
++ */
++ if (host->cmd || host->data_cmd) {
++ spin_unlock_irqrestore(&host->lock, flags);
++ return true;
++ }
++
+ /* Some controllers need this kick or reset won't work here */
+ if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)
+ /* This is to force an update */
+@@ -2310,10 +2319,8 @@ static bool sdhci_request_done(struct sdhci_host *host)
+
+ /* Spec says we should do both at the same time, but Ricoh
+ controllers do not like that. */
+- if (!host->cmd)
+- sdhci_do_reset(host, SDHCI_RESET_CMD);
+- if (!host->data_cmd)
+- sdhci_do_reset(host, SDHCI_RESET_DATA);
++ sdhci_do_reset(host, SDHCI_RESET_CMD);
++ sdhci_do_reset(host, SDHCI_RESET_DATA);
+
+ host->pending_reset = false;
+ }
+@@ -2321,6 +2328,8 @@ static bool sdhci_request_done(struct sdhci_host *host)
+ if (!sdhci_has_requests(host))
+ sdhci_led_deactivate(host);
+
++ host->mrqs_done[i] = NULL;
++
+ mmiowb();
+ spin_unlock_irqrestore(&host->lock, flags);
+
+@@ -2500,9 +2509,6 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
+ if (!host->data) {
+ struct mmc_command *data_cmd = host->data_cmd;
+
+- if (data_cmd)
+- host->data_cmd = NULL;
+-
+ /*
+ * The "data complete" interrupt is also used to
+ * indicate that a busy state has ended. See comment
+@@ -2510,11 +2516,13 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
+ */
+ if (data_cmd && (data_cmd->flags & MMC_RSP_BUSY)) {
+ if (intmask & SDHCI_INT_DATA_TIMEOUT) {
++ host->data_cmd = NULL;
+ data_cmd->error = -ETIMEDOUT;
+ sdhci_finish_mrq(host, data_cmd->mrq);
+ return;
+ }
+ if (intmask & SDHCI_INT_DATA_END) {
++ host->data_cmd = NULL;
+ /*
+ * Some cards handle busy-end interrupt
+ * before the command completed, so make
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index c74d164..6b46a37 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -9001,7 +9001,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ return 0;
+
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode,
+- nlflags, 0, 0, filter_mask, NULL);
++ 0, 0, nlflags, filter_mask, NULL);
+ }
+
+ /* Hardware supports L4 tunnel length of 128B (=2^7) which includes
+diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c
+index 83deda4..6f9563a 100644
+--- a/drivers/nfc/mei_phy.c
++++ b/drivers/nfc/mei_phy.c
+@@ -133,7 +133,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy)
+ return -ENOMEM;
+
+ bytes_recv = mei_cldev_recv(phy->cldev, (u8 *)reply, if_version_length);
+- if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) {
++ if (bytes_recv < 0 || bytes_recv < if_version_length) {
+ pr_err("Could not read IF version\n");
+ r = -EIO;
+ goto err;
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 60f7eab..da134a0 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -1531,9 +1531,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
+ return 0;
+ }
+
+-static void nvme_disable_io_queues(struct nvme_dev *dev)
++static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
+ {
+- int pass, queues = dev->online_queues - 1;
++ int pass;
+ unsigned long timeout;
+ u8 opcode = nvme_admin_delete_sq;
+
+@@ -1678,7 +1678,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
+
+ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
+ {
+- int i;
++ int i, queues;
+ u32 csts = -1;
+
+ del_timer_sync(&dev->watchdog_timer);
+@@ -1689,6 +1689,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
+ csts = readl(dev->bar + NVME_REG_CSTS);
+ }
+
++ queues = dev->online_queues - 1;
+ for (i = dev->queue_count - 1; i > 0; i--)
+ nvme_suspend_queue(dev->queues[i]);
+
+@@ -1700,7 +1701,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
+ if (dev->queue_count)
+ nvme_suspend_queue(dev->queues[0]);
+ } else {
+- nvme_disable_io_queues(dev);
++ nvme_disable_io_queues(dev, queues);
+ nvme_disable_admin_queue(dev, shutdown);
+ }
+ nvme_pci_disable(dev);
+diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
+index 66c4d8f..9526e34 100644
+--- a/drivers/pci/setup-res.c
++++ b/drivers/pci/setup-res.c
+@@ -121,6 +121,14 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
+ return -EINVAL;
+ }
+
++ /*
++ * If we have a shadow copy in RAM, the PCI device doesn't respond
++ * to the shadow range, so we don't need to claim it, and upstream
++ * bridges don't need to route the range to the device.
++ */
++ if (res->flags & IORESOURCE_ROM_SHADOW)
++ return 0;
++
+ root = pci_find_parent_resource(dev, res);
+ if (!root) {
+ dev_info(&dev->dev, "can't claim BAR %d %pR: no compatible bridge window\n",
+diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
+index 7f77007..5d1e505c3 100644
+--- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
++++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
+@@ -844,6 +844,6 @@ static struct platform_driver iproc_gpio_driver = {
+
+ static int __init iproc_gpio_init(void)
+ {
+- return platform_driver_probe(&iproc_gpio_driver, iproc_gpio_probe);
++ return platform_driver_register(&iproc_gpio_driver);
+ }
+ arch_initcall_sync(iproc_gpio_init);
+diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
+index 35783db..c8deb8b 100644
+--- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
++++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
+@@ -741,6 +741,6 @@ static struct platform_driver nsp_gpio_driver = {
+
+ static int __init nsp_gpio_init(void)
+ {
+- return platform_driver_probe(&nsp_gpio_driver, nsp_gpio_probe);
++ return platform_driver_register(&nsp_gpio_driver);
+ }
+ arch_initcall_sync(nsp_gpio_init);
+diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
+index 0fe8fad..bc31504 100644
+--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
++++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
+@@ -1634,12 +1634,15 @@ static int chv_pinctrl_remove(struct platform_device *pdev)
+ }
+
+ #ifdef CONFIG_PM_SLEEP
+-static int chv_pinctrl_suspend(struct device *dev)
++static int chv_pinctrl_suspend_noirq(struct device *dev)
+ {
+ struct platform_device *pdev = to_platform_device(dev);
+ struct chv_pinctrl *pctrl = platform_get_drvdata(pdev);
++ unsigned long flags;
+ int i;
+
++ raw_spin_lock_irqsave(&chv_lock, flags);
++
+ pctrl->saved_intmask = readl(pctrl->regs + CHV_INTMASK);
+
+ for (i = 0; i < pctrl->community->npins; i++) {
+@@ -1660,15 +1663,20 @@ static int chv_pinctrl_suspend(struct device *dev)
+ ctx->padctrl1 = readl(reg);
+ }
+
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
++
+ return 0;
+ }
+
+-static int chv_pinctrl_resume(struct device *dev)
++static int chv_pinctrl_resume_noirq(struct device *dev)
+ {
+ struct platform_device *pdev = to_platform_device(dev);
+ struct chv_pinctrl *pctrl = platform_get_drvdata(pdev);
++ unsigned long flags;
+ int i;
+
++ raw_spin_lock_irqsave(&chv_lock, flags);
++
+ /*
+ * Mask all interrupts before restoring per-pin configuration
+ * registers because we don't know in which state BIOS left them
+@@ -1713,12 +1721,15 @@ static int chv_pinctrl_resume(struct device *dev)
+ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
+ chv_writel(pctrl->saved_intmask, pctrl->regs + CHV_INTMASK);
+
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
++
+ return 0;
+ }
+ #endif
+
+ static const struct dev_pm_ops chv_pinctrl_pm_ops = {
+- SET_LATE_SYSTEM_SLEEP_PM_OPS(chv_pinctrl_suspend, chv_pinctrl_resume)
++ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(chv_pinctrl_suspend_noirq,
++ chv_pinctrl_resume_noirq)
+ };
+
+ static const struct acpi_device_id chv_pinctrl_acpi_match[] = {
+diff --git a/drivers/platform/x86/toshiba-wmi.c b/drivers/platform/x86/toshiba-wmi.c
+index feac457..2df07ee 100644
+--- a/drivers/platform/x86/toshiba-wmi.c
++++ b/drivers/platform/x86/toshiba-wmi.c
+@@ -24,14 +24,15 @@
+ #include <linux/acpi.h>
+ #include <linux/input.h>
+ #include <linux/input/sparse-keymap.h>
++#include <linux/dmi.h>
+
+ MODULE_AUTHOR("Azael Avalos");
+ MODULE_DESCRIPTION("Toshiba WMI Hotkey Driver");
+ MODULE_LICENSE("GPL");
+
+-#define TOSHIBA_WMI_EVENT_GUID "59142400-C6A3-40FA-BADB-8A2652834100"
++#define WMI_EVENT_GUID "59142400-C6A3-40FA-BADB-8A2652834100"
+
+-MODULE_ALIAS("wmi:"TOSHIBA_WMI_EVENT_GUID);
++MODULE_ALIAS("wmi:"WMI_EVENT_GUID);
+
+ static struct input_dev *toshiba_wmi_input_dev;
+
+@@ -63,6 +64,16 @@ static void toshiba_wmi_notify(u32 value, void *context)
+ kfree(response.pointer);
+ }
+
++static struct dmi_system_id toshiba_wmi_dmi_table[] __initdata = {
++ {
++ .ident = "Toshiba laptop",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
++ },
++ },
++ {}
++};
++
+ static int __init toshiba_wmi_input_setup(void)
+ {
+ acpi_status status;
+@@ -81,7 +92,7 @@ static int __init toshiba_wmi_input_setup(void)
+ if (err)
+ goto err_free_dev;
+
+- status = wmi_install_notify_handler(TOSHIBA_WMI_EVENT_GUID,
++ status = wmi_install_notify_handler(WMI_EVENT_GUID,
+ toshiba_wmi_notify, NULL);
+ if (ACPI_FAILURE(status)) {
+ err = -EIO;
+@@ -95,7 +106,7 @@ static int __init toshiba_wmi_input_setup(void)
+ return 0;
+
+ err_remove_notifier:
+- wmi_remove_notify_handler(TOSHIBA_WMI_EVENT_GUID);
++ wmi_remove_notify_handler(WMI_EVENT_GUID);
+ err_free_keymap:
+ sparse_keymap_free(toshiba_wmi_input_dev);
+ err_free_dev:
+@@ -105,7 +116,7 @@ static int __init toshiba_wmi_input_setup(void)
+
+ static void toshiba_wmi_input_destroy(void)
+ {
+- wmi_remove_notify_handler(TOSHIBA_WMI_EVENT_GUID);
++ wmi_remove_notify_handler(WMI_EVENT_GUID);
+ sparse_keymap_free(toshiba_wmi_input_dev);
+ input_unregister_device(toshiba_wmi_input_dev);
+ }
+@@ -114,7 +125,8 @@ static int __init toshiba_wmi_init(void)
+ {
+ int ret;
+
+- if (!wmi_has_guid(TOSHIBA_WMI_EVENT_GUID))
++ if (!wmi_has_guid(WMI_EVENT_GUID) ||
++ !dmi_check_system(toshiba_wmi_dmi_table))
+ return -ENODEV;
+
+ ret = toshiba_wmi_input_setup();
+@@ -130,7 +142,7 @@ static int __init toshiba_wmi_init(void)
+
+ static void __exit toshiba_wmi_exit(void)
+ {
+- if (wmi_has_guid(TOSHIBA_WMI_EVENT_GUID))
++ if (wmi_has_guid(WMI_EVENT_GUID))
+ toshiba_wmi_input_destroy();
+ }
+
+diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
+index b4478cc..8895f77 100644
+--- a/drivers/rtc/rtc-pcf2123.c
++++ b/drivers/rtc/rtc-pcf2123.c
+@@ -182,7 +182,8 @@ static ssize_t pcf2123_show(struct device *dev, struct device_attribute *attr,
+ }
+
+ static ssize_t pcf2123_store(struct device *dev, struct device_attribute *attr,
+- const char *buffer, size_t count) {
++ const char *buffer, size_t count)
++{
+ struct pcf2123_sysfs_reg *r;
+ unsigned long reg;
+ unsigned long val;
+@@ -199,7 +200,7 @@ static ssize_t pcf2123_store(struct device *dev, struct device_attribute *attr,
+ if (ret)
+ return ret;
+
+- pcf2123_write_reg(dev, reg, val);
++ ret = pcf2123_write_reg(dev, reg, val);
+ if (ret < 0)
+ return -EIO;
+ return count;
+diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
+index 752b5c9..920c421 100644
+--- a/drivers/scsi/device_handler/scsi_dh_alua.c
++++ b/drivers/scsi/device_handler/scsi_dh_alua.c
+@@ -792,6 +792,7 @@ static void alua_rtpg_work(struct work_struct *work)
+ WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
+ WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
+ spin_unlock_irqrestore(&pg->lock, flags);
++ kref_put(&pg->kref, release_port_group);
+ return;
+ }
+ if (pg->flags & ALUA_SYNC_STPG)
+@@ -889,6 +890,7 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
+ /* Do not queue if the worker is already running */
+ if (!(pg->flags & ALUA_PG_RUNNING)) {
+ kref_get(&pg->kref);
++ sdev = NULL;
+ start_queue = 1;
+ }
+ }
+@@ -900,7 +902,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
+ if (start_queue &&
+ !queue_delayed_work(alua_wq, &pg->rtpg_work,
+ msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
+- scsi_device_put(sdev);
++ if (sdev)
++ scsi_device_put(sdev);
+ kref_put(&pg->kref, release_port_group);
+ }
+ }
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+index 4cb7990..46c0f5e 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+@@ -1273,9 +1273,9 @@ scsih_target_alloc(struct scsi_target *starget)
+ sas_target_priv_data->handle = raid_device->handle;
+ sas_target_priv_data->sas_address = raid_device->wwid;
+ sas_target_priv_data->flags |= MPT_TARGET_FLAGS_VOLUME;
+- sas_target_priv_data->raid_device = raid_device;
+ if (ioc->is_warpdrive)
+- raid_device->starget = starget;
++ sas_target_priv_data->raid_device = raid_device;
++ raid_device->starget = starget;
+ }
+ spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
+ return 0;
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index 2674f4c..e46e2c5 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -2341,6 +2341,8 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
+ {
+ scsi_qla_host_t *vha = shost_priv(shost);
+
++ if (test_bit(UNLOADING, &vha->dpc_flags))
++ return 1;
+ if (!vha->host)
+ return 1;
+ if (time > vha->hw->loop_reset_delay * HZ)
+diff --git a/drivers/staging/comedi/drivers/ni_tio.c b/drivers/staging/comedi/drivers/ni_tio.c
+index 7043eb0..5ab49a7 100644
+--- a/drivers/staging/comedi/drivers/ni_tio.c
++++ b/drivers/staging/comedi/drivers/ni_tio.c
+@@ -207,7 +207,8 @@ static int ni_tio_clock_period_ps(const struct ni_gpct *counter,
+ * clock period is specified by user with prescaling
+ * already taken into account.
+ */
+- return counter->clock_period_ps;
++ *period_ps = counter->clock_period_ps;
++ return 0;
+ }
+
+ switch (generic_clock_source & NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK) {
+diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
+index 24c348d..98d9473 100644
+--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
++++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
+@@ -655,6 +655,7 @@ static void ad5933_work(struct work_struct *work)
+ __be16 buf[2];
+ int val[2];
+ unsigned char status;
++ int ret;
+
+ mutex_lock(&indio_dev->mlock);
+ if (st->state == AD5933_CTRL_INIT_START_FREQ) {
+@@ -662,19 +663,22 @@ static void ad5933_work(struct work_struct *work)
+ ad5933_cmd(st, AD5933_CTRL_START_SWEEP);
+ st->state = AD5933_CTRL_START_SWEEP;
+ schedule_delayed_work(&st->work, st->poll_time_jiffies);
+- mutex_unlock(&indio_dev->mlock);
+- return;
++ goto out;
+ }
+
+- ad5933_i2c_read(st->client, AD5933_REG_STATUS, 1, &status);
++ ret = ad5933_i2c_read(st->client, AD5933_REG_STATUS, 1, &status);
++ if (ret)
++ goto out;
+
+ if (status & AD5933_STAT_DATA_VALID) {
+ int scan_count = bitmap_weight(indio_dev->active_scan_mask,
+ indio_dev->masklength);
+- ad5933_i2c_read(st->client,
++ ret = ad5933_i2c_read(st->client,
+ test_bit(1, indio_dev->active_scan_mask) ?
+ AD5933_REG_REAL_DATA : AD5933_REG_IMAG_DATA,
+ scan_count * 2, (u8 *)buf);
++ if (ret)
++ goto out;
+
+ if (scan_count == 2) {
+ val[0] = be16_to_cpu(buf[0]);
+@@ -686,8 +690,7 @@ static void ad5933_work(struct work_struct *work)
+ } else {
+ /* no data available - try again later */
+ schedule_delayed_work(&st->work, st->poll_time_jiffies);
+- mutex_unlock(&indio_dev->mlock);
+- return;
++ goto out;
+ }
+
+ if (status & AD5933_STAT_SWEEP_DONE) {
+@@ -700,7 +703,7 @@ static void ad5933_work(struct work_struct *work)
+ ad5933_cmd(st, AD5933_CTRL_INC_FREQ);
+ schedule_delayed_work(&st->work, st->poll_time_jiffies);
+ }
+-
++out:
+ mutex_unlock(&indio_dev->mlock);
+ }
+
+diff --git a/drivers/staging/nvec/nvec_ps2.c b/drivers/staging/nvec/nvec_ps2.c
+index a324322..499952c 100644
+--- a/drivers/staging/nvec/nvec_ps2.c
++++ b/drivers/staging/nvec/nvec_ps2.c
+@@ -106,13 +106,12 @@ static int nvec_mouse_probe(struct platform_device *pdev)
+ {
+ struct nvec_chip *nvec = dev_get_drvdata(pdev->dev.parent);
+ struct serio *ser_dev;
+- char mouse_reset[] = { NVEC_PS2, SEND_COMMAND, PSMOUSE_RST, 3 };
+
+- ser_dev = devm_kzalloc(&pdev->dev, sizeof(struct serio), GFP_KERNEL);
++ ser_dev = kzalloc(sizeof(struct serio), GFP_KERNEL);
+ if (!ser_dev)
+ return -ENOMEM;
+
+- ser_dev->id.type = SERIO_PS_PSTHRU;
++ ser_dev->id.type = SERIO_8042;
+ ser_dev->write = ps2_sendcommand;
+ ser_dev->start = ps2_startstreaming;
+ ser_dev->stop = ps2_stopstreaming;
+@@ -127,9 +126,6 @@ static int nvec_mouse_probe(struct platform_device *pdev)
+
+ serio_register_port(ser_dev);
+
+- /* mouse reset */
+- nvec_write_async(nvec, mouse_reset, sizeof(mouse_reset));
+-
+ return 0;
+ }
+
+diff --git a/drivers/staging/sm750fb/ddk750_reg.h b/drivers/staging/sm750fb/ddk750_reg.h
+index 9552479..4ed6d8d 100644
+--- a/drivers/staging/sm750fb/ddk750_reg.h
++++ b/drivers/staging/sm750fb/ddk750_reg.h
+@@ -601,13 +601,13 @@
+
+ #define PANEL_PLANE_TL 0x08001C
+ #define PANEL_PLANE_TL_TOP_SHIFT 16
+-#define PANEL_PLANE_TL_TOP_MASK (0xeff << 16)
+-#define PANEL_PLANE_TL_LEFT_MASK 0xeff
++#define PANEL_PLANE_TL_TOP_MASK (0x7ff << 16)
++#define PANEL_PLANE_TL_LEFT_MASK 0x7ff
+
+ #define PANEL_PLANE_BR 0x080020
+ #define PANEL_PLANE_BR_BOTTOM_SHIFT 16
+-#define PANEL_PLANE_BR_BOTTOM_MASK (0xeff << 16)
+-#define PANEL_PLANE_BR_RIGHT_MASK 0xeff
++#define PANEL_PLANE_BR_BOTTOM_MASK (0x7ff << 16)
++#define PANEL_PLANE_BR_RIGHT_MASK 0x7ff
+
+ #define PANEL_HORIZONTAL_TOTAL 0x080024
+ #define PANEL_HORIZONTAL_TOTAL_TOTAL_SHIFT 16
+diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
+index 8bbde52..21aeac5 100644
+--- a/drivers/tty/serial/atmel_serial.c
++++ b/drivers/tty/serial/atmel_serial.c
+@@ -2026,6 +2026,7 @@ static void atmel_serial_pm(struct uart_port *port, unsigned int state,
+ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
+ struct ktermios *old)
+ {
++ struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+ unsigned long flags;
+ unsigned int old_mode, mode, imr, quot, baud;
+
+@@ -2129,11 +2130,29 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
+ mode |= ATMEL_US_USMODE_RS485;
+ } else if (termios->c_cflag & CRTSCTS) {
+ /* RS232 with hardware handshake (RTS/CTS) */
+- if (atmel_use_dma_rx(port) && !atmel_use_fifo(port)) {
+- dev_info(port->dev, "not enabling hardware flow control because DMA is used");
+- termios->c_cflag &= ~CRTSCTS;
+- } else {
++ if (atmel_use_fifo(port) &&
++ !mctrl_gpio_to_gpiod(atmel_port->gpios, UART_GPIO_CTS)) {
++ /*
++ * with ATMEL_US_USMODE_HWHS set, the controller will
++ * be able to drive the RTS pin high/low when the RX
++ * FIFO is above RXFTHRES/below RXFTHRES2.
++ * It will also disable the transmitter when the CTS
++ * pin is high.
++ * This mode is not activated if CTS pin is a GPIO
++ * because in this case, the transmitter is always
++ * disabled (there must be an internal pull-up
++ * responsible for this behaviour).
++ * If the RTS pin is a GPIO, the controller won't be
++ * able to drive it according to the FIFO thresholds,
++ * but it will be handled by the driver.
++ */
+ mode |= ATMEL_US_USMODE_HWHS;
++ } else {
++ /*
++ * For platforms without FIFO, the flow control is
++ * handled by the driver.
++ */
++ mode |= ATMEL_US_USMODE_NORMAL;
+ }
+ } else {
+ /* RS232 without hadware handshake */
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index 0f3f62e..3ca9fdb 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -946,8 +946,6 @@ static int wait_serial_change(struct acm *acm, unsigned long arg)
+ DECLARE_WAITQUEUE(wait, current);
+ struct async_icount old, new;
+
+- if (arg & (TIOCM_DSR | TIOCM_RI | TIOCM_CD))
+- return -EINVAL;
+ do {
+ spin_lock_irq(&acm->read_lock);
+ old = acm->oldcount;
+@@ -1175,6 +1173,8 @@ static int acm_probe(struct usb_interface *intf,
+ if (quirks == IGNORE_DEVICE)
+ return -ENODEV;
+
++ memset(&h, 0x00, sizeof(struct usb_cdc_parsed_header));
++
+ num_rx_buf = (quirks == SINGLE_RX_URB) ? 1 : ACM_NR;
+
+ /* handle quirks deadly to normal probing*/
+diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
+index 35d0924..2d47010 100644
+--- a/drivers/usb/dwc3/core.c
++++ b/drivers/usb/dwc3/core.c
+@@ -669,15 +669,14 @@ static int dwc3_core_init(struct dwc3 *dwc)
+ return 0;
+
+ err4:
+- phy_power_off(dwc->usb2_generic_phy);
++ phy_power_off(dwc->usb3_generic_phy);
+
+ err3:
+- phy_power_off(dwc->usb3_generic_phy);
++ phy_power_off(dwc->usb2_generic_phy);
+
+ err2:
+ usb_phy_set_suspend(dwc->usb2_phy, 1);
+ usb_phy_set_suspend(dwc->usb3_phy, 1);
+- dwc3_core_exit(dwc);
+
+ err1:
+ usb_phy_shutdown(dwc->usb2_phy);
+diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
+index 9b9e71f..f590ada 100644
+--- a/drivers/usb/gadget/function/u_ether.c
++++ b/drivers/usb/gadget/function/u_ether.c
+@@ -585,14 +585,6 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
+
+ req->length = length;
+
+- /* throttle high/super speed IRQ rate back slightly */
+- if (gadget_is_dualspeed(dev->gadget))
+- req->no_interrupt = (((dev->gadget->speed == USB_SPEED_HIGH ||
+- dev->gadget->speed == USB_SPEED_SUPER)) &&
+- !list_empty(&dev->tx_reqs))
+- ? ((atomic_read(&dev->tx_qlen) % dev->qmult) != 0)
+- : 0;
+-
+ retval = usb_ep_queue(in, req, GFP_ATOMIC);
+ switch (retval) {
+ default:
+diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
+index 6abb83c..74265b2 100644
+--- a/drivers/watchdog/watchdog_core.c
++++ b/drivers/watchdog/watchdog_core.c
+@@ -349,7 +349,7 @@ int devm_watchdog_register_device(struct device *dev,
+ struct watchdog_device **rcwdd;
+ int ret;
+
+- rcwdd = devres_alloc(devm_watchdog_unregister_device, sizeof(*wdd),
++ rcwdd = devres_alloc(devm_watchdog_unregister_device, sizeof(*rcwdd),
+ GFP_KERNEL);
+ if (!rcwdd)
+ return -ENOMEM;
+diff --git a/fs/coredump.c b/fs/coredump.c
+index 281b768..eb9c92c 100644
+--- a/fs/coredump.c
++++ b/fs/coredump.c
+@@ -1,6 +1,7 @@
+ #include <linux/slab.h>
+ #include <linux/file.h>
+ #include <linux/fdtable.h>
++#include <linux/freezer.h>
+ #include <linux/mm.h>
+ #include <linux/stat.h>
+ #include <linux/fcntl.h>
+@@ -423,7 +424,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
+ if (core_waiters > 0) {
+ struct core_thread *ptr;
+
++ freezer_do_not_count();
+ wait_for_completion(&core_state->startup);
++ freezer_count();
+ /*
+ * Wait for all the threads to become inactive, so that
+ * all the thread context (extended register state, like
+diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
+index b629730..150c5a1 100644
+--- a/fs/nfs/nfs4session.c
++++ b/fs/nfs/nfs4session.c
+@@ -178,12 +178,14 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid,
+ __must_hold(&tbl->slot_tbl_lock)
+ {
+ struct nfs4_slot *slot;
++ int ret;
+
+ slot = nfs4_lookup_slot(tbl, slotid);
+- if (IS_ERR(slot))
+- return PTR_ERR(slot);
+- *seq_nr = slot->seq_nr;
+- return 0;
++ ret = PTR_ERR_OR_ZERO(slot);
++ if (!ret)
++ *seq_nr = slot->seq_nr;
++
++ return ret;
+ }
+
+ /*
+diff --git a/include/linux/acpi.h b/include/linux/acpi.h
+index c5eaf2f..67d1d3e 100644
+--- a/include/linux/acpi.h
++++ b/include/linux/acpi.h
+@@ -318,6 +318,7 @@ struct pci_dev;
+ int acpi_pci_irq_enable (struct pci_dev *dev);
+ void acpi_penalize_isa_irq(int irq, int active);
+ bool acpi_isa_irq_available(int irq);
++void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
+ void acpi_pci_irq_disable (struct pci_dev *dev);
+
+ extern int ec_read(u8 addr, u8 *val);
+diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
+index c46d2aa..1d18af0 100644
+--- a/include/linux/frontswap.h
++++ b/include/linux/frontswap.h
+@@ -106,8 +106,9 @@ static inline void frontswap_invalidate_area(unsigned type)
+
+ static inline void frontswap_init(unsigned type, unsigned long *map)
+ {
+- if (frontswap_enabled())
+- __frontswap_init(type, map);
++#ifdef CONFIG_FRONTSWAP
++ __frontswap_init(type, map);
++#endif
+ }
+
+ #endif /* _LINUX_FRONTSWAP_H */
+diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
+index d6917b8..3584bc8 100644
+--- a/include/linux/sunrpc/svc_rdma.h
++++ b/include/linux/sunrpc/svc_rdma.h
+@@ -86,6 +86,7 @@ struct svc_rdma_op_ctxt {
+ unsigned long flags;
+ enum dma_data_direction direction;
+ int count;
++ unsigned int mapped_sges;
+ struct ib_sge sge[RPCSVC_MAXPAGES];
+ struct page *pages[RPCSVC_MAXPAGES];
+ };
+@@ -193,6 +194,14 @@ struct svcxprt_rdma {
+
+ #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
+
++/* Track DMA maps for this transport and context */
++static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
++ struct svc_rdma_op_ctxt *ctxt)
++{
++ ctxt->mapped_sges++;
++ atomic_inc(&rdma->sc_dma_used);
++}
++
+ /* svc_rdma_backchannel.c */
+ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
+ struct rpcrdma_msg *rmsgp,
+diff --git a/lib/genalloc.c b/lib/genalloc.c
+index 0a11396..144fe6b 100644
+--- a/lib/genalloc.c
++++ b/lib/genalloc.c
+@@ -292,7 +292,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
+ struct gen_pool_chunk *chunk;
+ unsigned long addr = 0;
+ int order = pool->min_alloc_order;
+- int nbits, start_bit = 0, end_bit, remain;
++ int nbits, start_bit, end_bit, remain;
+
+ #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ BUG_ON(in_nmi());
+@@ -307,6 +307,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
+ if (size > atomic_read(&chunk->avail))
+ continue;
+
++ start_bit = 0;
+ end_bit = chunk_size(chunk) >> order;
+ retry:
+ start_bit = algo(chunk->bits, end_bit, start_bit,
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 770d83e..0ddce6a 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h,
+ * is not the case is if a reserve map was changed between calls. It
+ * is the responsibility of the caller to notice the difference and
+ * take appropriate action.
++ *
++ * vma_add_reservation is used in error paths where a reservation must
++ * be restored when a newly allocated huge page must be freed. It is
++ * to be called after calling vma_needs_reservation to determine if a
++ * reservation exists.
+ */
+ enum vma_resv_mode {
+ VMA_NEEDS_RESV,
+ VMA_COMMIT_RESV,
+ VMA_END_RESV,
++ VMA_ADD_RESV,
+ };
+ static long __vma_reservation_common(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr,
+@@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h,
+ region_abort(resv, idx, idx + 1);
+ ret = 0;
+ break;
++ case VMA_ADD_RESV:
++ if (vma->vm_flags & VM_MAYSHARE)
++ ret = region_add(resv, idx, idx + 1);
++ else {
++ region_abort(resv, idx, idx + 1);
++ ret = region_del(resv, idx, idx + 1);
++ }
++ break;
+ default:
+ BUG();
+ }
+@@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h,
+ (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
+ }
+
++static long vma_add_reservation(struct hstate *h,
++ struct vm_area_struct *vma, unsigned long addr)
++{
++ return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
++}
++
++/*
++ * This routine is called to restore a reservation on error paths. In the
++ * specific error paths, a huge page was allocated (via alloc_huge_page)
++ * and is about to be freed. If a reservation for the page existed,
++ * alloc_huge_page would have consumed the reservation and set PagePrivate
++ * in the newly allocated page. When the page is freed via free_huge_page,
++ * the global reservation count will be incremented if PagePrivate is set.
++ * However, free_huge_page can not adjust the reserve map. Adjust the
++ * reserve map here to be consistent with global reserve count adjustments
++ * to be made by free_huge_page.
++ */
++static void restore_reserve_on_error(struct hstate *h,
++ struct vm_area_struct *vma, unsigned long address,
++ struct page *page)
++{
++ if (unlikely(PagePrivate(page))) {
++ long rc = vma_needs_reservation(h, vma, address);
++
++ if (unlikely(rc < 0)) {
++ /*
++ * Rare out of memory condition in reserve map
++ * manipulation. Clear PagePrivate so that
++ * global reserve count will not be incremented
++ * by free_huge_page. This will make it appear
++ * as though the reservation for this page was
++ * consumed. This may prevent the task from
++ * faulting in the page at a later time. This
++ * is better than inconsistent global huge page
++ * accounting of reserve counts.
++ */
++ ClearPagePrivate(page);
++ } else if (rc) {
++ rc = vma_add_reservation(h, vma, address);
++ if (unlikely(rc < 0))
++ /*
++ * See above comment about rare out of
++ * memory condition.
++ */
++ ClearPagePrivate(page);
++ } else
++ vma_end_reservation(h, vma, address);
++ }
++}
++
+ struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr, int avoid_reserve)
+ {
+@@ -3498,6 +3562,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
+ spin_unlock(ptl);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+ out_release_all:
++ restore_reserve_on_error(h, vma, address, new_page);
+ put_page(new_page);
+ out_release_old:
+ put_page(old_page);
+@@ -3680,6 +3745,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ spin_unlock(ptl);
+ backout_unlocked:
+ unlock_page(page);
++ restore_reserve_on_error(h, vma, address, page);
+ put_page(page);
+ goto out;
+ }
+diff --git a/mm/memory-failure.c b/mm/memory-failure.c
+index de88f33..19e796d 100644
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1112,10 +1112,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
+ }
+
+ if (!PageHuge(p) && PageTransHuge(hpage)) {
+- lock_page(hpage);
+- if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) {
+- unlock_page(hpage);
+- if (!PageAnon(hpage))
++ lock_page(p);
++ if (!PageAnon(p) || unlikely(split_huge_page(p))) {
++ unlock_page(p);
++ if (!PageAnon(p))
+ pr_err("Memory failure: %#lx: non anonymous thp\n",
+ pfn);
+ else
+@@ -1126,9 +1126,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
+ put_hwpoison_page(p);
+ return -EBUSY;
+ }
+- unlock_page(hpage);
+- get_hwpoison_page(p);
+- put_hwpoison_page(hpage);
++ unlock_page(p);
+ VM_BUG_ON_PAGE(!page_count(p), p);
+ hpage = compound_head(p);
+ }
+diff --git a/mm/shmem.c b/mm/shmem.c
+index 971fc83..38aa5e0 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1483,6 +1483,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+ copy_highpage(newpage, oldpage);
+ flush_dcache_page(newpage);
+
++ __SetPageLocked(newpage);
++ __SetPageSwapBacked(newpage);
+ SetPageUptodate(newpage);
+ set_page_private(newpage, swap_index);
+ SetPageSwapCache(newpage);
+diff --git a/mm/slab_common.c b/mm/slab_common.c
+index 71f0b28..329b038 100644
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -533,8 +533,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
+
+ s = create_cache(cache_name, root_cache->object_size,
+ root_cache->size, root_cache->align,
+- root_cache->flags, root_cache->ctor,
+- memcg, root_cache);
++ root_cache->flags & CACHE_CREATE_MASK,
++ root_cache->ctor, memcg, root_cache);
+ /*
+ * If we could not create a memcg cache, do not complain, because
+ * that's not critical at all as we can always proceed with the root
+diff --git a/mm/swapfile.c b/mm/swapfile.c
+index 2657acc..bf262e4 100644
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -2218,6 +2218,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
+ swab32s(&swap_header->info.version);
+ swab32s(&swap_header->info.last_page);
+ swab32s(&swap_header->info.nr_badpages);
++ if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
++ return 0;
+ for (i = 0; i < swap_header->info.nr_badpages; i++)
+ swab32s(&swap_header->info.badpages[i]);
+ }
+diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
+index 3940b5d..3e9667e 100644
+--- a/net/batman-adv/originator.c
++++ b/net/batman-adv/originator.c
+@@ -537,7 +537,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
+ if (bat_priv->algo_ops->neigh.hardif_init)
+ bat_priv->algo_ops->neigh.hardif_init(hardif_neigh);
+
+- hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list);
++ hlist_add_head_rcu(&hardif_neigh->list, &hard_iface->neigh_list);
+
+ out:
+ spin_unlock_bh(&hard_iface->neigh_list_lock);
+diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
+index 7d54e94..dcbe67f 100644
+--- a/net/ceph/ceph_fs.c
++++ b/net/ceph/ceph_fs.c
+@@ -34,7 +34,8 @@ void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
+ fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count);
+ fl->object_size = le32_to_cpu(legacy->fl_object_size);
+ fl->pool_id = le32_to_cpu(legacy->fl_pg_pool);
+- if (fl->pool_id == 0)
++ if (fl->pool_id == 0 && fl->stripe_unit == 0 &&
++ fl->stripe_count == 0 && fl->object_size == 0)
+ fl->pool_id = -1;
+ }
+ EXPORT_SYMBOL(ceph_file_layout_from_legacy);
+diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
+index aa5847a..1df2c8d 100644
+--- a/net/netfilter/nf_log.c
++++ b/net/netfilter/nf_log.c
+@@ -420,7 +420,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
+ char buf[NFLOGGER_NAME_LEN];
+ int r = 0;
+ int tindex = (unsigned long)table->extra1;
+- struct net *net = current->nsproxy->net_ns;
++ struct net *net = table->extra2;
+
+ if (write) {
+ struct ctl_table tmp = *table;
+@@ -474,7 +474,6 @@ static int netfilter_log_sysctl_init(struct net *net)
+ 3, "%d", i);
+ nf_log_sysctl_table[i].procname =
+ nf_log_sysctl_fnames[i];
+- nf_log_sysctl_table[i].data = NULL;
+ nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN;
+ nf_log_sysctl_table[i].mode = 0644;
+ nf_log_sysctl_table[i].proc_handler =
+@@ -484,6 +483,9 @@ static int netfilter_log_sysctl_init(struct net *net)
+ }
+ }
+
++ for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
++ table[i].extra2 = net;
++
+ net->nf.nf_log_dir_header = register_net_sysctl(net,
+ "net/netfilter/nf_log",
+ table);
+diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
+index 892b5e1..2761377 100644
+--- a/net/sunrpc/xprtrdma/frwr_ops.c
++++ b/net/sunrpc/xprtrdma/frwr_ops.c
+@@ -44,18 +44,20 @@
+ * being done.
+ *
+ * When the underlying transport disconnects, MRs are left in one of
+- * three states:
++ * four states:
+ *
+ * INVALID: The MR was not in use before the QP entered ERROR state.
+- * (Or, the LOCAL_INV WR has not completed or flushed yet).
+- *
+- * STALE: The MR was being registered or unregistered when the QP
+- * entered ERROR state, and the pending WR was flushed.
+ *
+ * VALID: The MR was registered before the QP entered ERROR state.
+ *
+- * When frwr_op_map encounters STALE and VALID MRs, they are recovered
+- * with ib_dereg_mr and then are re-initialized. Beause MR recovery
++ * FLUSHED_FR: The MR was being registered when the QP entered ERROR
++ * state, and the pending WR was flushed.
++ *
++ * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
++ * state, and the pending WR was flushed.
++ *
++ * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
++ * with ib_dereg_mr and then are re-initialized. Because MR recovery
+ * allocates fresh resources, it is deferred to a workqueue, and the
+ * recovered MRs are placed back on the rb_mws list when recovery is
+ * complete. frwr_op_map allocates another MR for the current RPC while
+@@ -175,12 +177,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+ static void
+ frwr_op_recover_mr(struct rpcrdma_mw *mw)
+ {
++ enum rpcrdma_frmr_state state = mw->frmr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ int rc;
+
+ rc = __frwr_reset_mr(ia, mw);
+- ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
++ if (state != FRMR_FLUSHED_LI)
++ ib_dma_unmap_sg(ia->ri_device,
++ mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ if (rc)
+ goto out_release;
+
+@@ -261,10 +266,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+ }
+
+ static void
+-__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
+- const char *wr)
++__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
+ {
+- frmr->fr_state = FRMR_IS_STALE;
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
+ wr, ib_wc_status_msg(wc->status),
+@@ -287,7 +290,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
+ if (wc->status != IB_WC_SUCCESS) {
+ cqe = wc->wr_cqe;
+ frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+- __frwr_sendcompletion_flush(wc, frmr, "fastreg");
++ frmr->fr_state = FRMR_FLUSHED_FR;
++ __frwr_sendcompletion_flush(wc, "fastreg");
+ }
+ }
+
+@@ -307,7 +311,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
+ if (wc->status != IB_WC_SUCCESS) {
+ cqe = wc->wr_cqe;
+ frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+- __frwr_sendcompletion_flush(wc, frmr, "localinv");
++ frmr->fr_state = FRMR_FLUSHED_LI;
++ __frwr_sendcompletion_flush(wc, "localinv");
+ }
+ }
+
+@@ -327,9 +332,11 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ cqe = wc->wr_cqe;
+ frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+- if (wc->status != IB_WC_SUCCESS)
+- __frwr_sendcompletion_flush(wc, frmr, "localinv");
+- complete_all(&frmr->fr_linv_done);
++ if (wc->status != IB_WC_SUCCESS) {
++ frmr->fr_state = FRMR_FLUSHED_LI;
++ __frwr_sendcompletion_flush(wc, "localinv");
++ }
++ complete(&frmr->fr_linv_done);
+ }
+
+ /* Post a REG_MR Work Request to register a memory region
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+index a2a7519..cd0c558 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+@@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
+ ret = -EIO;
+ goto out_unmap;
+ }
+- atomic_inc(&rdma->sc_dma_used);
++ svc_rdma_count_mappings(rdma, ctxt);
+
+ memset(&send_wr, 0, sizeof(send_wr));
+ ctxt->cqe.done = svc_rdma_wc_send;
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+index 2c25606..ad1df97 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+@@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+ ctxt->sge[pno].addr);
+ if (ret)
+ goto err;
+- atomic_inc(&xprt->sc_dma_used);
++ svc_rdma_count_mappings(xprt, ctxt);
+
+ ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
+ ctxt->sge[pno].length = len;
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+index 54d53330..3b95b19 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -280,7 +280,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ sge[sge_no].addr))
+ goto err;
+- atomic_inc(&xprt->sc_dma_used);
++ svc_rdma_count_mappings(xprt, ctxt);
+ sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
+ ctxt->count++;
+ sge_off = 0;
+@@ -489,7 +489,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ ctxt->sge[0].length, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
+ goto err;
+- atomic_inc(&rdma->sc_dma_used);
++ svc_rdma_count_mappings(rdma, ctxt);
+
+ ctxt->direction = DMA_TO_DEVICE;
+
+@@ -505,7 +505,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+ ctxt->sge[sge_no].addr))
+ goto err;
+- atomic_inc(&rdma->sc_dma_used);
++ svc_rdma_count_mappings(rdma, ctxt);
+ ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
+ ctxt->sge[sge_no].length = sge_bytes;
+ }
+@@ -523,23 +523,9 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
+ ctxt->count++;
+ rqstp->rq_respages[page_no] = NULL;
+- /*
+- * If there are more pages than SGE, terminate SGE
+- * list so that svc_rdma_unmap_dma doesn't attempt to
+- * unmap garbage.
+- */
+- if (page_no+1 >= sge_no)
+- ctxt->sge[page_no+1].length = 0;
+ }
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+- /* The loop above bumps sc_dma_used for each sge. The
+- * xdr_buf.tail gets a separate sge, but resides in the
+- * same page as xdr_buf.head. Don't count it twice.
+- */
+- if (sge_no > ctxt->count)
+- atomic_dec(&rdma->sc_dma_used);
+-
+ if (sge_no > rdma->sc_max_sge) {
+ pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+ goto err;
+@@ -635,7 +621,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
+ ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
+ inline_bytes);
+ if (ret < 0)
+- goto err1;
++ goto err0;
+
+ svc_rdma_put_req_map(rdma, vec);
+ dprintk("svcrdma: send_reply returns %d\n", ret);
+@@ -692,7 +678,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
+ svc_rdma_put_context(ctxt, 1);
+ return;
+ }
+- atomic_inc(&xprt->sc_dma_used);
++ svc_rdma_count_mappings(xprt, ctxt);
+
+ /* Prepare SEND WR */
+ memset(&err_wr, 0, sizeof(err_wr));
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+index dd94401..924271c 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+@@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+
+ out:
+ ctxt->count = 0;
++ ctxt->mapped_sges = 0;
+ ctxt->frmr = NULL;
+ return ctxt;
+
+@@ -221,22 +222,27 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
+ {
+ struct svcxprt_rdma *xprt = ctxt->xprt;
+- int i;
+- for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
++ struct ib_device *device = xprt->sc_cm_id->device;
++ u32 lkey = xprt->sc_pd->local_dma_lkey;
++ unsigned int i, count;
++
++ for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
+ /*
+ * Unmap the DMA addr in the SGE if the lkey matches
+ * the local_dma_lkey, otherwise, ignore it since it is
+ * an FRMR lkey and will be unmapped later when the
+ * last WR that uses it completes.
+ */
+- if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
+- atomic_dec(&xprt->sc_dma_used);
+- ib_dma_unmap_page(xprt->sc_cm_id->device,
++ if (ctxt->sge[i].lkey == lkey) {
++ count++;
++ ib_dma_unmap_page(device,
+ ctxt->sge[i].addr,
+ ctxt->sge[i].length,
+ ctxt->direction);
+ }
+ }
++ ctxt->mapped_sges = 0;
++ atomic_sub(count, &xprt->sc_dma_used);
+ }
+
+ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
+@@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
+ goto err_put_ctxt;
+- atomic_inc(&xprt->sc_dma_used);
++ svc_rdma_count_mappings(xprt, ctxt);
+ ctxt->sge[sge_no].addr = pa;
+ ctxt->sge[sge_no].length = PAGE_SIZE;
+ ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index a71b0f5..edc0344 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -207,7 +207,8 @@ struct rpcrdma_rep {
+ enum rpcrdma_frmr_state {
+ FRMR_IS_INVALID, /* ready to be used */
+ FRMR_IS_VALID, /* in use */
+- FRMR_IS_STALE, /* failed completion */
++ FRMR_FLUSHED_FR, /* flushed FASTREG WR */
++ FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
+ };
+
+ struct rpcrdma_frmr {
+diff --git a/sound/core/info.c b/sound/core/info.c
+index 895362a..8ab72e0 100644
+--- a/sound/core/info.c
++++ b/sound/core/info.c
+@@ -325,10 +325,15 @@ static ssize_t snd_info_text_entry_write(struct file *file,
+ size_t next;
+ int err = 0;
+
++ if (!entry->c.text.write)
++ return -EIO;
+ pos = *offset;
+ if (!valid_pos(pos, count))
+ return -EIO;
+ next = pos + count;
++ /* don't handle too large text inputs */
++ if (next > 16 * 1024)
++ return -EIO;
+ mutex_lock(&entry->access);
+ buf = data->wbuffer;
+ if (!buf) {
+@@ -366,7 +371,9 @@ static int snd_info_seq_show(struct seq_file *seq, void *p)
+ struct snd_info_private_data *data = seq->private;
+ struct snd_info_entry *entry = data->entry;
+
+- if (entry->c.text.read) {
++ if (!entry->c.text.read) {
++ return -EIO;
++ } else {
+ data->rbuffer->buffer = (char *)seq; /* XXX hack! */
+ entry->c.text.read(entry, data->rbuffer);
+ }
+diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
+index e07807d..3670086b 100644
+--- a/sound/soc/codecs/cs4270.c
++++ b/sound/soc/codecs/cs4270.c
+@@ -148,11 +148,11 @@ SND_SOC_DAPM_OUTPUT("AOUTR"),
+ };
+
+ static const struct snd_soc_dapm_route cs4270_dapm_routes[] = {
+- { "Capture", NULL, "AINA" },
+- { "Capture", NULL, "AINB" },
++ { "Capture", NULL, "AINL" },
++ { "Capture", NULL, "AINR" },
+
+- { "AOUTA", NULL, "Playback" },
+- { "AOUTB", NULL, "Playback" },
++ { "AOUTL", NULL, "Playback" },
++ { "AOUTR", NULL, "Playback" },
+ };
+
+ /**
+diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
+index e3e7641..7b7a380 100644
+--- a/sound/soc/intel/skylake/skl.c
++++ b/sound/soc/intel/skylake/skl.c
+@@ -785,8 +785,7 @@ static void skl_remove(struct pci_dev *pci)
+
+ release_firmware(skl->tplg);
+
+- if (pci_dev_run_wake(pci))
+- pm_runtime_get_noresume(&pci->dev);
++ pm_runtime_get_noresume(&pci->dev);
+
+ /* codec removal, invoke bus_device_remove */
+ snd_hdac_ext_bus_device_remove(ebus);
+diff --git a/sound/soc/sunxi/sun4i-codec.c b/sound/soc/sunxi/sun4i-codec.c
+index 44f170c..03c18db 100644
+--- a/sound/soc/sunxi/sun4i-codec.c
++++ b/sound/soc/sunxi/sun4i-codec.c
+@@ -738,11 +738,11 @@ static struct snd_soc_card *sun4i_codec_create_card(struct device *dev)
+
+ card = devm_kzalloc(dev, sizeof(*card), GFP_KERNEL);
+ if (!card)
+- return NULL;
++ return ERR_PTR(-ENOMEM);
+
+ card->dai_link = sun4i_codec_create_link(dev, &card->num_links);
+ if (!card->dai_link)
+- return NULL;
++ return ERR_PTR(-ENOMEM);
+
+ card->dev = dev;
+ card->name = "sun4i-codec";
+@@ -842,7 +842,8 @@ static int sun4i_codec_probe(struct platform_device *pdev)
+ }
+
+ card = sun4i_codec_create_card(&pdev->dev);
+- if (!card) {
++ if (IS_ERR(card)) {
++ ret = PTR_ERR(card);
+ dev_err(&pdev->dev, "Failed to create our card\n");
+ goto err_unregister_codec;
+ }
+diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
+index 7aee954..4ad1eac 100644
+--- a/tools/perf/ui/browsers/hists.c
++++ b/tools/perf/ui/browsers/hists.c
+@@ -595,7 +595,8 @@ int hist_browser__run(struct hist_browser *browser, const char *help)
+ u64 nr_entries;
+ hbt->timer(hbt->arg);
+
+- if (hist_browser__has_filter(browser))
++ if (hist_browser__has_filter(browser) ||
++ symbol_conf.report_hierarchy)
+ hist_browser__update_nr_entries(browser);
+
+ nr_entries = hist_browser__nr_entries(browser);
+diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
+index b4bf769..1eef0ae 100644
+--- a/tools/power/cpupower/utils/cpufreq-set.c
++++ b/tools/power/cpupower/utils/cpufreq-set.c
+@@ -296,7 +296,7 @@ int cmd_freq_set(int argc, char **argv)
+ struct cpufreq_affected_cpus *cpus;
+
+ if (!bitmask_isbitset(cpus_chosen, cpu) ||
+- cpupower_is_cpu_online(cpu))
++ cpupower_is_cpu_online(cpu) != 1)
+ continue;
+
+ cpus = cpufreq_get_related_cpus(cpu);
+@@ -316,10 +316,7 @@ int cmd_freq_set(int argc, char **argv)
+ cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+ if (!bitmask_isbitset(cpus_chosen, cpu) ||
+- cpupower_is_cpu_online(cpu))
+- continue;
+-
+- if (cpupower_is_cpu_online(cpu) != 1)
++ cpupower_is_cpu_online(cpu) != 1)
+ continue;
+
+ printf(_("Setting cpu: %d\n"), cpu);
+diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
+index 3bad3c5..d1b080c 100644
+--- a/virt/kvm/arm/vgic/vgic-mmio.c
++++ b/virt/kvm/arm/vgic/vgic-mmio.c
+@@ -453,17 +453,33 @@ struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
+ return container_of(dev, struct vgic_io_device, dev);
+ }
+
+-static bool check_region(const struct vgic_register_region *region,
++static bool check_region(const struct kvm *kvm,
++ const struct vgic_register_region *region,
+ gpa_t addr, int len)
+ {
+- if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
+- return true;
+- if ((region->access_flags & VGIC_ACCESS_32bit) &&
+- len == sizeof(u32) && !(addr & 3))
+- return true;
+- if ((region->access_flags & VGIC_ACCESS_64bit) &&
+- len == sizeof(u64) && !(addr & 7))
+- return true;
++ int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
++
++ switch (len) {
++ case sizeof(u8):
++ flags = VGIC_ACCESS_8bit;
++ break;
++ case sizeof(u32):
++ flags = VGIC_ACCESS_32bit;
++ break;
++ case sizeof(u64):
++ flags = VGIC_ACCESS_64bit;
++ break;
++ default:
++ return false;
++ }
++
++ if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) {
++ if (!region->bits_per_irq)
++ return true;
++
++ /* Do we access a non-allocated IRQ? */
++ return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs;
++ }
+
+ return false;
+ }
+@@ -477,7 +493,7 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+
+ region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
+ addr - iodev->base_addr);
+- if (!region || !check_region(region, addr, len)) {
++ if (!region || !check_region(vcpu->kvm, region, addr, len)) {
+ memset(val, 0, len);
+ return 0;
+ }
+@@ -510,10 +526,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+
+ region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
+ addr - iodev->base_addr);
+- if (!region)
+- return 0;
+-
+- if (!check_region(region, addr, len))
++ if (!region || !check_region(vcpu->kvm, region, addr, len))
+ return 0;
+
+ switch (iodev->iodev_type) {
+diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
+index 0b3ecf9..ba63d91 100644
+--- a/virt/kvm/arm/vgic/vgic-mmio.h
++++ b/virt/kvm/arm/vgic/vgic-mmio.h
+@@ -50,15 +50,15 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
+ #define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1)
+
+ /*
+- * (addr & mask) gives us the byte offset for the INT ID, so we want to
+- * divide this with 'bytes per irq' to get the INT ID, which is given
+- * by '(bits) / 8'. But we do this with fixed-point-arithmetic and
+- * take advantage of the fact that division by a fraction equals
+- * multiplication with the inverted fraction, and scale up both the
+- * numerator and denominator with 8 to support at most 64 bits per IRQ:
++ * (addr & mask) gives us the _byte_ offset for the INT ID.
++ * We multiply this by 8 the get the _bit_ offset, then divide this by
++ * the number of bits to learn the actual INT ID.
++ * But instead of a division (which requires a "long long div" implementation),
++ * we shift by the binary logarithm of <bits>.
++ * This assumes that <bits> is a power of two.
+ */
+ #define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
+- 64 / (bits) / 8)
++ 8 >> ilog2(bits))
+
+ /*
+ * Some VGIC registers store per-IRQ information, with a different number
diff --git a/4.8.10/1009_linux-4.8.10.patch b/4.8.10/1009_linux-4.8.10.patch
new file mode 100644
index 0000000..1e751e5
--- /dev/null
+++ b/4.8.10/1009_linux-4.8.10.patch
@@ -0,0 +1,4759 @@
+diff --git a/Makefile b/Makefile
+index c1519ab..7cf2b49 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 8
+-SUBLEVEL = 9
++SUBLEVEL = 10
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
+index 37a315d..a6847fc 100644
+--- a/arch/sparc/include/asm/uaccess_64.h
++++ b/arch/sparc/include/asm/uaccess_64.h
+@@ -98,7 +98,6 @@ struct exception_table_entry {
+ unsigned int insn, fixup;
+ };
+
+-void __ret_efault(void);
+ void __retl_efault(void);
+
+ /* Uh, these should become the main single-value transfer routines..
+@@ -205,55 +204,34 @@ int __get_user_bad(void);
+ unsigned long __must_check ___copy_from_user(void *to,
+ const void __user *from,
+ unsigned long size);
+-unsigned long copy_from_user_fixup(void *to, const void __user *from,
+- unsigned long size);
+ static inline unsigned long __must_check
+ copy_from_user(void *to, const void __user *from, unsigned long size)
+ {
+- unsigned long ret;
+-
+ check_object_size(to, size, false);
+
+- ret = ___copy_from_user(to, from, size);
+- if (unlikely(ret))
+- ret = copy_from_user_fixup(to, from, size);
+-
+- return ret;
++ return ___copy_from_user(to, from, size);
+ }
+ #define __copy_from_user copy_from_user
+
+ unsigned long __must_check ___copy_to_user(void __user *to,
+ const void *from,
+ unsigned long size);
+-unsigned long copy_to_user_fixup(void __user *to, const void *from,
+- unsigned long size);
+ static inline unsigned long __must_check
+ copy_to_user(void __user *to, const void *from, unsigned long size)
+ {
+- unsigned long ret;
+-
+ check_object_size(from, size, true);
+
+- ret = ___copy_to_user(to, from, size);
+- if (unlikely(ret))
+- ret = copy_to_user_fixup(to, from, size);
+- return ret;
++ return ___copy_to_user(to, from, size);
+ }
+ #define __copy_to_user copy_to_user
+
+ unsigned long __must_check ___copy_in_user(void __user *to,
+ const void __user *from,
+ unsigned long size);
+-unsigned long copy_in_user_fixup(void __user *to, void __user *from,
+- unsigned long size);
+ static inline unsigned long __must_check
+ copy_in_user(void __user *to, void __user *from, unsigned long size)
+ {
+- unsigned long ret = ___copy_in_user(to, from, size);
+-
+- if (unlikely(ret))
+- ret = copy_in_user_fixup(to, from, size);
+- return ret;
++ return ___copy_in_user(to, from, size);
+ }
+ #define __copy_in_user copy_in_user
+
+diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
+index a076b42..5f1f3ae 100644
+--- a/arch/sparc/kernel/head_64.S
++++ b/arch/sparc/kernel/head_64.S
+@@ -922,47 +922,11 @@ prom_tba: .xword 0
+ tlb_type: .word 0 /* Must NOT end up in BSS */
+ .section ".fixup",#alloc,#execinstr
+
+- .globl __ret_efault, __retl_efault, __ret_one, __retl_one
+-ENTRY(__ret_efault)
+- ret
+- restore %g0, -EFAULT, %o0
+-ENDPROC(__ret_efault)
+-
+ ENTRY(__retl_efault)
+ retl
+ mov -EFAULT, %o0
+ ENDPROC(__retl_efault)
+
+-ENTRY(__retl_one)
+- retl
+- mov 1, %o0
+-ENDPROC(__retl_one)
+-
+-ENTRY(__retl_one_fp)
+- VISExitHalf
+- retl
+- mov 1, %o0
+-ENDPROC(__retl_one_fp)
+-
+-ENTRY(__ret_one_asi)
+- wr %g0, ASI_AIUS, %asi
+- ret
+- restore %g0, 1, %o0
+-ENDPROC(__ret_one_asi)
+-
+-ENTRY(__retl_one_asi)
+- wr %g0, ASI_AIUS, %asi
+- retl
+- mov 1, %o0
+-ENDPROC(__retl_one_asi)
+-
+-ENTRY(__retl_one_asi_fp)
+- wr %g0, ASI_AIUS, %asi
+- VISExitHalf
+- retl
+- mov 1, %o0
+-ENDPROC(__retl_one_asi_fp)
+-
+ ENTRY(__retl_o1)
+ retl
+ mov %o1, %o0
+diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
+index 59bbeff..07933b9 100644
+--- a/arch/sparc/kernel/jump_label.c
++++ b/arch/sparc/kernel/jump_label.c
+@@ -13,19 +13,30 @@
+ void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+ {
+- u32 val;
+ u32 *insn = (u32 *) (unsigned long) entry->code;
++ u32 val;
+
+ if (type == JUMP_LABEL_JMP) {
+ s32 off = (s32)entry->target - (s32)entry->code;
++ bool use_v9_branch = false;
++
++ BUG_ON(off & 3);
+
+ #ifdef CONFIG_SPARC64
+- /* ba,pt %xcc, . + (off << 2) */
+- val = 0x10680000 | ((u32) off >> 2);
+-#else
+- /* ba . + (off << 2) */
+- val = 0x10800000 | ((u32) off >> 2);
++ if (off <= 0xfffff && off >= -0x100000)
++ use_v9_branch = true;
+ #endif
++ if (use_v9_branch) {
++ /* WDISP19 - target is . + immed << 2 */
++ /* ba,pt %xcc, . + off */
++ val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
++ } else {
++ /* WDISP22 - target is . + immed << 2 */
++ BUG_ON(off > 0x7fffff);
++ BUG_ON(off < -0x800000);
++ /* ba . + off */
++ val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
++ }
+ } else {
+ val = 0x01000000;
+ }
+diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
+index 9e034f2..20ffb05 100644
+--- a/arch/sparc/kernel/sparc_ksyms_64.c
++++ b/arch/sparc/kernel/sparc_ksyms_64.c
+@@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user);
+ EXPORT_SYMBOL_GPL(real_hard_smp_processor_id);
+
+ /* from head_64.S */
+-EXPORT_SYMBOL(__ret_efault);
+ EXPORT_SYMBOL(tlb_type);
+ EXPORT_SYMBOL(sun4v_chip_type);
+ EXPORT_SYMBOL(prom_root_node);
+diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
+index b7d0bd6..69a439f 100644
+--- a/arch/sparc/lib/GENcopy_from_user.S
++++ b/arch/sparc/lib/GENcopy_from_user.S
+@@ -3,11 +3,11 @@
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+-#define EX_LD(x) \
++#define EX_LD(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one; \
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
+index 780550e..9947427 100644
+--- a/arch/sparc/lib/GENcopy_to_user.S
++++ b/arch/sparc/lib/GENcopy_to_user.S
+@@ -3,11 +3,11 @@
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+-#define EX_ST(x) \
++#define EX_ST(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one; \
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
+index 89358ee..059ea24 100644
+--- a/arch/sparc/lib/GENmemcpy.S
++++ b/arch/sparc/lib/GENmemcpy.S
+@@ -4,21 +4,18 @@
+ */
+
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #define GLOBAL_SPARE %g7
+ #else
+ #define GLOBAL_SPARE %g5
+ #endif
+
+ #ifndef EX_LD
+-#define EX_LD(x) x
++#define EX_LD(x,y) x
+ #endif
+
+ #ifndef EX_ST
+-#define EX_ST(x) x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x) x
++#define EX_ST(x,y) x
+ #endif
+
+ #ifndef LOAD
+@@ -45,6 +42,29 @@
+ .register %g3,#scratch
+
+ .text
++
++#ifndef EX_RETVAL
++#define EX_RETVAL(x) x
++ENTRY(GEN_retl_o4_1)
++ add %o4, %o2, %o4
++ retl
++ add %o4, 1, %o0
++ENDPROC(GEN_retl_o4_1)
++ENTRY(GEN_retl_g1_8)
++ add %g1, %o2, %g1
++ retl
++ add %g1, 8, %o0
++ENDPROC(GEN_retl_g1_8)
++ENTRY(GEN_retl_o2_4)
++ retl
++ add %o2, 4, %o0
++ENDPROC(GEN_retl_o2_4)
++ENTRY(GEN_retl_o2_1)
++ retl
++ add %o2, 1, %o0
++ENDPROC(GEN_retl_o2_1)
++#endif
++
+ .align 64
+
+ .globl FUNC_NAME
+@@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ sub %g0, %o4, %o4
+ sub %o2, %o4, %o2
+ 1: subcc %o4, 1, %o4
+- EX_LD(LOAD(ldub, %o1, %g1))
+- EX_ST(STORE(stb, %g1, %o0))
++ EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
++ EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
+ add %o1, 1, %o1
+ bne,pt %XCC, 1b
+ add %o0, 1, %o0
+@@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ andn %o2, 0x7, %g1
+ sub %o2, %g1, %o2
+ 1: subcc %g1, 0x8, %g1
+- EX_LD(LOAD(ldx, %o1, %g2))
+- EX_ST(STORE(stx, %g2, %o0))
++ EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
++ EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
+ add %o1, 0x8, %o1
+ bne,pt %XCC, 1b
+ add %o0, 0x8, %o0
+@@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 1:
+ subcc %o2, 4, %o2
+- EX_LD(LOAD(lduw, %o1, %g1))
+- EX_ST(STORE(stw, %g1, %o1 + %o3))
++ EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
++ EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
+ bgu,pt %XCC, 1b
+ add %o1, 4, %o1
+
+@@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ .align 32
+ 90:
+ subcc %o2, 1, %o2
+- EX_LD(LOAD(ldub, %o1, %g1))
+- EX_ST(STORE(stb, %g1, %o1 + %o3))
++ EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
++ EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
+ bgu,pt %XCC, 90b
+ add %o1, 1, %o1
+ retl
+diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
+index 3269b02..4f2384a 100644
+--- a/arch/sparc/lib/Makefile
++++ b/arch/sparc/lib/Makefile
+@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
+ lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
+ lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
+
+-lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
++lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
+ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
+
+ obj-$(CONFIG_SPARC64) += iomap.o
+diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
+index d5242b8..b79a699 100644
+--- a/arch/sparc/lib/NG2copy_from_user.S
++++ b/arch/sparc/lib/NG2copy_from_user.S
+@@ -3,19 +3,19 @@
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+-#define EX_LD(x) \
++#define EX_LD(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_asi;\
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+-#define EX_LD_FP(x) \
++#define EX_LD_FP(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_asi_fp;\
++ .word 98b, y##_fp; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
+index 4e962d9..dcec55f 100644
+--- a/arch/sparc/lib/NG2copy_to_user.S
++++ b/arch/sparc/lib/NG2copy_to_user.S
+@@ -3,19 +3,19 @@
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+-#define EX_ST(x) \
++#define EX_ST(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_asi;\
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+-#define EX_ST_FP(x) \
++#define EX_ST_FP(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_asi_fp;\
++ .word 98b, y##_fp; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
+index d5f585d..c629dbd 100644
+--- a/arch/sparc/lib/NG2memcpy.S
++++ b/arch/sparc/lib/NG2memcpy.S
+@@ -4,6 +4,7 @@
+ */
+
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/visasm.h>
+ #include <asm/asi.h>
+ #define GLOBAL_SPARE %g7
+@@ -32,21 +33,17 @@
+ #endif
+
+ #ifndef EX_LD
+-#define EX_LD(x) x
++#define EX_LD(x,y) x
+ #endif
+ #ifndef EX_LD_FP
+-#define EX_LD_FP(x) x
++#define EX_LD_FP(x,y) x
+ #endif
+
+ #ifndef EX_ST
+-#define EX_ST(x) x
++#define EX_ST(x,y) x
+ #endif
+ #ifndef EX_ST_FP
+-#define EX_ST_FP(x) x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x) x
++#define EX_ST_FP(x,y) x
+ #endif
+
+ #ifndef LOAD
+@@ -140,45 +137,110 @@
+ fsrc2 %x6, %f12; \
+ fsrc2 %x7, %f14;
+ #define FREG_LOAD_1(base, x0) \
+- EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
+ #define FREG_LOAD_2(base, x0, x1) \
+- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+- EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_3(base, x0, x1, x2) \
+- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+- EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_4(base, x0, x1, x2, x3) \
+- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+- EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
+- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+- EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
+- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
+- EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
+ #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
+- EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+- EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+- EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+- EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+- EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
+- EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
+- EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
++ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
++ EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
+
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+ .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x) x
++__restore_fp:
++ VISExitHalf
++__restore_asi:
++ retl
++ wr %g0, ASI_AIUS, %asi
++ENTRY(NG2_retl_o2)
++ ba,pt %xcc, __restore_asi
++ mov %o2, %o0
++ENDPROC(NG2_retl_o2)
++ENTRY(NG2_retl_o2_plus_1)
++ ba,pt %xcc, __restore_asi
++ add %o2, 1, %o0
++ENDPROC(NG2_retl_o2_plus_1)
++ENTRY(NG2_retl_o2_plus_4)
++ ba,pt %xcc, __restore_asi
++ add %o2, 4, %o0
++ENDPROC(NG2_retl_o2_plus_4)
++ENTRY(NG2_retl_o2_plus_8)
++ ba,pt %xcc, __restore_asi
++ add %o2, 8, %o0
++ENDPROC(NG2_retl_o2_plus_8)
++ENTRY(NG2_retl_o2_plus_o4_plus_1)
++ add %o4, 1, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_plus_o4_plus_1)
++ENTRY(NG2_retl_o2_plus_o4_plus_8)
++ add %o4, 8, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_plus_o4_plus_8)
++ENTRY(NG2_retl_o2_plus_o4_plus_16)
++ add %o4, 16, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_plus_o4_plus_16)
++ENTRY(NG2_retl_o2_plus_g1_fp)
++ ba,pt %xcc, __restore_fp
++ add %o2, %g1, %o0
++ENDPROC(NG2_retl_o2_plus_g1_fp)
++ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
++ add %g1, 64, %g1
++ ba,pt %xcc, __restore_fp
++ add %o2, %g1, %o0
++ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
++ENTRY(NG2_retl_o2_plus_g1_plus_1)
++ add %g1, 1, %g1
++ ba,pt %xcc, __restore_asi
++ add %o2, %g1, %o0
++ENDPROC(NG2_retl_o2_plus_g1_plus_1)
++ENTRY(NG2_retl_o2_and_7_plus_o4)
++ and %o2, 7, %o2
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_and_7_plus_o4)
++ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
++ and %o2, 7, %o2
++ add %o4, 8, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
++#endif
++
+ .align 64
+
+ .globl FUNC_NAME
+@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ sub %g0, %o4, %o4 ! bytes to align dst
+ sub %o2, %o4, %o2
+ 1: subcc %o4, 1, %o4
+- EX_LD(LOAD(ldub, %o1, %g1))
+- EX_ST(STORE(stb, %g1, %o0))
++ EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
++ EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
+ add %o1, 1, %o1
+ bne,pt %XCC, 1b
+ add %o0, 1, %o0
+@@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ nop
+ /* fall through for 0 < low bits < 8 */
+ 110: sub %o4, 64, %g2
+- EX_LD_FP(LOAD_BLK(%g2, %f0))
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+- EX_LD_FP(LOAD_BLK(%o4, %f16))
++ EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+ FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+@@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 120: sub %o4, 56, %g2
+ FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+- EX_LD_FP(LOAD_BLK(%o4, %f16))
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+ FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+@@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 130: sub %o4, 48, %g2
+ FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+- EX_LD_FP(LOAD_BLK(%o4, %f16))
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+ FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+@@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 140: sub %o4, 40, %g2
+ FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+- EX_LD_FP(LOAD_BLK(%o4, %f16))
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+ FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ FREG_MOVE_5(f22, f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+@@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 150: sub %o4, 32, %g2
+ FREG_LOAD_4(%g2, f0, f2, f4, f6)
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+- EX_LD_FP(LOAD_BLK(%o4, %f16))
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+ FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ FREG_MOVE_4(f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+@@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 160: sub %o4, 24, %g2
+ FREG_LOAD_3(%g2, f0, f2, f4)
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+- EX_LD_FP(LOAD_BLK(%o4, %f16))
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+ FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ FREG_MOVE_3(f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+@@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 170: sub %o4, 16, %g2
+ FREG_LOAD_2(%g2, f0, f2)
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+- EX_LD_FP(LOAD_BLK(%o4, %f16))
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+ FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ FREG_MOVE_2(f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+@@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 180: sub %o4, 8, %g2
+ FREG_LOAD_1(%g2, f0)
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+- EX_LD_FP(LOAD_BLK(%o4, %f16))
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
++ EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+ FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ FREG_MOVE_1(f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+@@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ nop
+
+ 190:
+-1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
++1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+ subcc %g1, 64, %g1
+- EX_LD_FP(LOAD_BLK(%o4, %f0))
+- EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
++ EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
++ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+@@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ andn %o2, 0xf, %o4
+ and %o2, 0xf, %o2
+ 1: subcc %o4, 0x10, %o4
+- EX_LD(LOAD(ldx, %o1, %o5))
++ EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
+ add %o1, 0x08, %o1
+- EX_LD(LOAD(ldx, %o1, %g1))
++ EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
+ sub %o1, 0x08, %o1
+- EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
++ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
+ add %o1, 0x8, %o1
+- EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
++ EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
+ bgu,pt %XCC, 1b
+ add %o1, 0x8, %o1
+ 73: andcc %o2, 0x8, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x8, %o2
+- EX_LD(LOAD(ldx, %o1, %o5))
+- EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
++ EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
++ EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
+ add %o1, 0x8, %o1
+ 1: andcc %o2, 0x4, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x4, %o2
+- EX_LD(LOAD(lduw, %o1, %o5))
+- EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
++ EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
++ EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
+ add %o1, 0x4, %o1
+ 1: cmp %o2, 0
+ be,pt %XCC, 85f
+@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ sub %o2, %g1, %o2
+
+ 1: subcc %g1, 1, %g1
+- EX_LD(LOAD(ldub, %o1, %o5))
+- EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
++ EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
++ EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
+ bgu,pt %icc, 1b
+ add %o1, 1, %o1
+
+@@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 8: mov 64, GLOBAL_SPARE
+ andn %o1, 0x7, %o1
+- EX_LD(LOAD(ldx, %o1, %g2))
++ EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
+ sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
+ andn %o2, 0x7, %o4
+ sllx %g2, %g1, %g2
+ 1: add %o1, 0x8, %o1
+- EX_LD(LOAD(ldx, %o1, %g3))
++ EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
+ subcc %o4, 0x8, %o4
+ srlx %g3, GLOBAL_SPARE, %o5
+ or %o5, %g2, %o5
+- EX_ST(STORE(stx, %o5, %o0))
++ EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
+ add %o0, 0x8, %o0
+ bgu,pt %icc, 1b
+ sllx %g3, %g1, %g2
+@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 1:
+ subcc %o2, 4, %o2
+- EX_LD(LOAD(lduw, %o1, %g1))
+- EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
++ EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
++ EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
+ bgu,pt %XCC, 1b
+ add %o1, 4, %o1
+
+@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ .align 32
+ 90:
+ subcc %o2, 1, %o2
+- EX_LD(LOAD(ldub, %o1, %g1))
+- EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
++ EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
++ EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
+ bgu,pt %XCC, 90b
+ add %o1, 1, %o1
+ retl
+diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
+index 2e8ee7a..16a286c 100644
+--- a/arch/sparc/lib/NG4copy_from_user.S
++++ b/arch/sparc/lib/NG4copy_from_user.S
+@@ -3,19 +3,19 @@
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+-#define EX_LD(x) \
++#define EX_LD(x, y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_asi;\
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+-#define EX_LD_FP(x) \
++#define EX_LD_FP(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_asi_fp;\
++ .word 98b, y##_fp; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
+index be0bf45..6b0276f 100644
+--- a/arch/sparc/lib/NG4copy_to_user.S
++++ b/arch/sparc/lib/NG4copy_to_user.S
+@@ -3,19 +3,19 @@
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+-#define EX_ST(x) \
++#define EX_ST(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_asi;\
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+-#define EX_ST_FP(x) \
++#define EX_ST_FP(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_asi_fp;\
++ .word 98b, y##_fp; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
+index 8e13ee1..75bb93b 100644
+--- a/arch/sparc/lib/NG4memcpy.S
++++ b/arch/sparc/lib/NG4memcpy.S
+@@ -4,6 +4,7 @@
+ */
+
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/visasm.h>
+ #include <asm/asi.h>
+ #define GLOBAL_SPARE %g7
+@@ -46,22 +47,19 @@
+ #endif
+
+ #ifndef EX_LD
+-#define EX_LD(x) x
++#define EX_LD(x,y) x
+ #endif
+ #ifndef EX_LD_FP
+-#define EX_LD_FP(x) x
++#define EX_LD_FP(x,y) x
+ #endif
+
+ #ifndef EX_ST
+-#define EX_ST(x) x
++#define EX_ST(x,y) x
+ #endif
+ #ifndef EX_ST_FP
+-#define EX_ST_FP(x) x
++#define EX_ST_FP(x,y) x
+ #endif
+
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x) x
+-#endif
+
+ #ifndef LOAD
+ #define LOAD(type,addr,dest) type [addr], dest
+@@ -94,6 +92,158 @@
+ .register %g3,#scratch
+
+ .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x) x
++__restore_asi_fp:
++ VISExitHalf
++__restore_asi:
++ retl
++ wr %g0, ASI_AIUS, %asi
++
++ENTRY(NG4_retl_o2)
++ ba,pt %xcc, __restore_asi
++ mov %o2, %o0
++ENDPROC(NG4_retl_o2)
++ENTRY(NG4_retl_o2_plus_1)
++ ba,pt %xcc, __restore_asi
++ add %o2, 1, %o0
++ENDPROC(NG4_retl_o2_plus_1)
++ENTRY(NG4_retl_o2_plus_4)
++ ba,pt %xcc, __restore_asi
++ add %o2, 4, %o0
++ENDPROC(NG4_retl_o2_plus_4)
++ENTRY(NG4_retl_o2_plus_o5)
++ ba,pt %xcc, __restore_asi
++ add %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5)
++ENTRY(NG4_retl_o2_plus_o5_plus_4)
++ add %o5, 4, %o5
++ ba,pt %xcc, __restore_asi
++ add %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_4)
++ENTRY(NG4_retl_o2_plus_o5_plus_8)
++ add %o5, 8, %o5
++ ba,pt %xcc, __restore_asi
++ add %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_8)
++ENTRY(NG4_retl_o2_plus_o5_plus_16)
++ add %o5, 16, %o5
++ ba,pt %xcc, __restore_asi
++ add %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_16)
++ENTRY(NG4_retl_o2_plus_o5_plus_24)
++ add %o5, 24, %o5
++ ba,pt %xcc, __restore_asi
++ add %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_24)
++ENTRY(NG4_retl_o2_plus_o5_plus_32)
++ add %o5, 32, %o5
++ ba,pt %xcc, __restore_asi
++ add %o2, %o5, %o0
++ENDPROC(NG4_retl_o2_plus_o5_plus_32)
++ENTRY(NG4_retl_o2_plus_g1)
++ ba,pt %xcc, __restore_asi
++ add %o2, %g1, %o0
++ENDPROC(NG4_retl_o2_plus_g1)
++ENTRY(NG4_retl_o2_plus_g1_plus_1)
++ add %g1, 1, %g1
++ ba,pt %xcc, __restore_asi
++ add %o2, %g1, %o0
++ENDPROC(NG4_retl_o2_plus_g1_plus_1)
++ENTRY(NG4_retl_o2_plus_g1_plus_8)
++ add %g1, 8, %g1
++ ba,pt %xcc, __restore_asi
++ add %o2, %g1, %o0
++ENDPROC(NG4_retl_o2_plus_g1_plus_8)
++ENTRY(NG4_retl_o2_plus_o4)
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4)
++ENTRY(NG4_retl_o2_plus_o4_plus_8)
++ add %o4, 8, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_8)
++ENTRY(NG4_retl_o2_plus_o4_plus_16)
++ add %o4, 16, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_16)
++ENTRY(NG4_retl_o2_plus_o4_plus_24)
++ add %o4, 24, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_24)
++ENTRY(NG4_retl_o2_plus_o4_plus_32)
++ add %o4, 32, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_32)
++ENTRY(NG4_retl_o2_plus_o4_plus_40)
++ add %o4, 40, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_40)
++ENTRY(NG4_retl_o2_plus_o4_plus_48)
++ add %o4, 48, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_48)
++ENTRY(NG4_retl_o2_plus_o4_plus_56)
++ add %o4, 56, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_56)
++ENTRY(NG4_retl_o2_plus_o4_plus_64)
++ add %o4, 64, %o4
++ ba,pt %xcc, __restore_asi
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_64)
++ENTRY(NG4_retl_o2_plus_o4_fp)
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
++ add %o4, 8, %o4
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
++ add %o4, 16, %o4
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
++ add %o4, 24, %o4
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
++ add %o4, 32, %o4
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
++ add %o4, 40, %o4
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
++ add %o4, 48, %o4
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
++ add %o4, 56, %o4
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
++ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
++ add %o4, 64, %o4
++ ba,pt %xcc, __restore_asi_fp
++ add %o2, %o4, %o0
++ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
++#endif
+ .align 64
+
+ .globl FUNC_NAME
+@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ brz,pt %g1, 51f
+ sub %o2, %g1, %o2
+
+-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
++
++1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
+ add %o1, 1, %o1
+ subcc %g1, 1, %g1
+ add %o0, 1, %o0
+ bne,pt %icc, 1b
+- EX_ST(STORE(stb, %g2, %o0 - 0x01))
++ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
+
+ 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
+ LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
+@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ brz,pt %g1, .Llarge_aligned
+ sub %o2, %g1, %o2
+
+-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
++1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
+ add %o1, 8, %o1
+ subcc %g1, 8, %g1
+ add %o0, 8, %o0
+ bne,pt %icc, 1b
+- EX_ST(STORE(stx, %g2, %o0 - 0x08))
++ EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
+
+ .Llarge_aligned:
+ /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
+ andn %o2, 0x3f, %o4
+ sub %o2, %o4, %o2
+
+-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
++1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
+ add %o1, 0x40, %o1
+- EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
++ EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
+ subcc %o4, 0x40, %o4
+- EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
+- EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
+- EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
+- EX_ST(STORE_INIT(%g1, %o0))
++ EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
++ EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
++ EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
++ EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
+ add %o0, 0x08, %o0
+- EX_ST(STORE_INIT(%g2, %o0))
++ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
+ add %o0, 0x08, %o0
+- EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
+- EX_ST(STORE_INIT(%g3, %o0))
++ EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
++ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
+ add %o0, 0x08, %o0
+- EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
+- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
++ EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
++ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
+ add %o0, 0x08, %o0
+- EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
+- EX_ST(STORE_INIT(%o5, %o0))
++ EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
++ EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
+ add %o0, 0x08, %o0
+- EX_ST(STORE_INIT(%g2, %o0))
++ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
+ add %o0, 0x08, %o0
+- EX_ST(STORE_INIT(%g3, %o0))
++ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
+ add %o0, 0x08, %o0
+- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
++ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
+ add %o0, 0x08, %o0
+ bne,pt %icc, 1b
+ LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
+@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ sub %o2, %o4, %o2
+ alignaddr %o1, %g0, %g1
+ add %o1, %o4, %o1
+- EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
+-1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
++ EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
++1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
+ subcc %o4, 0x40, %o4
+- EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
+- EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
+- EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
+- EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
+- EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
+- EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
++ EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
++ EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
++ EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
++ EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
++ EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
++ EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
+ faligndata %f0, %f2, %f16
+- EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
++ EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
+ faligndata %f2, %f4, %f18
+ add %g1, 0x40, %g1
+ faligndata %f4, %f6, %f20
+@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ faligndata %f10, %f12, %f26
+ faligndata %f12, %f14, %f28
+ faligndata %f14, %f0, %f30
+- EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
+- EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
+- EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
+- EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
+- EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
+- EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
+- EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
+- EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
++ EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
++ EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
++ EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
++ EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
++ EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
++ EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
++ EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
++ EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
+ add %o0, 0x40, %o0
+ bne,pt %icc, 1b
+ LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
+@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ andncc %o2, 0x20 - 1, %o5
+ be,pn %icc, 2f
+ sub %o2, %o5, %o2
+-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+- EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
+- EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
+- EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
++1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
++ EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
++ EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
++ EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
+ add %o1, 0x20, %o1
+ subcc %o5, 0x20, %o5
+- EX_ST(STORE(stx, %g1, %o0 + 0x00))
+- EX_ST(STORE(stx, %g2, %o0 + 0x08))
+- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
+- EX_ST(STORE(stx, %o4, %o0 + 0x18))
++ EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
++ EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
++ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
++ EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
+ bne,pt %icc, 1b
+ add %o0, 0x20, %o0
+ 2: andcc %o2, 0x18, %o5
+ be,pt %icc, 3f
+ sub %o2, %o5, %o2
+-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
++
++1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+ add %o1, 0x08, %o1
+ add %o0, 0x08, %o0
+ subcc %o5, 0x08, %o5
+ bne,pt %icc, 1b
+- EX_ST(STORE(stx, %g1, %o0 - 0x08))
++ EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
+ 3: brz,pt %o2, .Lexit
+ cmp %o2, 0x04
+ bl,pn %icc, .Ltiny
+ nop
+- EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
++ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
+ add %o1, 0x04, %o1
+ add %o0, 0x04, %o0
+ subcc %o2, 0x04, %o2
+ bne,pn %icc, .Ltiny
+- EX_ST(STORE(stw, %g1, %o0 - 0x04))
++ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
+ ba,a,pt %icc, .Lexit
+ .Lmedium_unaligned:
+ /* First get dest 8 byte aligned. */
+@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ brz,pt %g1, 2f
+ sub %o2, %g1, %o2
+
+-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
++1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
+ add %o1, 1, %o1
+ subcc %g1, 1, %g1
+ add %o0, 1, %o0
+ bne,pt %icc, 1b
+- EX_ST(STORE(stb, %g2, %o0 - 0x01))
++ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
+ 2:
+ and %o1, 0x7, %g1
+ brz,pn %g1, .Lmedium_noprefetch
+@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ mov 64, %g2
+ sub %g2, %g1, %g2
+ andn %o1, 0x7, %o1
+- EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
++ EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
+ sllx %o4, %g1, %o4
+ andn %o2, 0x08 - 1, %o5
+ sub %o2, %o5, %o2
+-1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
++1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
+ add %o1, 0x08, %o1
+ subcc %o5, 0x08, %o5
+ srlx %g3, %g2, GLOBAL_SPARE
+ or GLOBAL_SPARE, %o4, GLOBAL_SPARE
+- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
++ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
+ add %o0, 0x08, %o0
+ bne,pt %icc, 1b
+ sllx %g3, %g1, %o4
+@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ ba,pt %icc, .Lsmall_unaligned
+
+ .Ltiny:
+- EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
++ EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
+ subcc %o2, 1, %o2
+ be,pn %icc, .Lexit
+- EX_ST(STORE(stb, %g1, %o0 + 0x00))
+- EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
++ EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
++ EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
+ subcc %o2, 1, %o2
+ be,pn %icc, .Lexit
+- EX_ST(STORE(stb, %g1, %o0 + 0x01))
+- EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
++ EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
++ EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
+ ba,pt %icc, .Lexit
+- EX_ST(STORE(stb, %g1, %o0 + 0x02))
++ EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
+
+ .Lsmall:
+ andcc %g2, 0x3, %g0
+@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ andn %o2, 0x4 - 1, %o5
+ sub %o2, %o5, %o2
+ 1:
+- EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
++ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+ add %o1, 0x04, %o1
+ subcc %o5, 0x04, %o5
+ add %o0, 0x04, %o0
+ bne,pt %icc, 1b
+- EX_ST(STORE(stw, %g1, %o0 - 0x04))
++ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
+ brz,pt %o2, .Lexit
+ nop
+ ba,a,pt %icc, .Ltiny
+
+ .Lsmall_unaligned:
+-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
++1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
+ add %o1, 1, %o1
+ add %o0, 1, %o0
+ subcc %o2, 1, %o2
+ bne,pt %icc, 1b
+- EX_ST(STORE(stb, %g1, %o0 - 0x01))
++ EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
+ ba,a,pt %icc, .Lexit
+ .size FUNC_NAME, .-FUNC_NAME
+diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
+index 5d1e4d1..9cd42fc 100644
+--- a/arch/sparc/lib/NGcopy_from_user.S
++++ b/arch/sparc/lib/NGcopy_from_user.S
+@@ -3,11 +3,11 @@
+ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+-#define EX_LD(x) \
++#define EX_LD(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __ret_one_asi;\
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
+index ff630dc..5c358af 100644
+--- a/arch/sparc/lib/NGcopy_to_user.S
++++ b/arch/sparc/lib/NGcopy_to_user.S
+@@ -3,11 +3,11 @@
+ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+-#define EX_ST(x) \
++#define EX_ST(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __ret_one_asi;\
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
+index 96a14ca..d88c4ed 100644
+--- a/arch/sparc/lib/NGmemcpy.S
++++ b/arch/sparc/lib/NGmemcpy.S
+@@ -4,6 +4,7 @@
+ */
+
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/asi.h>
+ #include <asm/thread_info.h>
+ #define GLOBAL_SPARE %g7
+@@ -27,15 +28,11 @@
+ #endif
+
+ #ifndef EX_LD
+-#define EX_LD(x) x
++#define EX_LD(x,y) x
+ #endif
+
+ #ifndef EX_ST
+-#define EX_ST(x) x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x) x
++#define EX_ST(x,y) x
+ #endif
+
+ #ifndef LOAD
+@@ -79,6 +76,92 @@
+ .register %g3,#scratch
+
+ .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x) x
++__restore_asi:
++ ret
++ wr %g0, ASI_AIUS, %asi
++ restore
++ENTRY(NG_ret_i2_plus_i4_plus_1)
++ ba,pt %xcc, __restore_asi
++ add %i2, %i5, %i0
++ENDPROC(NG_ret_i2_plus_i4_plus_1)
++ENTRY(NG_ret_i2_plus_g1)
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1)
++ENTRY(NG_ret_i2_plus_g1_minus_8)
++ sub %g1, 8, %g1
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_8)
++ENTRY(NG_ret_i2_plus_g1_minus_16)
++ sub %g1, 16, %g1
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_16)
++ENTRY(NG_ret_i2_plus_g1_minus_24)
++ sub %g1, 24, %g1
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_24)
++ENTRY(NG_ret_i2_plus_g1_minus_32)
++ sub %g1, 32, %g1
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_32)
++ENTRY(NG_ret_i2_plus_g1_minus_40)
++ sub %g1, 40, %g1
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_40)
++ENTRY(NG_ret_i2_plus_g1_minus_48)
++ sub %g1, 48, %g1
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_48)
++ENTRY(NG_ret_i2_plus_g1_minus_56)
++ sub %g1, 56, %g1
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_minus_56)
++ENTRY(NG_ret_i2_plus_i4)
++ ba,pt %xcc, __restore_asi
++ add %i2, %i4, %i0
++ENDPROC(NG_ret_i2_plus_i4)
++ENTRY(NG_ret_i2_plus_i4_minus_8)
++ sub %i4, 8, %i4
++ ba,pt %xcc, __restore_asi
++ add %i2, %i4, %i0
++ENDPROC(NG_ret_i2_plus_i4_minus_8)
++ENTRY(NG_ret_i2_plus_8)
++ ba,pt %xcc, __restore_asi
++ add %i2, 8, %i0
++ENDPROC(NG_ret_i2_plus_8)
++ENTRY(NG_ret_i2_plus_4)
++ ba,pt %xcc, __restore_asi
++ add %i2, 4, %i0
++ENDPROC(NG_ret_i2_plus_4)
++ENTRY(NG_ret_i2_plus_1)
++ ba,pt %xcc, __restore_asi
++ add %i2, 1, %i0
++ENDPROC(NG_ret_i2_plus_1)
++ENTRY(NG_ret_i2_plus_g1_plus_1)
++ add %g1, 1, %g1
++ ba,pt %xcc, __restore_asi
++ add %i2, %g1, %i0
++ENDPROC(NG_ret_i2_plus_g1_plus_1)
++ENTRY(NG_ret_i2)
++ ba,pt %xcc, __restore_asi
++ mov %i2, %i0
++ENDPROC(NG_ret_i2)
++ENTRY(NG_ret_i2_and_7_plus_i4)
++ and %i2, 7, %i2
++ ba,pt %xcc, __restore_asi
++ add %i2, %i4, %i0
++ENDPROC(NG_ret_i2_and_7_plus_i4)
++#endif
++
+ .align 64
+
+ .globl FUNC_NAME
+@@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ sub %g0, %i4, %i4 ! bytes to align dst
+ sub %i2, %i4, %i2
+ 1: subcc %i4, 1, %i4
+- EX_LD(LOAD(ldub, %i1, %g1))
+- EX_ST(STORE(stb, %g1, %o0))
++ EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
++ EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
+ add %i1, 1, %i1
+ bne,pt %XCC, 1b
+ add %o0, 1, %o0
+@@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ and %i4, 0x7, GLOBAL_SPARE
+ sll GLOBAL_SPARE, 3, GLOBAL_SPARE
+ mov 64, %i5
+- EX_LD(LOAD_TWIN(%i1, %g2, %g3))
++ EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
+ sub %i5, GLOBAL_SPARE, %i5
+ mov 16, %o4
+ mov 32, %o5
+@@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ srlx WORD3, PRE_SHIFT, TMP; \
+ or WORD2, TMP, WORD2;
+
+-8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
++8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
+ MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
+ LOAD(prefetch, %i1 + %i3, #one_read)
+
+- EX_ST(STORE_INIT(%g2, %o0 + 0x00))
+- EX_ST(STORE_INIT(%g3, %o0 + 0x08))
++ EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
++ EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+
+- EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
++ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
+ MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
+
+- EX_ST(STORE_INIT(%o2, %o0 + 0x10))
+- EX_ST(STORE_INIT(%o3, %o0 + 0x18))
++ EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
++ EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+
+- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
++ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+ MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
+
+- EX_ST(STORE_INIT(%g2, %o0 + 0x20))
+- EX_ST(STORE_INIT(%g3, %o0 + 0x28))
++ EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
++ EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+
+- EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
++ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
+ add %i1, 64, %i1
+ MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
+
+- EX_ST(STORE_INIT(%o2, %o0 + 0x30))
+- EX_ST(STORE_INIT(%o3, %o0 + 0x38))
++ EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
++ EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 8b
+@@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ ba,pt %XCC, 60f
+ add %i1, %i4, %i1
+
+-9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
++9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
+ MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
+ LOAD(prefetch, %i1 + %i3, #one_read)
+
+- EX_ST(STORE_INIT(%g3, %o0 + 0x00))
+- EX_ST(STORE_INIT(%o2, %o0 + 0x08))
++ EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
++ EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+
+- EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
++ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
+ MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
+
+- EX_ST(STORE_INIT(%o3, %o0 + 0x10))
+- EX_ST(STORE_INIT(%g2, %o0 + 0x18))
++ EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
++ EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+
+- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
++ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+ MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
+
+- EX_ST(STORE_INIT(%g3, %o0 + 0x20))
+- EX_ST(STORE_INIT(%o2, %o0 + 0x28))
++ EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
++ EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+
+- EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
++ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
+ add %i1, 64, %i1
+ MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
+
+- EX_ST(STORE_INIT(%o3, %o0 + 0x30))
+- EX_ST(STORE_INIT(%g2, %o0 + 0x38))
++ EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
++ EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 9b
+@@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ * one twin load ahead, then add 8 back into source when
+ * we finish the loop.
+ */
+- EX_LD(LOAD_TWIN(%i1, %o4, %o5))
++ EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
+ mov 16, %o7
+ mov 32, %g2
+ mov 48, %g3
+ mov 64, %o1
+-1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
++1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
+ LOAD(prefetch, %i1 + %o1, #one_read)
+- EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line
+- EX_ST(STORE_INIT(%o2, %o0 + 0x08))
+- EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
+- EX_ST(STORE_INIT(%o3, %o0 + 0x10))
+- EX_ST(STORE_INIT(%o4, %o0 + 0x18))
+- EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
+- EX_ST(STORE_INIT(%o5, %o0 + 0x20))
+- EX_ST(STORE_INIT(%o2, %o0 + 0x28))
+- EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
++ EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
++ EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
++ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
++ EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
++ EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
++ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
++ EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
++ EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
++ EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
+ add %i1, 64, %i1
+- EX_ST(STORE_INIT(%o3, %o0 + 0x30))
+- EX_ST(STORE_INIT(%o4, %o0 + 0x38))
++ EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
++ EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 1b
+ add %o0, 64, %o0
+@@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ mov 32, %g2
+ mov 48, %g3
+ mov 64, %o1
+-1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
+- EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
++1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
++ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
+ LOAD(prefetch, %i1 + %o1, #one_read)
+- EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line
+- EX_ST(STORE_INIT(%o5, %o0 + 0x08))
+- EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
+- EX_ST(STORE_INIT(%o2, %o0 + 0x10))
+- EX_ST(STORE_INIT(%o3, %o0 + 0x18))
+- EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
++ EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
++ EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
++ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
++ EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
++ EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
++ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+ add %i1, 64, %i1
+- EX_ST(STORE_INIT(%o4, %o0 + 0x20))
+- EX_ST(STORE_INIT(%o5, %o0 + 0x28))
+- EX_ST(STORE_INIT(%o2, %o0 + 0x30))
+- EX_ST(STORE_INIT(%o3, %o0 + 0x38))
++ EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
++ EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
++ EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
++ EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 1b
+ add %o0, 64, %o0
+@@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ andn %i2, 0xf, %i4
+ and %i2, 0xf, %i2
+ 1: subcc %i4, 0x10, %i4
+- EX_LD(LOAD(ldx, %i1, %o4))
++ EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
+ add %i1, 0x08, %i1
+- EX_LD(LOAD(ldx, %i1, %g1))
++ EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
+ sub %i1, 0x08, %i1
+- EX_ST(STORE(stx, %o4, %i1 + %i3))
++ EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
+ add %i1, 0x8, %i1
+- EX_ST(STORE(stx, %g1, %i1 + %i3))
++ EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
+ bgu,pt %XCC, 1b
+ add %i1, 0x8, %i1
+ 73: andcc %i2, 0x8, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %i2, 0x8, %i2
+- EX_LD(LOAD(ldx, %i1, %o4))
+- EX_ST(STORE(stx, %o4, %i1 + %i3))
++ EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
++ EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
+ add %i1, 0x8, %i1
+ 1: andcc %i2, 0x4, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %i2, 0x4, %i2
+- EX_LD(LOAD(lduw, %i1, %i5))
+- EX_ST(STORE(stw, %i5, %i1 + %i3))
++ EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
++ EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
+ add %i1, 0x4, %i1
+ 1: cmp %i2, 0
+ be,pt %XCC, 85f
+@@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ sub %i2, %g1, %i2
+
+ 1: subcc %g1, 1, %g1
+- EX_LD(LOAD(ldub, %i1, %i5))
+- EX_ST(STORE(stb, %i5, %i1 + %i3))
++ EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
++ EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
+ bgu,pt %icc, 1b
+ add %i1, 1, %i1
+
+@@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+
+ 8: mov 64, %i3
+ andn %i1, 0x7, %i1
+- EX_LD(LOAD(ldx, %i1, %g2))
++ EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
+ sub %i3, %g1, %i3
+ andn %i2, 0x7, %i4
+ sllx %g2, %g1, %g2
+ 1: add %i1, 0x8, %i1
+- EX_LD(LOAD(ldx, %i1, %g3))
++ EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
+ subcc %i4, 0x8, %i4
+ srlx %g3, %i3, %i5
+ or %i5, %g2, %i5
+- EX_ST(STORE(stx, %i5, %o0))
++ EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
+ add %o0, 0x8, %o0
+ bgu,pt %icc, 1b
+ sllx %g3, %g1, %g2
+@@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+
+ 1:
+ subcc %i2, 4, %i2
+- EX_LD(LOAD(lduw, %i1, %g1))
+- EX_ST(STORE(stw, %g1, %i1 + %i3))
++ EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
++ EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
+ bgu,pt %XCC, 1b
+ add %i1, 4, %i1
+
+@@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ .align 32
+ 90:
+ subcc %i2, 1, %i2
+- EX_LD(LOAD(ldub, %i1, %g1))
+- EX_ST(STORE(stb, %g1, %i1 + %i3))
++ EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
++ EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
+ bgu,pt %XCC, 90b
+ add %i1, 1, %i1
+ ret
+diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
+index ecc5692..bb6ff73 100644
+--- a/arch/sparc/lib/U1copy_from_user.S
++++ b/arch/sparc/lib/U1copy_from_user.S
+@@ -3,19 +3,19 @@
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+-#define EX_LD(x) \
++#define EX_LD(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one; \
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+-#define EX_LD_FP(x) \
++#define EX_LD_FP(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_fp;\
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
+index 9eea392..ed92ce73 100644
+--- a/arch/sparc/lib/U1copy_to_user.S
++++ b/arch/sparc/lib/U1copy_to_user.S
+@@ -3,19 +3,19 @@
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+-#define EX_ST(x) \
++#define EX_ST(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one; \
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+-#define EX_ST_FP(x) \
++#define EX_ST_FP(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_fp;\
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
+index 3e6209e..f30d2ab 100644
+--- a/arch/sparc/lib/U1memcpy.S
++++ b/arch/sparc/lib/U1memcpy.S
+@@ -5,6 +5,7 @@
+ */
+
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/visasm.h>
+ #include <asm/asi.h>
+ #define GLOBAL_SPARE g7
+@@ -23,21 +24,17 @@
+ #endif
+
+ #ifndef EX_LD
+-#define EX_LD(x) x
++#define EX_LD(x,y) x
+ #endif
+ #ifndef EX_LD_FP
+-#define EX_LD_FP(x) x
++#define EX_LD_FP(x,y) x
+ #endif
+
+ #ifndef EX_ST
+-#define EX_ST(x) x
++#define EX_ST(x,y) x
+ #endif
+ #ifndef EX_ST_FP
+-#define EX_ST_FP(x) x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x) x
++#define EX_ST_FP(x,y) x
+ #endif
+
+ #ifndef LOAD
+@@ -78,53 +75,169 @@
+ faligndata %f7, %f8, %f60; \
+ faligndata %f8, %f9, %f62;
+
+-#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
+- EX_LD_FP(LOAD_BLK(%src, %fdest)); \
+- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
+- add %src, 0x40, %src; \
+- subcc %len, 0x40, %len; \
+- be,pn %xcc, jmptgt; \
+- add %dest, 0x40, %dest; \
+-
+-#define LOOP_CHUNK1(src, dest, len, branch_dest) \
+- MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest)
+-#define LOOP_CHUNK2(src, dest, len, branch_dest) \
+- MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
+-#define LOOP_CHUNK3(src, dest, len, branch_dest) \
+- MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
++#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
++ EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
++ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
++ add %src, 0x40, %src; \
++ subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
++ be,pn %xcc, jmptgt; \
++ add %dest, 0x40, %dest; \
++
++#define LOOP_CHUNK1(src, dest, branch_dest) \
++ MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
++#define LOOP_CHUNK2(src, dest, branch_dest) \
++ MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
++#define LOOP_CHUNK3(src, dest, branch_dest) \
++ MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
+
+ #define DO_SYNC membar #Sync;
+ #define STORE_SYNC(dest, fsrc) \
+- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
++ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
+ add %dest, 0x40, %dest; \
+ DO_SYNC
+
+ #define STORE_JUMP(dest, fsrc, target) \
+- EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
++ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
+ add %dest, 0x40, %dest; \
+ ba,pt %xcc, target; \
+ nop;
+
+-#define FINISH_VISCHUNK(dest, f0, f1, left) \
+- subcc %left, 8, %left;\
+- bl,pn %xcc, 95f; \
+- faligndata %f0, %f1, %f48; \
+- EX_ST_FP(STORE(std, %f48, %dest)); \
++#define FINISH_VISCHUNK(dest, f0, f1) \
++ subcc %g3, 8, %g3; \
++ bl,pn %xcc, 95f; \
++ faligndata %f0, %f1, %f48; \
++ EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
+ add %dest, 8, %dest;
+
+-#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
+- subcc %left, 8, %left; \
+- bl,pn %xcc, 95f; \
++#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
++ subcc %g3, 8, %g3; \
++ bl,pn %xcc, 95f; \
+ fsrc2 %f0, %f1;
+
+-#define UNEVEN_VISCHUNK(dest, f0, f1, left) \
+- UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
++#define UNEVEN_VISCHUNK(dest, f0, f1) \
++ UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
+ ba,a,pt %xcc, 93f;
+
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+ .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x) x
++ENTRY(U1_g1_1_fp)
++ VISExitHalf
++ add %g1, 1, %g1
++ add %g1, %g2, %g1
++ retl
++ add %g1, %o2, %o0
++ENDPROC(U1_g1_1_fp)
++ENTRY(U1_g2_0_fp)
++ VISExitHalf
++ retl
++ add %g2, %o2, %o0
++ENDPROC(U1_g2_0_fp)
++ENTRY(U1_g2_8_fp)
++ VISExitHalf
++ add %g2, 8, %g2
++ retl
++ add %g2, %o2, %o0
++ENDPROC(U1_g2_8_fp)
++ENTRY(U1_gs_0_fp)
++ VISExitHalf
++ add %GLOBAL_SPARE, %g3, %o0
++ retl
++ add %o0, %o2, %o0
++ENDPROC(U1_gs_0_fp)
++ENTRY(U1_gs_80_fp)
++ VISExitHalf
++ add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
++ add %GLOBAL_SPARE, %g3, %o0
++ retl
++ add %o0, %o2, %o0
++ENDPROC(U1_gs_80_fp)
++ENTRY(U1_gs_40_fp)
++ VISExitHalf
++ add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
++ add %GLOBAL_SPARE, %g3, %o0
++ retl
++ add %o0, %o2, %o0
++ENDPROC(U1_gs_40_fp)
++ENTRY(U1_g3_0_fp)
++ VISExitHalf
++ retl
++ add %g3, %o2, %o0
++ENDPROC(U1_g3_0_fp)
++ENTRY(U1_g3_8_fp)
++ VISExitHalf
++ add %g3, 8, %g3
++ retl
++ add %g3, %o2, %o0
++ENDPROC(U1_g3_8_fp)
++ENTRY(U1_o2_0_fp)
++ VISExitHalf
++ retl
++ mov %o2, %o0
++ENDPROC(U1_o2_0_fp)
++ENTRY(U1_o2_1_fp)
++ VISExitHalf
++ retl
++ add %o2, 1, %o0
++ENDPROC(U1_o2_1_fp)
++ENTRY(U1_gs_0)
++ VISExitHalf
++ retl
++ add %GLOBAL_SPARE, %o2, %o0
++ENDPROC(U1_gs_0)
++ENTRY(U1_gs_8)
++ VISExitHalf
++ add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
++ retl
++ add %GLOBAL_SPARE, 0x8, %o0
++ENDPROC(U1_gs_8)
++ENTRY(U1_gs_10)
++ VISExitHalf
++ add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
++ retl
++ add %GLOBAL_SPARE, 0x10, %o0
++ENDPROC(U1_gs_10)
++ENTRY(U1_o2_0)
++ retl
++ mov %o2, %o0
++ENDPROC(U1_o2_0)
++ENTRY(U1_o2_8)
++ retl
++ add %o2, 8, %o0
++ENDPROC(U1_o2_8)
++ENTRY(U1_o2_4)
++ retl
++ add %o2, 4, %o0
++ENDPROC(U1_o2_4)
++ENTRY(U1_o2_1)
++ retl
++ add %o2, 1, %o0
++ENDPROC(U1_o2_1)
++ENTRY(U1_g1_0)
++ retl
++ add %g1, %o2, %o0
++ENDPROC(U1_g1_0)
++ENTRY(U1_g1_1)
++ add %g1, 1, %g1
++ retl
++ add %g1, %o2, %o0
++ENDPROC(U1_g1_1)
++ENTRY(U1_gs_0_o2_adj)
++ and %o2, 7, %o2
++ retl
++ add %GLOBAL_SPARE, %o2, %o0
++ENDPROC(U1_gs_0_o2_adj)
++ENTRY(U1_gs_8_o2_adj)
++ and %o2, 7, %o2
++ add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
++ retl
++ add %GLOBAL_SPARE, %o2, %o0
++ENDPROC(U1_gs_8_o2_adj)
++#endif
++
+ .align 64
+
+ .globl FUNC_NAME
+@@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ and %g2, 0x38, %g2
+
+ 1: subcc %g1, 0x1, %g1
+- EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
+- EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
++ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
++ EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
+ bgu,pt %XCC, 1b
+ add %o1, 0x1, %o1
+
+@@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ be,pt %icc, 3f
+ alignaddr %o1, %g0, %o1
+
+- EX_LD_FP(LOAD(ldd, %o1, %f4))
+-1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
++ EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
++1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
+ add %o1, 0x8, %o1
+ subcc %g2, 0x8, %g2
+ faligndata %f4, %f6, %f0
+- EX_ST_FP(STORE(std, %f0, %o0))
++ EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
+ be,pn %icc, 3f
+ add %o0, 0x8, %o0
+
+- EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
+ add %o1, 0x8, %o1
+ subcc %g2, 0x8, %g2
+ faligndata %f6, %f4, %f0
+- EX_ST_FP(STORE(std, %f0, %o0))
++ EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
+ bne,pt %icc, 1b
+ add %o0, 0x8, %o0
+
+@@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ add %g1, %GLOBAL_SPARE, %g1
+ subcc %o2, %g3, %o2
+
+- EX_LD_FP(LOAD_BLK(%o1, %f0))
++ EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
+ add %o1, 0x40, %o1
+ add %g1, %g3, %g1
+- EX_LD_FP(LOAD_BLK(%o1, %f16))
++ EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
+ add %o1, 0x40, %o1
+ sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
+- EX_LD_FP(LOAD_BLK(%o1, %f32))
++ EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
+ add %o1, 0x40, %o1
+
+ /* There are 8 instances of the unrolled loop,
+@@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ .align 64
+ 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
+- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++ LOOP_CHUNK1(o1, o0, 1f)
+ FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
+- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++ LOOP_CHUNK2(o1, o0, 2f)
+ FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
+- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++ LOOP_CHUNK3(o1, o0, 3f)
+ ba,pt %xcc, 1b+4
+ faligndata %f0, %f2, %f48
+ 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
+@@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ STORE_JUMP(o0, f48, 56f)
+
+ 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
+- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++ LOOP_CHUNK1(o1, o0, 1f)
+ FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
+- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++ LOOP_CHUNK2(o1, o0, 2f)
+ FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
+- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++ LOOP_CHUNK3(o1, o0, 3f)
+ ba,pt %xcc, 1b+4
+ faligndata %f2, %f4, %f48
+ 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
+@@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ STORE_JUMP(o0, f48, 57f)
+
+ 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
+- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++ LOOP_CHUNK1(o1, o0, 1f)
+ FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
+- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++ LOOP_CHUNK2(o1, o0, 2f)
+ FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
+- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++ LOOP_CHUNK3(o1, o0, 3f)
+ ba,pt %xcc, 1b+4
+ faligndata %f4, %f6, %f48
+ 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
+@@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ STORE_JUMP(o0, f48, 58f)
+
+ 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
+- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++ LOOP_CHUNK1(o1, o0, 1f)
+ FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
+- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++ LOOP_CHUNK2(o1, o0, 2f)
+ FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
+- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++ LOOP_CHUNK3(o1, o0, 3f)
+ ba,pt %xcc, 1b+4
+ faligndata %f6, %f8, %f48
+ 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
+@@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ STORE_JUMP(o0, f48, 59f)
+
+ 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
+- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++ LOOP_CHUNK1(o1, o0, 1f)
+ FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
+- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++ LOOP_CHUNK2(o1, o0, 2f)
+ FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
+- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++ LOOP_CHUNK3(o1, o0, 3f)
+ ba,pt %xcc, 1b+4
+ faligndata %f8, %f10, %f48
+ 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
+@@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ STORE_JUMP(o0, f48, 60f)
+
+ 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
+- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++ LOOP_CHUNK1(o1, o0, 1f)
+ FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
+- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++ LOOP_CHUNK2(o1, o0, 2f)
+ FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
+- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++ LOOP_CHUNK3(o1, o0, 3f)
+ ba,pt %xcc, 1b+4
+ faligndata %f10, %f12, %f48
+ 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
+@@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ STORE_JUMP(o0, f48, 61f)
+
+ 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
+- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++ LOOP_CHUNK1(o1, o0, 1f)
+ FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
+- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++ LOOP_CHUNK2(o1, o0, 2f)
+ FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
+- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++ LOOP_CHUNK3(o1, o0, 3f)
+ ba,pt %xcc, 1b+4
+ faligndata %f12, %f14, %f48
+ 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
+@@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ STORE_JUMP(o0, f48, 62f)
+
+ 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
+- LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
++ LOOP_CHUNK1(o1, o0, 1f)
+ FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+- LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
++ LOOP_CHUNK2(o1, o0, 2f)
+ FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
+- LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
++ LOOP_CHUNK3(o1, o0, 3f)
+ ba,pt %xcc, 1b+4
+ faligndata %f14, %f16, %f48
+ 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+@@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+ STORE_JUMP(o0, f48, 63f)
+
+-40: FINISH_VISCHUNK(o0, f0, f2, g3)
+-41: FINISH_VISCHUNK(o0, f2, f4, g3)
+-42: FINISH_VISCHUNK(o0, f4, f6, g3)
+-43: FINISH_VISCHUNK(o0, f6, f8, g3)
+-44: FINISH_VISCHUNK(o0, f8, f10, g3)
+-45: FINISH_VISCHUNK(o0, f10, f12, g3)
+-46: FINISH_VISCHUNK(o0, f12, f14, g3)
+-47: UNEVEN_VISCHUNK(o0, f14, f0, g3)
+-48: FINISH_VISCHUNK(o0, f16, f18, g3)
+-49: FINISH_VISCHUNK(o0, f18, f20, g3)
+-50: FINISH_VISCHUNK(o0, f20, f22, g3)
+-51: FINISH_VISCHUNK(o0, f22, f24, g3)
+-52: FINISH_VISCHUNK(o0, f24, f26, g3)
+-53: FINISH_VISCHUNK(o0, f26, f28, g3)
+-54: FINISH_VISCHUNK(o0, f28, f30, g3)
+-55: UNEVEN_VISCHUNK(o0, f30, f0, g3)
+-56: FINISH_VISCHUNK(o0, f32, f34, g3)
+-57: FINISH_VISCHUNK(o0, f34, f36, g3)
+-58: FINISH_VISCHUNK(o0, f36, f38, g3)
+-59: FINISH_VISCHUNK(o0, f38, f40, g3)
+-60: FINISH_VISCHUNK(o0, f40, f42, g3)
+-61: FINISH_VISCHUNK(o0, f42, f44, g3)
+-62: FINISH_VISCHUNK(o0, f44, f46, g3)
+-63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3)
+-
+-93: EX_LD_FP(LOAD(ldd, %o1, %f2))
++40: FINISH_VISCHUNK(o0, f0, f2)
++41: FINISH_VISCHUNK(o0, f2, f4)
++42: FINISH_VISCHUNK(o0, f4, f6)
++43: FINISH_VISCHUNK(o0, f6, f8)
++44: FINISH_VISCHUNK(o0, f8, f10)
++45: FINISH_VISCHUNK(o0, f10, f12)
++46: FINISH_VISCHUNK(o0, f12, f14)
++47: UNEVEN_VISCHUNK(o0, f14, f0)
++48: FINISH_VISCHUNK(o0, f16, f18)
++49: FINISH_VISCHUNK(o0, f18, f20)
++50: FINISH_VISCHUNK(o0, f20, f22)
++51: FINISH_VISCHUNK(o0, f22, f24)
++52: FINISH_VISCHUNK(o0, f24, f26)
++53: FINISH_VISCHUNK(o0, f26, f28)
++54: FINISH_VISCHUNK(o0, f28, f30)
++55: UNEVEN_VISCHUNK(o0, f30, f0)
++56: FINISH_VISCHUNK(o0, f32, f34)
++57: FINISH_VISCHUNK(o0, f34, f36)
++58: FINISH_VISCHUNK(o0, f36, f38)
++59: FINISH_VISCHUNK(o0, f38, f40)
++60: FINISH_VISCHUNK(o0, f40, f42)
++61: FINISH_VISCHUNK(o0, f42, f44)
++62: FINISH_VISCHUNK(o0, f44, f46)
++63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
++
++93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
+ add %o1, 8, %o1
+ subcc %g3, 8, %g3
+ faligndata %f0, %f2, %f8
+- EX_ST_FP(STORE(std, %f8, %o0))
++ EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
+ bl,pn %xcc, 95f
+ add %o0, 8, %o0
+- EX_LD_FP(LOAD(ldd, %o1, %f0))
++ EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
+ add %o1, 8, %o1
+ subcc %g3, 8, %g3
+ faligndata %f2, %f0, %f8
+- EX_ST_FP(STORE(std, %f8, %o0))
++ EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
+ bge,pt %xcc, 93b
+ add %o0, 8, %o0
+
+ 95: brz,pt %o2, 2f
+ mov %g1, %o1
+
+-1: EX_LD_FP(LOAD(ldub, %o1, %o3))
++1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
+ add %o1, 1, %o1
+ subcc %o2, 1, %o2
+- EX_ST_FP(STORE(stb, %o3, %o0))
++ EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
+ bne,pt %xcc, 1b
+ add %o0, 1, %o0
+
+@@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 72: andn %o2, 0xf, %GLOBAL_SPARE
+ and %o2, 0xf, %o2
+-1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
+- EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
++1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
++ EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
+ subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
+- EX_ST(STORE(stx, %o5, %o1 + %o3))
++ EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
+ add %o1, 0x8, %o1
+- EX_ST(STORE(stx, %g1, %o1 + %o3))
++ EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
+ bgu,pt %XCC, 1b
+ add %o1, 0x8, %o1
+ 73: andcc %o2, 0x8, %g0
+ be,pt %XCC, 1f
+ nop
+- EX_LD(LOAD(ldx, %o1, %o5))
++ EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
+ sub %o2, 0x8, %o2
+- EX_ST(STORE(stx, %o5, %o1 + %o3))
++ EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
+ add %o1, 0x8, %o1
+ 1: andcc %o2, 0x4, %g0
+ be,pt %XCC, 1f
+ nop
+- EX_LD(LOAD(lduw, %o1, %o5))
++ EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
+ sub %o2, 0x4, %o2
+- EX_ST(STORE(stw, %o5, %o1 + %o3))
++ EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
+ add %o1, 0x4, %o1
+ 1: cmp %o2, 0
+ be,pt %XCC, 85f
+@@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ sub %g0, %g1, %g1
+ sub %o2, %g1, %o2
+
+-1: EX_LD(LOAD(ldub, %o1, %o5))
++1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
+ subcc %g1, 1, %g1
+- EX_ST(STORE(stb, %o5, %o1 + %o3))
++ EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
+ bgu,pt %icc, 1b
+ add %o1, 1, %o1
+
+@@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 8: mov 64, %o3
+ andn %o1, 0x7, %o1
+- EX_LD(LOAD(ldx, %o1, %g2))
++ EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
+ sub %o3, %g1, %o3
+ andn %o2, 0x7, %GLOBAL_SPARE
+ sllx %g2, %g1, %g2
+-1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
++1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
+ subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
+ add %o1, 0x8, %o1
+ srlx %g3, %o3, %o5
+ or %o5, %g2, %o5
+- EX_ST(STORE(stx, %o5, %o0))
++ EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
+ add %o0, 0x8, %o0
+ bgu,pt %icc, 1b
+ sllx %g3, %g1, %g2
+@@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ bne,pn %XCC, 90f
+ sub %o0, %o1, %o3
+
+-1: EX_LD(LOAD(lduw, %o1, %g1))
++1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
+ subcc %o2, 4, %o2
+- EX_ST(STORE(stw, %g1, %o1 + %o3))
++ EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
+ bgu,pt %XCC, 1b
+ add %o1, 4, %o1
+
+@@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ mov EX_RETVAL(%o4), %o0
+
+ .align 32
+-90: EX_LD(LOAD(ldub, %o1, %g1))
++90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
+ subcc %o2, 1, %o2
+- EX_ST(STORE(stb, %g1, %o1 + %o3))
++ EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
+ bgu,pt %XCC, 90b
+ add %o1, 1, %o1
+ retl
+diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
+index 88ad73d..db73010 100644
+--- a/arch/sparc/lib/U3copy_from_user.S
++++ b/arch/sparc/lib/U3copy_from_user.S
+@@ -3,19 +3,19 @@
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+-#define EX_LD(x) \
++#define EX_LD(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one; \
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+-#define EX_LD_FP(x) \
++#define EX_LD_FP(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_fp;\
++ .word 98b, y##_fp; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
+index 845139d..c4ee858 100644
+--- a/arch/sparc/lib/U3copy_to_user.S
++++ b/arch/sparc/lib/U3copy_to_user.S
+@@ -3,19 +3,19 @@
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+-#define EX_ST(x) \
++#define EX_ST(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one; \
++ .word 98b, y; \
+ .text; \
+ .align 4;
+
+-#define EX_ST_FP(x) \
++#define EX_ST_FP(x,y) \
+ 98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one_fp;\
++ .word 98b, y##_fp; \
+ .text; \
+ .align 4;
+
+diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
+index 491ee69..54f9870 100644
+--- a/arch/sparc/lib/U3memcpy.S
++++ b/arch/sparc/lib/U3memcpy.S
+@@ -4,6 +4,7 @@
+ */
+
+ #ifdef __KERNEL__
++#include <linux/linkage.h>
+ #include <asm/visasm.h>
+ #include <asm/asi.h>
+ #define GLOBAL_SPARE %g7
+@@ -22,21 +23,17 @@
+ #endif
+
+ #ifndef EX_LD
+-#define EX_LD(x) x
++#define EX_LD(x,y) x
+ #endif
+ #ifndef EX_LD_FP
+-#define EX_LD_FP(x) x
++#define EX_LD_FP(x,y) x
+ #endif
+
+ #ifndef EX_ST
+-#define EX_ST(x) x
++#define EX_ST(x,y) x
+ #endif
+ #ifndef EX_ST_FP
+-#define EX_ST_FP(x) x
+-#endif
+-
+-#ifndef EX_RETVAL
+-#define EX_RETVAL(x) x
++#define EX_ST_FP(x,y) x
+ #endif
+
+ #ifndef LOAD
+@@ -77,6 +74,87 @@
+ */
+
+ .text
++#ifndef EX_RETVAL
++#define EX_RETVAL(x) x
++__restore_fp:
++ VISExitHalf
++ retl
++ nop
++ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
++ add %g1, 1, %g1
++ add %g2, %g1, %g2
++ ba,pt %xcc, __restore_fp
++ add %o2, %g2, %o0
++ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
++ENTRY(U3_retl_o2_plus_g2_fp)
++ ba,pt %xcc, __restore_fp
++ add %o2, %g2, %o0
++ENDPROC(U3_retl_o2_plus_g2_fp)
++ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
++ add %g2, 8, %g2
++ ba,pt %xcc, __restore_fp
++ add %o2, %g2, %o0
++ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
++ENTRY(U3_retl_o2)
++ retl
++ mov %o2, %o0
++ENDPROC(U3_retl_o2)
++ENTRY(U3_retl_o2_plus_1)
++ retl
++ add %o2, 1, %o0
++ENDPROC(U3_retl_o2_plus_1)
++ENTRY(U3_retl_o2_plus_4)
++ retl
++ add %o2, 4, %o0
++ENDPROC(U3_retl_o2_plus_4)
++ENTRY(U3_retl_o2_plus_8)
++ retl
++ add %o2, 8, %o0
++ENDPROC(U3_retl_o2_plus_8)
++ENTRY(U3_retl_o2_plus_g1_plus_1)
++ add %g1, 1, %g1
++ retl
++ add %o2, %g1, %o0
++ENDPROC(U3_retl_o2_plus_g1_plus_1)
++ENTRY(U3_retl_o2_fp)
++ ba,pt %xcc, __restore_fp
++ mov %o2, %o0
++ENDPROC(U3_retl_o2_fp)
++ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
++ sll %o3, 6, %o3
++ add %o3, 0x80, %o3
++ ba,pt %xcc, __restore_fp
++ add %o2, %o3, %o0
++ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
++ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
++ sll %o3, 6, %o3
++ add %o3, 0x40, %o3
++ ba,pt %xcc, __restore_fp
++ add %o2, %o3, %o0
++ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
++ENTRY(U3_retl_o2_plus_GS_plus_0x10)
++ add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
++ retl
++ add %o2, GLOBAL_SPARE, %o0
++ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
++ENTRY(U3_retl_o2_plus_GS_plus_0x08)
++ add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
++ retl
++ add %o2, GLOBAL_SPARE, %o0
++ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
++ENTRY(U3_retl_o2_and_7_plus_GS)
++ and %o2, 7, %o2
++ retl
++ add %o2, GLOBAL_SPARE, %o2
++ENDPROC(U3_retl_o2_and_7_plus_GS)
++ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
++ add GLOBAL_SPARE, 8, GLOBAL_SPARE
++ and %o2, 7, %o2
++ retl
++ add %o2, GLOBAL_SPARE, %o2
++ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
++#endif
++
+ .align 64
+
+ /* The cheetah's flexible spine, oversized liver, enlarged heart,
+@@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ and %g2, 0x38, %g2
+
+ 1: subcc %g1, 0x1, %g1
+- EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
+- EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
++ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
++ EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
+ bgu,pt %XCC, 1b
+ add %o1, 0x1, %o1
+
+@@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ be,pt %icc, 3f
+ alignaddr %o1, %g0, %o1
+
+- EX_LD_FP(LOAD(ldd, %o1, %f4))
+-1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
++ EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
++1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
+ add %o1, 0x8, %o1
+ subcc %g2, 0x8, %g2
+ faligndata %f4, %f6, %f0
+- EX_ST_FP(STORE(std, %f0, %o0))
++ EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
+ be,pn %icc, 3f
+ add %o0, 0x8, %o0
+
+- EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
+ add %o1, 0x8, %o1
+ subcc %g2, 0x8, %g2
+ faligndata %f6, %f4, %f2
+- EX_ST_FP(STORE(std, %f2, %o0))
++ EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
+ bne,pt %icc, 1b
+ add %o0, 0x8, %o0
+
+@@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ LOAD(prefetch, %o1 + 0x080, #one_read)
+ LOAD(prefetch, %o1 + 0x0c0, #one_read)
+ LOAD(prefetch, %o1 + 0x100, #one_read)
+- EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
+ LOAD(prefetch, %o1 + 0x140, #one_read)
+- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
+ LOAD(prefetch, %o1 + 0x180, #one_read)
+- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
+ LOAD(prefetch, %o1 + 0x1c0, #one_read)
+ faligndata %f0, %f2, %f16
+- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
+ faligndata %f2, %f4, %f18
+- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
+ faligndata %f4, %f6, %f20
+- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
+ faligndata %f6, %f8, %f22
+
+- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
+ faligndata %f8, %f10, %f24
+- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
+ faligndata %f10, %f12, %f26
+- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
+
+ subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
+ add %o1, 0x40, %o1
+@@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ .align 64
+ 1:
+- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+ faligndata %f12, %f14, %f28
+- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+ faligndata %f14, %f0, %f30
+- EX_ST_FP(STORE_BLK(%f16, %o0))
+- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
++ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
++ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ faligndata %f0, %f2, %f16
+ add %o0, 0x40, %o0
+
+- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ faligndata %f2, %f4, %f18
+- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ faligndata %f4, %f6, %f20
+- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ subcc %o3, 0x01, %o3
+ faligndata %f6, %f8, %f22
+- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+
+ faligndata %f8, %f10, %f24
+- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+ LOAD(prefetch, %o1 + 0x1c0, #one_read)
+ faligndata %f10, %f12, %f26
+ bg,pt %XCC, 1b
+@@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ /* Finally we copy the last full 64-byte block. */
+ 2:
+- EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+ faligndata %f12, %f14, %f28
+- EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+ faligndata %f14, %f0, %f30
+- EX_ST_FP(STORE_BLK(%f16, %o0))
+- EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
++ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
++ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ faligndata %f0, %f2, %f16
+- EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ faligndata %f2, %f4, %f18
+- EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ faligndata %f4, %f6, %f20
+- EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ faligndata %f6, %f8, %f22
+- EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ faligndata %f8, %f10, %f24
+ cmp %g1, 0
+ be,pt %XCC, 1f
+ add %o0, 0x40, %o0
+- EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ 1: faligndata %f10, %f12, %f26
+ faligndata %f12, %f14, %f28
+ faligndata %f14, %f0, %f30
+- EX_ST_FP(STORE_BLK(%f16, %o0))
++ EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+ add %o0, 0x40, %o0
+ add %o1, 0x40, %o1
+ membar #Sync
+@@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ sub %o2, %g2, %o2
+ be,a,pt %XCC, 1f
+- EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
+
+-1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
++1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
+ add %o1, 0x8, %o1
+ subcc %g2, 0x8, %g2
+ faligndata %f0, %f2, %f8
+- EX_ST_FP(STORE(std, %f8, %o0))
++ EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
+ be,pn %XCC, 2f
+ add %o0, 0x8, %o0
+- EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
++ EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
+ add %o1, 0x8, %o1
+ subcc %g2, 0x8, %g2
+ faligndata %f2, %f0, %f8
+- EX_ST_FP(STORE(std, %f8, %o0))
++ EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
+ bne,pn %XCC, 1b
+ add %o0, 0x8, %o0
+
+@@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ andcc %o2, 0x8, %g0
+ be,pt %icc, 1f
+ nop
+- EX_LD(LOAD(ldx, %o1, %o5))
+- EX_ST(STORE(stx, %o5, %o1 + %o3))
++ EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
++ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
+ add %o1, 0x8, %o1
++ sub %o2, 8, %o2
+
+ 1: andcc %o2, 0x4, %g0
+ be,pt %icc, 1f
+ nop
+- EX_LD(LOAD(lduw, %o1, %o5))
+- EX_ST(STORE(stw, %o5, %o1 + %o3))
++ EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
++ EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
+ add %o1, 0x4, %o1
++ sub %o2, 4, %o2
+
+ 1: andcc %o2, 0x2, %g0
+ be,pt %icc, 1f
+ nop
+- EX_LD(LOAD(lduh, %o1, %o5))
+- EX_ST(STORE(sth, %o5, %o1 + %o3))
++ EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
++ EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
+ add %o1, 0x2, %o1
++ sub %o2, 2, %o2
+
+ 1: andcc %o2, 0x1, %g0
+ be,pt %icc, 85f
+ nop
+- EX_LD(LOAD(ldub, %o1, %o5))
++ EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
+ ba,pt %xcc, 85f
+- EX_ST(STORE(stb, %o5, %o1 + %o3))
++ EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
+
+ .align 64
+ 70: /* 16 < len <= 64 */
+@@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ andn %o2, 0xf, GLOBAL_SPARE
+ and %o2, 0xf, %o2
+ 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
+- EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
+- EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
+- EX_ST(STORE(stx, %o5, %o1 + %o3))
++ EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
++ EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
++ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
+ add %o1, 0x8, %o1
+- EX_ST(STORE(stx, %g1, %o1 + %o3))
++ EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
+ bgu,pt %XCC, 1b
+ add %o1, 0x8, %o1
+ 73: andcc %o2, 0x8, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x8, %o2
+- EX_LD(LOAD(ldx, %o1, %o5))
+- EX_ST(STORE(stx, %o5, %o1 + %o3))
++ EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
++ EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
+ add %o1, 0x8, %o1
+ 1: andcc %o2, 0x4, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x4, %o2
+- EX_LD(LOAD(lduw, %o1, %o5))
+- EX_ST(STORE(stw, %o5, %o1 + %o3))
++ EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
++ EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
+ add %o1, 0x4, %o1
+ 1: cmp %o2, 0
+ be,pt %XCC, 85f
+@@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ sub %o2, %g1, %o2
+
+ 1: subcc %g1, 1, %g1
+- EX_LD(LOAD(ldub, %o1, %o5))
+- EX_ST(STORE(stb, %o5, %o1 + %o3))
++ EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
++ EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
+ bgu,pt %icc, 1b
+ add %o1, 1, %o1
+
+@@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 8: mov 64, %o3
+ andn %o1, 0x7, %o1
+- EX_LD(LOAD(ldx, %o1, %g2))
++ EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
+ sub %o3, %g1, %o3
+ andn %o2, 0x7, GLOBAL_SPARE
+ sllx %g2, %g1, %g2
+-1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
++1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
+ subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
+ add %o1, 0x8, %o1
+ srlx %g3, %o3, %o5
+ or %o5, %g2, %o5
+- EX_ST(STORE(stx, %o5, %o0))
++ EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
+ add %o0, 0x8, %o0
+ bgu,pt %icc, 1b
+ sllx %g3, %g1, %g2
+@@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+
+ 1:
+ subcc %o2, 4, %o2
+- EX_LD(LOAD(lduw, %o1, %g1))
+- EX_ST(STORE(stw, %g1, %o1 + %o3))
++ EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
++ EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
+ bgu,pt %XCC, 1b
+ add %o1, 4, %o1
+
+@@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ .align 32
+ 90:
+ subcc %o2, 1, %o2
+- EX_LD(LOAD(ldub, %o1, %g1))
+- EX_ST(STORE(stb, %g1, %o1 + %o3))
++ EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
++ EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
+ bgu,pt %XCC, 90b
+ add %o1, 1, %o1
+ retl
+diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
+index 302c0e6..4c89b48 100644
+--- a/arch/sparc/lib/copy_in_user.S
++++ b/arch/sparc/lib/copy_in_user.S
+@@ -8,18 +8,33 @@
+
+ #define XCC xcc
+
+-#define EX(x,y) \
++#define EX(x,y,z) \
+ 98: x,y; \
+ .section __ex_table,"a";\
+ .align 4; \
+- .word 98b, __retl_one; \
++ .word 98b, z; \
+ .text; \
+ .align 4;
+
++#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
++#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
++#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
++
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+ .text
++__retl_o4_plus_8:
++ add %o4, %o2, %o4
++ retl
++ add %o4, 8, %o0
++__retl_o2_plus_4:
++ retl
++ add %o2, 4, %o0
++__retl_o2_plus_1:
++ retl
++ add %o2, 1, %o0
++
+ .align 32
+
+ /* Don't try to get too fancy here, just nice and
+@@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
+ andn %o2, 0x7, %o4
+ and %o2, 0x7, %o2
+ 1: subcc %o4, 0x8, %o4
+- EX(ldxa [%o1] %asi, %o5)
+- EX(stxa %o5, [%o0] %asi)
++ EX_O4(ldxa [%o1] %asi, %o5)
++ EX_O4(stxa %o5, [%o0] %asi)
+ add %o1, 0x8, %o1
+ bgu,pt %XCC, 1b
+ add %o0, 0x8, %o0
+@@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x4, %o2
+- EX(lduwa [%o1] %asi, %o5)
+- EX(stwa %o5, [%o0] %asi)
++ EX_O2_4(lduwa [%o1] %asi, %o5)
++ EX_O2_4(stwa %o5, [%o0] %asi)
+ add %o1, 0x4, %o1
+ add %o0, 0x4, %o0
+ 1: cmp %o2, 0
+@@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
+
+ 82:
+ subcc %o2, 4, %o2
+- EX(lduwa [%o1] %asi, %g1)
+- EX(stwa %g1, [%o0] %asi)
++ EX_O2_4(lduwa [%o1] %asi, %g1)
++ EX_O2_4(stwa %g1, [%o0] %asi)
+ add %o1, 4, %o1
+ bgu,pt %XCC, 82b
+ add %o0, 4, %o0
+@@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
+ .align 32
+ 90:
+ subcc %o2, 1, %o2
+- EX(lduba [%o1] %asi, %g1)
+- EX(stba %g1, [%o0] %asi)
++ EX_O2_1(lduba [%o1] %asi, %g1)
++ EX_O2_1(stba %g1, [%o0] %asi)
+ add %o1, 1, %o1
+ bgu,pt %XCC, 90b
+ add %o0, 1, %o0
+diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
+deleted file mode 100644
+index ac96ae2..0000000
+--- a/arch/sparc/lib/user_fixup.c
++++ /dev/null
+@@ -1,71 +0,0 @@
+-/* user_fixup.c: Fix up user copy faults.
+- *
+- * Copyright (C) 2004 David S. Miller <davem@redhat.com>
+- */
+-
+-#include <linux/compiler.h>
+-#include <linux/kernel.h>
+-#include <linux/string.h>
+-#include <linux/errno.h>
+-#include <linux/module.h>
+-
+-#include <asm/uaccess.h>
+-
+-/* Calculating the exact fault address when using
+- * block loads and stores can be very complicated.
+- *
+- * Instead of trying to be clever and handling all
+- * of the cases, just fix things up simply here.
+- */
+-
+-static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
+-{
+- unsigned long fault_addr = current_thread_info()->fault_address;
+- unsigned long end = start + size;
+-
+- if (fault_addr < start || fault_addr >= end) {
+- *offset = 0;
+- } else {
+- *offset = fault_addr - start;
+- size = end - fault_addr;
+- }
+- return size;
+-}
+-
+-unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
+-{
+- unsigned long offset;
+-
+- size = compute_size((unsigned long) from, size, &offset);
+- if (likely(size))
+- memset(to + offset, 0, size);
+-
+- return size;
+-}
+-EXPORT_SYMBOL(copy_from_user_fixup);
+-
+-unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
+-{
+- unsigned long offset;
+-
+- return compute_size((unsigned long) to, size, &offset);
+-}
+-EXPORT_SYMBOL(copy_to_user_fixup);
+-
+-unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
+-{
+- unsigned long fault_addr = current_thread_info()->fault_address;
+- unsigned long start = (unsigned long) to;
+- unsigned long end = start + size;
+-
+- if (fault_addr >= start && fault_addr < end)
+- return end - fault_addr;
+-
+- start = (unsigned long) from;
+- end = start + size;
+- if (fault_addr >= start && fault_addr < end)
+- return end - fault_addr;
+-
+- return size;
+-}
+-EXPORT_SYMBOL(copy_in_user_fixup);
+diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
+index f2b7711..e20fbba 100644
+--- a/arch/sparc/mm/tsb.c
++++ b/arch/sparc/mm/tsb.c
+@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
+ return (tag == (vaddr >> 22));
+ }
+
++static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
++{
++ unsigned long idx;
++
++ for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
++ struct tsb *ent = &swapper_tsb[idx];
++ unsigned long match = idx << 13;
++
++ match |= (ent->tag << 22);
++ if (match >= start && match < end)
++ ent->tag = (1UL << TSB_TAG_INVALID_BIT);
++ }
++}
++
+ /* TSB flushes need only occur on the processor initiating the address
+ * space modification, not on each cpu the address space has run on.
+ * Only the TLB flush needs that treatment.
+@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
+ {
+ unsigned long v;
+
++ if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
++ return flush_tsb_kernel_range_scan(start, end);
++
+ for (v = start; v < end; v += PAGE_SIZE) {
+ unsigned long hash = tsb_hash(v, PAGE_SHIFT,
+ KERNEL_TSB_NENTRIES);
+diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
+index b4f4733..5d2fd6c 100644
+--- a/arch/sparc/mm/ultra.S
++++ b/arch/sparc/mm/ultra.S
+@@ -30,7 +30,7 @@
+ .text
+ .align 32
+ .globl __flush_tlb_mm
+-__flush_tlb_mm: /* 18 insns */
++__flush_tlb_mm: /* 19 insns */
+ /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
+ ldxa [%o1] ASI_DMMU, %g2
+ cmp %g2, %o0
+@@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */
+
+ .align 32
+ .globl __flush_tlb_pending
+-__flush_tlb_pending: /* 26 insns */
++__flush_tlb_pending: /* 27 insns */
+ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+ rdpr %pstate, %g7
+ sllx %o1, 3, %o1
+@@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */
+
+ .align 32
+ .globl __flush_tlb_kernel_range
+-__flush_tlb_kernel_range: /* 16 insns */
++__flush_tlb_kernel_range: /* 31 insns */
+ /* %o0=start, %o1=end */
+ cmp %o0, %o1
+ be,pn %xcc, 2f
++ sub %o1, %o0, %o3
++ srlx %o3, 18, %o4
++ brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
+ sethi %hi(PAGE_SIZE), %o4
+- sub %o1, %o0, %o3
+ sub %o3, %o4, %o3
+ or %o0, 0x20, %o0 ! Nucleus
+ 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
+@@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
+ retl
+ nop
+ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++
++__spitfire_flush_tlb_kernel_range_slow:
++ mov 63 * 8, %o4
++1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
++ andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
++ bne,pn %xcc, 2f
++ mov TLB_TAG_ACCESS, %o3
++ stxa %g0, [%o3] ASI_IMMU
++ stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
++ membar #Sync
++2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
++ andcc %o3, 0x40, %g0
++ bne,pn %xcc, 2f
++ mov TLB_TAG_ACCESS, %o3
++ stxa %g0, [%o3] ASI_DMMU
++ stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
++ membar #Sync
++2: sub %o4, 8, %o4
++ brgez,pt %o4, 1b
++ nop
++ retl
++ nop
+
+ __spitfire_flush_tlb_mm_slow:
+ rdpr %pstate, %g1
+@@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */
+ retl
+ wrpr %g7, 0x0, %pstate
+
++__cheetah_flush_tlb_kernel_range: /* 31 insns */
++ /* %o0=start, %o1=end */
++ cmp %o0, %o1
++ be,pn %xcc, 2f
++ sub %o1, %o0, %o3
++ srlx %o3, 18, %o4
++ brnz,pn %o4, 3f
++ sethi %hi(PAGE_SIZE), %o4
++ sub %o3, %o4, %o3
++ or %o0, 0x20, %o0 ! Nucleus
++1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
++ stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
++ membar #Sync
++ brnz,pt %o3, 1b
++ sub %o3, %o4, %o3
++2: sethi %hi(KERNBASE), %o3
++ flush %o3
++ retl
++ nop
++3: mov 0x80, %o4
++ stxa %g0, [%o4] ASI_DMMU_DEMAP
++ membar #Sync
++ stxa %g0, [%o4] ASI_IMMU_DEMAP
++ membar #Sync
++ retl
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++
+ #ifdef DCACHE_ALIASING_POSSIBLE
+ __cheetah_flush_dcache_page: /* 11 insns */
+ sethi %hi(PAGE_OFFSET), %g1
+@@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
+ ret
+ restore
+
+-__hypervisor_flush_tlb_mm: /* 10 insns */
++__hypervisor_flush_tlb_mm: /* 19 insns */
+ mov %o0, %o2 /* ARG2: mmu context */
+ mov 0, %o0 /* ARG0: CPU lists unimplemented */
+ mov 0, %o1 /* ARG1: CPU lists unimplemented */
+ mov HV_MMU_ALL, %o3 /* ARG3: flags */
+ mov HV_FAST_MMU_DEMAP_CTX, %o5
+ ta HV_FAST_TRAP
+- brnz,pn %o0, __hypervisor_tlb_tl0_error
++ brnz,pn %o0, 1f
+ mov HV_FAST_MMU_DEMAP_CTX, %o1
+ retl
+ nop
++1: sethi %hi(__hypervisor_tlb_tl0_error), %o5
++ jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
+
+-__hypervisor_flush_tlb_page: /* 11 insns */
++__hypervisor_flush_tlb_page: /* 22 insns */
+ /* %o0 = context, %o1 = vaddr */
+ mov %o0, %g2
+ mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
+@@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
+ srlx %o0, PAGE_SHIFT, %o0
+ sllx %o0, PAGE_SHIFT, %o0
+ ta HV_MMU_UNMAP_ADDR_TRAP
+- brnz,pn %o0, __hypervisor_tlb_tl0_error
++ brnz,pn %o0, 1f
+ mov HV_MMU_UNMAP_ADDR_TRAP, %o1
+ retl
+ nop
++1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
++ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
+
+-__hypervisor_flush_tlb_pending: /* 16 insns */
++__hypervisor_flush_tlb_pending: /* 27 insns */
+ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+ sllx %o1, 3, %g1
+ mov %o2, %g2
+@@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
+ srlx %o0, PAGE_SHIFT, %o0
+ sllx %o0, PAGE_SHIFT, %o0
+ ta HV_MMU_UNMAP_ADDR_TRAP
+- brnz,pn %o0, __hypervisor_tlb_tl0_error
++ brnz,pn %o0, 1f
+ mov HV_MMU_UNMAP_ADDR_TRAP, %o1
+ brnz,pt %g1, 1b
+ nop
+ retl
+ nop
++1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
++ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
+
+-__hypervisor_flush_tlb_kernel_range: /* 16 insns */
++__hypervisor_flush_tlb_kernel_range: /* 31 insns */
+ /* %o0=start, %o1=end */
+ cmp %o0, %o1
+ be,pn %xcc, 2f
+- sethi %hi(PAGE_SIZE), %g3
+- mov %o0, %g1
+- sub %o1, %g1, %g2
++ sub %o1, %o0, %g2
++ srlx %g2, 18, %g3
++ brnz,pn %g3, 4f
++ mov %o0, %g1
++ sethi %hi(PAGE_SIZE), %g3
+ sub %g2, %g3, %g2
+ 1: add %g1, %g2, %o0 /* ARG0: virtual address */
+ mov 0, %o1 /* ARG1: mmu context */
+ mov HV_MMU_ALL, %o2 /* ARG2: flags */
+ ta HV_MMU_UNMAP_ADDR_TRAP
+- brnz,pn %o0, __hypervisor_tlb_tl0_error
++ brnz,pn %o0, 3f
+ mov HV_MMU_UNMAP_ADDR_TRAP, %o1
+ brnz,pt %g2, 1b
+ sub %g2, %g3, %g2
+ 2: retl
+ nop
++3: sethi %hi(__hypervisor_tlb_tl0_error), %o2
++ jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
++ nop
++4: mov 0, %o0 /* ARG0: CPU lists unimplemented */
++ mov 0, %o1 /* ARG1: CPU lists unimplemented */
++ mov 0, %o2 /* ARG2: mmu context == nucleus */
++ mov HV_MMU_ALL, %o3 /* ARG3: flags */
++ mov HV_FAST_MMU_DEMAP_CTX, %o5
++ ta HV_FAST_TRAP
++ brnz,pn %o0, 3b
++ mov HV_FAST_MMU_DEMAP_CTX, %o1
++ retl
++ nop
+
+ #ifdef DCACHE_ALIASING_POSSIBLE
+ /* XXX Niagara and friends have an 8K cache, so no aliasing is
+@@ -394,43 +511,6 @@ tlb_patch_one:
+ retl
+ nop
+
+- .globl cheetah_patch_cachetlbops
+-cheetah_patch_cachetlbops:
+- save %sp, -128, %sp
+-
+- sethi %hi(__flush_tlb_mm), %o0
+- or %o0, %lo(__flush_tlb_mm), %o0
+- sethi %hi(__cheetah_flush_tlb_mm), %o1
+- or %o1, %lo(__cheetah_flush_tlb_mm), %o1
+- call tlb_patch_one
+- mov 19, %o2
+-
+- sethi %hi(__flush_tlb_page), %o0
+- or %o0, %lo(__flush_tlb_page), %o0
+- sethi %hi(__cheetah_flush_tlb_page), %o1
+- or %o1, %lo(__cheetah_flush_tlb_page), %o1
+- call tlb_patch_one
+- mov 22, %o2
+-
+- sethi %hi(__flush_tlb_pending), %o0
+- or %o0, %lo(__flush_tlb_pending), %o0
+- sethi %hi(__cheetah_flush_tlb_pending), %o1
+- or %o1, %lo(__cheetah_flush_tlb_pending), %o1
+- call tlb_patch_one
+- mov 27, %o2
+-
+-#ifdef DCACHE_ALIASING_POSSIBLE
+- sethi %hi(__flush_dcache_page), %o0
+- or %o0, %lo(__flush_dcache_page), %o0
+- sethi %hi(__cheetah_flush_dcache_page), %o1
+- or %o1, %lo(__cheetah_flush_dcache_page), %o1
+- call tlb_patch_one
+- mov 11, %o2
+-#endif /* DCACHE_ALIASING_POSSIBLE */
+-
+- ret
+- restore
+-
+ #ifdef CONFIG_SMP
+ /* These are all called by the slaves of a cross call, at
+ * trap level 1, with interrupts fully disabled.
+@@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
+ */
+ .align 32
+ .globl xcall_flush_tlb_mm
+-xcall_flush_tlb_mm: /* 21 insns */
++xcall_flush_tlb_mm: /* 24 insns */
+ mov PRIMARY_CONTEXT, %g2
+ ldxa [%g2] ASI_DMMU, %g3
+ srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
+@@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */
+ nop
+ nop
+ nop
++ nop
++ nop
++ nop
+
+ .globl xcall_flush_tlb_page
+-xcall_flush_tlb_page: /* 17 insns */
++xcall_flush_tlb_page: /* 20 insns */
+ /* %g5=context, %g1=vaddr */
+ mov PRIMARY_CONTEXT, %g4
+ ldxa [%g4] ASI_DMMU, %g2
+@@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */
+ retry
+ nop
+ nop
++ nop
++ nop
++ nop
+
+ .globl xcall_flush_tlb_kernel_range
+-xcall_flush_tlb_kernel_range: /* 25 insns */
++xcall_flush_tlb_kernel_range: /* 44 insns */
+ sethi %hi(PAGE_SIZE - 1), %g2
+ or %g2, %lo(PAGE_SIZE - 1), %g2
+ andn %g1, %g2, %g1
+ andn %g7, %g2, %g7
+ sub %g7, %g1, %g3
+- add %g2, 1, %g2
++ srlx %g3, 18, %g2
++ brnz,pn %g2, 2f
++ add %g2, 1, %g2
+ sub %g3, %g2, %g3
+ or %g1, 0x20, %g1 ! Nucleus
+ 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
+@@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */
+ brnz,pt %g3, 1b
+ sub %g3, %g2, %g3
+ retry
+- nop
+- nop
++2: mov 63 * 8, %g1
++1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
++ andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
++ bne,pn %xcc, 2f
++ mov TLB_TAG_ACCESS, %g2
++ stxa %g0, [%g2] ASI_IMMU
++ stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
++ membar #Sync
++2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
++ andcc %g2, 0x40, %g0
++ bne,pn %xcc, 2f
++ mov TLB_TAG_ACCESS, %g2
++ stxa %g0, [%g2] ASI_DMMU
++ stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
++ membar #Sync
++2: sub %g1, 8, %g1
++ brgez,pt %g1, 1b
++ nop
++ retry
+ nop
+ nop
+ nop
+@@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
+
+ retry
+
++__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
++ sethi %hi(PAGE_SIZE - 1), %g2
++ or %g2, %lo(PAGE_SIZE - 1), %g2
++ andn %g1, %g2, %g1
++ andn %g7, %g2, %g7
++ sub %g7, %g1, %g3
++ srlx %g3, 18, %g2
++ brnz,pn %g2, 2f
++ add %g2, 1, %g2
++ sub %g3, %g2, %g3
++ or %g1, 0x20, %g1 ! Nucleus
++1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
++ stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
++ membar #Sync
++ brnz,pt %g3, 1b
++ sub %g3, %g2, %g3
++ retry
++2: mov 0x80, %g2
++ stxa %g0, [%g2] ASI_DMMU_DEMAP
++ membar #Sync
++ stxa %g0, [%g2] ASI_IMMU_DEMAP
++ membar #Sync
++ retry
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++ nop
++
+ #ifdef DCACHE_ALIASING_POSSIBLE
+ .align 32
+ .globl xcall_flush_dcache_page_cheetah
+@@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
+ ba,a,pt %xcc, rtrap
+
+ .globl __hypervisor_xcall_flush_tlb_mm
+-__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
++__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
+ /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
+ mov %o0, %g2
+ mov %o1, %g3
+@@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
+ mov HV_FAST_MMU_DEMAP_CTX, %o5
+ ta HV_FAST_TRAP
+ mov HV_FAST_MMU_DEMAP_CTX, %g6
+- brnz,pn %o0, __hypervisor_tlb_xcall_error
++ brnz,pn %o0, 1f
+ mov %o0, %g5
+ mov %g2, %o0
+ mov %g3, %o1
+@@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
+ mov %g7, %o5
+ membar #Sync
+ retry
++1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
++ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
++ nop
+
+ .globl __hypervisor_xcall_flush_tlb_page
+-__hypervisor_xcall_flush_tlb_page: /* 17 insns */
++__hypervisor_xcall_flush_tlb_page: /* 20 insns */
+ /* %g5=ctx, %g1=vaddr */
+ mov %o0, %g2
+ mov %o1, %g3
+@@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
+ sllx %o0, PAGE_SHIFT, %o0
+ ta HV_MMU_UNMAP_ADDR_TRAP
+ mov HV_MMU_UNMAP_ADDR_TRAP, %g6
+- brnz,a,pn %o0, __hypervisor_tlb_xcall_error
++ brnz,a,pn %o0, 1f
+ mov %o0, %g5
+ mov %g2, %o0
+ mov %g3, %o1
+ mov %g4, %o2
+ membar #Sync
+ retry
++1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
++ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
++ nop
+
+ .globl __hypervisor_xcall_flush_tlb_kernel_range
+-__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
++__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
+ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
+ sethi %hi(PAGE_SIZE - 1), %g2
+ or %g2, %lo(PAGE_SIZE - 1), %g2
+ andn %g1, %g2, %g1
+ andn %g7, %g2, %g7
+ sub %g7, %g1, %g3
++ srlx %g3, 18, %g7
+ add %g2, 1, %g2
+ sub %g3, %g2, %g3
+ mov %o0, %g2
+ mov %o1, %g4
+- mov %o2, %g7
++ brnz,pn %g7, 2f
++ mov %o2, %g7
+ 1: add %g1, %g3, %o0 /* ARG0: virtual address */
+ mov 0, %o1 /* ARG1: mmu context */
+ mov HV_MMU_ALL, %o2 /* ARG2: flags */
+ ta HV_MMU_UNMAP_ADDR_TRAP
+ mov HV_MMU_UNMAP_ADDR_TRAP, %g6
+- brnz,pn %o0, __hypervisor_tlb_xcall_error
++ brnz,pn %o0, 1f
+ mov %o0, %g5
+ sethi %hi(PAGE_SIZE), %o2
+ brnz,pt %g3, 1b
+ sub %g3, %o2, %g3
+- mov %g2, %o0
++5: mov %g2, %o0
+ mov %g4, %o1
+ mov %g7, %o2
+ membar #Sync
+ retry
++1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
++ jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
++ nop
++2: mov %o3, %g1
++ mov %o5, %g3
++ mov 0, %o0 /* ARG0: CPU lists unimplemented */
++ mov 0, %o1 /* ARG1: CPU lists unimplemented */
++ mov 0, %o2 /* ARG2: mmu context == nucleus */
++ mov HV_MMU_ALL, %o3 /* ARG3: flags */
++ mov HV_FAST_MMU_DEMAP_CTX, %o5
++ ta HV_FAST_TRAP
++ mov %g1, %o3
++ brz,pt %o0, 5b
++ mov %g3, %o5
++ mov HV_FAST_MMU_DEMAP_CTX, %g6
++ ba,pt %xcc, 1b
++ clr %g5
+
+ /* These just get rescheduled to PIL vectors. */
+ .globl xcall_call_function
+@@ -809,6 +985,58 @@ xcall_kgdb_capture:
+
+ #endif /* CONFIG_SMP */
+
++ .globl cheetah_patch_cachetlbops
++cheetah_patch_cachetlbops:
++ save %sp, -128, %sp
++
++ sethi %hi(__flush_tlb_mm), %o0
++ or %o0, %lo(__flush_tlb_mm), %o0
++ sethi %hi(__cheetah_flush_tlb_mm), %o1
++ or %o1, %lo(__cheetah_flush_tlb_mm), %o1
++ call tlb_patch_one
++ mov 19, %o2
++
++ sethi %hi(__flush_tlb_page), %o0
++ or %o0, %lo(__flush_tlb_page), %o0
++ sethi %hi(__cheetah_flush_tlb_page), %o1
++ or %o1, %lo(__cheetah_flush_tlb_page), %o1
++ call tlb_patch_one
++ mov 22, %o2
++
++ sethi %hi(__flush_tlb_pending), %o0
++ or %o0, %lo(__flush_tlb_pending), %o0
++ sethi %hi(__cheetah_flush_tlb_pending), %o1
++ or %o1, %lo(__cheetah_flush_tlb_pending), %o1
++ call tlb_patch_one
++ mov 27, %o2
++
++ sethi %hi(__flush_tlb_kernel_range), %o0
++ or %o0, %lo(__flush_tlb_kernel_range), %o0
++ sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
++ or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
++ call tlb_patch_one
++ mov 31, %o2
++
++#ifdef DCACHE_ALIASING_POSSIBLE
++ sethi %hi(__flush_dcache_page), %o0
++ or %o0, %lo(__flush_dcache_page), %o0
++ sethi %hi(__cheetah_flush_dcache_page), %o1
++ or %o1, %lo(__cheetah_flush_dcache_page), %o1
++ call tlb_patch_one
++ mov 11, %o2
++#endif /* DCACHE_ALIASING_POSSIBLE */
++
++#ifdef CONFIG_SMP
++ sethi %hi(xcall_flush_tlb_kernel_range), %o0
++ or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
++ sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
++ or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
++ call tlb_patch_one
++ mov 44, %o2
++#endif /* CONFIG_SMP */
++
++ ret
++ restore
+
+ .globl hypervisor_patch_cachetlbops
+ hypervisor_patch_cachetlbops:
+@@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
+ sethi %hi(__hypervisor_flush_tlb_mm), %o1
+ or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
+ call tlb_patch_one
+- mov 10, %o2
++ mov 19, %o2
+
+ sethi %hi(__flush_tlb_page), %o0
+ or %o0, %lo(__flush_tlb_page), %o0
+ sethi %hi(__hypervisor_flush_tlb_page), %o1
+ or %o1, %lo(__hypervisor_flush_tlb_page), %o1
+ call tlb_patch_one
+- mov 11, %o2
++ mov 22, %o2
+
+ sethi %hi(__flush_tlb_pending), %o0
+ or %o0, %lo(__flush_tlb_pending), %o0
+ sethi %hi(__hypervisor_flush_tlb_pending), %o1
+ or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
+ call tlb_patch_one
+- mov 16, %o2
++ mov 27, %o2
+
+ sethi %hi(__flush_tlb_kernel_range), %o0
+ or %o0, %lo(__flush_tlb_kernel_range), %o0
+ sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
+ or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
+ call tlb_patch_one
+- mov 16, %o2
++ mov 31, %o2
+
+ #ifdef DCACHE_ALIASING_POSSIBLE
+ sethi %hi(__flush_dcache_page), %o0
+@@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
+ sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
+ or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
+ call tlb_patch_one
+- mov 21, %o2
++ mov 24, %o2
+
+ sethi %hi(xcall_flush_tlb_page), %o0
+ or %o0, %lo(xcall_flush_tlb_page), %o0
+ sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
+ or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
+ call tlb_patch_one
+- mov 17, %o2
++ mov 20, %o2
+
+ sethi %hi(xcall_flush_tlb_kernel_range), %o0
+ or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
+ sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
+ or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
+ call tlb_patch_one
+- mov 25, %o2
++ mov 44, %o2
+ #endif /* CONFIG_SMP */
+
+ ret
+diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
+index c4751ec..45e87c9 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
+ u32 ctl;
+
+ ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
++
++ /* preserve ONLY bits 16-17 from current hardware value */
++ ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
++
+ if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
+ ctl &= ~BGMAC_DMA_RX_BL_MASK;
+ ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
+@@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
+ ctl &= ~BGMAC_DMA_RX_PT_MASK;
+ ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
+ }
+- ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
+ ctl |= BGMAC_DMA_RX_ENABLE;
+ ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
+ ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
+diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
+index 505ceaf..2c850a9 100644
+--- a/drivers/net/ethernet/broadcom/bnx2.c
++++ b/drivers/net/ethernet/broadcom/bnx2.c
+@@ -49,6 +49,7 @@
+ #include <linux/firmware.h>
+ #include <linux/log2.h>
+ #include <linux/aer.h>
++#include <linux/crash_dump.h>
+
+ #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+ #define BCM_CNIC 1
+@@ -4759,15 +4760,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp)
+ BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR);
+ }
+
+-static int
+-bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
++static void
++bnx2_wait_dma_complete(struct bnx2 *bp)
+ {
+ u32 val;
+- int i, rc = 0;
+- u8 old_port;
++ int i;
+
+- /* Wait for the current PCI transaction to complete before
+- * issuing a reset. */
++ /*
++ * Wait for the current PCI transaction to complete before
++ * issuing a reset.
++ */
+ if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) ||
+ (BNX2_CHIP(bp) == BNX2_CHIP_5708)) {
+ BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS,
+@@ -4791,6 +4793,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
+ }
+ }
+
++ return;
++}
++
++
++static int
++bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
++{
++ u32 val;
++ int i, rc = 0;
++ u8 old_port;
++
++ /* Wait for the current PCI transaction to complete before
++ * issuing a reset. */
++ bnx2_wait_dma_complete(bp);
++
+ /* Wait for the firmware to tell us it is ok to issue a reset. */
+ bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1);
+
+@@ -6356,6 +6373,10 @@ bnx2_open(struct net_device *dev)
+ struct bnx2 *bp = netdev_priv(dev);
+ int rc;
+
++ rc = bnx2_request_firmware(bp);
++ if (rc < 0)
++ goto out;
++
+ netif_carrier_off(dev);
+
+ bnx2_disable_int(bp);
+@@ -6424,6 +6445,7 @@ bnx2_open(struct net_device *dev)
+ bnx2_free_irq(bp);
+ bnx2_free_mem(bp);
+ bnx2_del_napi(bp);
++ bnx2_release_firmware(bp);
+ goto out;
+ }
+
+@@ -8570,12 +8592,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ pci_set_drvdata(pdev, dev);
+
+- rc = bnx2_request_firmware(bp);
+- if (rc < 0)
+- goto error;
+-
++ /*
++ * In-flight DMA from 1st kernel could continue going in kdump kernel.
++ * New io-page table has been created before bnx2 does reset at open stage.
++ * We have to wait for the in-flight DMA to complete to avoid it look up
++ * into the newly created io-page table.
++ */
++ if (is_kdump_kernel())
++ bnx2_wait_dma_complete(bp);
+
+- bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
+ memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
+
+ dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
+@@ -8608,7 +8633,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ return 0;
+
+ error:
+- bnx2_release_firmware(bp);
+ pci_iounmap(pdev, bp->regview);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+index d48873b..5cdc96b 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+
+ span_entry->used = true;
+ span_entry->id = index;
+- span_entry->ref_count = 0;
++ span_entry->ref_count = 1;
+ span_entry->local_port = local_port;
+ return span_entry;
+ }
+@@ -268,6 +268,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+
+ span_entry = mlxsw_sp_span_entry_find(port);
+ if (span_entry) {
++ /* Already exists, just take a reference */
+ span_entry->ref_count++;
+ return span_entry;
+ }
+@@ -278,6 +279,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry)
+ {
++ WARN_ON(!span_entry->ref_count);
+ if (--span_entry->ref_count == 0)
+ mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+ return 0;
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+index 3f5c51d..62514b9 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+@@ -777,6 +777,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
+ }
+ }
+
++static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
++{
++ u8 num_rec, last_rec_index, num_entries;
++
++ num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
++ last_rec_index = num_rec - 1;
++
++ if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
++ return false;
++ if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
++ MLXSW_REG_RAUHTD_TYPE_IPV6)
++ return true;
++
++ num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
++ last_rec_index);
++ if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
++ return true;
++ return false;
++}
++
+ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+ {
+ char *rauhtd_pl;
+@@ -803,7 +823,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+ for (i = 0; i < num_rec; i++)
+ mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
+ i);
+- } while (num_rec);
++ } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
+ rtnl_unlock();
+
+ kfree(rauhtd_pl);
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 4c8c60a..fe9e7b1 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -871,6 +871,13 @@ static int stmmac_init_phy(struct net_device *dev)
+ return -ENODEV;
+ }
+
++ /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
++ * subsequent PHY polling, make sure we force a link transition if
++ * we have a UP/DOWN/UP transition
++ */
++ if (phydev->is_pseudo_fixed_link)
++ phydev->irq = PHY_POLL;
++
+ pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)"
+ " Link = %d\n", dev->name, phydev->phy_id, phydev->link);
+
+diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
+index 5c8429f..3a5530d 100644
+--- a/drivers/usb/gadget/function/f_fs.c
++++ b/drivers/usb/gadget/function/f_fs.c
+@@ -133,8 +133,60 @@ struct ffs_epfile {
+ /*
+ * Buffer for holding data from partial reads which may happen since
+ * we’re rounding user read requests to a multiple of a max packet size.
++ *
++ * The pointer is initialised with NULL value and may be set by
++ * __ffs_epfile_read_data function to point to a temporary buffer.
++ *
++ * In normal operation, calls to __ffs_epfile_read_buffered will consume
++ * data from said buffer and eventually free it. Importantly, while the
++ * function is using the buffer, it sets the pointer to NULL. This is
++ * all right since __ffs_epfile_read_data and __ffs_epfile_read_buffered
++ * can never run concurrently (they are synchronised by epfile->mutex)
++ * so the latter will not assign a new value to the pointer.
++ *
++ * Meanwhile ffs_func_eps_disable frees the buffer (if the pointer is
++ * valid) and sets the pointer to READ_BUFFER_DROP value. This special
++ * value is crux of the synchronisation between ffs_func_eps_disable and
++ * __ffs_epfile_read_data.
++ *
++ * Once __ffs_epfile_read_data is about to finish it will try to set the
++ * pointer back to its old value (as described above), but seeing as the
++ * pointer is not-NULL (namely READ_BUFFER_DROP) it will instead free
++ * the buffer.
++ *
++ * == State transitions ==
++ *
++ * • ptr == NULL: (initial state)
++ * ◦ __ffs_epfile_read_buffer_free: go to ptr == DROP
++ * ◦ __ffs_epfile_read_buffered: nop
++ * ◦ __ffs_epfile_read_data allocates temp buffer: go to ptr == buf
++ * ◦ reading finishes: n/a, not in ‘and reading’ state
++ * • ptr == DROP:
++ * ◦ __ffs_epfile_read_buffer_free: nop
++ * ◦ __ffs_epfile_read_buffered: go to ptr == NULL
++ * ◦ __ffs_epfile_read_data allocates temp buffer: free buf, nop
++ * ◦ reading finishes: n/a, not in ‘and reading’ state
++ * • ptr == buf:
++ * ◦ __ffs_epfile_read_buffer_free: free buf, go to ptr == DROP
++ * ◦ __ffs_epfile_read_buffered: go to ptr == NULL and reading
++ * ◦ __ffs_epfile_read_data: n/a, __ffs_epfile_read_buffered
++ * is always called first
++ * ◦ reading finishes: n/a, not in ‘and reading’ state
++ * • ptr == NULL and reading:
++ * ◦ __ffs_epfile_read_buffer_free: go to ptr == DROP and reading
++ * ◦ __ffs_epfile_read_buffered: n/a, mutex is held
++ * ◦ __ffs_epfile_read_data: n/a, mutex is held
++ * ◦ reading finishes and …
++ * … all data read: free buf, go to ptr == NULL
++ * … otherwise: go to ptr == buf and reading
++ * • ptr == DROP and reading:
++ * ◦ __ffs_epfile_read_buffer_free: nop
++ * ◦ __ffs_epfile_read_buffered: n/a, mutex is held
++ * ◦ __ffs_epfile_read_data: n/a, mutex is held
++ * ◦ reading finishes: free buf, go to ptr == DROP
+ */
+- struct ffs_buffer *read_buffer; /* P: epfile->mutex */
++ struct ffs_buffer *read_buffer;
++#define READ_BUFFER_DROP ((struct ffs_buffer *)ERR_PTR(-ESHUTDOWN))
+
+ char name[5];
+
+@@ -733,25 +785,47 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep,
+ schedule_work(&io_data->work);
+ }
+
++static void __ffs_epfile_read_buffer_free(struct ffs_epfile *epfile)
++{
++ /*
++ * See comment in struct ffs_epfile for full read_buffer pointer
++ * synchronisation story.
++ */
++ struct ffs_buffer *buf = xchg(&epfile->read_buffer, READ_BUFFER_DROP);
++ if (buf && buf != READ_BUFFER_DROP)
++ kfree(buf);
++}
++
+ /* Assumes epfile->mutex is held. */
+ static ssize_t __ffs_epfile_read_buffered(struct ffs_epfile *epfile,
+ struct iov_iter *iter)
+ {
+- struct ffs_buffer *buf = epfile->read_buffer;
++ /*
++ * Null out epfile->read_buffer so ffs_func_eps_disable does not free
++ * the buffer while we are using it. See comment in struct ffs_epfile
++ * for full read_buffer pointer synchronisation story.
++ */
++ struct ffs_buffer *buf = xchg(&epfile->read_buffer, NULL);
+ ssize_t ret;
+- if (!buf)
++ if (!buf || buf == READ_BUFFER_DROP)
+ return 0;
+
+ ret = copy_to_iter(buf->data, buf->length, iter);
+ if (buf->length == ret) {
+ kfree(buf);
+- epfile->read_buffer = NULL;
+- } else if (unlikely(iov_iter_count(iter))) {
++ return ret;
++ }
++
++ if (unlikely(iov_iter_count(iter))) {
+ ret = -EFAULT;
+ } else {
+ buf->length -= ret;
+ buf->data += ret;
+ }
++
++ if (cmpxchg(&epfile->read_buffer, NULL, buf))
++ kfree(buf);
++
+ return ret;
+ }
+
+@@ -780,7 +854,15 @@ static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile,
+ buf->length = data_len;
+ buf->data = buf->storage;
+ memcpy(buf->storage, data + ret, data_len);
+- epfile->read_buffer = buf;
++
++ /*
++ * At this point read_buffer is NULL or READ_BUFFER_DROP (if
++ * ffs_func_eps_disable has been called in the meanwhile). See comment
++ * in struct ffs_epfile for full read_buffer pointer synchronisation
++ * story.
++ */
++ if (unlikely(cmpxchg(&epfile->read_buffer, NULL, buf)))
++ kfree(buf);
+
+ return ret;
+ }
+@@ -1094,8 +1176,7 @@ ffs_epfile_release(struct inode *inode, struct file *file)
+
+ ENTER();
+
+- kfree(epfile->read_buffer);
+- epfile->read_buffer = NULL;
++ __ffs_epfile_read_buffer_free(epfile);
+ ffs_data_closed(epfile->ffs);
+
+ return 0;
+@@ -1721,24 +1802,20 @@ static void ffs_func_eps_disable(struct ffs_function *func)
+ unsigned count = func->ffs->eps_count;
+ unsigned long flags;
+
++ spin_lock_irqsave(&func->ffs->eps_lock, flags);
+ do {
+- if (epfile)
+- mutex_lock(&epfile->mutex);
+- spin_lock_irqsave(&func->ffs->eps_lock, flags);
+ /* pending requests get nuked */
+ if (likely(ep->ep))
+ usb_ep_disable(ep->ep);
+ ++ep;
+- spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+
+ if (epfile) {
+ epfile->ep = NULL;
+- kfree(epfile->read_buffer);
+- epfile->read_buffer = NULL;
+- mutex_unlock(&epfile->mutex);
++ __ffs_epfile_read_buffer_free(epfile);
+ ++epfile;
+ }
+ } while (--count);
++ spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+ }
+
+ static int ffs_func_eps_enable(struct ffs_function *func)
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 156b0c1..0ccf6da 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -47,7 +47,6 @@ struct inet_skb_parm {
+ #define IPSKB_REROUTED BIT(4)
+ #define IPSKB_DOREDIRECT BIT(5)
+ #define IPSKB_FRAG_PMTU BIT(6)
+-#define IPSKB_FRAG_SEGS BIT(7)
+
+ u16 frag_max_size;
+ };
+diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
+index 43a5a0e..b01d5d1 100644
+--- a/include/net/ip6_tunnel.h
++++ b/include/net/ip6_tunnel.h
+@@ -145,6 +145,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
+ {
+ int pkt_len, err;
+
++ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+ pkt_len = skb->len - skb_inner_network_offset(skb);
+ err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
+ if (unlikely(net_xmit_eval(err)))
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 8741988..c26eab9 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1587,11 +1587,11 @@ static inline void sock_put(struct sock *sk)
+ void sock_gen_put(struct sock *sk);
+
+ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
+- unsigned int trim_cap);
++ unsigned int trim_cap, bool refcounted);
+ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+ const int nested)
+ {
+- return __sk_receive_skb(sk, skb, nested, 1);
++ return __sk_receive_skb(sk, skb, nested, 1, true);
+ }
+
+ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 7717302..0de6989 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1164,6 +1164,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
+ }
+
+ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
++int tcp_filter(struct sock *sk, struct sk_buff *skb);
+
+ #undef STATE_TRACE
+
+diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h
+index 5cd4d4d..9c9c6ad 100644
+--- a/include/uapi/linux/atm_zatm.h
++++ b/include/uapi/linux/atm_zatm.h
+@@ -14,7 +14,6 @@
+
+ #include <linux/atmapi.h>
+ #include <linux/atmioc.h>
+-#include <linux/time.h>
+
+ #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
+ /* get pool statistics */
+diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
+index 570eeca..ad1bc67 100644
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab)
+
+ hlist_for_each_entry_safe(l, n, head, hash_node) {
+ hlist_del_rcu(&l->hash_node);
+- htab_elem_free(htab, l);
++ if (l->state != HTAB_EXTRA_ELEM_USED)
++ htab_elem_free(htab, l);
+ }
+ }
+ }
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 44b3ba4..9ce9d72 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb)
+ goto out;
+ }
+
+- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
++ *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
+ out_set_summed:
+ skb->ip_summed = CHECKSUM_NONE;
+ out:
+diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
+index 52742a0..5550a86 100644
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -118,7 +118,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
+ struct flow_dissector_key_tags *key_tags;
+ struct flow_dissector_key_keyid *key_keyid;
+ u8 ip_proto = 0;
+- bool ret = false;
++ bool ret;
+
+ if (!data) {
+ data = skb->data;
+@@ -481,12 +481,17 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
+ out_good:
+ ret = true;
+
+-out_bad:
++ key_control->thoff = (u16)nhoff;
++out:
+ key_basic->n_proto = proto;
+ key_basic->ip_proto = ip_proto;
+- key_control->thoff = (u16)nhoff;
+
+ return ret;
++
++out_bad:
++ ret = false;
++ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
++ goto out;
+ }
+ EXPORT_SYMBOL(__skb_flow_dissect);
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index fd7b41e..10acacc 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ EXPORT_SYMBOL(sock_queue_rcv_skb);
+
+ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+- const int nested, unsigned int trim_cap)
++ const int nested, unsigned int trim_cap, bool refcounted)
+ {
+ int rc = NET_RX_SUCCESS;
+
+@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+
+ bh_unlock_sock(sk);
+ out:
+- sock_put(sk);
++ if (refcounted)
++ sock_put(sk);
+ return rc;
+ discard_and_relse:
+ kfree_skb(skb);
+@@ -1563,6 +1564,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
+ RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
+
+ newsk->sk_err = 0;
++ newsk->sk_err_soft = 0;
+ newsk->sk_priority = 0;
+ newsk->sk_incoming_cpu = raw_smp_processor_id();
+ atomic64_set(&newsk->sk_cookie, 0);
+diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
+index 345a3ae..b567c87 100644
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
+ {
+ const struct iphdr *iph = (struct iphdr *)skb->data;
+ const u8 offset = iph->ihl << 2;
+- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
++ const struct dccp_hdr *dh;
+ struct dccp_sock *dp;
+ struct inet_sock *inet;
+ const int type = icmp_hdr(skb)->type;
+@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
+ int err;
+ struct net *net = dev_net(skb->dev);
+
+- if (skb->len < offset + sizeof(*dh) ||
+- skb->len < offset + __dccp_basic_hdr_len(dh)) {
+- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+- return;
+- }
++ /* Only need dccph_dport & dccph_sport which are the first
++ * 4 bytes in dccp header.
++ * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
++ */
++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
++ dh = (struct dccp_hdr *)(skb->data + offset);
+
+ sk = __inet_lookup_established(net, &dccp_hashinfo,
+ iph->daddr, dh->dccph_dport,
+@@ -868,7 +870,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
+ goto discard_and_relse;
+ nf_reset(skb);
+
+- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
++ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
+
+ no_dccp_socket:
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
+index 3828f94..715e5d1 100644
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+ {
+ const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
++ const struct dccp_hdr *dh;
+ struct dccp_sock *dp;
+ struct ipv6_pinfo *np;
+ struct sock *sk;
+@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ __u64 seq;
+ struct net *net = dev_net(skb->dev);
+
+- if (skb->len < offset + sizeof(*dh) ||
+- skb->len < offset + __dccp_basic_hdr_len(dh)) {
+- __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+- ICMP6_MIB_INERRORS);
+- return;
+- }
++ /* Only need dccph_dport & dccph_sport which are the first
++ * 4 bytes in dccp header.
++ * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
++ */
++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
++ dh = (struct dccp_hdr *)(skb->data + offset);
+
+ sk = __inet6_lookup_established(net, &dccp_hashinfo,
+ &hdr->daddr, dh->dccph_dport,
+@@ -738,7 +739,8 @@ static int dccp_v6_rcv(struct sk_buff *skb)
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto discard_and_relse;
+
+- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
++ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
++ refcounted) ? -1 : 0;
+
+ no_dccp_socket:
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+@@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
+ .getsockopt = ipv6_getsockopt,
+ .addr2sockaddr = inet6_csk_addr2sockaddr,
+ .sockaddr_len = sizeof(struct sockaddr_in6),
++ .bind_conflict = inet6_csk_bind_conflict,
+ #ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_ipv6_setsockopt,
+ .compat_getsockopt = compat_ipv6_getsockopt,
+diff --git a/net/dccp/proto.c b/net/dccp/proto.c
+index 41e6580..9fe25bf 100644
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
+ __kfree_skb(skb);
+ }
+
++ /* If socket has been already reset kill it. */
++ if (sk->sk_state == DCCP_CLOSED)
++ goto adjudge_to_death;
++
+ if (data_was_unread) {
+ /* Unread data was tossed, send an appropriate Reset Code */
+ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index e2ffc2a..7ef7031 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -2455,22 +2455,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
+ struct key_vector *l, **tp = &iter->tnode;
+ t_key key;
+
+- /* use cache location of next-to-find key */
++ /* use cached location of previously found key */
+ if (iter->pos > 0 && pos >= iter->pos) {
+- pos -= iter->pos;
+ key = iter->key;
+ } else {
+- iter->pos = 0;
++ iter->pos = 1;
+ key = 0;
+ }
+
+- while ((l = leaf_walk_rcu(tp, key)) != NULL) {
++ pos -= iter->pos;
++
++ while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
+ key = l->key + 1;
+ iter->pos++;
+-
+- if (--pos <= 0)
+- break;
+-
+ l = NULL;
+
+ /* handle unlikely case of a key wrap */
+@@ -2479,7 +2476,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
+ }
+
+ if (l)
+- iter->key = key; /* remember it */
++ iter->key = l->key; /* remember it */
+ else
+ iter->pos = 0; /* forget it */
+
+@@ -2507,7 +2504,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
+ return fib_route_get_idx(iter, *pos);
+
+ iter->pos = 0;
+- iter->key = 0;
++ iter->key = KEY_MAX;
+
+ return SEQ_START_TOKEN;
+ }
+@@ -2516,7 +2513,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ struct fib_route_iter *iter = seq->private;
+ struct key_vector *l = NULL;
+- t_key key = iter->key;
++ t_key key = iter->key + 1;
+
+ ++*pos;
+
+@@ -2525,7 +2522,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ l = leaf_walk_rcu(&iter->tnode, key);
+
+ if (l) {
+- iter->key = l->key + 1;
++ iter->key = l->key;
+ iter->pos++;
+ } else {
+ iter->pos = 0;
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index 38abe70..48734ee 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+ fl4->flowi4_proto = IPPROTO_ICMP;
+ fl4->fl4_icmp_type = type;
+ fl4->fl4_icmp_code = code;
+- fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
++ fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
+
+ security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
+ rt = __ip_route_output_key_hash(net, fl4,
+@@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+ if (err)
+ goto relookup_failed;
+
+- if (inet_addr_type_dev_table(net, skb_in->dev,
++ if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
+ fl4_dec.saddr) == RTN_LOCAL) {
+ rt2 = __ip_route_output_key(net, &fl4_dec);
+ if (IS_ERR(rt2))
+diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
+index 8b4ffd2..9f0a7b9 100644
+--- a/net/ipv4/ip_forward.c
++++ b/net/ipv4/ip_forward.c
+@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
+ if (opt->is_strictroute && rt->rt_uses_gateway)
+ goto sr_failed;
+
+- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
++ IPCB(skb)->flags |= IPSKB_FORWARDED;
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+ if (ip_exceeds_mtu(skb, mtu)) {
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index dde37fb..307daed 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -223,11 +223,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
+ struct sk_buff *segs;
+ int ret = 0;
+
+- /* common case: fragmentation of segments is not allowed,
+- * or seglen is <= mtu
++ /* common case: seglen is <= mtu
+ */
+- if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
+- skb_gso_validate_mtu(skb, mtu))
++ if (skb_gso_validate_mtu(skb, mtu))
+ return ip_finish_output2(net, sk, skb);
+
+ /* Slowpath - GSO segment length is exceeding the dst MTU.
+diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
+index 0f227db..afd6b59 100644
+--- a/net/ipv4/ip_tunnel_core.c
++++ b/net/ipv4/ip_tunnel_core.c
+@@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
+ int pkt_len = skb->len - skb_inner_network_offset(skb);
+ struct net *net = dev_net(rt->dst.dev);
+ struct net_device *dev = skb->dev;
+- int skb_iif = skb->skb_iif;
+ struct iphdr *iph;
+ int err;
+
+@@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
+ skb_dst_set(skb, &rt->dst);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
+- if (skb_iif && !(df & htons(IP_DF))) {
+- /* Arrived from an ingress interface, got encapsulated, with
+- * fragmentation of encapulating frames allowed.
+- * If skb is gso, the resulting encapsulated network segments
+- * may exceed dst mtu.
+- * Allow IP Fragmentation of segments.
+- */
+- IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
+- }
+-
+ /* Push down and install the IP header. */
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
+index 5f006e1..27089f5 100644
+--- a/net/ipv4/ipmr.c
++++ b/net/ipv4/ipmr.c
+@@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
+ vif->dev->stats.tx_bytes += skb->len;
+ }
+
+- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
++ IPCB(skb)->flags |= IPSKB_FORWARDED;
+
+ /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+ * not only before forwarding, but after forwarding on all output
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 62c3ed0..2f23ef1 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
+ goto reject_redirect;
+ }
+
+- n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
++ n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
++ if (!n)
++ n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
+ if (!IS_ERR(n)) {
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index ffbb218..c876f5d 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1145,7 +1145,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+
+ err = -EPIPE;
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+- goto out_err;
++ goto do_error;
+
+ sg = !!(sk->sk_route_caps & NETIF_F_SG);
+
+@@ -1219,7 +1219,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+
+ if (!skb_can_coalesce(skb, i, pfrag->page,
+ pfrag->offset)) {
+- if (i == sysctl_max_skb_frags || !sg) {
++ if (i >= sysctl_max_skb_frags || !sg) {
+ tcp_mark_push(tp, skb);
+ goto new_segment;
+ }
+diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
+index 10d728b..ab37c67 100644
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -56,6 +56,7 @@ struct dctcp {
+ u32 next_seq;
+ u32 ce_state;
+ u32 delayed_ack_reserved;
++ u32 loss_cwnd;
+ };
+
+ static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
+@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
+ ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
+
+ ca->delayed_ack_reserved = 0;
++ ca->loss_cwnd = 0;
+ ca->ce_state = 0;
+
+ dctcp_reset(tp, ca);
+@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
+
+ static u32 dctcp_ssthresh(struct sock *sk)
+ {
+- const struct dctcp *ca = inet_csk_ca(sk);
++ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
++ ca->loss_cwnd = tp->snd_cwnd;
+ return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+ }
+
+@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
+ return 0;
+ }
+
++static u32 dctcp_cwnd_undo(struct sock *sk)
++{
++ const struct dctcp *ca = inet_csk_ca(sk);
++
++ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
++}
++
+ static struct tcp_congestion_ops dctcp __read_mostly = {
+ .init = dctcp_init,
+ .in_ack_event = dctcp_update_alpha,
+ .cwnd_event = dctcp_cwnd_event,
+ .ssthresh = dctcp_ssthresh,
+ .cong_avoid = tcp_reno_cong_avoid,
++ .undo_cwnd = dctcp_cwnd_undo,
+ .set_state = dctcp_state,
+ .get_info = dctcp_get_info,
+ .flags = TCP_CONG_NEEDS_ECN,
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 7158d4f..7b235fa 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1537,6 +1537,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL(tcp_prequeue);
+
++int tcp_filter(struct sock *sk, struct sk_buff *skb)
++{
++ struct tcphdr *th = (struct tcphdr *)skb->data;
++ unsigned int eaten = skb->len;
++ int err;
++
++ err = sk_filter_trim_cap(sk, skb, th->doff * 4);
++ if (!err) {
++ eaten -= skb->len;
++ TCP_SKB_CB(skb)->end_seq -= eaten;
++ }
++ return err;
++}
++EXPORT_SYMBOL(tcp_filter);
++
+ /*
+ * From tcp_input.c
+ */
+@@ -1648,8 +1663,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
+
+ nf_reset(skb);
+
+- if (sk_filter(sk, skb))
++ if (tcp_filter(sk, skb))
+ goto discard_and_relse;
++ th = (const struct tcphdr *)skb->data;
++ iph = ip_hdr(skb);
+
+ skb->dev = NULL;
+
+diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
+index bd59c34..7370ad2 100644
+--- a/net/ipv6/icmp.c
++++ b/net/ipv6/icmp.c
+@@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ if (__ipv6_addr_needs_scope_id(addr_type))
+ iif = skb->dev->ifindex;
+ else
+- iif = l3mdev_master_ifindex(skb->dev);
++ iif = l3mdev_master_ifindex(skb_dst(skb)->dev);
+
+ /*
+ * Must not send error if the source does not uniquely
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index fc67822..af6a09e 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1228,7 +1228,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+ if (skb->protocol == htons(ETH_P_IP))
+ return tcp_v4_do_rcv(sk, skb);
+
+- if (sk_filter(sk, skb))
++ if (tcp_filter(sk, skb))
+ goto discard;
+
+ /*
+@@ -1455,8 +1455,10 @@ static int tcp_v6_rcv(struct sk_buff *skb)
+ if (tcp_v6_inbound_md5_hash(sk, skb))
+ goto discard_and_relse;
+
+- if (sk_filter(sk, skb))
++ if (tcp_filter(sk, skb))
+ goto discard_and_relse;
++ th = (const struct tcphdr *)skb->data;
++ hdr = ipv6_hdr(skb);
+
+ skb->dev = NULL;
+
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index baccbf3..7b0e059 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk,
+
+ timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
+
+- err = sctp_wait_for_connect(asoc, &timeo);
+- if ((err == 0 || err == -EINPROGRESS) && assoc_id)
++ if (assoc_id)
+ *assoc_id = asoc->assoc_id;
++ err = sctp_wait_for_connect(asoc, &timeo);
++ /* Note: the asoc may be freed after the return of
++ * sctp_wait_for_connect.
++ */
+
+ /* Don't free association on exit. */
+ asoc = NULL;
+@@ -4278,19 +4281,18 @@ static void sctp_shutdown(struct sock *sk, int how)
+ {
+ struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep;
+- struct sctp_association *asoc;
+
+ if (!sctp_style(sk, TCP))
+ return;
+
+- if (how & SEND_SHUTDOWN) {
++ ep = sctp_sk(sk)->ep;
++ if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
++ struct sctp_association *asoc;
++
+ sk->sk_state = SCTP_SS_CLOSING;
+- ep = sctp_sk(sk)->ep;
+- if (!list_empty(&ep->asocs)) {
+- asoc = list_entry(ep->asocs.next,
+- struct sctp_association, asocs);
+- sctp_primitive_SHUTDOWN(net, asoc, NULL);
+- }
++ asoc = list_entry(ep->asocs.next,
++ struct sctp_association, asocs);
++ sctp_primitive_SHUTDOWN(net, asoc, NULL);
+ }
+ }
+
+diff --git a/net/socket.c b/net/socket.c
+index a1bd161..03bc2c2 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
+ if (err)
+ break;
+ ++datagrams;
++ if (msg_data_left(&msg_sys))
++ break;
+ cond_resched();
+ }
+
+diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
+index f3825b6..f046b77 100644
+--- a/tools/spi/spidev_test.c
++++ b/tools/spi/spidev_test.c
+@@ -19,6 +19,7 @@
+ #include <getopt.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
++#include <linux/ioctl.h>
+ #include <sys/stat.h>
+ #include <linux/types.h>
+ #include <linux/spi/spidev.h>
diff --git a/4.8.10/4420_grsecurity-3.1-4.8.10-201611232213.patch b/4.8.10/4420_grsecurity-3.1-4.8.10-201611232213.patch
new file mode 100644
index 0000000..0149d93
--- /dev/null
+++ b/4.8.10/4420_grsecurity-3.1-4.8.10-201611232213.patch
@@ -0,0 +1,220815 @@
+diff --git a/Documentation/dontdiff b/Documentation/dontdiff
+index 5385cba..607c6a0 100644
+--- a/Documentation/dontdiff
++++ b/Documentation/dontdiff
+@@ -7,6 +7,7 @@
+ *.cis
+ *.cpio
+ *.csp
++*.dbg
+ *.dsp
+ *.dvi
+ *.elf
+@@ -16,6 +17,7 @@
+ *.gcov
+ *.gen.S
+ *.gif
++*.gmo
+ *.grep
+ *.grp
+ *.gz
+@@ -52,14 +54,17 @@
+ *.tab.h
+ *.tex
+ *.ver
++*.vim
+ *.xml
+ *.xz
+ *_MODULES
++*_reg_safe.h
+ *_vga16.c
+ *~
+ \#*#
+ *.9
+-.*
++.[^g]*
++.gen*
+ .*.d
+ .mm
+ 53c700_d.h
+@@ -73,9 +78,11 @@ Image
+ Module.markers
+ Module.symvers
+ PENDING
++PERF*
+ SCCS
+ System.map*
+ TAGS
++TRACEEVENT-CFLAGS
+ aconf
+ af_names.h
+ aic7*reg.h*
+@@ -84,6 +91,7 @@ aic7*seq.h*
+ aicasm
+ aicdb.h*
+ altivec*.c
++ashldi3.S
+ asm-offsets.h
+ asm_offsets.h
+ autoconf.h*
+@@ -96,11 +104,14 @@ bounds.h
+ bsetup
+ btfixupprep
+ build
++builtin-policy.h
+ bvmlinux
+ bzImage*
+ capability_names.h
+ capflags.c
+ classlist.h*
++clut_vga16.c
++common-cmds.h
+ comp*.log
+ compile.h*
+ conf
+@@ -109,19 +120,23 @@ config-*
+ config_data.h*
+ config.mak
+ config.mak.autogen
++config.tmp
+ conmakehash
+ consolemap_deftbl.c*
+ cpustr.h
+ crc32table.h*
+ cscope.*
+ defkeymap.c
++devicetable-offsets.h
+ devlist.h*
+ dnotify_test
+ docproc
+ dslm
++dtc-lexer.lex.c
+ elf2ecoff
+ elfconfig.h*
+ evergreen_reg_safe.h
++exception_policy.conf
+ fixdep
+ flask.h
+ fore200e_mkfirm
+@@ -129,12 +144,15 @@ fore200e_pca_fw.c*
+ gconf
+ gconf.glade.h
+ gen-devlist
++gen-kdb_cmds.c
+ gen_crc32table
+ gen_init_cpio
+ generated
+ genheaders
+ genksyms
+ *_gray256.c
++hash
++hid-example
+ hpet_example
+ hugepage-mmap
+ hugepage-shm
+@@ -149,14 +167,14 @@ int32.c
+ int4.c
+ int8.c
+ kallsyms
+-kconfig
++kern_constants.h
+ keywords.c
+ ksym.c*
+ ksym.h*
+ kxgettext
+ lex.c
+ lex.*.c
+-linux
++lib1funcs.S
+ logo_*.c
+ logo_*_clut224.c
+ logo_*_mono.c
+@@ -167,12 +185,14 @@ machtypes.h
+ map
+ map_hugetlb
+ mconf
++mdp
+ miboot*
+ mk_elfconfig
+ mkboot
+ mkbugboot
+ mkcpustr
+ mkdep
++mkpiggy
+ mkprep
+ mkregtable
+ mktables
+@@ -188,6 +208,8 @@ oui.c*
+ page-types
+ parse.c
+ parse.h
++parse-events*
++pasyms.h
+ patches*
+ pca200e.bin
+ pca200e_ecd.bin2
+@@ -197,6 +219,7 @@ perf-archive
+ piggyback
+ piggy.gzip
+ piggy.S
++pmu-*
+ pnmtologo
+ ppc_defs.h*
+ pss_boot.h
+@@ -206,7 +229,12 @@ r200_reg_safe.h
+ r300_reg_safe.h
+ r420_reg_safe.h
+ r600_reg_safe.h
++randomize_layout_hash.h
++randomize_layout_seed.h
++realmode.lds
++realmode.relocs
+ recordmcount
++regdb.c
+ relocs
+ rlim_names.h
+ rn50_reg_safe.h
+@@ -216,8 +244,17 @@ series
+ setup
+ setup.bin
+ setup.elf
++signing_key*
++aux.h
++disable.h
++e_fields.h
++e_fns.h
++e_fptrs.h
++e_vars.h
+ sImage
++slabinfo
+ sm_tbl*
++sortextable
+ split-include
+ syscalltab.h
+ tables.c
+@@ -227,6 +264,7 @@ tftpboot.img
+ timeconst.h
+ times.h*
+ trix_boot.h
++user_constants.h
+ utsrelease.h*
+ vdso-syms.lds
+ vdso.lds
+@@ -238,13 +276,17 @@ vdso32.lds
+ vdso32.so.dbg
+ vdso64.lds
+ vdso64.so.dbg
++vdsox32.lds
++vdsox32-syms.lds
+ version.h*
+ vmImage
+ vmlinux
+ vmlinux-*
+ vmlinux.aout
+ vmlinux.bin.all
++vmlinux.bin.bz2
+ vmlinux.lds
++vmlinux.relocs
+ vmlinuz
+ voffset.h
+ vsyscall.lds
+@@ -252,9 +294,12 @@ vsyscall_32.lds
+ wanxlfw.inc
+ uImage
+ unifdef
++utsrelease.h
+ wakeup.bin
+ wakeup.elf
+ wakeup.lds
++x509*
+ zImage*
+ zconf.hash.c
++zconf.lex.c
+ zoffset.h
+diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
+index 385a5ef..51d7fba 100644
+--- a/Documentation/kbuild/makefiles.txt
++++ b/Documentation/kbuild/makefiles.txt
+@@ -23,10 +23,11 @@ This document describes the Linux kernel Makefiles.
+ === 4 Host Program support
+ --- 4.1 Simple Host Program
+ --- 4.2 Composite Host Programs
+- --- 4.3 Using C++ for host programs
+- --- 4.4 Controlling compiler options for host programs
+- --- 4.5 When host programs are actually built
+- --- 4.6 Using hostprogs-$(CONFIG_FOO)
++ --- 4.3 Defining shared libraries
++ --- 4.4 Using C++ for host programs
++ --- 4.5 Controlling compiler options for host programs
++ --- 4.6 When host programs are actually built
++ --- 4.7 Using hostprogs-$(CONFIG_FOO)
+
+ === 5 Kbuild clean infrastructure
+
+@@ -644,7 +645,29 @@ Both possibilities are described in the following.
+ Finally, the two .o files are linked to the executable, lxdialog.
+ Note: The syntax <executable>-y is not permitted for host-programs.
+
+---- 4.3 Using C++ for host programs
++--- 4.3 Defining shared libraries
++
++ Objects with extension .so are considered shared libraries, and
++ will be compiled as position independent objects.
++ Kbuild provides support for shared libraries, but the usage
++ shall be restricted.
++ In the following example the libkconfig.so shared library is used
++ to link the executable conf.
++
++ Example:
++ #scripts/kconfig/Makefile
++ hostprogs-y := conf
++ conf-objs := conf.o libkconfig.so
++ libkconfig-objs := expr.o type.o
++
++ Shared libraries always require a corresponding -objs line, and
++ in the example above the shared library libkconfig is composed by
++ the two objects expr.o and type.o.
++ expr.o and type.o will be built as position independent code and
++ linked as a shared library libkconfig.so. C++ is not supported for
++ shared libraries.
++
++--- 4.4 Using C++ for host programs
+
+ kbuild offers support for host programs written in C++. This was
+ introduced solely to support kconfig, and is not recommended
+@@ -667,7 +690,7 @@ Both possibilities are described in the following.
+ qconf-cxxobjs := qconf.o
+ qconf-objs := check.o
+
+---- 4.4 Controlling compiler options for host programs
++--- 4.5 Controlling compiler options for host programs
+
+ When compiling host programs, it is possible to set specific flags.
+ The programs will always be compiled utilising $(HOSTCC) passed
+@@ -695,7 +718,7 @@ Both possibilities are described in the following.
+ When linking qconf, it will be passed the extra option
+ "-L$(QTDIR)/lib".
+
+---- 4.5 When host programs are actually built
++--- 4.6 When host programs are actually built
+
+ Kbuild will only build host-programs when they are referenced
+ as a prerequisite.
+@@ -726,7 +749,7 @@ Both possibilities are described in the following.
+ This will tell kbuild to build lxdialog even if not referenced in
+ any rule.
+
+---- 4.6 Using hostprogs-$(CONFIG_FOO)
++--- 4.7 Using hostprogs-$(CONFIG_FOO)
+
+ A typical pattern in a Kbuild file looks like this:
+
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
+index 46726d4..36138ff 100644
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -1368,6 +1368,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
+ [KNL] Should the hard-lockup detector generate
+ backtraces on all cpus.
+ Format: <integer>
++ grsec_proc_gid= [GRKERNSEC_PROC_USERGROUP] Chooses GID to
++ ignore grsecurity's /proc restrictions
++
++ grsec_sysfs_restrict= Format: 0 | 1
++ Default: 1
++ Disables GRKERNSEC_SYSFS_RESTRICT if enabled in config
+
+ hashdist= [KNL,NUMA] Large hashes allocated during boot
+ are distributed across NUMA nodes. Defaults on
+@@ -2591,6 +2597,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
+ noexec=on: enable non-executable mappings (default)
+ noexec=off: disable non-executable mappings
+
++ nopcid [X86-64]
++ Disable PCID (Process-Context IDentifier) even if it
++ is supported by the processor.
++
+ nosmap [X86]
+ Disable SMAP (Supervisor Mode Access Prevention)
+ even if it is supported by processor.
+@@ -2895,6 +2905,35 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
+ the specified number of seconds. This is to be used if
+ your oopses keep scrolling off the screen.
+
++ pax_nouderef [X86] disables UDEREF. Most likely needed under certain
++ virtualization environments that don't cope well with the
++ expand down segment used by UDEREF on X86-32 or the frequent
++ page table updates on X86-64.
++
++ pax_sanitize_slab=
++ Format: { 0 | 1 | off | fast | full }
++ Options '0' and '1' are only provided for backward
++ compatibility, 'off' or 'fast' should be used instead.
++ 0|off : disable slab object sanitization
++ 1|fast: enable slab object sanitization excluding
++ whitelisted slabs (default)
++ full : sanitize all slabs, even the whitelisted ones
++
++ pax_softmode= 0/1 to disable/enable PaX softmode on boot already.
++
++ pax_extra_latent_entropy
++ Enable a very simple form of latent entropy extraction
++ from the first 4GB of memory as the bootmem allocator
++ passes the memory pages to the buddy allocator.
++
++ pax_size_overflow_report_only
++ Enables rate-limited logging of size_overflow plugin
++ violations while disabling killing of the violating
++ task.
++
++ pax_weakuderef [X86-64] enables the weaker but faster form of UDEREF
++ when the processor supports PCID.
++
+ pcbit= [HW,ISDN]
+
+ pcd. [PARIDE]
+diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
+index ffab8b5..b8fcd61 100644
+--- a/Documentation/sysctl/kernel.txt
++++ b/Documentation/sysctl/kernel.txt
+@@ -42,6 +42,7 @@ show up in /proc/sys/kernel:
+ - kptr_restrict
+ - kstack_depth_to_print [ X86 only ]
+ - l2cr [ PPC only ]
++- modify_ldt [ X86 only ]
+ - modprobe ==> Documentation/debugging-modules.txt
+ - modules_disabled
+ - msg_next_id [ sysv ipc ]
+@@ -409,6 +410,20 @@ This flag controls the L2 cache of G3 processor boards. If
+
+ ==============================================================
+
++modify_ldt: (X86 only)
++
++Enables (1) or disables (0) the modify_ldt syscall. Modifying the LDT
++(Local Descriptor Table) may be needed to run a 16-bit or segmented code
++such as Dosemu or Wine. This is done via a system call which is not needed
++to run portable applications, and which can sometimes be abused to exploit
++some weaknesses of the architecture, opening new vulnerabilities.
++
++This sysctl allows one to increase the system's security by disabling the
++system call, or to restore compatibility with specific applications when it
++was already disabled.
++
++==============================================================
++
+ modules_disabled:
+
+ A toggle value indicating if modules are allowed to be loaded
+diff --git a/Makefile b/Makefile
+index 7cf2b49..3e3071c 100644
+--- a/Makefile
++++ b/Makefile
+@@ -302,7 +302,9 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
+ HOSTCC = gcc
+ HOSTCXX = g++
+ HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89
+-HOSTCXXFLAGS = -O2
++HOSTCFLAGS = -W -Wno-unused-parameter -Wno-missing-field-initializers -fno-delete-null-pointer-checks
++HOSTCFLAGS += $(call cc-option, -Wno-empty-body)
++HOSTCXXFLAGS = -O2 -Wall -W -Wno-array-bounds
+
+ ifeq ($(shell $(HOSTCC) -v 2>&1 | grep -c "clang version"), 1)
+ HOSTCFLAGS += -Wno-unused-value -Wno-unused-parameter \
+@@ -621,6 +623,8 @@ include arch/$(SRCARCH)/Makefile
+
+ KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,)
+ KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,)
++KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
++KBUILD_AFLAGS += $(call cc-option,-fno-PIE)
+
+ ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+ KBUILD_CFLAGS += -Os
+@@ -715,7 +719,7 @@ KBUILD_CFLAGS += $(call cc-option, -gsplit-dwarf, -g)
+ else
+ KBUILD_CFLAGS += -g
+ endif
+-KBUILD_AFLAGS += -Wa,-gdwarf-2
++KBUILD_AFLAGS += -Wa,--gdwarf-2
+ endif
+ ifdef CONFIG_DEBUG_INFO_DWARF4
+ KBUILD_CFLAGS += $(call cc-option, -gdwarf-4,)
+@@ -890,7 +894,7 @@ export mod_sign_cmd
+
+
+ ifeq ($(KBUILD_EXTMOD),)
+-core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
++core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/
+
+ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
+ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
+@@ -1256,7 +1260,10 @@ MRPROPER_FILES += .config .config.old .version .old_version \
+ Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
+ signing_key.pem signing_key.priv signing_key.x509 \
+ x509.genkey extra_certificates signing_key.x509.keyid \
+- signing_key.x509.signer vmlinux-gdb.py
++ signing_key.x509.signer vmlinux-gdb.py \
++ scripts/gcc-plugins/size_overflow_plugin/e_*.h \
++ scripts/gcc-plugins/size_overflow_plugin/disable.h \
++ scripts/gcc-plugins/randomize_layout_seed.h
+
+ # clean - Delete most, but leave enough to build external modules
+ #
+@@ -1295,7 +1302,7 @@ distclean: mrproper
+ @find $(srctree) $(RCS_FIND_IGNORE) \
+ \( -name '*.orig' -o -name '*.rej' -o -name '*~' \
+ -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \
+- -o -name '.*.rej' -o -name '*%' -o -name 'core' \) \
++ -o -name '.*.rej' -o -name '*.so' -o -name '*%' -o -name 'core' \) \
+ -type f -print | xargs rm -f
+
+
+diff --git a/arch/Kconfig b/arch/Kconfig
+index fd6e971..35d7bbf 100644
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -355,7 +355,7 @@ config HAVE_GCC_PLUGINS
+ menuconfig GCC_PLUGINS
+ bool "GCC plugins"
+ depends on HAVE_GCC_PLUGINS
+- depends on !COMPILE_TEST
++ default y
+ help
+ GCC plugins are loadable modules that provide extra features to the
+ compiler. They are useful for runtime instrumentation and static analysis.
+diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
+index 498933a..78d2b22 100644
+--- a/arch/alpha/include/asm/atomic.h
++++ b/arch/alpha/include/asm/atomic.h
+@@ -308,4 +308,14 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
+ #define atomic_dec(v) atomic_sub(1,(v))
+ #define atomic64_dec(v) atomic64_sub(1,(v))
+
++#define atomic64_read_unchecked(v) atomic64_read(v)
++#define atomic64_set_unchecked(v, i) atomic64_set((v), (i))
++#define atomic64_add_unchecked(a, v) atomic64_add((a), (v))
++#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v))
++#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v))
++#define atomic64_inc_unchecked(v) atomic64_inc(v)
++#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v)
++#define atomic64_dec_unchecked(v) atomic64_dec(v)
++#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n))
++
+ #endif /* _ALPHA_ATOMIC_H */
+diff --git a/arch/alpha/include/asm/cache.h b/arch/alpha/include/asm/cache.h
+index ad368a9..fbe0f25 100644
+--- a/arch/alpha/include/asm/cache.h
++++ b/arch/alpha/include/asm/cache.h
+@@ -4,19 +4,19 @@
+ #ifndef __ARCH_ALPHA_CACHE_H
+ #define __ARCH_ALPHA_CACHE_H
+
++#include <linux/const.h>
+
+ /* Bytes per L1 (data) cache line. */
+ #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EV6)
+-# define L1_CACHE_BYTES 64
+ # define L1_CACHE_SHIFT 6
+ #else
+ /* Both EV4 and EV5 are write-through, read-allocate,
+ direct-mapped, physical.
+ */
+-# define L1_CACHE_BYTES 32
+ # define L1_CACHE_SHIFT 5
+ #endif
+
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+ #define SMP_CACHE_BYTES L1_CACHE_BYTES
+
+ #endif
+diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h
+index 968d999..d36b2df 100644
+--- a/arch/alpha/include/asm/elf.h
++++ b/arch/alpha/include/asm/elf.h
+@@ -91,6 +91,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+ #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE (current->personality & ADDR_LIMIT_32BIT ? 0x10000 : 0x120000000UL)
++
++#define PAX_DELTA_MMAP_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 28)
++#define PAX_DELTA_STACK_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 19)
++#endif
++
+ /* $0 is set by ld.so to a pointer to a function which might be
+ registered using atexit. This provides a mean for the dynamic
+ linker to call DT_FINI functions for shared libraries that have
+diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
+index c2ebb6f..93a0613 100644
+--- a/arch/alpha/include/asm/pgalloc.h
++++ b/arch/alpha/include/asm/pgalloc.h
+@@ -29,6 +29,12 @@ pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+ pgd_set(pgd, pmd);
+ }
+
++static inline void
++pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
++{
++ pgd_populate(mm, pgd, pmd);
++}
++
+ extern pgd_t *pgd_alloc(struct mm_struct *mm);
+
+ static inline void
+diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
+index a9a1195..e9b8417 100644
+--- a/arch/alpha/include/asm/pgtable.h
++++ b/arch/alpha/include/asm/pgtable.h
+@@ -101,6 +101,17 @@ struct vm_area_struct;
+ #define PAGE_SHARED __pgprot(_PAGE_VALID | __ACCESS_BITS)
+ #define PAGE_COPY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW)
+ #define PAGE_READONLY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW)
++
++#ifdef CONFIG_PAX_PAGEEXEC
++# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOE)
++# define PAGE_COPY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE)
++# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE)
++#else
++# define PAGE_SHARED_NOEXEC PAGE_SHARED
++# define PAGE_COPY_NOEXEC PAGE_COPY
++# define PAGE_READONLY_NOEXEC PAGE_READONLY
++#endif
++
+ #define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE)
+
+ #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x))
+diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c
+index 936bc8f..bb1859f 100644
+--- a/arch/alpha/kernel/module.c
++++ b/arch/alpha/kernel/module.c
+@@ -160,7 +160,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
+
+ /* The small sections were sorted to the end of the segment.
+ The following should definitely cover them. */
+- gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000;
++ gp = (u64)me->core_layout.base_rw + me->core_layout.size_rw - 0x8000;
+ got = sechdrs[me->arch.gotsecindex].sh_addr;
+
+ for (i = 0; i < n; i++) {
+diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
+index ffb93f49..ced8233 100644
+--- a/arch/alpha/kernel/osf_sys.c
++++ b/arch/alpha/kernel/osf_sys.c
+@@ -1300,10 +1300,11 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
+ generic version except that we know how to honor ADDR_LIMIT_32BIT. */
+
+ static unsigned long
+-arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
+- unsigned long limit)
++arch_get_unmapped_area_1(struct file *filp, unsigned long addr, unsigned long len,
++ unsigned long limit, unsigned long flags)
+ {
+ struct vm_unmapped_area_info info;
++ unsigned long offset = gr_rand_threadstack_offset(current->mm, filp, flags);
+
+ info.flags = 0;
+ info.length = len;
+@@ -1311,6 +1312,7 @@ arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
+ info.high_limit = limit;
+ info.align_mask = 0;
+ info.align_offset = 0;
++ info.threadstack_offset = offset;
+ return vm_unmapped_area(&info);
+ }
+
+@@ -1343,20 +1345,24 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ merely specific addresses, but regions of memory -- perhaps
+ this feature should be incorporated into all ports? */
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(current->mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (addr) {
+- addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit);
++ addr = arch_get_unmapped_area_1 (filp, PAGE_ALIGN(addr), len, limit, flags);
+ if (addr != (unsigned long) -ENOMEM)
+ return addr;
+ }
+
+ /* Next, try allocating at TASK_UNMAPPED_BASE. */
+- addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE),
+- len, limit);
++ addr = arch_get_unmapped_area_1 (filp, PAGE_ALIGN(current->mm->mmap_base), len, limit, flags);
++
+ if (addr != (unsigned long) -ENOMEM)
+ return addr;
+
+ /* Finally, try allocating in low memory. */
+- addr = arch_get_unmapped_area_1 (PAGE_SIZE, len, limit);
++ addr = arch_get_unmapped_area_1 (filp, PAGE_SIZE, len, limit, flags);
+
+ return addr;
+ }
+diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
+index 83e9eee..db02682 100644
+--- a/arch/alpha/mm/fault.c
++++ b/arch/alpha/mm/fault.c
+@@ -52,6 +52,124 @@ __load_new_mm_context(struct mm_struct *next_mm)
+ __reload_thread(pcb);
+ }
+
++#ifdef CONFIG_PAX_PAGEEXEC
++/*
++ * PaX: decide what to do with offenders (regs->pc = fault address)
++ *
++ * returns 1 when task should be killed
++ * 2 when patched PLT trampoline was detected
++ * 3 when unpatched PLT trampoline was detected
++ */
++static int pax_handle_fetch_fault(struct pt_regs *regs)
++{
++
++#ifdef CONFIG_PAX_EMUPLT
++ int err;
++
++ do { /* PaX: patched PLT emulation #1 */
++ unsigned int ldah, ldq, jmp;
++
++ err = get_user(ldah, (unsigned int *)regs->pc);
++ err |= get_user(ldq, (unsigned int *)(regs->pc+4));
++ err |= get_user(jmp, (unsigned int *)(regs->pc+8));
++
++ if (err)
++ break;
++
++ if ((ldah & 0xFFFF0000U) == 0x277B0000U &&
++ (ldq & 0xFFFF0000U) == 0xA77B0000U &&
++ jmp == 0x6BFB0000U)
++ {
++ unsigned long r27, addr;
++ unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16;
++ unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL;
++
++ addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL);
++ err = get_user(r27, (unsigned long *)addr);
++ if (err)
++ break;
++
++ regs->r27 = r27;
++ regs->pc = r27;
++ return 2;
++ }
++ } while (0);
++
++ do { /* PaX: patched PLT emulation #2 */
++ unsigned int ldah, lda, br;
++
++ err = get_user(ldah, (unsigned int *)regs->pc);
++ err |= get_user(lda, (unsigned int *)(regs->pc+4));
++ err |= get_user(br, (unsigned int *)(regs->pc+8));
++
++ if (err)
++ break;
++
++ if ((ldah & 0xFFFF0000U) == 0x277B0000U &&
++ (lda & 0xFFFF0000U) == 0xA77B0000U &&
++ (br & 0xFFE00000U) == 0xC3E00000U)
++ {
++ unsigned long addr = br | 0xFFFFFFFFFFE00000UL;
++ unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16;
++ unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL;
++
++ regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL);
++ regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2);
++ return 2;
++ }
++ } while (0);
++
++ do { /* PaX: unpatched PLT emulation */
++ unsigned int br;
++
++ err = get_user(br, (unsigned int *)regs->pc);
++
++ if (!err && (br & 0xFFE00000U) == 0xC3800000U) {
++ unsigned int br2, ldq, nop, jmp;
++ unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver;
++
++ addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2);
++ err = get_user(br2, (unsigned int *)addr);
++ err |= get_user(ldq, (unsigned int *)(addr+4));
++ err |= get_user(nop, (unsigned int *)(addr+8));
++ err |= get_user(jmp, (unsigned int *)(addr+12));
++ err |= get_user(resolver, (unsigned long *)(addr+16));
++
++ if (err)
++ break;
++
++ if (br2 == 0xC3600000U &&
++ ldq == 0xA77B000CU &&
++ nop == 0x47FF041FU &&
++ jmp == 0x6B7B0000U)
++ {
++ regs->r28 = regs->pc+4;
++ regs->r27 = addr+16;
++ regs->pc = resolver;
++ return 3;
++ }
++ }
++ } while (0);
++#endif
++
++ return 1;
++}
++
++void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
++{
++ unsigned long i;
++
++ printk(KERN_ERR "PAX: bytes at PC: ");
++ for (i = 0; i < 5; i++) {
++ unsigned int c;
++ if (get_user(c, (unsigned int *)pc+i))
++ printk(KERN_CONT "???????? ");
++ else
++ printk(KERN_CONT "%08x ", c);
++ }
++ printk("\n");
++}
++#endif
+
+ /*
+ * This routine handles page faults. It determines the address,
+@@ -132,8 +250,29 @@ retry:
+ good_area:
+ si_code = SEGV_ACCERR;
+ if (cause < 0) {
+- if (!(vma->vm_flags & VM_EXEC))
++ if (!(vma->vm_flags & VM_EXEC)) {
++
++#ifdef CONFIG_PAX_PAGEEXEC
++ if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc)
++ goto bad_area;
++
++ up_read(&mm->mmap_sem);
++ switch (pax_handle_fetch_fault(regs)) {
++
++#ifdef CONFIG_PAX_EMUPLT
++ case 2:
++ case 3:
++ return;
++#endif
++
++ }
++ pax_report_fault(regs, (void *)regs->pc, (void *)rdusp());
++ do_group_exit(SIGKILL);
++#else
+ goto bad_area;
++#endif
++
++ }
+ } else if (!cause) {
+ /* Allow reads even for write-only mappings */
+ if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
+diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
+index 0d3e59f..4418d65 100644
+--- a/arch/arc/Kconfig
++++ b/arch/arc/Kconfig
+@@ -541,6 +541,7 @@ config ARC_DBG_TLB_MISS_COUNT
+ bool "Profile TLB Misses"
+ default n
+ select DEBUG_FS
++ depends on !GRKERNSEC_KMEM
+ help
+ Counts number of I and D TLB Misses and exports them via Debugfs
+ The counters can be cleared via Debugfs as well
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index a9c4e48..75bc9c9 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -1621,6 +1621,7 @@ config AEABI
+ config OABI_COMPAT
+ bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)"
+ depends on AEABI && !THUMB2_KERNEL
++ depends on !GRKERNSEC
+ help
+ This option preserves the old syscall interface along with the
+ new (ARM EABI) one. It also provides a compatibility layer to
+@@ -1689,6 +1690,7 @@ config HIGHPTE
+ config CPU_SW_DOMAIN_PAN
+ bool "Enable use of CPU domains to implement privileged no-access"
+ depends on MMU && !ARM_LPAE
++ depends on !PAX_KERNEXEC && !PAX_MEMORY_UDEREF
+ default y
+ help
+ Increase kernel security by ensuring that normal kernel accesses
+@@ -1765,7 +1767,7 @@ config ALIGNMENT_TRAP
+
+ config UACCESS_WITH_MEMCPY
+ bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user()"
+- depends on MMU
++ depends on MMU && !PAX_MEMORY_UDEREF
+ default y if CPU_FEROCEON
+ help
+ Implement faster copy_to_user and clear_user methods for CPU
+@@ -2020,6 +2022,7 @@ config KEXEC
+ depends on (!SMP || PM_SLEEP_SMP)
+ depends on !CPU_V7M
+ select KEXEC_CORE
++ depends on !GRKERNSEC_KMEM
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+@@ -2064,7 +2067,7 @@ config EFI_STUB
+
+ config EFI
+ bool "UEFI runtime support"
+- depends on OF && !CPU_BIG_ENDIAN && MMU && AUTO_ZRELADDR && !XIP_KERNEL
++ depends on OF && !CPU_BIG_ENDIAN && MMU && AUTO_ZRELADDR && !XIP_KERNEL && !PAX_KERNEXEC
+ select UCS2_STRING
+ select EFI_PARAMS_FROM_FDT
+ select EFI_STUB
+diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
+index a9693b6..87d8936 100644
+--- a/arch/arm/Kconfig.debug
++++ b/arch/arm/Kconfig.debug
+@@ -7,6 +7,7 @@ config ARM_PTDUMP
+ depends on DEBUG_KERNEL
+ depends on MMU
+ select DEBUG_FS
++ depends on !GRKERNSEC_KMEM
+ ---help---
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
+index d50430c..01cc53b 100644
+--- a/arch/arm/boot/compressed/Makefile
++++ b/arch/arm/boot/compressed/Makefile
+@@ -103,6 +103,8 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
+ KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
+ endif
+
++KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
++
+ # -fstack-protector-strong triggers protection checks in this code,
+ # but it is being used too early to link to meaningful stack_chk logic.
+ nossp_flags := $(call cc-option, -fno-stack-protector)
+diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
+index 6fc73bf..d0af3c7b 100644
+--- a/arch/arm/crypto/sha1_glue.c
++++ b/arch/arm/crypto/sha1_glue.c
+@@ -27,8 +27,8 @@
+
+ #include "sha1.h"
+
+-asmlinkage void sha1_block_data_order(u32 *digest,
+- const unsigned char *data, unsigned int rounds);
++asmlinkage void sha1_block_data_order(struct sha1_state *digest,
++ const u8 *data, int rounds);
+
+ int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+@@ -36,22 +36,20 @@ int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+ /* make sure casting to sha1_block_fn() is safe */
+ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
+
+- return sha1_base_do_update(desc, data, len,
+- (sha1_block_fn *)sha1_block_data_order);
++ return sha1_base_do_update(desc, data, len, sha1_block_data_order);
+ }
+ EXPORT_SYMBOL_GPL(sha1_update_arm);
+
+ static int sha1_final(struct shash_desc *desc, u8 *out)
+ {
+- sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
++ sha1_base_do_finalize(desc, sha1_block_data_order);
+ return sha1_base_finish(desc, out);
+ }
+
+ int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+ {
+- sha1_base_do_update(desc, data, len,
+- (sha1_block_fn *)sha1_block_data_order);
++ sha1_base_do_update(desc, data, len, sha1_block_data_order);
+ return sha1_final(desc, out);
+ }
+ EXPORT_SYMBOL_GPL(sha1_finup_arm);
+diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
+index 4e22f12..49902aa 100644
+--- a/arch/arm/crypto/sha1_neon_glue.c
++++ b/arch/arm/crypto/sha1_neon_glue.c
+@@ -31,8 +31,8 @@
+
+ #include "sha1.h"
+
+-asmlinkage void sha1_transform_neon(void *state_h, const char *data,
+- unsigned int rounds);
++asmlinkage void sha1_transform_neon(struct sha1_state *state_h, const u8 *data,
++ int rounds);
+
+ static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+@@ -45,7 +45,7 @@ static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
+
+ kernel_neon_begin();
+ sha1_base_do_update(desc, data, len,
+- (sha1_block_fn *)sha1_transform_neon);
++ sha1_transform_neon);
+ kernel_neon_end();
+
+ return 0;
+@@ -60,8 +60,8 @@ static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
+ kernel_neon_begin();
+ if (len)
+ sha1_base_do_update(desc, data, len,
+- (sha1_block_fn *)sha1_transform_neon);
+- sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon);
++ sha1_transform_neon);
++ sha1_base_do_finalize(desc, sha1_transform_neon);
+ kernel_neon_end();
+
+ return sha1_base_finish(desc, out);
+diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
+index a84e869..53a0c61 100644
+--- a/arch/arm/crypto/sha256_glue.c
++++ b/arch/arm/crypto/sha256_glue.c
+@@ -30,8 +30,8 @@
+
+ #include "sha256_glue.h"
+
+-asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
+- unsigned int num_blks);
++asmlinkage void sha256_block_data_order(struct sha256_state *digest, const u8 *data,
++ int num_blks);
+
+ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+@@ -39,23 +39,20 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
+ /* make sure casting to sha256_block_fn() is safe */
+ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
+
+- return sha256_base_do_update(desc, data, len,
+- (sha256_block_fn *)sha256_block_data_order);
++ return sha256_base_do_update(desc, data, len, sha256_block_data_order);
+ }
+ EXPORT_SYMBOL(crypto_sha256_arm_update);
+
+ static int sha256_final(struct shash_desc *desc, u8 *out)
+ {
+- sha256_base_do_finalize(desc,
+- (sha256_block_fn *)sha256_block_data_order);
++ sha256_base_do_finalize(desc, sha256_block_data_order);
+ return sha256_base_finish(desc, out);
+ }
+
+ int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+ {
+- sha256_base_do_update(desc, data, len,
+- (sha256_block_fn *)sha256_block_data_order);
++ sha256_base_do_update(desc, data, len, sha256_block_data_order);
+ return sha256_final(desc, out);
+ }
+ EXPORT_SYMBOL(crypto_sha256_arm_finup);
+diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
+index 39ccd65..f9511cb 100644
+--- a/arch/arm/crypto/sha256_neon_glue.c
++++ b/arch/arm/crypto/sha256_neon_glue.c
+@@ -26,8 +26,8 @@
+
+ #include "sha256_glue.h"
+
+-asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
+- unsigned int num_blks);
++asmlinkage void sha256_block_data_order_neon(struct sha256_state *digest, const u8 *data,
++ int num_blks);
+
+ static int sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+@@ -39,8 +39,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
+ return crypto_sha256_arm_update(desc, data, len);
+
+ kernel_neon_begin();
+- sha256_base_do_update(desc, data, len,
+- (sha256_block_fn *)sha256_block_data_order_neon);
++ sha256_base_do_update(desc, data, len, sha256_block_data_order_neon);
+ kernel_neon_end();
+
+ return 0;
+@@ -54,10 +53,8 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data,
+
+ kernel_neon_begin();
+ if (len)
+- sha256_base_do_update(desc, data, len,
+- (sha256_block_fn *)sha256_block_data_order_neon);
+- sha256_base_do_finalize(desc,
+- (sha256_block_fn *)sha256_block_data_order_neon);
++ sha256_base_do_update(desc, data, len, sha256_block_data_order_neon);
++ sha256_base_do_finalize(desc, sha256_block_data_order_neon);
+ kernel_neon_end();
+
+ return sha256_base_finish(desc, out);
+diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
+index 269a394..c7a91f1 100644
+--- a/arch/arm/crypto/sha512-glue.c
++++ b/arch/arm/crypto/sha512-glue.c
+@@ -28,27 +28,24 @@ MODULE_ALIAS_CRYPTO("sha512");
+ MODULE_ALIAS_CRYPTO("sha384-arm");
+ MODULE_ALIAS_CRYPTO("sha512-arm");
+
+-asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks);
++asmlinkage void sha512_block_data_order(struct sha512_state *state, u8 const *src, int blocks);
+
+ int sha512_arm_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+ {
+- return sha512_base_do_update(desc, data, len,
+- (sha512_block_fn *)sha512_block_data_order);
++ return sha512_base_do_update(desc, data, len, sha512_block_data_order);
+ }
+
+ int sha512_arm_final(struct shash_desc *desc, u8 *out)
+ {
+- sha512_base_do_finalize(desc,
+- (sha512_block_fn *)sha512_block_data_order);
++ sha512_base_do_finalize(desc, sha512_block_data_order);
+ return sha512_base_finish(desc, out);
+ }
+
+ int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+ {
+- sha512_base_do_update(desc, data, len,
+- (sha512_block_fn *)sha512_block_data_order);
++ sha512_base_do_update(desc, data, len, sha512_block_data_order);
+ return sha512_arm_final(desc, out);
+ }
+
+diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c
+index 3269368..9fcbc00 100644
+--- a/arch/arm/crypto/sha512-neon-glue.c
++++ b/arch/arm/crypto/sha512-neon-glue.c
+@@ -22,7 +22,7 @@
+ MODULE_ALIAS_CRYPTO("sha384-neon");
+ MODULE_ALIAS_CRYPTO("sha512-neon");
+
+-asmlinkage void sha512_block_data_order_neon(u64 *state, u8 const *src,
++asmlinkage void sha512_block_data_order_neon(struct sha512_state *state, u8 const *src,
+ int blocks);
+
+ static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+@@ -35,8 +35,7 @@ static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+ return sha512_arm_update(desc, data, len);
+
+ kernel_neon_begin();
+- sha512_base_do_update(desc, data, len,
+- (sha512_block_fn *)sha512_block_data_order_neon);
++ sha512_base_do_update(desc, data, len, sha512_block_data_order_neon);
+ kernel_neon_end();
+
+ return 0;
+@@ -50,10 +49,8 @@ static int sha512_neon_finup(struct shash_desc *desc, const u8 *data,
+
+ kernel_neon_begin();
+ if (len)
+- sha512_base_do_update(desc, data, len,
+- (sha512_block_fn *)sha512_block_data_order_neon);
+- sha512_base_do_finalize(desc,
+- (sha512_block_fn *)sha512_block_data_order_neon);
++ sha512_base_do_update(desc, data, len, sha512_block_data_order_neon);
++ sha512_base_do_finalize(desc, sha512_block_data_order_neon);
+ kernel_neon_end();
+
+ return sha512_base_finish(desc, out);
+diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
+index 66d0e21..8fa3237 100644
+--- a/arch/arm/include/asm/atomic.h
++++ b/arch/arm/include/asm/atomic.h
+@@ -18,17 +18,41 @@
+ #include <asm/barrier.h>
+ #include <asm/cmpxchg.h>
+
++#ifdef CONFIG_GENERIC_ATOMIC64
++#include <asm-generic/atomic64.h>
++#endif
++
+ #define ATOMIC_INIT(i) { (i) }
+
+ #ifdef __KERNEL__
+
++#ifdef CONFIG_THUMB2_KERNEL
++#define REFCOUNT_TRAP_INSN "bkpt 0xf1"
++#else
++#define REFCOUNT_TRAP_INSN "bkpt 0xf103"
++#endif
++
++#define _ASM_EXTABLE(from, to) \
++" .pushsection __ex_table,\"a\"\n"\
++" .align 3\n" \
++" .long " #from ", " #to"\n" \
++" .popsection"
++
+ /*
+ * On ARM, ordinary assignment (str instruction) doesn't clear the local
+ * strex/ldrex monitor on some implementations. The reason we can use it for
+ * atomic_set() is the clrex or dummy strex done on every exception return.
+ */
+ #define atomic_read(v) READ_ONCE((v)->counter)
++static inline int atomic_read_unchecked(const atomic_unchecked_t *v)
++{
++ return READ_ONCE(v->counter);
++}
+ #define atomic_set(v,i) WRITE_ONCE(((v)->counter), (i))
++static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i)
++{
++ WRITE_ONCE(v->counter, i);
++}
+
+ #if __LINUX_ARM_ARCH__ >= 6
+
+@@ -38,45 +62,74 @@
+ * to ensure that the update happens.
+ */
+
+-#define ATOMIC_OP(op, c_op, asm_op) \
+-static inline void atomic_##op(int i, atomic_t *v) \
++#ifdef CONFIG_PAX_REFCOUNT
++#define __OVERFLOW_POST \
++ " bvc 3f\n" \
++ "2: " REFCOUNT_TRAP_INSN "\n"\
++ "3:\n"
++#define __OVERFLOW_POST_RETURN \
++ " bvc 3f\n" \
++ " mov %1, %0\n" \
++ "2: " REFCOUNT_TRAP_INSN "\n"\
++ "3:\n"
++#define __OVERFLOW_EXTABLE \
++ "4:\n" \
++ _ASM_EXTABLE(2b, 4b)
++#else
++#define __OVERFLOW_POST
++#define __OVERFLOW_POST_RETURN
++#define __OVERFLOW_EXTABLE
++#endif
++
++#define __ATOMIC_OP(op, suffix, c_op, asm_op) \
++static inline void atomic_##op##suffix(int i, atomic##suffix##_t *v) \
+ { \
+ unsigned long tmp; \
+ int result; \
+ \
+ prefetchw(&v->counter); \
+- __asm__ __volatile__("@ atomic_" #op "\n" \
++ __asm__ __volatile__("@ atomic_" #op #suffix "\n" \
+ "1: ldrex %0, [%3]\n" \
+ " " #asm_op " %0, %0, %4\n" \
++ __OVERFLOW_POST \
+ " strex %1, %0, [%3]\n" \
+ " teq %1, #0\n" \
+-" bne 1b" \
++" bne 1b\n" \
++ __OVERFLOW_EXTABLE \
+ : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) \
+ : "r" (&v->counter), "Ir" (i) \
+ : "cc"); \
+ } \
+
+-#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
+-static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \
++#define ATOMIC_OP(op, c_op, asm_op) __ATOMIC_OP(op, _unchecked, c_op, asm_op)\
++ __ATOMIC_OP(op, , c_op, asm_op##s)
++
++#define __ATOMIC_OP_RETURN(op, suffix, c_op, asm_op) \
++static inline int atomic_##op##_return##suffix##_relaxed(int i, atomic##suffix##_t *v)\
+ { \
+- unsigned long tmp; \
++ int tmp; \
+ int result; \
+ \
+ prefetchw(&v->counter); \
+ \
+- __asm__ __volatile__("@ atomic_" #op "_return\n" \
++ __asm__ __volatile__("@ atomic_" #op "_return" #suffix "\n" \
+ "1: ldrex %0, [%3]\n" \
+-" " #asm_op " %0, %0, %4\n" \
+-" strex %1, %0, [%3]\n" \
+-" teq %1, #0\n" \
+-" bne 1b" \
+- : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) \
++" " #asm_op " %1, %0, %4\n" \
++ __OVERFLOW_POST_RETURN \
++" strex %0, %1, [%3]\n" \
++" teq %0, #0\n" \
++" bne 1b\n" \
++ __OVERFLOW_EXTABLE \
++ : "=&r" (tmp), "=&r" (result), "+Qo" (v->counter) \
+ : "r" (&v->counter), "Ir" (i) \
+ : "cc"); \
+ \
+ return result; \
+ }
+
++#define ATOMIC_OP_RETURN(op, c_op, asm_op) __ATOMIC_OP_RETURN(op, _unchecked, c_op, asm_op)\
++ __ATOMIC_OP_RETURN(op, , c_op, asm_op##s)
++
+ #define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+ { \
+@@ -99,6 +152,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+ }
+
+ #define atomic_add_return_relaxed atomic_add_return_relaxed
++#define atomic_add_return_unchecked_relaxed atomic_add_return_unchecked_relaxed
+ #define atomic_sub_return_relaxed atomic_sub_return_relaxed
+ #define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+ #define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+@@ -141,12 +195,17 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+ __asm__ __volatile__ ("@ atomic_add_unless\n"
+ "1: ldrex %0, [%4]\n"
+ " teq %0, %5\n"
+-" beq 2f\n"
+-" add %1, %0, %6\n"
++" beq 4f\n"
++" adds %1, %0, %6\n"
++
++ __OVERFLOW_POST
++
+ " strex %2, %1, [%4]\n"
+ " teq %2, #0\n"
+ " bne 1b\n"
+-"2:"
++
++ __OVERFLOW_EXTABLE
++
+ : "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter)
+ : "r" (&v->counter), "r" (u), "r" (a)
+ : "cc");
+@@ -157,14 +216,36 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+ return oldval;
+ }
+
++static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *ptr, int old, int new)
++{
++ unsigned long oldval, res;
++
++ smp_mb();
++
++ do {
++ __asm__ __volatile__("@ atomic_cmpxchg_unchecked\n"
++ "ldrex %1, [%3]\n"
++ "mov %0, #0\n"
++ "teq %1, %4\n"
++ "strexeq %0, %5, [%3]\n"
++ : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter)
++ : "r" (&ptr->counter), "Ir" (old), "r" (new)
++ : "cc");
++ } while (res);
++
++ smp_mb();
++
++ return oldval;
++}
++
+ #else /* ARM_ARCH_6 */
+
+ #ifdef CONFIG_SMP
+ #error SMP not supported on pre-ARMv6 CPUs
+ #endif
+
+-#define ATOMIC_OP(op, c_op, asm_op) \
+-static inline void atomic_##op(int i, atomic_t *v) \
++#define __ATOMIC_OP(op, suffix, c_op, asm_op) \
++static inline void atomic_##op##suffix(int i, atomic##suffix##_t *v) \
+ { \
+ unsigned long flags; \
+ \
+@@ -173,8 +254,11 @@ static inline void atomic_##op(int i, atomic_t *v) \
+ raw_local_irq_restore(flags); \
+ } \
+
+-#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
+-static inline int atomic_##op##_return(int i, atomic_t *v) \
++#define ATOMIC_OP(op, c_op, asm_op) __ATOMIC_OP(op, , c_op, asm_op) \
++ __ATOMIC_OP(op, _unchecked, c_op, asm_op)
++
++#define __ATOMIC_OP_RETURN(op, suffix, c_op, asm_op) \
++static inline int atomic_##op##_return##suffix(int i, atomic##suffix##_t *v)\
+ { \
+ unsigned long flags; \
+ int val; \
+@@ -201,6 +285,9 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \
+ return val; \
+ }
+
++#define ATOMIC_OP_RETURN(op, c_op, asm_op) __ATOMIC_OP_RETURN(op, , c_op, asm_op)\
++ __ATOMIC_OP_RETURN(op, _unchecked, c_op, asm_op)
++
+ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+ {
+ int ret;
+@@ -215,6 +302,11 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+ return ret;
+ }
+
++static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new)
++{
++ return atomic_cmpxchg((atomic_t *)v, old, new);
++}
++
+ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+ {
+ int c, old;
+@@ -250,16 +342,29 @@ ATOMIC_OPS(xor, ^=, eor)
+ #undef ATOMIC_OPS
+ #undef ATOMIC_FETCH_OP
+ #undef ATOMIC_OP_RETURN
++#undef __ATOMIC_OP_RETURN
+ #undef ATOMIC_OP
++#undef __ATOMIC_OP
+
+ #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
++#define atomic_xchg_unchecked(v, new) (xchg_unchecked(&((v)->counter), new))
+
+ #define atomic_inc(v) atomic_add(1, v)
++static inline void atomic_inc_unchecked(atomic_unchecked_t *v)
++{
++ atomic_add_unchecked(1, v);
++}
+ #define atomic_dec(v) atomic_sub(1, v)
++static inline void atomic_dec_unchecked(atomic_unchecked_t *v)
++{
++ atomic_sub_unchecked(1, v);
++}
+
+ #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
++#define atomic_inc_and_test_unchecked(v) (atomic_add_return_unchecked(1, v) == 0)
+ #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
+ #define atomic_inc_return_relaxed(v) (atomic_add_return_relaxed(1, v))
++#define atomic_inc_return_unchecked_relaxed(v) (atomic_add_return_unchecked_relaxed(1, v))
+ #define atomic_dec_return_relaxed(v) (atomic_sub_return_relaxed(1, v))
+ #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
+
+@@ -270,6 +375,14 @@ typedef struct {
+ long long counter;
+ } atomic64_t;
+
++#ifdef CONFIG_PAX_REFCOUNT
++typedef struct {
++ long long counter;
++} atomic64_unchecked_t;
++#else
++typedef atomic64_t atomic64_unchecked_t;
++#endif
++
+ #define ATOMIC64_INIT(i) { (i) }
+
+ #ifdef CONFIG_ARM_LPAE
+@@ -286,6 +399,19 @@ static inline long long atomic64_read(const atomic64_t *v)
+ return result;
+ }
+
++static inline long long atomic64_read_unchecked(const atomic64_unchecked_t *v)
++{
++ long long result;
++
++ __asm__ __volatile__("@ atomic64_read_unchecked\n"
++" ldrd %0, %H0, [%1]"
++ : "=&r" (result)
++ : "r" (&v->counter), "Qo" (v->counter)
++ );
++
++ return result;
++}
++
+ static inline void atomic64_set(atomic64_t *v, long long i)
+ {
+ __asm__ __volatile__("@ atomic64_set\n"
+@@ -294,6 +420,15 @@ static inline void atomic64_set(atomic64_t *v, long long i)
+ : "r" (&v->counter), "r" (i)
+ );
+ }
++
++static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long long i)
++{
++ __asm__ __volatile__("@ atomic64_set_unchecked\n"
++" strd %2, %H2, [%1]"
++ : "=Qo" (v->counter)
++ : "r" (&v->counter), "r" (i)
++ );
++}
+ #else
+ static inline long long atomic64_read(const atomic64_t *v)
+ {
+@@ -308,6 +443,19 @@ static inline long long atomic64_read(const atomic64_t *v)
+ return result;
+ }
+
++static inline long long atomic64_read_unchecked(const atomic64_unchecked_t *v)
++{
++ long long result;
++
++ __asm__ __volatile__("@ atomic64_read_unchecked\n"
++" ldrexd %0, %H0, [%1]"
++ : "=&r" (result)
++ : "r" (&v->counter), "Qo" (v->counter)
++ );
++
++ return result;
++}
++
+ static inline void atomic64_set(atomic64_t *v, long long i)
+ {
+ long long tmp;
+@@ -322,50 +470,82 @@ static inline void atomic64_set(atomic64_t *v, long long i)
+ : "r" (&v->counter), "r" (i)
+ : "cc");
+ }
++
++static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long long i)
++{
++ long long tmp;
++
++ prefetchw(&v->counter);
++ __asm__ __volatile__("@ atomic64_set_unchecked\n"
++"1: ldrexd %0, %H0, [%2]\n"
++" strexd %0, %3, %H3, [%2]\n"
++" teq %0, #0\n"
++" bne 1b"
++ : "=&r" (tmp), "=Qo" (v->counter)
++ : "r" (&v->counter), "r" (i)
++ : "cc");
++}
+ #endif
+
+-#define ATOMIC64_OP(op, op1, op2) \
+-static inline void atomic64_##op(long long i, atomic64_t *v) \
++#define __OVERFLOW_POST_RETURN64 \
++ " bvc 3f\n" \
++" mov %Q1, %Q0\n" \
++" mov %R1, %R0\n" \
++ "2: " REFCOUNT_TRAP_INSN "\n"\
++ "3:\n"
++
++#define __ATOMIC64_OP(op, suffix, op1, op2) \
++static inline void atomic64_##op##suffix(long long i, atomic64##suffix##_t *v)\
+ { \
+ long long result; \
+ unsigned long tmp; \
+ \
+ prefetchw(&v->counter); \
+- __asm__ __volatile__("@ atomic64_" #op "\n" \
++ __asm__ __volatile__("@ atomic64_" #op #suffix "\n" \
+ "1: ldrexd %0, %H0, [%3]\n" \
+ " " #op1 " %Q0, %Q0, %Q4\n" \
+ " " #op2 " %R0, %R0, %R4\n" \
++ __OVERFLOW_POST \
+ " strexd %1, %0, %H0, [%3]\n" \
+ " teq %1, #0\n" \
+-" bne 1b" \
++" bne 1b\n" \
++ __OVERFLOW_EXTABLE \
+ : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) \
+ : "r" (&v->counter), "r" (i) \
+ : "cc"); \
+ } \
+
+-#define ATOMIC64_OP_RETURN(op, op1, op2) \
++#define ATOMIC64_OP(op, op1, op2) __ATOMIC64_OP(op, _unchecked, op1, op2) \
++ __ATOMIC64_OP(op, , op1, op2##s)
++
++#define __ATOMIC64_OP_RETURN(op, suffix, op1, op2) \
+ static inline long long \
+-atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \
++atomic64_##op##_return##suffix##_relaxed(long long i, atomic64##suffix##_t *v) \
+ { \
+ long long result; \
+- unsigned long tmp; \
++ long long tmp; \
+ \
+ prefetchw(&v->counter); \
+ \
+- __asm__ __volatile__("@ atomic64_" #op "_return\n" \
++ __asm__ __volatile__("@ atomic64_" #op "_return" #suffix "\n" \
+ "1: ldrexd %0, %H0, [%3]\n" \
+-" " #op1 " %Q0, %Q0, %Q4\n" \
+-" " #op2 " %R0, %R0, %R4\n" \
+-" strexd %1, %0, %H0, [%3]\n" \
+-" teq %1, #0\n" \
+-" bne 1b" \
+- : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) \
++" " #op1 " %Q1, %Q0, %Q4\n" \
++" " #op2 " %R1, %R0, %R4\n" \
++ __OVERFLOW_POST_RETURN64 \
++" strexd %0, %1, %H1, [%3]\n" \
++" teq %0, #0\n" \
++" bne 1b\n" \
++ __OVERFLOW_EXTABLE \
++ : "=&r" (tmp), "=&r" (result), "+Qo" (v->counter) \
+ : "r" (&v->counter), "r" (i) \
+ : "cc"); \
+ \
+ return result; \
+ }
+
++#define ATOMIC64_OP_RETURN(op, op1, op2) __ATOMIC64_OP_RETURN(op, _unchecked, op1, op2) \
++ __ATOMIC64_OP_RETURN(op, , op1, op2##s)
++
+ #define ATOMIC64_FETCH_OP(op, op1, op2) \
+ static inline long long \
+ atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \
+@@ -398,6 +578,7 @@ ATOMIC64_OPS(add, adds, adc)
+ ATOMIC64_OPS(sub, subs, sbc)
+
+ #define atomic64_add_return_relaxed atomic64_add_return_relaxed
++#define atomic64_add_return_unchecked_relaxed atomic64_add_return_unchecked_relaxed
+ #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+ #define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+ #define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+@@ -422,7 +603,10 @@ ATOMIC64_OPS(xor, eor, eor)
+ #undef ATOMIC64_OPS
+ #undef ATOMIC64_FETCH_OP
+ #undef ATOMIC64_OP_RETURN
++#undef __ATOMIC64_OP_RETURN
+ #undef ATOMIC64_OP
++#undef __ATOMIC64_OP
++#undef __OVERFLOW_POST_RETURN
+
+ static inline long long
+ atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
+@@ -448,6 +632,13 @@ atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
+ }
+ #define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed
+
++static inline long long
++atomic64_cmpxchg_unchecked_relaxed(atomic64_unchecked_t *ptr, long long old, long long new)
++{
++ return atomic64_cmpxchg_relaxed((atomic64_t *)ptr, old, new);
++}
++#define atomic64_cmpxchg_unchecked_relaxed atomic64_cmpxchg_unchecked_relaxed
++
+ static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
+ {
+ long long result;
+@@ -468,25 +659,36 @@ static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
+ }
+ #define atomic64_xchg_relaxed atomic64_xchg_relaxed
+
++static inline long long atomic64_xchg_unchecked_relaxed(atomic64_unchecked_t *ptr, long long new)
++{
++ return atomic64_xchg_relaxed((atomic64_t *)ptr, new);
++}
++#define atomic64_xchg_unchecked_relaxed atomic64_xchg_unchecked_relaxed
++
+ static inline long long atomic64_dec_if_positive(atomic64_t *v)
+ {
+ long long result;
+- unsigned long tmp;
++ u64 tmp;
+
+ smp_mb();
+ prefetchw(&v->counter);
+
+ __asm__ __volatile__("@ atomic64_dec_if_positive\n"
+ "1: ldrexd %0, %H0, [%3]\n"
+-" subs %Q0, %Q0, #1\n"
+-" sbc %R0, %R0, #0\n"
+-" teq %R0, #0\n"
+-" bmi 2f\n"
+-" strexd %1, %0, %H0, [%3]\n"
+-" teq %1, #0\n"
++" subs %Q1, %Q0, #1\n"
++" sbcs %R1, %R0, #0\n"
++
++ __OVERFLOW_POST_RETURN64
++
++" teq %R1, #0\n"
++" bmi 4f\n"
++" strexd %0, %1, %H1, [%3]\n"
++" teq %0, #0\n"
+ " bne 1b\n"
+-"2:"
+- : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
++
++ __OVERFLOW_EXTABLE
++
++ : "=&r" (tmp), "=&r" (result), "+Qo" (v->counter)
+ : "r" (&v->counter)
+ : "cc");
+
+@@ -509,13 +711,18 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+ " teq %0, %5\n"
+ " teqeq %H0, %H5\n"
+ " moveq %1, #0\n"
+-" beq 2f\n"
++" beq 4f\n"
+ " adds %Q0, %Q0, %Q6\n"
+-" adc %R0, %R0, %R6\n"
++" adcs %R0, %R0, %R6\n"
++
++ __OVERFLOW_POST
++
+ " strexd %2, %0, %H0, [%4]\n"
+ " teq %2, #0\n"
+ " bne 1b\n"
+-"2:"
++
++ __OVERFLOW_EXTABLE
++
+ : "=&r" (val), "+r" (ret), "=&r" (tmp), "+Qo" (v->counter)
+ : "r" (&v->counter), "r" (u), "r" (a)
+ : "cc");
+@@ -526,12 +733,19 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+ return ret;
+ }
+
++#undef __OVERFLOW_EXTABLE
++#undef __OVERFLOW_POST_RETURN64
++#undef __OVERFLOW_POST
++
+ #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
+ #define atomic64_inc(v) atomic64_add(1LL, (v))
++#define atomic64_inc_unchecked(v) atomic64_add_unchecked(1LL, (v))
+ #define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1LL, (v))
++#define atomic64_inc_return_unchecked_relaxed(v) atomic64_add_return_unchecked_relaxed(1LL, (v))
+ #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+ #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
+ #define atomic64_dec(v) atomic64_sub(1LL, (v))
++#define atomic64_dec_unchecked(v) atomic64_sub_unchecked(1LL, (v))
+ #define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1LL, (v))
+ #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
+ #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
+diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
+index 75fe66b..2255c86 100644
+--- a/arch/arm/include/asm/cache.h
++++ b/arch/arm/include/asm/cache.h
+@@ -4,8 +4,10 @@
+ #ifndef __ASMARM_CACHE_H
+ #define __ASMARM_CACHE_H
+
++#include <linux/const.h>
++
+ #define L1_CACHE_SHIFT CONFIG_ARM_L1_CACHE_SHIFT
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ /*
+ * Memory returned by kmalloc() may be used for DMA, so we must make
+diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
+index 9156fc3..0521e3e 100644
+--- a/arch/arm/include/asm/cacheflush.h
++++ b/arch/arm/include/asm/cacheflush.h
+@@ -116,7 +116,7 @@ struct cpu_cache_fns {
+ void (*dma_unmap_area)(const void *, size_t, int);
+
+ void (*dma_flush_range)(const void *, const void *);
+-};
++} __no_const __no_randomize_layout;
+
+ /*
+ * Select the calling method
+diff --git a/arch/arm/include/asm/checksum.h b/arch/arm/include/asm/checksum.h
+index 524692f..a8871ec 100644
+--- a/arch/arm/include/asm/checksum.h
++++ b/arch/arm/include/asm/checksum.h
+@@ -37,7 +37,19 @@ __wsum
+ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
+
+ __wsum
+-csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);
++__csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);
++
++static inline __wsum
++csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr)
++{
++ __wsum ret;
++ pax_open_userland();
++ ret = __csum_partial_copy_from_user(src, dst, len, sum, err_ptr);
++ pax_close_userland();
++ return ret;
++}
++
++
+
+ /*
+ * Fold a partial checksum without adding pseudo headers
+diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
+index 97882f9..ff9d6ac 100644
+--- a/arch/arm/include/asm/cmpxchg.h
++++ b/arch/arm/include/asm/cmpxchg.h
+@@ -117,6 +117,10 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
+ (__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), \
+ sizeof(*(ptr))); \
+ })
++#define xchg_unchecked_relaxed(ptr, x) ({ \
++ (__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), \
++ sizeof(*(ptr))); \
++})
+
+ #include <asm-generic/cmpxchg-local.h>
+
+@@ -128,6 +132,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
+ #endif
+
+ #define xchg xchg_relaxed
++#define xchg_unchecked xchg_unchecked_relaxed
+
+ /*
+ * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
+diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
+index baefe1d..29cb35a 100644
+--- a/arch/arm/include/asm/cpuidle.h
++++ b/arch/arm/include/asm/cpuidle.h
+@@ -32,7 +32,7 @@ struct device_node;
+ struct cpuidle_ops {
+ int (*suspend)(unsigned long arg);
+ int (*init)(struct device_node *, int cpu);
+-};
++} __no_const;
+
+ struct of_cpuidle_method {
+ const char *method;
+diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
+index 99d9f63..ec44cb5 100644
+--- a/arch/arm/include/asm/domain.h
++++ b/arch/arm/include/asm/domain.h
+@@ -42,7 +42,6 @@
+ #define DOMAIN_USER 1
+ #define DOMAIN_IO 0
+ #endif
+-#define DOMAIN_VECTORS 3
+
+ /*
+ * Domain types
+@@ -51,9 +50,28 @@
+ #define DOMAIN_CLIENT 1
+ #ifdef CONFIG_CPU_USE_DOMAINS
+ #define DOMAIN_MANAGER 3
++#define DOMAIN_VECTORS 3
++#define DOMAIN_USERCLIENT DOMAIN_CLIENT
+ #else
++
++#ifdef CONFIG_PAX_KERNEXEC
+ #define DOMAIN_MANAGER 1
++#define DOMAIN_KERNEXEC 3
++#else
++#define DOMAIN_MANAGER 1
++#endif
++
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++#define DOMAIN_USERCLIENT 0
++#define DOMAIN_UDEREF 1
++#define DOMAIN_VECTORS DOMAIN_KERNEL
++#else
++#define DOMAIN_USERCLIENT 1
++#define DOMAIN_VECTORS DOMAIN_USER
++#endif
++
+ #endif
++#define DOMAIN_KERNELCLIENT 1
+
+ #define domain_mask(dom) ((3) << (2 * (dom)))
+ #define domain_val(dom,type) ((type) << (2 * (dom)))
+@@ -62,13 +80,19 @@
+ #define DACR_INIT \
+ (domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \
+ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
+- domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
++ domain_val(DOMAIN_IO, DOMAIN_KERNELCLIENT) | \
+ domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
++#elif defined(CONFIG_PAX_MEMORY_UDEREF)
++ /* DOMAIN_VECTORS is defined to DOMAIN_KERNEL */
++#define DACR_INIT \
++ (domain_val(DOMAIN_USER, DOMAIN_USERCLIENT) | \
++ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
++ domain_val(DOMAIN_IO, DOMAIN_KERNELCLIENT))
+ #else
+ #define DACR_INIT \
+- (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \
++ (domain_val(DOMAIN_USER, DOMAIN_USERCLIENT) | \
+ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
+- domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
++ domain_val(DOMAIN_IO, DOMAIN_KERNELCLIENT) | \
+ domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
+ #endif
+
+@@ -124,6 +148,17 @@ static inline void set_domain(unsigned val)
+ set_domain(domain); \
+ } while (0)
+
++#elif defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
++#define modify_domain(dom,type) \
++ do { \
++ struct thread_info *thread = current_thread_info(); \
++ unsigned int domain = get_domain(); \
++ domain &= ~domain_mask(dom); \
++ domain = domain | domain_val(dom, type); \
++ thread->cpu_domain = domain; \
++ set_domain(domain); \
++ } while (0)
++
+ #else
+ static inline void modify_domain(unsigned dom, unsigned type) { }
+ #endif
+diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
+index d2315ff..f60b47b 100644
+--- a/arch/arm/include/asm/elf.h
++++ b/arch/arm/include/asm/elf.h
+@@ -117,7 +117,14 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
+ the loader. We need to make sure that it is out of the way of the program
+ that it will "exec", and that there is sufficient room for the brk. */
+
+-#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
++#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
++
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE 0x00008000UL
++
++#define PAX_DELTA_MMAP_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10)
++#define PAX_DELTA_STACK_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10)
++#endif
+
+ /* When the program starts, a1 contains a pointer to a function to be
+ registered with atexit, as per the SVR4 ABI. A value of 0 means we
+diff --git a/arch/arm/include/asm/fncpy.h b/arch/arm/include/asm/fncpy.h
+index de53547..52b9a28 100644
+--- a/arch/arm/include/asm/fncpy.h
++++ b/arch/arm/include/asm/fncpy.h
+@@ -81,7 +81,9 @@
+ BUG_ON((uintptr_t)(dest_buf) & (FNCPY_ALIGN - 1) || \
+ (__funcp_address & ~(uintptr_t)1 & (FNCPY_ALIGN - 1))); \
+ \
++ pax_open_kernel(); \
+ memcpy(dest_buf, (void const *)(__funcp_address & ~1), size); \
++ pax_close_kernel(); \
+ flush_icache_range((unsigned long)(dest_buf), \
+ (unsigned long)(dest_buf) + (size)); \
+ \
+diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
+index 6795368..6c4d749 100644
+--- a/arch/arm/include/asm/futex.h
++++ b/arch/arm/include/asm/futex.h
+@@ -107,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ return -EFAULT;
+
+ preempt_disable();
++
+ __ua_flags = uaccess_save_and_enable();
+ __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
+ "1: " TUSER(ldr) " %1, [%4]\n"
+diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h
+index 83eb2f7..ed77159 100644
+--- a/arch/arm/include/asm/kmap_types.h
++++ b/arch/arm/include/asm/kmap_types.h
+@@ -4,6 +4,6 @@
+ /*
+ * This is the "bare minimum". AIO seems to require this.
+ */
+-#define KM_TYPE_NR 16
++#define KM_TYPE_NR 17
+
+ #endif
+diff --git a/arch/arm/include/asm/mach/dma.h b/arch/arm/include/asm/mach/dma.h
+index 9e614a1..3302cca 100644
+--- a/arch/arm/include/asm/mach/dma.h
++++ b/arch/arm/include/asm/mach/dma.h
+@@ -22,7 +22,7 @@ struct dma_ops {
+ int (*residue)(unsigned int, dma_t *); /* optional */
+ int (*setspeed)(unsigned int, dma_t *, int); /* optional */
+ const char *type;
+-};
++} __do_const;
+
+ struct dma_struct {
+ void *addr; /* single DMA address */
+diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
+index 9b7c328..2dfe68b 100644
+--- a/arch/arm/include/asm/mach/map.h
++++ b/arch/arm/include/asm/mach/map.h
+@@ -23,17 +23,19 @@ struct map_desc {
+
+ /* types 0-3 are defined in asm/io.h */
+ enum {
+- MT_UNCACHED = 4,
+- MT_CACHECLEAN,
+- MT_MINICLEAN,
++ MT_UNCACHED_RW = 4,
++ MT_CACHECLEAN_RO,
++ MT_MINICLEAN_RO,
+ MT_LOW_VECTORS,
+ MT_HIGH_VECTORS,
+- MT_MEMORY_RWX,
++ __MT_MEMORY_RWX,
+ MT_MEMORY_RW,
+- MT_ROM,
+- MT_MEMORY_RWX_NONCACHED,
++ MT_MEMORY_RX,
++ MT_ROM_RX,
++ MT_MEMORY_RW_NONCACHED,
++ MT_MEMORY_RX_NONCACHED,
+ MT_MEMORY_RW_DTCM,
+- MT_MEMORY_RWX_ITCM,
++ MT_MEMORY_RX_ITCM,
+ MT_MEMORY_RW_SO,
+ MT_MEMORY_DMA_READY,
+ };
+diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
+index c2bf24f..69e437c 100644
+--- a/arch/arm/include/asm/outercache.h
++++ b/arch/arm/include/asm/outercache.h
+@@ -39,7 +39,7 @@ struct outer_cache_fns {
+ /* This is an ARM L2C thing */
+ void (*write_sec)(unsigned long, unsigned);
+ void (*configure)(const struct l2x0_regs *);
+-};
++} __no_const;
+
+ extern struct outer_cache_fns outer_cache;
+
+diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
+index 4355f0e..cd9168e 100644
+--- a/arch/arm/include/asm/page.h
++++ b/arch/arm/include/asm/page.h
+@@ -23,6 +23,7 @@
+
+ #else
+
++#include <linux/compiler.h>
+ #include <asm/glue.h>
+
+ /*
+@@ -114,7 +115,7 @@ struct cpu_user_fns {
+ void (*cpu_clear_user_highpage)(struct page *page, unsigned long vaddr);
+ void (*cpu_copy_user_highpage)(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+-};
++} __no_const;
+
+ #ifdef MULTI_USER
+ extern struct cpu_user_fns cpu_user;
+diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
+index b2902a5..da11e4d 100644
+--- a/arch/arm/include/asm/pgalloc.h
++++ b/arch/arm/include/asm/pgalloc.h
+@@ -17,6 +17,7 @@
+ #include <asm/processor.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
++#include <asm/system_info.h>
+
+ #define check_pgt_cache() do { } while (0)
+
+@@ -43,6 +44,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
+ }
+
++static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
++{
++ pud_populate(mm, pud, pmd);
++}
++
+ #else /* !CONFIG_ARM_LPAE */
+
+ /*
+@@ -51,6 +57,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ #define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); })
+ #define pmd_free(mm, pmd) do { } while (0)
+ #define pud_populate(mm,pmd,pte) BUG()
++#define pud_populate_kernel(mm,pmd,pte) BUG()
+
+ #endif /* CONFIG_ARM_LPAE */
+
+@@ -128,6 +135,19 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+ __free_page(pte);
+ }
+
++static inline void __section_update(pmd_t *pmdp, unsigned long addr, pmdval_t prot)
++{
++#ifdef CONFIG_ARM_LPAE
++ pmdp[0] = __pmd(pmd_val(pmdp[0]) | prot);
++#else
++ if (addr & SECTION_SIZE)
++ pmdp[1] = __pmd(pmd_val(pmdp[1]) | prot);
++ else
++ pmdp[0] = __pmd(pmd_val(pmdp[0]) | prot);
++#endif
++ flush_pmd_entry(pmdp);
++}
++
+ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+ pmdval_t prot)
+ {
+diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h
+index 3f82e9d..2a85e8b 100644
+--- a/arch/arm/include/asm/pgtable-2level-hwdef.h
++++ b/arch/arm/include/asm/pgtable-2level-hwdef.h
+@@ -28,7 +28,7 @@
+ /*
+ * - section
+ */
+-#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 0) /* v7 */
++#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 0) /* v7 */
+ #define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2)
+ #define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3)
+ #define PMD_SECT_XN (_AT(pmdval_t, 1) << 4) /* v6 */
+@@ -40,6 +40,7 @@
+ #define PMD_SECT_nG (_AT(pmdval_t, 1) << 17) /* v6 */
+ #define PMD_SECT_SUPER (_AT(pmdval_t, 1) << 18) /* v6 */
+ #define PMD_SECT_AF (_AT(pmdval_t, 0))
++#define PMD_SECT_RDONLY (_AT(pmdval_t, 0))
+
+ #define PMD_SECT_UNCACHED (_AT(pmdval_t, 0))
+ #define PMD_SECT_BUFFERED (PMD_SECT_BUFFERABLE)
+@@ -70,6 +71,7 @@
+ * - extended small page/tiny page
+ */
+ #define PTE_EXT_XN (_AT(pteval_t, 1) << 0) /* v6 */
++#define PTE_EXT_PXN (_AT(pteval_t, 1) << 2) /* v7 */
+ #define PTE_EXT_AP_MASK (_AT(pteval_t, 3) << 4)
+ #define PTE_EXT_AP0 (_AT(pteval_t, 1) << 4)
+ #define PTE_EXT_AP1 (_AT(pteval_t, 2) << 4)
+diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
+index 92fd2c8..061dae1 100644
+--- a/arch/arm/include/asm/pgtable-2level.h
++++ b/arch/arm/include/asm/pgtable-2level.h
+@@ -127,6 +127,9 @@
+ #define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */
+ #define L_PTE_NONE (_AT(pteval_t, 1) << 11)
+
++/* Two-level page tables only have PXN in the PGD, not in the PTE. */
++#define L_PTE_PXN (_AT(pteval_t, 0))
++
+ /*
+ * These are the memory types, defined to be compatible with
+ * pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B
+diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
+index 2a029bc..a0524c7 100644
+--- a/arch/arm/include/asm/pgtable-3level.h
++++ b/arch/arm/include/asm/pgtable-3level.h
+@@ -80,6 +80,7 @@
+ #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
+ #define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
+ #define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */
++#define L_PTE_PXN (_AT(pteval_t, 1) << 53) /* PXN */
+ #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */
+ #define L_PTE_DIRTY (_AT(pteval_t, 1) << 55)
+ #define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56)
+@@ -90,10 +91,12 @@
+ #define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
+ #define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
+ #define L_PMD_SECT_RDONLY (_AT(pteval_t, 1) << 58)
++#define PMD_SECT_RDONLY PMD_SECT_AP2
+
+ /*
+ * To be used in assembly code with the upper page attributes.
+ */
++#define L_PTE_PXN_HIGH (1 << (53 - 32))
+ #define L_PTE_XN_HIGH (1 << (54 - 32))
+ #define L_PTE_DIRTY_HIGH (1 << (55 - 32))
+
+diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
+index a8d656d..2febb8a 100644
+--- a/arch/arm/include/asm/pgtable.h
++++ b/arch/arm/include/asm/pgtable.h
+@@ -33,6 +33,9 @@
+ #include <asm/pgtable-2level.h>
+ #endif
+
++#define ktla_ktva(addr) (addr)
++#define ktva_ktla(addr) (addr)
++
+ /*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+@@ -48,6 +51,9 @@
+ #define LIBRARY_TEXT_START 0x0c000000
+
+ #ifndef __ASSEMBLY__
++extern pteval_t __supported_pte_mask;
++extern pmdval_t __supported_pmd_mask;
++
+ extern void __pte_error(const char *file, int line, pte_t);
+ extern void __pmd_error(const char *file, int line, pmd_t);
+ extern void __pgd_error(const char *file, int line, pgd_t);
+@@ -56,6 +62,48 @@ extern void __pgd_error(const char *file, int line, pgd_t);
+ #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd)
+ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd)
+
++#define __HAVE_ARCH_PAX_OPEN_KERNEL
++#define __HAVE_ARCH_PAX_CLOSE_KERNEL
++
++#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
++#include <asm/domain.h>
++#include <linux/thread_info.h>
++#include <linux/preempt.h>
++
++static inline int test_domain(int domain, int domaintype)
++{
++ return ((current_thread_info()->cpu_domain) & domain_val(domain, 3)) == domain_val(domain, domaintype);
++}
++#endif
++
++#ifdef CONFIG_PAX_KERNEXEC
++static inline unsigned long pax_open_kernel(void) {
++#ifdef CONFIG_ARM_LPAE
++ /* TODO */
++#else
++ preempt_disable();
++ BUG_ON(test_domain(DOMAIN_KERNEL, DOMAIN_KERNEXEC));
++ modify_domain(DOMAIN_KERNEL, DOMAIN_KERNEXEC);
++#endif
++ return 0;
++}
++
++static inline unsigned long pax_close_kernel(void) {
++#ifdef CONFIG_ARM_LPAE
++ /* TODO */
++#else
++ BUG_ON(test_domain(DOMAIN_KERNEL, DOMAIN_MANAGER));
++ /* DOMAIN_MANAGER = "client" under KERNEXEC */
++ modify_domain(DOMAIN_KERNEL, DOMAIN_MANAGER);
++ preempt_enable_no_resched();
++#endif
++ return 0;
++}
++#else
++static inline unsigned long pax_open_kernel(void) { return 0; }
++static inline unsigned long pax_close_kernel(void) { return 0; }
++#endif
++
+ /*
+ * This is the lowest virtual address we can permit any user space
+ * mapping to be mapped at. This is particularly important for
+@@ -75,8 +123,8 @@ extern void __pgd_error(const char *file, int line, pgd_t);
+ /*
+ * The pgprot_* and protection_map entries will be fixed up in runtime
+ * to include the cachable and bufferable bits based on memory policy,
+- * as well as any architecture dependent bits like global/ASID and SMP
+- * shared mapping bits.
++ * as well as any architecture dependent bits like global/ASID, PXN,
++ * and SMP shared mapping bits.
+ */
+ #define _L_PTE_DEFAULT L_PTE_PRESENT | L_PTE_YOUNG
+
+@@ -308,7 +356,7 @@ static inline pte_t pte_mknexec(pte_t pte)
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+ const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER |
+- L_PTE_NONE | L_PTE_VALID;
++ L_PTE_NONE | L_PTE_VALID | __supported_pte_mask;
+ pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
+ return pte;
+ }
+diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
+index 3d6dc8b..1262ad3 100644
+--- a/arch/arm/include/asm/smp.h
++++ b/arch/arm/include/asm/smp.h
+@@ -108,7 +108,7 @@ struct smp_operations {
+ int (*cpu_disable)(unsigned int cpu);
+ #endif
+ #endif
+-};
++} __no_const;
+
+ struct of_cpu_method {
+ const char *method;
+diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
+index cf4f3aa..8f2f2d9 100644
+--- a/arch/arm/include/asm/string.h
++++ b/arch/arm/include/asm/string.h
+@@ -7,19 +7,19 @@
+ */
+
+ #define __HAVE_ARCH_STRRCHR
+-extern char * strrchr(const char * s, int c);
++extern char * strrchr(const char * s, int c) __nocapture(-1);
+
+ #define __HAVE_ARCH_STRCHR
+-extern char * strchr(const char * s, int c);
++extern char * strchr(const char * s, int c) __nocapture(-1);
+
+ #define __HAVE_ARCH_MEMCPY
+-extern void * memcpy(void *, const void *, __kernel_size_t);
++extern void * memcpy(void *, const void *, __kernel_size_t) __nocapture(2);
+
+ #define __HAVE_ARCH_MEMMOVE
+-extern void * memmove(void *, const void *, __kernel_size_t);
++extern void * memmove(void *, const void *, __kernel_size_t) __nocapture(2);
+
+ #define __HAVE_ARCH_MEMCHR
+-extern void * memchr(const void *, int, __kernel_size_t);
++extern void * memchr(const void *, int, __kernel_size_t) __nocapture(-1);
+
+ #define __HAVE_ARCH_MEMSET
+ extern void * memset(void *, int, __kernel_size_t);
+diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
+index 776757d..a552c1d 100644
+--- a/arch/arm/include/asm/thread_info.h
++++ b/arch/arm/include/asm/thread_info.h
+@@ -73,6 +73,9 @@ struct thread_info {
+ .flags = 0, \
+ .preempt_count = INIT_PREEMPT_COUNT, \
+ .addr_limit = KERNEL_DS, \
++ .cpu_domain = domain_val(DOMAIN_USER, DOMAIN_USERCLIENT) | \
++ domain_val(DOMAIN_KERNEL, DOMAIN_KERNELCLIENT) | \
++ domain_val(DOMAIN_IO, DOMAIN_KERNELCLIENT), \
+ }
+
+ #define init_thread_info (init_thread_union.thread_info)
+@@ -143,6 +146,10 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
+ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
+ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
+ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */
++/* within 8 bits of TIF_SYSCALL_TRACE
++ * to meet flexible second operand requirements
++ */
++#define TIF_GRSEC_SETXID 8
+
+ #define TIF_NOHZ 12 /* in adaptive nohz mode */
+ #define TIF_USING_IWMMXT 17
+@@ -158,10 +165,11 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
+ #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+ #define _TIF_SECCOMP (1 << TIF_SECCOMP)
+ #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
++#define _TIF_GRSEC_SETXID (1 << TIF_GRSEC_SETXID)
+
+ /* Checks for any syscall work in entry-common.S */
+ #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+- _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
++ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | _TIF_GRSEC_SETXID)
+
+ /*
+ * Change these and you break ASM code in entry-common.S
+diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
+index f6fcc67..5895d62 100644
+--- a/arch/arm/include/asm/timex.h
++++ b/arch/arm/include/asm/timex.h
+@@ -13,6 +13,7 @@
+ #define _ASMARM_TIMEX_H
+
+ typedef unsigned long cycles_t;
++extern int read_current_timer(unsigned long *timer_val);
+ #define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+
+ #endif
+diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
+index 5f833f7..76e6644 100644
+--- a/arch/arm/include/asm/tls.h
++++ b/arch/arm/include/asm/tls.h
+@@ -3,6 +3,7 @@
+
+ #include <linux/compiler.h>
+ #include <asm/thread_info.h>
++#include <asm/pgtable.h>
+
+ #ifdef __ASSEMBLY__
+ #include <asm/asm-offsets.h>
+@@ -89,7 +90,9 @@ static inline void set_tls(unsigned long val)
+ * at 0xffff0fe0 must be used instead. (see
+ * entry-armv.S for details)
+ */
++ pax_open_kernel();
+ *((unsigned int *)0xffff0ff0) = val;
++ pax_close_kernel();
+ #endif
+ }
+
+diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
+index a93c0f9..5c31bbb 100644
+--- a/arch/arm/include/asm/uaccess.h
++++ b/arch/arm/include/asm/uaccess.h
+@@ -18,6 +18,7 @@
+ #include <asm/domain.h>
+ #include <asm/unified.h>
+ #include <asm/compiler.h>
++#include <asm/pgtable.h>
+
+ #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ #include <asm-generic/uaccess-unaligned.h>
+@@ -50,6 +51,59 @@ struct exception_table_entry
+ extern int fixup_exception(struct pt_regs *regs);
+
+ /*
++ * These two are intentionally not defined anywhere - if the kernel
++ * code generates any references to them, that's a bug.
++ */
++extern int __get_user_bad(void);
++extern int __put_user_bad(void);
++
++/*
++ * Note that this is actually 0x1,0000,0000
++ */
++#define KERNEL_DS 0x00000000
++#define get_ds() (KERNEL_DS)
++
++#ifdef CONFIG_MMU
++
++#define USER_DS TASK_SIZE
++#define get_fs() (current_thread_info()->addr_limit)
++
++static inline void set_fs(mm_segment_t fs)
++{
++ current_thread_info()->addr_limit = fs;
++ modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_KERNELCLIENT : DOMAIN_MANAGER);
++}
++
++#define segment_eq(a, b) ((a) == (b))
++
++#define __HAVE_ARCH_PAX_OPEN_USERLAND
++#define __HAVE_ARCH_PAX_CLOSE_USERLAND
++
++static inline void pax_open_userland(void)
++{
++
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ if (segment_eq(get_fs(), USER_DS)) {
++ BUG_ON(test_domain(DOMAIN_USER, DOMAIN_UDEREF));
++ modify_domain(DOMAIN_USER, DOMAIN_UDEREF);
++ }
++#endif
++
++}
++
++static inline void pax_close_userland(void)
++{
++
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ if (segment_eq(get_fs(), USER_DS)) {
++ BUG_ON(test_domain(DOMAIN_USER, DOMAIN_NOACCESS));
++ modify_domain(DOMAIN_USER, DOMAIN_NOACCESS);
++ }
++#endif
++
++}
++
++/*
+ * These two functions allow hooking accesses to userspace to increase
+ * system integrity by ensuring that the kernel can not inadvertantly
+ * perform such accesses (eg, via list poison values) which could then
+@@ -66,6 +120,7 @@ static inline unsigned int uaccess_save_and_enable(void)
+
+ return old_domain;
+ #else
++ pax_open_userland();
+ return 0;
+ #endif
+ }
+@@ -75,35 +130,11 @@ static inline void uaccess_restore(unsigned int flags)
+ #ifdef CONFIG_CPU_SW_DOMAIN_PAN
+ /* Restore the user access mask */
+ set_domain(flags);
++#else
++ pax_close_userland();
+ #endif
+ }
+
+-/*
+- * These two are intentionally not defined anywhere - if the kernel
+- * code generates any references to them, that's a bug.
+- */
+-extern int __get_user_bad(void);
+-extern int __put_user_bad(void);
+-
+-/*
+- * Note that this is actually 0x1,0000,0000
+- */
+-#define KERNEL_DS 0x00000000
+-#define get_ds() (KERNEL_DS)
+-
+-#ifdef CONFIG_MMU
+-
+-#define USER_DS TASK_SIZE
+-#define get_fs() (current_thread_info()->addr_limit)
+-
+-static inline void set_fs(mm_segment_t fs)
+-{
+- current_thread_info()->addr_limit = fs;
+- modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
+-}
+-
+-#define segment_eq(a, b) ((a) == (b))
+-
+ /* We use 33-bit arithmetic here... */
+ #define __range_ok(addr, size) ({ \
+ unsigned long flag, roksum; \
+@@ -268,6 +299,7 @@ static inline void set_fs(mm_segment_t fs)
+
+ #endif /* CONFIG_MMU */
+
++#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
+ #define access_ok(type, addr, size) (__range_ok(addr, size) == 0)
+
+ #define user_addr_max() \
+@@ -474,10 +506,10 @@ do { \
+
+
+ #ifdef CONFIG_MMU
+-extern unsigned long __must_check
++extern unsigned long __must_check __size_overflow(3)
+ arm_copy_from_user(void *to, const void __user *from, unsigned long n);
+
+-static inline unsigned long __must_check
++static inline unsigned long __must_check __size_overflow(3)
+ __copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
+ unsigned int __ua_flags;
+@@ -489,9 +521,9 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
+ return n;
+ }
+
+-extern unsigned long __must_check
++extern unsigned long __must_check __size_overflow(3)
+ arm_copy_to_user(void __user *to, const void *from, unsigned long n);
+-extern unsigned long __must_check
++extern unsigned long __must_check __size_overflow(3)
+ __copy_to_user_std(void __user *to, const void *from, unsigned long n);
+
+ static inline unsigned long __must_check
+@@ -511,9 +543,9 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
+ #endif
+ }
+
+-extern unsigned long __must_check
++extern unsigned long __must_check __size_overflow(2)
+ arm_clear_user(void __user *addr, unsigned long n);
+-extern unsigned long __must_check
++extern unsigned long __must_check __size_overflow(2)
+ __clear_user_std(void __user *addr, unsigned long n);
+
+ static inline unsigned long __must_check
+@@ -533,6 +565,9 @@ __clear_user(void __user *addr, unsigned long n)
+
+ static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
++ if ((long)n < 0)
++ return n;
++
+ if (access_ok(VERIFY_READ, from, n))
+ n = __copy_from_user(to, from, n);
+ else /* security hole - plug it */
+@@ -542,6 +577,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u
+
+ static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
+ {
++ if ((long)n < 0)
++ return n;
++
+ if (access_ok(VERIFY_WRITE, to, n))
+ n = __copy_to_user(to, from, n);
+ return n;
+diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h
+index 5af0ed1..cea83883 100644
+--- a/arch/arm/include/uapi/asm/ptrace.h
++++ b/arch/arm/include/uapi/asm/ptrace.h
+@@ -92,7 +92,7 @@
+ * ARMv7 groups of PSR bits
+ */
+ #define APSR_MASK 0xf80f0000 /* N, Z, C, V, Q and GE flags */
+-#define PSR_ISET_MASK 0x01000010 /* ISA state (J, T) mask */
++#define PSR_ISET_MASK 0x01000020 /* ISA state (J, T) mask */
+ #define PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */
+ #define PSR_ENDIAN_MASK 0x00000200 /* Endianness state mask */
+
+diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
+index 7e45f69..2c047db 100644
+--- a/arch/arm/kernel/armksyms.c
++++ b/arch/arm/kernel/armksyms.c
+@@ -59,7 +59,7 @@ EXPORT_SYMBOL(arm_delay_ops);
+
+ /* networking */
+ EXPORT_SYMBOL(csum_partial);
+-EXPORT_SYMBOL(csum_partial_copy_from_user);
++EXPORT_SYMBOL(__csum_partial_copy_from_user);
+ EXPORT_SYMBOL(csum_partial_copy_nocheck);
+ EXPORT_SYMBOL(__csum_ipv6_magic);
+
+diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
+index 7dccc96..84da243 100644
+--- a/arch/arm/kernel/cpuidle.c
++++ b/arch/arm/kernel/cpuidle.c
+@@ -19,7 +19,7 @@ extern struct of_cpuidle_method __cpuidle_method_of_table[];
+ static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel
+ __used __section(__cpuidle_method_of_table_end);
+
+-static struct cpuidle_ops cpuidle_ops[NR_CPUS];
++static struct cpuidle_ops cpuidle_ops[NR_CPUS] __read_only;
+
+ /**
+ * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle()
+diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c
+index 9f43ba0..1cee475 100644
+--- a/arch/arm/kernel/efi.c
++++ b/arch/arm/kernel/efi.c
+@@ -60,9 +60,9 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
+ * preference.
+ */
+ if (md->attribute & EFI_MEMORY_WB)
+- desc.type = MT_MEMORY_RWX;
++ desc.type = __MT_MEMORY_RWX;
+ else if (md->attribute & EFI_MEMORY_WT)
+- desc.type = MT_MEMORY_RWX_NONCACHED;
++ desc.type = MT_MEMORY_RW_NONCACHED;
+ else if (md->attribute & EFI_MEMORY_WC)
+ desc.type = MT_DEVICE_WC;
+ else
+diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
+index 9f157e7..8e3f857 100644
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -50,6 +50,87 @@
+ 9997:
+ .endm
+
++ .macro pax_enter_kernel
++#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
++ @ make aligned space for saved DACR
++ sub sp, sp, #8
++ @ save regs
++ stmdb sp!, {r1, r2}
++ @ read DACR from cpu_domain into r1
++ mov r2, sp
++ @ assume 8K pages, since we have to split the immediate in two
++ bic r2, r2, #(0x1fc0)
++ bic r2, r2, #(0x3f)
++ ldr r1, [r2, #TI_CPU_DOMAIN]
++ @ store old DACR on stack
++ str r1, [sp, #8]
++#ifdef CONFIG_PAX_KERNEXEC
++ @ set type of DOMAIN_KERNEL to DOMAIN_KERNELCLIENT
++ bic r1, r1, #(domain_val(DOMAIN_KERNEL, 3))
++ orr r1, r1, #(domain_val(DOMAIN_KERNEL, DOMAIN_KERNELCLIENT))
++#endif
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ @ set current DOMAIN_USER to DOMAIN_NOACCESS
++ bic r1, r1, #(domain_val(DOMAIN_USER, 3))
++#endif
++ @ write r1 to current_thread_info()->cpu_domain
++ str r1, [r2, #TI_CPU_DOMAIN]
++ @ write r1 to DACR
++ mcr p15, 0, r1, c3, c0, 0
++ @ instruction sync
++ instr_sync
++ @ restore regs
++ ldmia sp!, {r1, r2}
++#endif
++ .endm
++
++ .macro pax_open_userland
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ @ save regs
++ stmdb sp!, {r0, r1}
++ @ read DACR from cpu_domain into r1
++ mov r0, sp
++ @ assume 8K pages, since we have to split the immediate in two
++ bic r0, r0, #(0x1fc0)
++ bic r0, r0, #(0x3f)
++ ldr r1, [r0, #TI_CPU_DOMAIN]
++ @ set current DOMAIN_USER to DOMAIN_CLIENT
++ bic r1, r1, #(domain_val(DOMAIN_USER, 3))
++ orr r1, r1, #(domain_val(DOMAIN_USER, DOMAIN_UDEREF))
++ @ write r1 to current_thread_info()->cpu_domain
++ str r1, [r0, #TI_CPU_DOMAIN]
++ @ write r1 to DACR
++ mcr p15, 0, r1, c3, c0, 0
++ @ instruction sync
++ instr_sync
++ @ restore regs
++ ldmia sp!, {r0, r1}
++#endif
++ .endm
++
++ .macro pax_close_userland
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ @ save regs
++ stmdb sp!, {r0, r1}
++ @ read DACR from cpu_domain into r1
++ mov r0, sp
++ @ assume 8K pages, since we have to split the immediate in two
++ bic r0, r0, #(0x1fc0)
++ bic r0, r0, #(0x3f)
++ ldr r1, [r0, #TI_CPU_DOMAIN]
++ @ set current DOMAIN_USER to DOMAIN_NOACCESS
++ bic r1, r1, #(domain_val(DOMAIN_USER, 3))
++ @ write r1 to current_thread_info()->cpu_domain
++ str r1, [r0, #TI_CPU_DOMAIN]
++ @ write r1 to DACR
++ mcr p15, 0, r1, c3, c0, 0
++ @ instruction sync
++ instr_sync
++ @ restore regs
++ ldmia sp!, {r0, r1}
++#endif
++ .endm
++
+ .macro pabt_helper
+ @ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
+ #ifdef MULTI_PABORT
+@@ -92,11 +173,15 @@
+ * Invalid mode handlers
+ */
+ .macro inv_entry, reason
++
++ pax_enter_kernel
++
+ sub sp, sp, #PT_REGS_SIZE
+ ARM( stmib sp, {r1 - lr} )
+ THUMB( stmia sp, {r0 - r12} )
+ THUMB( str sp, [sp, #S_SP] )
+ THUMB( str lr, [sp, #S_LR] )
++
+ mov r1, #\reason
+ .endm
+
+@@ -152,6 +237,9 @@ ENDPROC(__und_invalid)
+ .macro svc_entry, stack_hole=0, trace=1, uaccess=1
+ UNWIND(.fnstart )
+ UNWIND(.save {r0 - pc} )
++
++ pax_enter_kernel
++
+ sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4)
+ #ifdef CONFIG_THUMB2_KERNEL
+ SPFIX( str r0, [sp] ) @ temporarily saved
+@@ -167,7 +255,12 @@ ENDPROC(__und_invalid)
+ ldmia r0, {r3 - r5}
+ add r7, sp, #S_SP - 4 @ here for interlock avoidance
+ mov r6, #-1 @ "" "" "" ""
++#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
++ @ offset sp by 8 as done in pax_enter_kernel
++ add r2, sp, #(SVC_REGS_SIZE + \stack_hole + 4)
++#else
+ add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4)
++#endif
+ SPFIX( addeq r2, r2, #4 )
+ str r3, [sp, #-4]! @ save the "real" r0 copied
+ @ from the exception stack
+@@ -382,6 +475,9 @@ ENDPROC(__fiq_abt)
+ .macro usr_entry, trace=1, uaccess=1
+ UNWIND(.fnstart )
+ UNWIND(.cantunwind ) @ don't unwind the user space
++
++ pax_enter_kernel_user
++
+ sub sp, sp, #PT_REGS_SIZE
+ ARM( stmib sp, {r1 - r12} )
+ THUMB( stmia sp, {r0 - r12} )
+@@ -495,7 +591,9 @@ __und_usr:
+ tst r3, #PSR_T_BIT @ Thumb mode?
+ bne __und_usr_thumb
+ sub r4, r2, #4 @ ARM instr at LR - 4
++ pax_open_userland
+ 1: ldrt r0, [r4]
++ pax_close_userland
+ ARM_BE8(rev r0, r0) @ little endian instruction
+
+ uaccess_disable ip
+@@ -531,11 +629,15 @@ __und_usr_thumb:
+ */
+ .arch armv6t2
+ #endif
++ pax_open_userland
+ 2: ldrht r5, [r4]
++ pax_close_userland
+ ARM_BE8(rev16 r5, r5) @ little endian instruction
+ cmp r5, #0xe800 @ 32bit instruction if xx != 0
+ blo __und_usr_fault_16_pan @ 16bit undefined instruction
++ pax_open_userland
+ 3: ldrht r0, [r2]
++ pax_close_userland
+ ARM_BE8(rev16 r0, r0) @ little endian instruction
+ uaccess_disable ip
+ add r2, r2, #2 @ r2 is PC + 2, make it PC + 4
+@@ -566,7 +668,8 @@ ENDPROC(__und_usr)
+ */
+ .pushsection .text.fixup, "ax"
+ .align 2
+-4: str r4, [sp, #S_PC] @ retry current instruction
++4: pax_close_userland
++ str r4, [sp, #S_PC] @ retry current instruction
+ ret r9
+ .popsection
+ .pushsection __ex_table,"a"
+@@ -788,7 +891,7 @@ ENTRY(__switch_to)
+ THUMB( str lr, [ip], #4 )
+ ldr r4, [r2, #TI_TP_VALUE]
+ ldr r5, [r2, #TI_TP_VALUE + 4]
+-#ifdef CONFIG_CPU_USE_DOMAINS
++#if defined(CONFIG_CPU_USE_DOMAINS) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+ mrc p15, 0, r6, c3, c0, 0 @ Get domain register
+ str r6, [r1, #TI_CPU_DOMAIN] @ Save old domain register
+ ldr r6, [r2, #TI_CPU_DOMAIN]
+@@ -799,7 +902,7 @@ ENTRY(__switch_to)
+ ldr r8, =__stack_chk_guard
+ ldr r7, [r7, #TSK_STACK_CANARY]
+ #endif
+-#ifdef CONFIG_CPU_USE_DOMAINS
++#if defined(CONFIG_CPU_USE_DOMAINS) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+ mcr p15, 0, r6, c3, c0, 0 @ Set domain register
+ #endif
+ mov r5, r0
+diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
+index 10c3283..c47cdf5 100644
+--- a/arch/arm/kernel/entry-common.S
++++ b/arch/arm/kernel/entry-common.S
+@@ -11,18 +11,46 @@
+ #include <asm/assembler.h>
+ #include <asm/unistd.h>
+ #include <asm/ftrace.h>
++#include <asm/domain.h>
+ #include <asm/unwind.h>
+
++#include "entry-header.S"
++
+ #ifdef CONFIG_NEED_RET_TO_USER
+ #include <mach/entry-macro.S>
+ #else
+ .macro arch_ret_to_user, tmp1, tmp2
++#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
++ @ save regs
++ stmdb sp!, {r1, r2}
++ @ read DACR from cpu_domain into r1
++ mov r2, sp
++ @ assume 8K pages, since we have to split the immediate in two
++ bic r2, r2, #(0x1fc0)
++ bic r2, r2, #(0x3f)
++ ldr r1, [r2, #TI_CPU_DOMAIN]
++#ifdef CONFIG_PAX_KERNEXEC
++ @ set type of DOMAIN_KERNEL to DOMAIN_KERNELCLIENT
++ bic r1, r1, #(domain_val(DOMAIN_KERNEL, 3))
++ orr r1, r1, #(domain_val(DOMAIN_KERNEL, DOMAIN_KERNELCLIENT))
++#endif
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ @ set current DOMAIN_USER to DOMAIN_UDEREF
++ bic r1, r1, #(domain_val(DOMAIN_USER, 3))
++ orr r1, r1, #(domain_val(DOMAIN_USER, DOMAIN_UDEREF))
++#endif
++ @ write r1 to current_thread_info()->cpu_domain
++ str r1, [r2, #TI_CPU_DOMAIN]
++ @ write r1 to DACR
++ mcr p15, 0, r1, c3, c0, 0
++ @ instruction sync
++ instr_sync
++ @ restore regs
++ ldmia sp!, {r1, r2}
++#endif
+ .endm
+ #endif
+
+-#include "entry-header.S"
+-
+-
+ .align 5
+ #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
+ /*
+@@ -36,7 +64,9 @@ ret_fast_syscall:
+ UNWIND(.cantunwind )
+ disable_irq_notrace @ disable interrupts
+ ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
+- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
++ tst r1, #_TIF_SYSCALL_WORK
++ bne fast_work_pending
++ tst r1, #_TIF_WORK_MASK
+ bne fast_work_pending
+
+ /* perform architecture specific actions before user return */
+@@ -62,7 +92,9 @@ ret_fast_syscall:
+ str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
+ disable_irq_notrace @ disable interrupts
+ ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
+- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
++ tst r1, #_TIF_SYSCALL_WORK
++ bne __sys_trace_return_nosave
++ tst r1, #_TIF_WORK_MASK
+ beq no_work_pending
+ UNWIND(.fnend )
+ ENDPROC(ret_fast_syscall)
+@@ -199,6 +231,12 @@ ENTRY(vector_swi)
+
+ uaccess_disable tbl
+
++ /*
++ * do this here to avoid a performance hit of wrapping the code above
++ * that directly dereferences userland to parse the SWI instruction
++ */
++ pax_enter_kernel_user
++
+ adr tbl, sys_call_table @ load syscall table pointer
+
+ #if defined(CONFIG_OABI_COMPAT)
+diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
+index 6391728..6bf90b8 100644
+--- a/arch/arm/kernel/entry-header.S
++++ b/arch/arm/kernel/entry-header.S
+@@ -196,6 +196,59 @@
+ msr cpsr_c, \rtemp @ switch back to the SVC mode
+ .endm
+
++ .macro pax_enter_kernel_user
++#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
++ @ save regs
++ stmdb sp!, {r0, r1}
++ @ read DACR from cpu_domain into r1
++ mov r0, sp
++ @ assume 8K pages, since we have to split the immediate in two
++ bic r0, r0, #(0x1fc0)
++ bic r0, r0, #(0x3f)
++ ldr r1, [r0, #TI_CPU_DOMAIN]
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ @ set current DOMAIN_USER to DOMAIN_NOACCESS
++ bic r1, r1, #(domain_val(DOMAIN_USER, 3))
++#endif
++#ifdef CONFIG_PAX_KERNEXEC
++ @ set current DOMAIN_KERNEL to DOMAIN_KERNELCLIENT
++ bic r1, r1, #(domain_val(DOMAIN_KERNEL, 3))
++ orr r1, r1, #(domain_val(DOMAIN_KERNEL, DOMAIN_KERNELCLIENT))
++#endif
++ @ write r1 to current_thread_info()->cpu_domain
++ str r1, [r0, #TI_CPU_DOMAIN]
++ @ write r1 to DACR
++ mcr p15, 0, r1, c3, c0, 0
++ @ instruction sync
++ instr_sync
++ @ restore regs
++ ldmia sp!, {r0, r1}
++#endif
++ .endm
++
++ .macro pax_exit_kernel
++#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
++ @ save regs
++ stmdb sp!, {r0, r1}
++ @ read old DACR from stack into r1
++ ldr r1, [sp, #(8 + S_SP)]
++ sub r1, r1, #8
++ ldr r1, [r1]
++
++ @ write r1 to current_thread_info()->cpu_domain
++ mov r0, sp
++ @ assume 8K pages, since we have to split the immediate in two
++ bic r0, r0, #(0x1fc0)
++ bic r0, r0, #(0x3f)
++ str r1, [r0, #TI_CPU_DOMAIN]
++ @ write r1 to DACR
++ mcr p15, 0, r1, c3, c0, 0
++ @ instruction sync
++ instr_sync
++ @ restore regs
++ ldmia sp!, {r0, r1}
++#endif
++ .endm
+
+ .macro svc_exit, rpsr, irq = 0
+ .if \irq != 0
+@@ -219,6 +272,8 @@
+ uaccess_restore
+ str r1, [tsk, #TI_ADDR_LIMIT]
+
++ pax_exit_kernel
++
+ #ifndef CONFIG_THUMB2_KERNEL
+ @ ARM mode SVC restore
+ msr spsr_cxsf, \rpsr
+diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
+index 059c3da..8e45cfc 100644
+--- a/arch/arm/kernel/fiq.c
++++ b/arch/arm/kernel/fiq.c
+@@ -95,7 +95,10 @@ void set_fiq_handler(void *start, unsigned int length)
+ void *base = vectors_page;
+ unsigned offset = FIQ_OFFSET;
+
++ pax_open_kernel();
+ memcpy(base + offset, start, length);
++ pax_close_kernel();
++
+ if (!cache_is_vipt_nonaliasing())
+ flush_icache_range((unsigned long)base + offset, offset +
+ length);
+diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
+index 0c7efc3..3927085 100644
+--- a/arch/arm/kernel/module-plts.c
++++ b/arch/arm/kernel/module-plts.c
+@@ -30,17 +30,12 @@ struct plt_entries {
+ u32 lit[PLT_ENT_COUNT];
+ };
+
+-static bool in_init(const struct module *mod, u32 addr)
+-{
+- return addr - (u32)mod->init_layout.base < mod->init_layout.size;
+-}
+-
+ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
+ {
+ struct plt_entries *plt, *plt_end;
+ int c, *count;
+
+- if (in_init(mod, loc)) {
++ if (within_module_init(loc, mod)) {
+ plt = (void *)mod->arch.init_plt->sh_addr;
+ plt_end = (void *)plt + mod->arch.init_plt->sh_size;
+ count = &mod->arch.init_plt_count;
+diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
+index 4f14b5c..91ff261 100644
+--- a/arch/arm/kernel/module.c
++++ b/arch/arm/kernel/module.c
+@@ -38,17 +38,47 @@
+ #endif
+
+ #ifdef CONFIG_MMU
+-void *module_alloc(unsigned long size)
++static inline void *__module_alloc(unsigned long size, pgprot_t prot)
+ {
+- void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+- GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
++ void *p;
++
++ if (!size || (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && PAGE_ALIGN(size) > MODULES_END - MODULES_VADDR))
++ return NULL;
++
++ p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
++ GFP_KERNEL, prot, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
+ return p;
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+- GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
++ GFP_KERNEL, prot, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ }
++
++void *module_alloc(unsigned long size)
++{
++
++#ifdef CONFIG_PAX_KERNEXEC
++ return __module_alloc(size, PAGE_KERNEL);
++#else
++ return __module_alloc(size, PAGE_KERNEL_EXEC);
++#endif
++
++}
++
++#ifdef CONFIG_PAX_KERNEXEC
++void module_memfree_exec(void *module_region)
++{
++ module_memfree(module_region);
++}
++EXPORT_SYMBOL(module_memfree_exec);
++
++void *module_alloc_exec(unsigned long size)
++{
++ return __module_alloc(size, PAGE_KERNEL_EXEC);
++}
++EXPORT_SYMBOL(module_alloc_exec);
++#endif
+ #endif
+
+ int
+diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
+index 69bda1a..755113a 100644
+--- a/arch/arm/kernel/patch.c
++++ b/arch/arm/kernel/patch.c
+@@ -66,6 +66,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
+ else
+ __acquire(&patch_lock);
+
++ pax_open_kernel();
+ if (thumb2 && __opcode_is_thumb16(insn)) {
+ *(u16 *)waddr = __opcode_to_mem_thumb16(insn);
+ size = sizeof(u16);
+@@ -97,6 +98,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
+ *(u32 *)waddr = insn;
+ size = sizeof(u32);
+ }
++ pax_close_kernel();
+
+ if (waddr != addr) {
+ flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
+diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
+index 612eb53..5a44c8c 100644
+--- a/arch/arm/kernel/process.c
++++ b/arch/arm/kernel/process.c
+@@ -118,8 +118,8 @@ void __show_regs(struct pt_regs *regs)
+
+ show_regs_print_info(KERN_DEFAULT);
+
+- print_symbol("PC is at %s\n", instruction_pointer(regs));
+- print_symbol("LR is at %s\n", regs->ARM_lr);
++ printk("PC is at %pA\n", (void *)instruction_pointer(regs));
++ printk("LR is at %pA\n", (void *)regs->ARM_lr);
+ printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n"
+ "sp : %08lx ip : %08lx fp : %08lx\n",
+ regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr,
+@@ -233,7 +233,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
+
+ memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
+
+-#ifdef CONFIG_CPU_USE_DOMAINS
++#if defined(CONFIG_CPU_USE_DOMAINS) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+ /*
+ * Copy the initial value of the domain access control register
+ * from the current thread: thread->addr_limit will have been
+@@ -337,7 +337,7 @@ static struct vm_area_struct gate_vma = {
+
+ static int __init gate_vma_init(void)
+ {
+- gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
++ gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags);
+ return 0;
+ }
+ arch_initcall(gate_vma_init);
+@@ -366,92 +366,14 @@ const char *arch_vma_name(struct vm_area_struct *vma)
+ return is_gate_vma(vma) ? "[vectors]" : NULL;
+ }
+
+-/* If possible, provide a placement hint at a random offset from the
+- * stack for the sigpage and vdso pages.
+- */
+-static unsigned long sigpage_addr(const struct mm_struct *mm,
+- unsigned int npages)
+-{
+- unsigned long offset;
+- unsigned long first;
+- unsigned long last;
+- unsigned long addr;
+- unsigned int slots;
+-
+- first = PAGE_ALIGN(mm->start_stack);
+-
+- last = TASK_SIZE - (npages << PAGE_SHIFT);
+-
+- /* No room after stack? */
+- if (first > last)
+- return 0;
+-
+- /* Just enough room? */
+- if (first == last)
+- return first;
+-
+- slots = ((last - first) >> PAGE_SHIFT) + 1;
+-
+- offset = get_random_int() % slots;
+-
+- addr = first + (offset << PAGE_SHIFT);
+-
+- return addr;
+-}
+-
+-static struct page *signal_page;
+-extern struct page *get_signal_page(void);
+-
+-static const struct vm_special_mapping sigpage_mapping = {
+- .name = "[sigpage]",
+- .pages = &signal_page,
+-};
+-
+ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+ {
+ struct mm_struct *mm = current->mm;
+- struct vm_area_struct *vma;
+- unsigned long npages;
+- unsigned long addr;
+- unsigned long hint;
+- int ret = 0;
+-
+- if (!signal_page)
+- signal_page = get_signal_page();
+- if (!signal_page)
+- return -ENOMEM;
+-
+- npages = 1; /* for sigpage */
+- npages += vdso_total_pages;
+
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+- hint = sigpage_addr(mm, npages);
+- addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0);
+- if (IS_ERR_VALUE(addr)) {
+- ret = addr;
+- goto up_fail;
+- }
+-
+- vma = _install_special_mapping(mm, addr, PAGE_SIZE,
+- VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+- &sigpage_mapping);
+-
+- if (IS_ERR(vma)) {
+- ret = PTR_ERR(vma);
+- goto up_fail;
+- }
+-
+- mm->context.sigpage = addr;
+-
+- /* Unlike the sigpage, failure to install the vdso is unlikely
+- * to be fatal to the process, so no error check needed
+- * here.
+- */
+- arm_install_vdso(mm, addr + PAGE_SIZE);
+-
+- up_fail:
++ mm->context.sigpage = (PAGE_OFFSET + (get_random_int() % 0x3FFEFFE0)) & 0xFFFFFFFC;
+ up_write(&mm->mmap_sem);
+- return ret;
++ return 0;
+ }
+ #endif
+diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
+index ce131ed..26f9765 100644
+--- a/arch/arm/kernel/ptrace.c
++++ b/arch/arm/kernel/ptrace.c
+@@ -928,10 +928,19 @@ static void tracehook_report_syscall(struct pt_regs *regs,
+ regs->ARM_ip = ip;
+ }
+
++#ifdef CONFIG_GRKERNSEC_SETXID
++extern void gr_delayed_cred_worker(void);
++#endif
++
+ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
+ {
+ current_thread_info()->syscall = scno;
+
++#ifdef CONFIG_GRKERNSEC_SETXID
++ if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID)))
++ gr_delayed_cred_worker();
++#endif
++
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
+index 3fa867a..d610607 100644
+--- a/arch/arm/kernel/reboot.c
++++ b/arch/arm/kernel/reboot.c
+@@ -120,6 +120,7 @@ void machine_power_off(void)
+
+ if (pm_power_off)
+ pm_power_off();
++ while (1);
+ }
+
+ /*
+diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
+index df7f2a7..d9d2bc1 100644
+--- a/arch/arm/kernel/setup.c
++++ b/arch/arm/kernel/setup.c
+@@ -112,21 +112,23 @@ EXPORT_SYMBOL(elf_hwcap);
+ unsigned int elf_hwcap2 __read_mostly;
+ EXPORT_SYMBOL(elf_hwcap2);
+
++pteval_t __supported_pte_mask __read_only;
++pmdval_t __supported_pmd_mask __read_only;
+
+ #ifdef MULTI_CPU
+-struct processor processor __read_mostly;
++struct processor processor __read_only;
+ #endif
+ #ifdef MULTI_TLB
+-struct cpu_tlb_fns cpu_tlb __read_mostly;
++struct cpu_tlb_fns cpu_tlb __read_only;
+ #endif
+ #ifdef MULTI_USER
+-struct cpu_user_fns cpu_user __read_mostly;
++struct cpu_user_fns cpu_user __read_only;
+ #endif
+ #ifdef MULTI_CACHE
+-struct cpu_cache_fns cpu_cache __read_mostly;
++struct cpu_cache_fns cpu_cache __read_only;
+ #endif
+ #ifdef CONFIG_OUTER_CACHE
+-struct outer_cache_fns outer_cache __read_mostly;
++struct outer_cache_fns outer_cache __read_only;
+ EXPORT_SYMBOL(outer_cache);
+ #endif
+
+@@ -257,9 +259,13 @@ static int __get_cpu_architecture(void)
+ * Register 0 and check for VMSAv7 or PMSAv7 */
+ unsigned int mmfr0 = read_cpuid_ext(CPUID_EXT_MMFR0);
+ if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
+- (mmfr0 & 0x000000f0) >= 0x00000030)
++ (mmfr0 & 0x000000f0) >= 0x00000030) {
+ cpu_arch = CPU_ARCH_ARMv7;
+- else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
++ if ((mmfr0 & 0x0000000f) == 0x00000005 || (mmfr0 & 0x0000000f) == 0x00000004) {
++ __supported_pte_mask |= L_PTE_PXN;
++ __supported_pmd_mask |= PMD_PXNTABLE;
++ }
++ } else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
+ (mmfr0 & 0x000000f0) == 0x00000020)
+ cpu_arch = CPU_ARCH_ARMv6;
+ else
+diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
+index 7b8f214..ece8e28 100644
+--- a/arch/arm/kernel/signal.c
++++ b/arch/arm/kernel/signal.c
+@@ -24,8 +24,6 @@
+
+ extern const unsigned long sigreturn_codes[7];
+
+-static unsigned long signal_return_offset;
+-
+ #ifdef CONFIG_CRUNCH
+ static int preserve_crunch_context(struct crunch_sigframe __user *frame)
+ {
+@@ -388,8 +386,7 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
+ * except when the MPU has protected the vectors
+ * page from PL0
+ */
+- retcode = mm->context.sigpage + signal_return_offset +
+- (idx << 2) + thumb;
++ retcode = mm->context.sigpage + (idx << 2) + thumb;
+ } else
+ #endif
+ {
+@@ -601,33 +598,3 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+ } while (thread_flags & _TIF_WORK_MASK);
+ return 0;
+ }
+-
+-struct page *get_signal_page(void)
+-{
+- unsigned long ptr;
+- unsigned offset;
+- struct page *page;
+- void *addr;
+-
+- page = alloc_pages(GFP_KERNEL, 0);
+-
+- if (!page)
+- return NULL;
+-
+- addr = page_address(page);
+-
+- /* Give the signal return code some randomness */
+- offset = 0x200 + (get_random_int() & 0x7fc);
+- signal_return_offset = offset;
+-
+- /*
+- * Copy signal return handlers into the vector page, and
+- * set sigreturn to be a pointer to these.
+- */
+- memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes));
+-
+- ptr = (unsigned long)addr + offset;
+- flush_icache_range(ptr, ptr + sizeof(sigreturn_codes));
+-
+- return page;
+-}
+diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
+index 8615216..f5be307 100644
+--- a/arch/arm/kernel/smp.c
++++ b/arch/arm/kernel/smp.c
+@@ -82,7 +82,7 @@ enum ipi_msg_type {
+
+ static DECLARE_COMPLETION(cpu_running);
+
+-static struct smp_operations smp_ops;
++static struct smp_operations smp_ops __read_only;
+
+ void __init smp_set_ops(const struct smp_operations *ops)
+ {
+diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
+index b10e136..cb5edf9 100644
+--- a/arch/arm/kernel/tcm.c
++++ b/arch/arm/kernel/tcm.c
+@@ -64,7 +64,7 @@ static struct map_desc itcm_iomap[] __initdata = {
+ .virtual = ITCM_OFFSET,
+ .pfn = __phys_to_pfn(ITCM_OFFSET),
+ .length = 0,
+- .type = MT_MEMORY_RWX_ITCM,
++ .type = MT_MEMORY_RX_ITCM,
+ }
+ };
+
+@@ -362,7 +362,9 @@ no_dtcm:
+ start = &__sitcm_text;
+ end = &__eitcm_text;
+ ram = &__itcm_start;
++ pax_open_kernel();
+ memcpy(start, ram, itcm_code_sz);
++ pax_close_kernel();
+ pr_debug("CPU ITCM: copied code from %p - %p\n",
+ start, end);
+ itcm_present = true;
+diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
+index bc69838..e5dfdd4 100644
+--- a/arch/arm/kernel/traps.c
++++ b/arch/arm/kernel/traps.c
+@@ -65,7 +65,7 @@ static void dump_mem(const char *, const char *, unsigned long, unsigned long);
+ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame)
+ {
+ #ifdef CONFIG_KALLSYMS
+- printk("[<%08lx>] (%ps) from [<%08lx>] (%pS)\n", where, (void *)where, from, (void *)from);
++ printk("[<%08lx>] (%pA) from [<%08lx>] (%pA)\n", where, (void *)where, from, (void *)from);
+ #else
+ printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from);
+ #endif
+@@ -267,6 +267,8 @@ static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+ static int die_owner = -1;
+ static unsigned int die_nest_count;
+
++extern void gr_handle_kernel_exploit(void);
++
+ static unsigned long oops_begin(void)
+ {
+ int cpu;
+@@ -309,6 +311,9 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception");
++
++ gr_handle_kernel_exploit();
++
+ if (signr)
+ do_exit(signr);
+ }
+diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
+index d24e5dd..77cf6cf 100644
+--- a/arch/arm/kernel/vmlinux.lds.S
++++ b/arch/arm/kernel/vmlinux.lds.S
+@@ -44,7 +44,8 @@
+ #endif
+
+ #if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
+- defined(CONFIG_GENERIC_BUG) || defined(CONFIG_JUMP_LABEL)
++ defined(CONFIG_GENERIC_BUG) || defined(CONFIG_JUMP_LABEL) || \
++ defined(CONFIG_PAX_REFCOUNT)
+ #define ARM_EXIT_KEEP(x) x
+ #define ARM_EXIT_DISCARD(x)
+ #else
+diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
+index c94b90d..0cc6830 100644
+--- a/arch/arm/kvm/arm.c
++++ b/arch/arm/kvm/arm.c
+@@ -59,7 +59,7 @@ static unsigned long hyp_default_vectors;
+ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
+
+ /* The VMID used in the VTTBR */
+-static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
++static atomic64_unchecked_t kvm_vmid_gen = ATOMIC64_INIT(1);
+ static u32 kvm_next_vmid;
+ static unsigned int kvm_vmid_bits __read_mostly;
+ static DEFINE_SPINLOCK(kvm_vmid_lock);
+@@ -388,7 +388,7 @@ void force_vm_exit(const cpumask_t *mask)
+ */
+ static bool need_new_vmid_gen(struct kvm *kvm)
+ {
+- return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
++ return unlikely(kvm->arch.vmid_gen != atomic64_read_unchecked(&kvm_vmid_gen));
+ }
+
+ /**
+@@ -421,7 +421,7 @@ static void update_vttbr(struct kvm *kvm)
+
+ /* First user of a new VMID generation? */
+ if (unlikely(kvm_next_vmid == 0)) {
+- atomic64_inc(&kvm_vmid_gen);
++ atomic64_inc_unchecked(&kvm_vmid_gen);
+ kvm_next_vmid = 1;
+
+ /*
+@@ -438,7 +438,7 @@ static void update_vttbr(struct kvm *kvm)
+ kvm_call_hyp(__kvm_flush_vm_context);
+ }
+
+- kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
++ kvm->arch.vmid_gen = atomic64_read_unchecked(&kvm_vmid_gen);
+ kvm->arch.vmid = kvm_next_vmid;
+ kvm_next_vmid++;
+ kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
+diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
+index 6ee2f67..d1cce76 100644
+--- a/arch/arm/lib/copy_page.S
++++ b/arch/arm/lib/copy_page.S
+@@ -10,6 +10,7 @@
+ * ASM optimised string functions
+ */
+ #include <linux/linkage.h>
++#include <linux/const.h>
+ #include <asm/assembler.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/cache.h>
+diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
+index 1712f13..a3165dc 100644
+--- a/arch/arm/lib/csumpartialcopyuser.S
++++ b/arch/arm/lib/csumpartialcopyuser.S
+@@ -71,8 +71,8 @@
+ * Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT
+ */
+
+-#define FN_ENTRY ENTRY(csum_partial_copy_from_user)
+-#define FN_EXIT ENDPROC(csum_partial_copy_from_user)
++#define FN_ENTRY ENTRY(__csum_partial_copy_from_user)
++#define FN_EXIT ENDPROC(__csum_partial_copy_from_user)
+
+ #include "csumpartialcopygeneric.S"
+
+diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
+index 8044591..c9b2609 100644
+--- a/arch/arm/lib/delay.c
++++ b/arch/arm/lib/delay.c
+@@ -29,7 +29,7 @@
+ /*
+ * Default to the loop-based delay implementation.
+ */
+-struct arm_delay_ops arm_delay_ops = {
++struct arm_delay_ops arm_delay_ops __read_only = {
+ .delay = __loop_delay,
+ .const_udelay = __loop_const_udelay,
+ .udelay = __loop_udelay,
+diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
+index 6bd1089..e999400 100644
+--- a/arch/arm/lib/uaccess_with_memcpy.c
++++ b/arch/arm/lib/uaccess_with_memcpy.c
+@@ -84,7 +84,7 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
+ return 1;
+ }
+
+-static unsigned long noinline
++static unsigned long noinline __size_overflow(3)
+ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
+ {
+ unsigned long ua_flags;
+@@ -157,7 +157,7 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n)
+ return n;
+ }
+
+-static unsigned long noinline
++static unsigned long noinline __size_overflow(2)
+ __clear_user_memset(void __user *addr, unsigned long n)
+ {
+ unsigned long ua_flags;
+diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
+index 06332f6..1fa0c71 100644
+--- a/arch/arm/mach-exynos/suspend.c
++++ b/arch/arm/mach-exynos/suspend.c
+@@ -724,8 +724,10 @@ void __init exynos_pm_init(void)
+ tmp |= pm_data->wake_disable_mask;
+ pmu_raw_writel(tmp, S5P_WAKEUP_MASK);
+
+- exynos_pm_syscore_ops.suspend = pm_data->pm_suspend;
+- exynos_pm_syscore_ops.resume = pm_data->pm_resume;
++ pax_open_kernel();
++ const_cast(exynos_pm_syscore_ops.suspend) = pm_data->pm_suspend;
++ const_cast(exynos_pm_syscore_ops.resume) = pm_data->pm_resume;
++ pax_close_kernel();
+
+ register_syscore_ops(&exynos_pm_syscore_ops);
+ suspend_set_ops(&exynos_suspend_ops);
+diff --git a/arch/arm/mach-mmp/mmp2.c b/arch/arm/mach-mmp/mmp2.c
+index afba546..9e5403d 100644
+--- a/arch/arm/mach-mmp/mmp2.c
++++ b/arch/arm/mach-mmp/mmp2.c
+@@ -98,7 +98,9 @@ void __init mmp2_init_irq(void)
+ {
+ mmp2_init_icu();
+ #ifdef CONFIG_PM
+- icu_irq_chip.irq_set_wake = mmp2_set_wake;
++ pax_open_kernel();
++ const_cast(icu_irq_chip.irq_set_wake) = mmp2_set_wake;
++ pax_close_kernel();
+ #endif
+ }
+
+diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c
+index 1ccbba9..7a95c29 100644
+--- a/arch/arm/mach-mmp/pxa910.c
++++ b/arch/arm/mach-mmp/pxa910.c
+@@ -84,7 +84,9 @@ void __init pxa910_init_irq(void)
+ {
+ icu_init_irq();
+ #ifdef CONFIG_PM
+- icu_irq_chip.irq_set_wake = pxa910_set_wake;
++ pax_open_kernel();
++ const_cast(icu_irq_chip.irq_set_wake) = pxa910_set_wake;
++ pax_close_kernel();
+ #endif
+ }
+
+diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
+index ae2a018..297ad08 100644
+--- a/arch/arm/mach-mvebu/coherency.c
++++ b/arch/arm/mach-mvebu/coherency.c
+@@ -156,7 +156,7 @@ exit:
+
+ /*
+ * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
+- * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
++ * areas are mapped as MT_UNCACHED_RW instead of MT_DEVICE. This is
+ * needed for the HW I/O coherency mechanism to work properly without
+ * deadlock.
+ */
+@@ -164,7 +164,7 @@ static void __iomem *
+ armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+ unsigned int mtype, void *caller)
+ {
+- mtype = MT_UNCACHED;
++ mtype = MT_UNCACHED_RW;
+ return __arm_ioremap_caller(phys_addr, size, mtype, caller);
+ }
+
+@@ -174,7 +174,7 @@ static void __init armada_375_380_coherency_init(struct device_node *np)
+
+ coherency_cpu_base = of_iomap(np, 0);
+ arch_ioremap_caller = armada_wa_ioremap_caller;
+- pci_ioremap_set_mem_type(MT_UNCACHED);
++ pci_ioremap_set_mem_type(MT_UNCACHED_RW);
+
+ /*
+ * We should switch the PL310 to I/O coherency mode only if
+diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
+index f39bd51..866c780 100644
+--- a/arch/arm/mach-mvebu/pmsu.c
++++ b/arch/arm/mach-mvebu/pmsu.c
+@@ -93,7 +93,7 @@
+ #define ARMADA_370_CRYPT0_ENG_ATTR 0x1
+
+ extern void ll_disable_coherency(void);
+-extern void ll_enable_coherency(void);
++extern int ll_enable_coherency(void);
+
+ extern void armada_370_xp_cpu_resume(void);
+ extern void armada_38x_cpu_resume(void);
+diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
+index b6443a4..20a0b74 100644
+--- a/arch/arm/mach-omap2/board-n8x0.c
++++ b/arch/arm/mach-omap2/board-n8x0.c
+@@ -569,7 +569,7 @@ static int n8x0_menelaus_late_init(struct device *dev)
+ }
+ #endif
+
+-struct menelaus_platform_data n8x0_menelaus_platform_data __initdata = {
++struct menelaus_platform_data n8x0_menelaus_platform_data __initconst = {
+ .late_init = n8x0_menelaus_late_init,
+ };
+
+diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+index ad98246..69437a8 100644
+--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
++++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+@@ -88,7 +88,7 @@ struct cpu_pm_ops {
+ void (*resume)(void);
+ void (*scu_prepare)(unsigned int cpu_id, unsigned int cpu_state);
+ void (*hotplug_restart)(void);
+-};
++} __no_const;
+
+ static DEFINE_PER_CPU(struct omap4_cpu_pm_info, omap4_pm_info);
+ static struct powerdomain *mpuss_pd;
+@@ -106,7 +106,7 @@ static void dummy_cpu_resume(void)
+ static void dummy_scu_prepare(unsigned int cpu_id, unsigned int cpu_state)
+ {}
+
+-static struct cpu_pm_ops omap_pm_ops = {
++static struct cpu_pm_ops omap_pm_ops __read_only = {
+ .finish_suspend = default_finish_suspend,
+ .resume = dummy_cpu_resume,
+ .scu_prepare = dummy_scu_prepare,
+diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
+index b4de3da..e027393 100644
+--- a/arch/arm/mach-omap2/omap-smp.c
++++ b/arch/arm/mach-omap2/omap-smp.c
+@@ -19,6 +19,7 @@
+ #include <linux/device.h>
+ #include <linux/smp.h>
+ #include <linux/io.h>
++#include <linux/irq.h>
+ #include <linux/irqchip/arm-gic.h>
+
+ #include <asm/smp_scu.h>
+diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
+index e920dd8..ef999171 100644
+--- a/arch/arm/mach-omap2/omap_device.c
++++ b/arch/arm/mach-omap2/omap_device.c
+@@ -530,7 +530,7 @@ void omap_device_delete(struct omap_device *od)
+ struct platform_device __init *omap_device_build(const char *pdev_name,
+ int pdev_id,
+ struct omap_hwmod *oh,
+- void *pdata, int pdata_len)
++ const void *pdata, int pdata_len)
+ {
+ struct omap_hwmod *ohs[] = { oh };
+
+@@ -558,7 +558,7 @@ struct platform_device __init *omap_device_build(const char *pdev_name,
+ struct platform_device __init *omap_device_build_ss(const char *pdev_name,
+ int pdev_id,
+ struct omap_hwmod **ohs,
+- int oh_cnt, void *pdata,
++ int oh_cnt, const void *pdata,
+ int pdata_len)
+ {
+ int ret = -ENOMEM;
+diff --git a/arch/arm/mach-omap2/omap_device.h b/arch/arm/mach-omap2/omap_device.h
+index 78c02b3..c94109a 100644
+--- a/arch/arm/mach-omap2/omap_device.h
++++ b/arch/arm/mach-omap2/omap_device.h
+@@ -72,12 +72,12 @@ int omap_device_idle(struct platform_device *pdev);
+ /* Core code interface */
+
+ struct platform_device *omap_device_build(const char *pdev_name, int pdev_id,
+- struct omap_hwmod *oh, void *pdata,
++ struct omap_hwmod *oh, const void *pdata,
+ int pdata_len);
+
+ struct platform_device *omap_device_build_ss(const char *pdev_name, int pdev_id,
+ struct omap_hwmod **oh, int oh_cnt,
+- void *pdata, int pdata_len);
++ const void *pdata, int pdata_len);
+
+ struct omap_device *omap_device_alloc(struct platform_device *pdev,
+ struct omap_hwmod **ohs, int oh_cnt);
+diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
+index 1052b29..54669b0 100644
+--- a/arch/arm/mach-omap2/omap_hwmod.c
++++ b/arch/arm/mach-omap2/omap_hwmod.c
+@@ -206,10 +206,10 @@ struct omap_hwmod_soc_ops {
+ void (*update_context_lost)(struct omap_hwmod *oh);
+ int (*get_context_lost)(struct omap_hwmod *oh);
+ int (*disable_direct_prcm)(struct omap_hwmod *oh);
+-};
++} __no_const;
+
+ /* soc_ops: adapts the omap_hwmod code to the currently-booted SoC */
+-static struct omap_hwmod_soc_ops soc_ops;
++static struct omap_hwmod_soc_ops soc_ops __read_only;
+
+ /* omap_hwmod_list contains all registered struct omap_hwmods */
+ static LIST_HEAD(omap_hwmod_list);
+diff --git a/arch/arm/mach-omap2/powerdomains43xx_data.c b/arch/arm/mach-omap2/powerdomains43xx_data.c
+index 95fee54..b5dd79d 100644
+--- a/arch/arm/mach-omap2/powerdomains43xx_data.c
++++ b/arch/arm/mach-omap2/powerdomains43xx_data.c
+@@ -10,6 +10,7 @@
+
+ #include <linux/kernel.h>
+ #include <linux/init.h>
++#include <asm/pgtable.h>
+
+ #include "powerdomain.h"
+
+@@ -129,7 +130,9 @@ static int am43xx_check_vcvp(void)
+
+ void __init am43xx_powerdomains_init(void)
+ {
+- omap4_pwrdm_operations.pwrdm_has_voltdm = am43xx_check_vcvp;
++ pax_open_kernel();
++ const_cast(omap4_pwrdm_operations.pwrdm_has_voltdm) = am43xx_check_vcvp;
++ pax_close_kernel();
+ pwrdm_register_platform_funcs(&omap4_pwrdm_operations);
+ pwrdm_register_pwrdms(powerdomains_am43xx);
+ pwrdm_complete_init();
+diff --git a/arch/arm/mach-omap2/wd_timer.c b/arch/arm/mach-omap2/wd_timer.c
+index ff0a68c..b312aa0 100644
+--- a/arch/arm/mach-omap2/wd_timer.c
++++ b/arch/arm/mach-omap2/wd_timer.c
+@@ -110,7 +110,9 @@ static int __init omap_init_wdt(void)
+ struct omap_hwmod *oh;
+ char *oh_name = "wd_timer2";
+ char *dev_name = "omap_wdt";
+- struct omap_wd_timer_platform_data pdata;
++ static struct omap_wd_timer_platform_data pdata = {
++ .read_reset_sources = prm_read_reset_sources
++ };
+
+ if (!cpu_class_is_omap2() || of_have_populated_dt())
+ return 0;
+@@ -121,8 +123,6 @@ static int __init omap_init_wdt(void)
+ return -EINVAL;
+ }
+
+- pdata.read_reset_sources = prm_read_reset_sources;
+-
+ pdev = omap_device_build(dev_name, id, oh, &pdata,
+ sizeof(struct omap_wd_timer_platform_data));
+ WARN(IS_ERR(pdev), "Can't build omap_device for %s:%s.\n",
+diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
+index 92ec8c3..3b09472 100644
+--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
++++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
+@@ -240,7 +240,7 @@ static struct platform_device smdk6410_b_pwr_5v = {
+ };
+ #endif
+
+-static struct s3c_ide_platdata smdk6410_ide_pdata __initdata = {
++static const struct s3c_ide_platdata smdk6410_ide_pdata __initconst = {
+ .setup_gpio = s3c64xx_ide_setup_gpio,
+ };
+
+diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c
+index 0c6bb45..0f18d70 100644
+--- a/arch/arm/mach-shmobile/platsmp-apmu.c
++++ b/arch/arm/mach-shmobile/platsmp-apmu.c
+@@ -22,6 +22,7 @@
+ #include <asm/proc-fns.h>
+ #include <asm/smp_plat.h>
+ #include <asm/suspend.h>
++#include <asm/pgtable.h>
+ #include "common.h"
+ #include "platsmp-apmu.h"
+ #include "rcar-gen2.h"
+@@ -316,6 +317,8 @@ static int shmobile_smp_apmu_enter_suspend(suspend_state_t state)
+
+ void __init shmobile_smp_apmu_suspend_init(void)
+ {
+- shmobile_suspend_ops.enter = shmobile_smp_apmu_enter_suspend;
++ pax_open_kernel();
++ const_cast(shmobile_suspend_ops.enter) = shmobile_smp_apmu_enter_suspend;
++ pax_close_kernel();
+ }
+ #endif
+diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
+index afcee04..63e52ac 100644
+--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
++++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
+@@ -178,7 +178,7 @@ static int tegra20_idle_lp2_coupled(struct cpuidle_device *dev,
+ bool entered_lp2 = false;
+
+ if (tegra_pending_sgi())
+- ACCESS_ONCE(abort_flag) = true;
++ ACCESS_ONCE_RW(abort_flag) = true;
+
+ cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
+
+diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c
+index a69b22d..8523a03 100644
+--- a/arch/arm/mach-tegra/irq.c
++++ b/arch/arm/mach-tegra/irq.c
+@@ -20,6 +20,7 @@
+ #include <linux/cpu_pm.h>
+ #include <linux/interrupt.h>
+ #include <linux/io.h>
++#include <linux/irq.h>
+ #include <linux/irqchip/arm-gic.h>
+ #include <linux/irq.h>
+ #include <linux/kernel.h>
+diff --git a/arch/arm/mach-ux500/pm.c b/arch/arm/mach-ux500/pm.c
+index 8538910..2f39bc4 100644
+--- a/arch/arm/mach-ux500/pm.c
++++ b/arch/arm/mach-ux500/pm.c
+@@ -10,6 +10,7 @@
+ */
+
+ #include <linux/kernel.h>
++#include <linux/irq.h>
+ #include <linux/irqchip/arm-gic.h>
+ #include <linux/delay.h>
+ #include <linux/io.h>
+diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c
+index 7cd9865..a00b6ab 100644
+--- a/arch/arm/mach-zynq/platsmp.c
++++ b/arch/arm/mach-zynq/platsmp.c
+@@ -24,6 +24,7 @@
+ #include <linux/io.h>
+ #include <asm/cacheflush.h>
+ #include <asm/smp_scu.h>
++#include <linux/irq.h>
+ #include <linux/irqchip/arm-gic.h>
+ #include "common.h"
+
+diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
+index d15a7fe..6cc4fc9 100644
+--- a/arch/arm/mm/Kconfig
++++ b/arch/arm/mm/Kconfig
+@@ -445,6 +445,7 @@ config CPU_32v5
+
+ config CPU_32v6
+ bool
++ select CPU_USE_DOMAINS if CPU_V6 && MMU && !PAX_KERNEXEC && !PAX_MEMORY_UDEREF
+ select TLS_REG_EMUL if !CPU_32v6K && !MMU
+
+ config CPU_32v6K
+@@ -599,6 +600,7 @@ config CPU_CP15_MPU
+
+ config CPU_USE_DOMAINS
+ bool
++ depends on !ARM_LPAE && !PAX_KERNEXEC && !PAX_MEMORY_UDEREF
+ help
+ This option enables or disables the use of domain switching
+ via the set_fs() function.
+@@ -809,7 +811,7 @@ config NEED_KUSER_HELPERS
+
+ config KUSER_HELPERS
+ bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
+- depends on MMU
++ depends on MMU && (!(CPU_V6 || CPU_V6K || CPU_V7) || GRKERNSEC_OLD_ARM_USERLAND)
+ default y
+ help
+ Warning: disabling this option may break user programs.
+@@ -823,7 +825,7 @@ config KUSER_HELPERS
+ See Documentation/arm/kernel_user_helpers.txt for details.
+
+ However, the fixed address nature of these helpers can be used
+- by ROP (return orientated programming) authors when creating
++ by ROP (Return Oriented Programming) authors when creating
+ exploits.
+
+ If all of the binaries and libraries which run on your platform
+@@ -838,7 +840,7 @@ config KUSER_HELPERS
+
+ config VDSO
+ bool "Enable VDSO for acceleration of some system calls"
+- depends on AEABI && MMU && CPU_V7
++ depends on AEABI && MMU && CPU_V7 && !PAX_KERNEXEC && !PAX_MEMORY_UDEREF
+ default y if ARM_ARCH_TIMER
+ select GENERIC_TIME_VSYSCALL
+ help
+diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
+index 7d5f4c7..c6a0816 100644
+--- a/arch/arm/mm/alignment.c
++++ b/arch/arm/mm/alignment.c
+@@ -778,6 +778,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ u16 tinstr = 0;
+ int isize = 4;
+ int thumb2_32b = 0;
++ bool is_user_mode = user_mode(regs);
+
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+@@ -786,14 +787,24 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+
+ if (thumb_mode(regs)) {
+ u16 *ptr = (u16 *)(instrptr & ~1);
+- fault = probe_kernel_address(ptr, tinstr);
++ if (is_user_mode) {
++ pax_open_userland();
++ fault = probe_kernel_address(ptr, tinstr);
++ pax_close_userland();
++ } else
++ fault = probe_kernel_address(ptr, tinstr);
+ tinstr = __mem_to_opcode_thumb16(tinstr);
+ if (!fault) {
+ if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
+ IS_T32(tinstr)) {
+ /* Thumb-2 32-bit */
+ u16 tinst2 = 0;
+- fault = probe_kernel_address(ptr + 1, tinst2);
++ if (is_user_mode) {
++ pax_open_userland();
++ fault = probe_kernel_address(ptr + 1, tinst2);
++ pax_close_userland();
++ } else
++ fault = probe_kernel_address(ptr + 1, tinst2);
+ tinst2 = __mem_to_opcode_thumb16(tinst2);
+ instr = __opcode_thumb32_compose(tinstr, tinst2);
+ thumb2_32b = 1;
+@@ -803,7 +814,12 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ }
+ }
+ } else {
+- fault = probe_kernel_address((void *)instrptr, instr);
++ if (is_user_mode) {
++ pax_open_userland();
++ fault = probe_kernel_address((void *)instrptr, instr);
++ pax_close_userland();
++ } else
++ fault = probe_kernel_address((void *)instrptr, instr);
+ instr = __mem_to_opcode_arm(instr);
+ }
+
+@@ -812,7 +828,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ goto bad_or_fault;
+ }
+
+- if (user_mode(regs))
++ if (is_user_mode)
+ goto user;
+
+ ai_sys += 1;
+diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
+index cc12905..88463b3 100644
+--- a/arch/arm/mm/cache-l2x0.c
++++ b/arch/arm/mm/cache-l2x0.c
+@@ -44,7 +44,7 @@ struct l2c_init_data {
+ void (*configure)(void __iomem *);
+ void (*unlock)(void __iomem *, unsigned);
+ struct outer_cache_fns outer_cache;
+-};
++} __do_const;
+
+ #define CACHE_LINE_SIZE 32
+
+diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
+index c8c8b9e..c55cc79 100644
+--- a/arch/arm/mm/context.c
++++ b/arch/arm/mm/context.c
+@@ -43,7 +43,7 @@
+ #define NUM_USER_ASIDS ASID_FIRST_VERSION
+
+ static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
+-static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
++static atomic64_unchecked_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
+ static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
+
+ static DEFINE_PER_CPU(atomic64_t, active_asids);
+@@ -193,7 +193,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
+ {
+ static u32 cur_idx = 1;
+ u64 asid = atomic64_read(&mm->context.id);
+- u64 generation = atomic64_read(&asid_generation);
++ u64 generation = atomic64_read_unchecked(&asid_generation);
+
+ if (asid != 0) {
+ u64 newasid = generation | (asid & ~ASID_MASK);
+@@ -225,7 +225,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
+ */
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+ if (asid == NUM_USER_ASIDS) {
+- generation = atomic64_add_return(ASID_FIRST_VERSION,
++ generation = atomic64_add_return_unchecked(ASID_FIRST_VERSION,
+ &asid_generation);
+ flush_context(cpu);
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+@@ -254,14 +254,14 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
+ cpu_set_reserved_ttbr0();
+
+ asid = atomic64_read(&mm->context.id);
+- if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS)
++ if (!((asid ^ atomic64_read_unchecked(&asid_generation)) >> ASID_BITS)
+ && atomic64_xchg(&per_cpu(active_asids, cpu), asid))
+ goto switch_mm_fastpath;
+
+ raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ asid = atomic64_read(&mm->context.id);
+- if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) {
++ if ((asid ^ atomic64_read_unchecked(&asid_generation)) >> ASID_BITS) {
+ asid = new_context(mm, cpu);
+ atomic64_set(&mm->context.id, asid);
+ }
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index 3a2e678..ebdbf80 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -25,6 +25,7 @@
+ #include <asm/system_misc.h>
+ #include <asm/system_info.h>
+ #include <asm/tlbflush.h>
++#include <asm/sections.h>
+
+ #include "fault.h"
+
+@@ -138,6 +139,31 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+ if (fixup_exception(regs))
+ return;
+
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ if (addr < TASK_SIZE) {
++ if (current->signal->curr_ip)
++ printk(KERN_EMERG "PAX: From %pI4: %s:%d, uid/euid: %u/%u, attempted to access userland memory at %08lx\n", &current->signal->curr_ip, current->comm, task_pid_nr(current),
++ from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()), addr);
++ else
++ printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to access userland memory at %08lx\n", current->comm, task_pid_nr(current),
++ from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()), addr);
++ }
++#endif
++
++#ifdef CONFIG_PAX_KERNEXEC
++ if ((fsr & FSR_WRITE) &&
++ (((unsigned long)_stext <= addr && addr < init_mm.end_code) ||
++ (MODULES_VADDR <= addr && addr < MODULES_END)))
++ {
++ if (current->signal->curr_ip)
++ printk(KERN_EMERG "PAX: From %pI4: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", &current->signal->curr_ip, current->comm, task_pid_nr(current),
++ from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()));
++ else
++ printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", current->comm, task_pid_nr(current),
++ from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()));
++ }
++#endif
++
+ /*
+ * No handler, we'll have to terminate things with extreme prejudice.
+ */
+@@ -173,6 +199,13 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr,
+ }
+ #endif
+
++#ifdef CONFIG_PAX_PAGEEXEC
++ if ((tsk->mm->pax_flags & MF_PAX_PAGEEXEC) && (fsr & FSR_LNX_PF)) {
++ pax_report_fault(regs, (void *)regs->ARM_pc, (void *)regs->ARM_sp);
++ do_group_exit(SIGKILL);
++ }
++#endif
++
+ tsk->thread.address = addr;
+ tsk->thread.error_code = fsr;
+ tsk->thread.trap_no = 14;
+@@ -400,6 +433,33 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ }
+ #endif /* CONFIG_MMU */
+
++#ifdef CONFIG_PAX_PAGEEXEC
++void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
++{
++ long i;
++
++ printk(KERN_ERR "PAX: bytes at PC: ");
++ for (i = 0; i < 20; i++) {
++ unsigned char c;
++ if (get_user(c, (__force unsigned char __user *)pc+i))
++ printk(KERN_CONT "?? ");
++ else
++ printk(KERN_CONT "%02x ", c);
++ }
++ printk("\n");
++
++ printk(KERN_ERR "PAX: bytes at SP-4: ");
++ for (i = -1; i < 20; i++) {
++ unsigned long c;
++ if (get_user(c, (__force unsigned long __user *)sp+i))
++ printk(KERN_CONT "???????? ");
++ else
++ printk(KERN_CONT "%08lx ", c);
++ }
++ printk("\n");
++}
++#endif
++
+ /*
+ * First Level Translation Fault Handler
+ *
+@@ -547,9 +607,22 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
+ struct siginfo info;
+
++#ifdef CONFIG_PAX_MEMORY_UDEREF
++ if (addr < TASK_SIZE && is_domain_fault(fsr)) {
++ if (current->signal->curr_ip)
++ printk(KERN_EMERG "PAX: From %pI4: %s:%d, uid/euid: %u/%u, attempted to access userland memory at %08lx\n", &current->signal->curr_ip, current->comm, task_pid_nr(current),
++ from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()), addr);
++ else
++ printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to access userland memory at %08lx\n", current->comm, task_pid_nr(current),
++ from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()), addr);
++ goto die;
++ }
++#endif
++
+ if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
+ return;
+
++die:
+ pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
+ inf->name, fsr, addr);
+ show_pte(current->mm, addr);
+@@ -574,15 +647,118 @@ hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *
+ ifsr_info[nr].name = name;
+ }
+
++asmlinkage int sys_sigreturn(struct pt_regs *regs);
++asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
++
+ asmlinkage void __exception
+ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
+ {
+ const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
+ struct siginfo info;
++ unsigned long pc = instruction_pointer(regs);
++
++ if (user_mode(regs)) {
++ unsigned long sigpage = current->mm->context.sigpage;
++
++ if (sigpage <= pc && pc < sigpage + 7*4) {
++ if (pc < sigpage + 3*4)
++ sys_sigreturn(regs);
++ else
++ sys_rt_sigreturn(regs);
++ return;
++ }
++ if (pc == 0xffff0f60UL) {
++ /*
++ * PaX: __kuser_cmpxchg64 emulation
++ */
++ // TODO
++ //regs->ARM_pc = regs->ARM_lr;
++ //return;
++ }
++ if (pc == 0xffff0fa0UL) {
++ /*
++ * PaX: __kuser_memory_barrier emulation
++ */
++ // dmb(); implied by the exception
++ regs->ARM_pc = regs->ARM_lr;
++#ifdef CONFIG_ARM_THUMB
++ if (regs->ARM_lr & 1) {
++ regs->ARM_cpsr |= PSR_T_BIT;
++ regs->ARM_pc &= ~0x1U;
++ } else
++ regs->ARM_cpsr &= ~PSR_T_BIT;
++#endif
++ return;
++ }
++ if (pc == 0xffff0fc0UL) {
++ /*
++ * PaX: __kuser_cmpxchg emulation
++ */
++ // TODO
++ //long new;
++ //int op;
++
++ //op = FUTEX_OP_SET << 28;
++ //new = futex_atomic_op_inuser(op, regs->ARM_r2);
++ //regs->ARM_r0 = old != new;
++ //regs->ARM_pc = regs->ARM_lr;
++ //return;
++ }
++ if (pc == 0xffff0fe0UL) {
++ /*
++ * PaX: __kuser_get_tls emulation
++ */
++ regs->ARM_r0 = current_thread_info()->tp_value[0];
++ regs->ARM_pc = regs->ARM_lr;
++#ifdef CONFIG_ARM_THUMB
++ if (regs->ARM_lr & 1) {
++ regs->ARM_cpsr |= PSR_T_BIT;
++ regs->ARM_pc &= ~0x1U;
++ } else
++ regs->ARM_cpsr &= ~PSR_T_BIT;
++#endif
++ return;
++ }
++ }
++
++#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
++ else if (is_domain_fault(ifsr) || is_xn_fault(ifsr)) {
++ if (current->signal->curr_ip)
++ printk(KERN_EMERG "PAX: From %pI4: %s:%d, uid/euid: %u/%u, attempted to execute %s memory at %08lx\n", &current->signal->curr_ip, current->comm, task_pid_nr(current),
++ from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()),
++ pc >= TASK_SIZE ? "non-executable kernel" : "userland", pc);
++ else
++ printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to execute %s memory at %08lx\n", current->comm, task_pid_nr(current),
++ from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()),
++ pc >= TASK_SIZE ? "non-executable kernel" : "userland", pc);
++ goto die;
++ }
++#endif
++
++#ifdef CONFIG_PAX_REFCOUNT
++ if (fsr_fs(ifsr) == FAULT_CODE_DEBUG) {
++#ifdef CONFIG_THUMB2_KERNEL
++ unsigned short bkpt;
++
++ if (!probe_kernel_address((const unsigned short *)pc, bkpt) && cpu_to_le16(bkpt) == 0xbef1) {
++#else
++ unsigned int bkpt;
++
++ if (!probe_kernel_address((const unsigned int *)pc, bkpt) && cpu_to_le32(bkpt) == 0xe12f1073) {
++#endif
++ current->thread.error_code = ifsr;
++ current->thread.trap_no = 0;
++ pax_report_refcount_error(regs, NULL);
++ fixup_exception(regs);
++ return;
++ }
++ }
++#endif
+
+ if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
+ return;
+
++die:
+ pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
+ inf->name, ifsr, addr);
+
+diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h
+index 05ec5e0..0b70277 100644
+--- a/arch/arm/mm/fault.h
++++ b/arch/arm/mm/fault.h
+@@ -3,6 +3,7 @@
+
+ /*
+ * Fault status register encodings. We steal bit 31 for our own purposes.
++ * Set when the FSR value is from an instruction fault.
+ */
+ #define FSR_LNX_PF (1 << 31)
+ #define FSR_WRITE (1 << 11)
+@@ -22,6 +23,17 @@ static inline int fsr_fs(unsigned int fsr)
+ }
+ #endif
+
++/* valid for LPAE and !LPAE */
++static inline int is_xn_fault(unsigned int fsr)
++{
++ return ((fsr_fs(fsr) & 0x3c) == 0xc);
++}
++
++static inline int is_domain_fault(unsigned int fsr)
++{
++ return ((fsr_fs(fsr) & 0xD) == 0x9);
++}
++
+ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
+ unsigned long search_exception_table(unsigned long addr);
+ void early_abt_enable(void);
+diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
+index 370581a..b985cc1 100644
+--- a/arch/arm/mm/init.c
++++ b/arch/arm/mm/init.c
+@@ -747,7 +747,46 @@ void free_tcmmem(void)
+ {
+ #ifdef CONFIG_HAVE_TCM
+ extern char __tcm_start, __tcm_end;
++#endif
+
++#ifdef CONFIG_PAX_KERNEXEC
++ unsigned long addr;
++ pgd_t *pgd;
++ pud_t *pud;
++ pmd_t *pmd;
++ int cpu_arch = cpu_architecture();
++ unsigned int cr = get_cr();
++
++ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
++ /* make pages tables, etc before .text NX */
++ for (addr = PAGE_OFFSET; addr < (unsigned long)_stext; addr += SECTION_SIZE) {
++ pgd = pgd_offset_k(addr);
++ pud = pud_offset(pgd, addr);
++ pmd = pmd_offset(pud, addr);
++ __section_update(pmd, addr, PMD_SECT_XN);
++ }
++ /* make init NX */
++ for (addr = (unsigned long)__init_begin; addr < (unsigned long)_sdata; addr += SECTION_SIZE) {
++ pgd = pgd_offset_k(addr);
++ pud = pud_offset(pgd, addr);
++ pmd = pmd_offset(pud, addr);
++ __section_update(pmd, addr, PMD_SECT_XN);
++ }
++ /* make kernel code/rodata RX */
++ for (addr = (unsigned long)_stext; addr < (unsigned long)__init_begin; addr += SECTION_SIZE) {
++ pgd = pgd_offset_k(addr);
++ pud = pud_offset(pgd, addr);
++ pmd = pmd_offset(pud, addr);
++#ifdef CONFIG_ARM_LPAE
++ __section_update(pmd, addr, PMD_SECT_RDONLY);
++#else
++ __section_update(pmd, addr, PMD_SECT_APX|PMD_SECT_AP_WRITE);
++#endif
++ }
++ }
++#endif
++
++#ifdef CONFIG_HAVE_TCM
+ poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
+ free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link");
+ #endif
+diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
+index ff0eed2..f17f1c9 100644
+--- a/arch/arm/mm/ioremap.c
++++ b/arch/arm/mm/ioremap.c
+@@ -411,9 +411,9 @@ __arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached)
+ unsigned int mtype;
+
+ if (cached)
+- mtype = MT_MEMORY_RWX;
++ mtype = MT_MEMORY_RX;
+ else
+- mtype = MT_MEMORY_RWX_NONCACHED;
++ mtype = MT_MEMORY_RX_NONCACHED;
+
+ return __arm_ioremap_caller(phys_addr, size, mtype,
+ __builtin_return_address(0));
+diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
+index 66353ca..8aad9f8 100644
+--- a/arch/arm/mm/mmap.c
++++ b/arch/arm/mm/mmap.c
+@@ -59,6 +59,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ struct vm_area_struct *vma;
+ int do_align = 0;
+ int aliasing = cache_is_vipt_aliasing();
++ unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags);
+ struct vm_unmapped_area_info info;
+
+ /*
+@@ -81,6 +82,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (addr) {
+ if (do_align)
+ addr = COLOUR_ALIGN(addr, pgoff);
+@@ -88,8 +93,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ addr = PAGE_ALIGN(addr);
+
+ vma = find_vma(mm, addr);
+- if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len, offset))
+ return addr;
+ }
+
+@@ -99,19 +103,21 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ info.high_limit = TASK_SIZE;
+ info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
++ info.threadstack_offset = offset;
+ return vm_unmapped_area(&info);
+ }
+
+ unsigned long
+-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+- const unsigned long len, const unsigned long pgoff,
+- const unsigned long flags)
++arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr0,
++ unsigned long len, unsigned long pgoff,
++ unsigned long flags)
+ {
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ int do_align = 0;
+ int aliasing = cache_is_vipt_aliasing();
++ unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags);
+ struct vm_unmapped_area_info info;
+
+ /*
+@@ -132,6 +138,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ return addr;
+ }
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ /* requesting a specific address */
+ if (addr) {
+ if (do_align)
+@@ -139,8 +149,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ else
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+- if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len, offset))
+ return addr;
+ }
+
+@@ -150,6 +159,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ info.high_limit = mm->mmap_base;
+ info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
++ info.threadstack_offset = offset;
+ addr = vm_unmapped_area(&info);
+
+ /*
+@@ -182,14 +192,30 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
+ {
+ unsigned long random_factor = 0UL;
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = arch_mmap_rnd();
+
+ if (mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ mm->mmap_base += mm->delta_mmap;
++#endif
++
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ } else {
+ mm->mmap_base = mmap_base(random_factor);
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
++#endif
++
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ }
+ }
+diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
+index 30fe03f..738d54e 100644
+--- a/arch/arm/mm/mmu.c
++++ b/arch/arm/mm/mmu.c
+@@ -243,7 +243,15 @@ __setup("noalign", noalign_setup);
+ #define PROT_PTE_S2_DEVICE PROT_PTE_DEVICE
+ #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE
+
+-static struct mem_type mem_types[] = {
++#ifdef CONFIG_PAX_KERNEXEC
++#define L_PTE_KERNEXEC L_PTE_RDONLY
++#define PMD_SECT_KERNEXEC PMD_SECT_RDONLY
++#else
++#define L_PTE_KERNEXEC L_PTE_DIRTY
++#define PMD_SECT_KERNEXEC PMD_SECT_AP_WRITE
++#endif
++
++static struct mem_type mem_types[] __read_only = {
+ [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
+ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
+ L_PTE_SHARED,
+@@ -272,19 +280,19 @@ static struct mem_type mem_types[] = {
+ .prot_sect = PROT_SECT_DEVICE,
+ .domain = DOMAIN_IO,
+ },
+- [MT_UNCACHED] = {
++ [MT_UNCACHED_RW] = {
+ .prot_pte = PROT_PTE_DEVICE,
+ .prot_l1 = PMD_TYPE_TABLE,
+ .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
+ .domain = DOMAIN_IO,
+ },
+- [MT_CACHECLEAN] = {
+- .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
++ [MT_CACHECLEAN_RO] = {
++ .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_RDONLY,
+ .domain = DOMAIN_KERNEL,
+ },
+ #ifndef CONFIG_ARM_LPAE
+- [MT_MINICLEAN] = {
+- .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
++ [MT_MINICLEAN_RO] = {
++ .prot_sect = PMD_TYPE_SECT | PMD_SECT_MINICACHE | PMD_SECT_XN | PMD_SECT_RDONLY,
+ .domain = DOMAIN_KERNEL,
+ },
+ #endif
+@@ -300,7 +308,7 @@ static struct mem_type mem_types[] = {
+ .prot_l1 = PMD_TYPE_TABLE,
+ .domain = DOMAIN_VECTORS,
+ },
+- [MT_MEMORY_RWX] = {
++ [__MT_MEMORY_RWX] = {
+ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
+ .prot_l1 = PMD_TYPE_TABLE,
+ .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+@@ -313,17 +321,30 @@ static struct mem_type mem_types[] = {
+ .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+ .domain = DOMAIN_KERNEL,
+ },
+- [MT_ROM] = {
+- .prot_sect = PMD_TYPE_SECT,
++ [MT_MEMORY_RX] = {
++ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_KERNEXEC,
++ .prot_l1 = PMD_TYPE_TABLE,
++ .prot_sect = PMD_TYPE_SECT | PMD_SECT_KERNEXEC,
++ .domain = DOMAIN_KERNEL,
++ },
++ [MT_ROM_RX] = {
++ .prot_sect = PMD_TYPE_SECT | PMD_SECT_RDONLY,
+ .domain = DOMAIN_KERNEL,
+ },
+- [MT_MEMORY_RWX_NONCACHED] = {
++ [MT_MEMORY_RW_NONCACHED] = {
+ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+ L_PTE_MT_BUFFERABLE,
+ .prot_l1 = PMD_TYPE_TABLE,
+ .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+ .domain = DOMAIN_KERNEL,
+ },
++ [MT_MEMORY_RX_NONCACHED] = {
++ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_KERNEXEC |
++ L_PTE_MT_BUFFERABLE,
++ .prot_l1 = PMD_TYPE_TABLE,
++ .prot_sect = PMD_TYPE_SECT | PMD_SECT_KERNEXEC,
++ .domain = DOMAIN_KERNEL,
++ },
+ [MT_MEMORY_RW_DTCM] = {
+ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+ L_PTE_XN,
+@@ -331,9 +352,10 @@ static struct mem_type mem_types[] = {
+ .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
+ .domain = DOMAIN_KERNEL,
+ },
+- [MT_MEMORY_RWX_ITCM] = {
+- .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
++ [MT_MEMORY_RX_ITCM] = {
++ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_KERNEXEC,
+ .prot_l1 = PMD_TYPE_TABLE,
++ .prot_sect = PMD_TYPE_SECT | PMD_SECT_KERNEXEC,
+ .domain = DOMAIN_KERNEL,
+ },
+ [MT_MEMORY_RW_SO] = {
+@@ -586,9 +608,14 @@ static void __init build_mem_type_table(void)
+ * Mark cache clean areas and XIP ROM read only
+ * from SVC mode and no access from userspace.
+ */
+- mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+- mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+- mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
++ mem_types[MT_ROM_RX].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
++#ifdef CONFIG_PAX_KERNEXEC
++ mem_types[MT_MEMORY_RX].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
++ mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
++ mem_types[MT_MEMORY_RX_ITCM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
++#endif
++ mem_types[MT_MINICLEAN_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
++ mem_types[MT_CACHECLEAN_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+ #endif
+
+ /*
+@@ -605,13 +632,17 @@ static void __init build_mem_type_table(void)
+ mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
+ mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
+- mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
+- mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
++ mem_types[__MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
++ mem_types[__MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
+ mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
++ mem_types[MT_MEMORY_RX].prot_sect |= PMD_SECT_S;
++ mem_types[MT_MEMORY_RX].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
+- mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
+- mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
++ mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |= PMD_SECT_S;
++ mem_types[MT_MEMORY_RW_NONCACHED].prot_pte |= L_PTE_SHARED;
++ mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |= PMD_SECT_S;
++ mem_types[MT_MEMORY_RX_NONCACHED].prot_pte |= L_PTE_SHARED;
+ }
+ }
+
+@@ -622,15 +653,20 @@ static void __init build_mem_type_table(void)
+ if (cpu_arch >= CPU_ARCH_ARMv6) {
+ if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+ /* Non-cacheable Normal is XCB = 001 */
+- mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
++ mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |=
++ PMD_SECT_BUFFERED;
++ mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |=
+ PMD_SECT_BUFFERED;
+ } else {
+ /* For both ARMv6 and non-TEX-remapping ARMv7 */
+- mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
++ mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |=
++ PMD_SECT_TEX(1);
++ mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |=
+ PMD_SECT_TEX(1);
+ }
+ } else {
+- mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
++ mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
++ mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+ }
+
+ #ifdef CONFIG_ARM_LPAE
+@@ -651,6 +687,8 @@ static void __init build_mem_type_table(void)
+ user_pgprot |= PTE_EXT_PXN;
+ #endif
+
++ user_pgprot |= __supported_pte_mask;
++
+ for (i = 0; i < 16; i++) {
+ pteval_t v = pgprot_val(protection_map[i]);
+ protection_map[i] = __pgprot(v | user_pgprot);
+@@ -668,21 +706,24 @@ static void __init build_mem_type_table(void)
+
+ mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
+ mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
+- mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
+- mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
++ mem_types[__MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
++ mem_types[__MT_MEMORY_RWX].prot_pte |= kern_pgprot;
+ mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
+ mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
++ mem_types[MT_MEMORY_RX].prot_sect |= ecc_mask | cp->pmd;
++ mem_types[MT_MEMORY_RX].prot_pte |= kern_pgprot;
+ mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
+- mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
+- mem_types[MT_ROM].prot_sect |= cp->pmd;
++ mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |= ecc_mask;
++ mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |= ecc_mask;
++ mem_types[MT_ROM_RX].prot_sect |= cp->pmd;
+
+ switch (cp->pmd) {
+ case PMD_SECT_WT:
+- mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
++ mem_types[MT_CACHECLEAN_RO].prot_sect |= PMD_SECT_WT;
+ break;
+ case PMD_SECT_WB:
+ case PMD_SECT_WBWA:
+- mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
++ mem_types[MT_CACHECLEAN_RO].prot_sect |= PMD_SECT_WB;
+ break;
+ }
+ pr_info("Memory policy: %sData cache %s\n",
+@@ -959,7 +1000,7 @@ static void __init create_mapping(struct map_desc *md)
+ return;
+ }
+
+- if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
++ if ((md->type == MT_DEVICE || md->type == MT_ROM_RX) &&
+ md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START &&
+ (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
+ pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
+@@ -1320,18 +1361,15 @@ void __init arm_mm_memblock_reserve(void)
+ * Any other function or debugging method which may touch any device _will_
+ * crash the kernel.
+ */
++
++static char vectors[PAGE_SIZE * 2] __read_only __aligned(PAGE_SIZE);
++
+ static void __init devicemaps_init(const struct machine_desc *mdesc)
+ {
+ struct map_desc map;
+ unsigned long addr;
+- void *vectors;
+
+- /*
+- * Allocate the vector page early.
+- */
+- vectors = early_alloc(PAGE_SIZE * 2);
+-
+- early_trap_init(vectors);
++ early_trap_init(&vectors);
+
+ /*
+ * Clear page table except top pmd used by early fixmaps
+@@ -1347,7 +1385,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
+ map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
+ map.virtual = MODULES_VADDR;
+ map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK;
+- map.type = MT_ROM;
++ map.type = MT_ROM_RX;
+ create_mapping(&map);
+ #endif
+
+@@ -1358,14 +1396,14 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
+ map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
+ map.virtual = FLUSH_BASE;
+ map.length = SZ_1M;
+- map.type = MT_CACHECLEAN;
++ map.type = MT_CACHECLEAN_RO;
+ create_mapping(&map);
+ #endif
+ #ifdef FLUSH_BASE_MINICACHE
+ map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
+ map.virtual = FLUSH_BASE_MINICACHE;
+ map.length = SZ_1M;
+- map.type = MT_MINICLEAN;
++ map.type = MT_MINICLEAN_RO;
+ create_mapping(&map);
+ #endif
+
+@@ -1374,7 +1412,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
+ * location (0xffff0000). If we aren't using high-vectors, also
+ * create a mapping at the low-vectors virtual address.
+ */
+- map.pfn = __phys_to_pfn(virt_to_phys(vectors));
++ map.pfn = __phys_to_pfn(virt_to_phys(&vectors));
+ map.virtual = 0xffff0000;
+ map.length = PAGE_SIZE;
+ #ifdef CONFIG_KUSER_HELPERS
+@@ -1437,12 +1475,14 @@ static void __init kmap_init(void)
+ static void __init map_lowmem(void)
+ {
+ struct memblock_region *reg;
++#ifndef CONFIG_PAX_KERNEXEC
+ #ifdef CONFIG_XIP_KERNEL
+ phys_addr_t kernel_x_start = round_down(__pa(_sdata), SECTION_SIZE);
+ #else
+ phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+ #endif
+ phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
++#endif
+
+ /* Map all the lowmem memory banks. */
+ for_each_memblock(memory, reg) {
+@@ -1458,11 +1498,48 @@ static void __init map_lowmem(void)
+ if (start >= end)
+ break;
+
++#ifdef CONFIG_PAX_KERNEXEC
++ map.pfn = __phys_to_pfn(start);
++ map.virtual = __phys_to_virt(start);
++ map.length = end - start;
++
++ if (map.virtual <= (unsigned long)_stext && ((unsigned long)_end < (map.virtual + map.length))) {
++ struct map_desc kernel;
++ struct map_desc initmap;
++
++ /* when freeing initmem we will make this RW */
++ initmap.pfn = __phys_to_pfn(__pa(__init_begin));
++ initmap.virtual = (unsigned long)__init_begin;
++ initmap.length = _sdata - __init_begin;
++ initmap.type = __MT_MEMORY_RWX;
++ create_mapping(&initmap);
++
++ /* when freeing initmem we will make this RX */
++ kernel.pfn = __phys_to_pfn(__pa(_stext));
++ kernel.virtual = (unsigned long)_stext;
++ kernel.length = __init_begin - _stext;
++ kernel.type = __MT_MEMORY_RWX;
++ create_mapping(&kernel);
++
++ if (map.virtual < (unsigned long)_stext) {
++ map.length = (unsigned long)_stext - map.virtual;
++ map.type = __MT_MEMORY_RWX;
++ create_mapping(&map);
++ }
++
++ map.pfn = __phys_to_pfn(__pa(_sdata));
++ map.virtual = (unsigned long)_sdata;
++ map.length = end - __pa(_sdata);
++ }
++
++ map.type = MT_MEMORY_RW;
++ create_mapping(&map);
++#else
+ if (end < kernel_x_start) {
+ map.pfn = __phys_to_pfn(start);
+ map.virtual = __phys_to_virt(start);
+ map.length = end - start;
+- map.type = MT_MEMORY_RWX;
++ map.type = __MT_MEMORY_RWX;
+
+ create_mapping(&map);
+ } else if (start >= kernel_x_end) {
+@@ -1486,7 +1563,7 @@ static void __init map_lowmem(void)
+ map.pfn = __phys_to_pfn(kernel_x_start);
+ map.virtual = __phys_to_virt(kernel_x_start);
+ map.length = kernel_x_end - kernel_x_start;
+- map.type = MT_MEMORY_RWX;
++ map.type = __MT_MEMORY_RWX;
+
+ create_mapping(&map);
+
+@@ -1499,6 +1576,7 @@ static void __init map_lowmem(void)
+ create_mapping(&map);
+ }
+ }
++#endif
+ }
+ }
+
+diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
+index 93d0b6d..2db6d99 100644
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -20,6 +20,7 @@
+ #include <asm/cacheflush.h>
+ #include <asm/hwcap.h>
+ #include <asm/opcodes.h>
++#include <asm/pgtable.h>
+
+ #include "bpf_jit_32.h"
+
+@@ -72,54 +73,38 @@ struct jit_ctx {
+ #endif
+ };
+
++#ifdef CONFIG_GRKERNSEC_BPF_HARDEN
++int bpf_jit_enable __read_only;
++#else
+ int bpf_jit_enable __read_mostly;
++#endif
+
+-static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
+- unsigned int size)
+-{
+- void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
+-
+- if (!ptr)
+- return -EFAULT;
+- memcpy(ret, ptr, size);
+- return 0;
+-}
+-
+-static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
++static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
+ {
+ u8 ret;
+ int err;
+
+- if (offset < 0)
+- err = call_neg_helper(skb, offset, &ret, 1);
+- else
+- err = skb_copy_bits(skb, offset, &ret, 1);
++ err = skb_copy_bits(skb, offset, &ret, 1);
+
+ return (u64)err << 32 | ret;
+ }
+
+-static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
++static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
+ {
+ u16 ret;
+ int err;
+
+- if (offset < 0)
+- err = call_neg_helper(skb, offset, &ret, 2);
+- else
+- err = skb_copy_bits(skb, offset, &ret, 2);
++ err = skb_copy_bits(skb, offset, &ret, 2);
+
+ return (u64)err << 32 | ntohs(ret);
+ }
+
+-static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
++static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
+ {
+ u32 ret;
+ int err;
+
+- if (offset < 0)
+- err = call_neg_helper(skb, offset, &ret, 4);
+- else
+- err = skb_copy_bits(skb, offset, &ret, 4);
++ err = skb_copy_bits(skb, offset, &ret, 4);
+
+ return (u64)err << 32 | ntohl(ret);
+ }
+@@ -191,8 +176,10 @@ static void jit_fill_hole(void *area, unsigned int size)
+ {
+ u32 *ptr;
+ /* We are guaranteed to have aligned memory. */
++ pax_open_kernel();
+ for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
+ *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
++ pax_close_kernel();
+ }
+
+ static void build_prologue(struct jit_ctx *ctx)
+@@ -554,6 +541,9 @@ static int build_body(struct jit_ctx *ctx)
+ case BPF_LD | BPF_B | BPF_ABS:
+ load_order = 0;
+ load:
++ /* the interpreter will deal with the negative K */
++ if ((int)k < 0)
++ return -ENOTSUPP;
+ emit_mov_i(r_off, k, ctx);
+ load_common:
+ ctx->seen |= SEEN_DATA | SEEN_CALL;
+@@ -568,18 +558,6 @@ load_common:
+ condt = ARM_COND_HI;
+ }
+
+- /*
+- * test for negative offset, only if we are
+- * currently scheduled to take the fast
+- * path. this will update the flags so that
+- * the slowpath instruction are ignored if the
+- * offset is negative.
+- *
+- * for loard_order == 0 the HI condition will
+- * make loads at offset 0 take the slow path too.
+- */
+- _emit(condt, ARM_CMP_I(r_off, 0), ctx);
+-
+ _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
+ ctx);
+
+diff --git a/arch/arm/plat-iop/setup.c b/arch/arm/plat-iop/setup.c
+index 8151bde..9be301f 100644
+--- a/arch/arm/plat-iop/setup.c
++++ b/arch/arm/plat-iop/setup.c
+@@ -24,7 +24,7 @@ static struct map_desc iop3xx_std_desc[] __initdata = {
+ .virtual = IOP3XX_PERIPHERAL_VIRT_BASE,
+ .pfn = __phys_to_pfn(IOP3XX_PERIPHERAL_PHYS_BASE),
+ .length = IOP3XX_PERIPHERAL_SIZE,
+- .type = MT_UNCACHED,
++ .type = MT_UNCACHED_RW,
+ },
+ };
+
+diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
+index a5bc92d..0bb4730 100644
+--- a/arch/arm/plat-omap/sram.c
++++ b/arch/arm/plat-omap/sram.c
+@@ -93,6 +93,8 @@ void __init omap_map_sram(unsigned long start, unsigned long size,
+ * Looks like we need to preserve some bootloader code at the
+ * beginning of SRAM for jumping to flash for reboot to work...
+ */
++ pax_open_kernel();
+ memset_io(omap_sram_base + omap_sram_skip, 0,
+ omap_sram_size - omap_sram_skip);
++ pax_close_kernel();
+ }
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index bc3f00f..88ded6a 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -891,6 +891,7 @@ config RELOCATABLE
+
+ config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
++ depends on BROKEN_SECURITY
+ select ARM64_MODULE_PLTS if MODULES
+ select RELOCATABLE
+ help
+diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
+index 0cc758c..de67415 100644
+--- a/arch/arm64/Kconfig.debug
++++ b/arch/arm64/Kconfig.debug
+@@ -6,6 +6,7 @@ config ARM64_PTDUMP
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL
+ select DEBUG_FS
++ depends on !GRKERNSEC_KMEM
+ help
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
+index aefda98..2937874 100644
+--- a/arch/arm64/crypto/sha1-ce-glue.c
++++ b/arch/arm64/crypto/sha1-ce-glue.c
+@@ -29,7 +29,7 @@ struct sha1_ce_state {
+ u32 finalize;
+ };
+
+-asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
++asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
+ int blocks);
+
+ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+@@ -39,8 +39,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+
+ sctx->finalize = 0;
+ kernel_neon_begin_partial(16);
+- sha1_base_do_update(desc, data, len,
+- (sha1_block_fn *)sha1_ce_transform);
++ sha1_base_do_update(desc, data, len, sha1_ce_transform);
+ kernel_neon_end();
+
+ return 0;
+@@ -64,10 +63,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+ sctx->finalize = finalize;
+
+ kernel_neon_begin_partial(16);
+- sha1_base_do_update(desc, data, len,
+- (sha1_block_fn *)sha1_ce_transform);
++ sha1_base_do_update(desc, data, len, sha1_ce_transform);
+ if (!finalize)
+- sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
++ sha1_base_do_finalize(desc, sha1_ce_transform);
+ kernel_neon_end();
+ return sha1_base_finish(desc, out);
+ }
+@@ -78,7 +76,7 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
+
+ sctx->finalize = 0;
+ kernel_neon_begin_partial(16);
+- sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
++ sha1_base_do_finalize(desc, sha1_ce_transform);
+ kernel_neon_end();
+ return sha1_base_finish(desc, out);
+ }
+diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
+index c0235e0..86eb684 100644
+--- a/arch/arm64/include/asm/atomic.h
++++ b/arch/arm64/include/asm/atomic.h
+@@ -57,11 +57,13 @@
+ #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+ #define atomic_add_return_relaxed atomic_add_return_relaxed
++#define atomic_add_return_unchecked_relaxed atomic_add_return_relaxed
+ #define atomic_add_return_acquire atomic_add_return_acquire
+ #define atomic_add_return_release atomic_add_return_release
+ #define atomic_add_return atomic_add_return
+
+ #define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v))
++#define atomic_inc_return_unchecked_relaxed(v) atomic_add_return_relaxed(1, (v))
+ #define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v))
+ #define atomic_inc_return_release(v) atomic_add_return_release(1, (v))
+ #define atomic_inc_return(v) atomic_add_return(1, (v))
+@@ -128,6 +130,8 @@
+ #define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,)
+ #define atomic_andnot atomic_andnot
+
++#define atomic_inc_return_unchecked_relaxed(v) atomic_add_return_relaxed(1, (v))
++
+ /*
+ * 64-bit atomic operations.
+ */
+@@ -206,5 +210,16 @@
+
+ #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
++#define atomic64_read_unchecked(v) atomic64_read(v)
++#define atomic64_set_unchecked(v, i) atomic64_set((v), (i))
++#define atomic64_add_unchecked(a, v) atomic64_add((a), (v))
++#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v))
++#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v))
++#define atomic64_inc_unchecked(v) atomic64_inc(v)
++#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v)
++#define atomic64_dec_unchecked(v) atomic64_dec(v)
++#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n))
++#define atomic64_xchg_unchecked(v, n) atomic64_xchg((v), (n))
++
+ #endif
+ #endif
+diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
+index 5082b30..9ef38c2 100644
+--- a/arch/arm64/include/asm/cache.h
++++ b/arch/arm64/include/asm/cache.h
+@@ -16,10 +16,14 @@
+ #ifndef __ASM_CACHE_H
+ #define __ASM_CACHE_H
+
++#include <linux/const.h>
++
+ #include <asm/cachetype.h>
+
++#include <linux/const.h>
++
+ #define L1_CACHE_SHIFT 7
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ /*
+ * Memory returned by kmalloc() may be used for DMA, so we must make
+diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
+index 5394c84..05e5a95 100644
+--- a/arch/arm64/include/asm/percpu.h
++++ b/arch/arm64/include/asm/percpu.h
+@@ -123,16 +123,16 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size)
+ {
+ switch (size) {
+ case 1:
+- ACCESS_ONCE(*(u8 *)ptr) = (u8)val;
++ ACCESS_ONCE_RW(*(u8 *)ptr) = (u8)val;
+ break;
+ case 2:
+- ACCESS_ONCE(*(u16 *)ptr) = (u16)val;
++ ACCESS_ONCE_RW(*(u16 *)ptr) = (u16)val;
+ break;
+ case 4:
+- ACCESS_ONCE(*(u32 *)ptr) = (u32)val;
++ ACCESS_ONCE_RW(*(u32 *)ptr) = (u32)val;
+ break;
+ case 8:
+- ACCESS_ONCE(*(u64 *)ptr) = (u64)val;
++ ACCESS_ONCE_RW(*(u64 *)ptr) = (u64)val;
+ break;
+ default:
+ BUILD_BUG();
+diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
+index d25f4f1..61d52da 100644
+--- a/arch/arm64/include/asm/pgalloc.h
++++ b/arch/arm64/include/asm/pgalloc.h
+@@ -51,6 +51,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ {
+ __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+ }
++
++static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
++{
++ pud_populate(mm, pud, pmd);
++}
+ #else
+ static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+ {
+@@ -80,6 +85,11 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+ {
+ __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+ }
++
++static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
++{
++ pgd_populate(mm, pgd, pud);
++}
+ #else
+ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+ {
+diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
+index e20bd43..7e476da 100644
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -23,6 +23,9 @@
+ #include <asm/pgtable-hwdef.h>
+ #include <asm/pgtable-prot.h>
+
++#define ktla_ktva(addr) (addr)
++#define ktva_ktla(addr) (addr)
++
+ /*
+ * VMALLOC range.
+ *
+@@ -718,6 +721,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
+ #define kc_vaddr_to_offset(v) ((v) & ~VA_START)
+ #define kc_offset_to_vaddr(o) ((o) | VA_START)
+
++#define ktla_ktva(addr) (addr)
++#define ktva_ktla(addr) (addr)
++
+ #endif /* !__ASSEMBLY__ */
+
+ #endif /* __ASM_PGTABLE_H */
+diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
+index ace0a96..c7c4d3c 100644
+--- a/arch/arm64/include/asm/processor.h
++++ b/arch/arm64/include/asm/processor.h
+@@ -194,4 +194,11 @@ void cpu_enable_pan(void *__unused);
+ void cpu_enable_uao(void *__unused);
+ void cpu_enable_cache_maint_trap(void *__unused);
+
++#ifdef CONFIG_PAX_RAP
++static inline void pax_reload_rap_cookie(unsigned long *rap_cookie)
++{
++ asm volatile("mov\tx19, %0\n\t" : : "r"(*rap_cookie) : "r19");
++}
++#endif
++
+ #endif /* __ASM_PROCESSOR_H */
+diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
+index 2eb714c..3a10471 100644
+--- a/arch/arm64/include/asm/string.h
++++ b/arch/arm64/include/asm/string.h
+@@ -17,40 +17,40 @@
+ #define __ASM_STRING_H
+
+ #define __HAVE_ARCH_STRRCHR
+-extern char *strrchr(const char *, int c);
++extern char *strrchr(const char *, int c) __nocapture(-1);
+
+ #define __HAVE_ARCH_STRCHR
+-extern char *strchr(const char *, int c);
++extern char *strchr(const char *, int c) __nocapture(-1);
+
+ #define __HAVE_ARCH_STRCMP
+-extern int strcmp(const char *, const char *);
++extern int strcmp(const char *, const char *) __nocapture();
+
+ #define __HAVE_ARCH_STRNCMP
+-extern int strncmp(const char *, const char *, __kernel_size_t);
++extern int strncmp(const char *, const char *, __kernel_size_t) __nocapture(1, 2);
+
+ #define __HAVE_ARCH_STRLEN
+-extern __kernel_size_t strlen(const char *);
++extern __kernel_size_t strlen(const char *) __nocapture(1);
+
+ #define __HAVE_ARCH_STRNLEN
+-extern __kernel_size_t strnlen(const char *, __kernel_size_t);
++extern __kernel_size_t strnlen(const char *, __kernel_size_t) __nocapture(1);
+
+ #define __HAVE_ARCH_MEMCPY
+-extern void *memcpy(void *, const void *, __kernel_size_t);
+-extern void *__memcpy(void *, const void *, __kernel_size_t);
++extern void *memcpy(void *, const void *, __kernel_size_t) __nocapture(2);
++extern void *__memcpy(void *, const void *, __kernel_size_t) __nocapture(2);
+
+ #define __HAVE_ARCH_MEMMOVE
+-extern void *memmove(void *, const void *, __kernel_size_t);
+-extern void *__memmove(void *, const void *, __kernel_size_t);
++extern void *memmove(void *, const void *, __kernel_size_t) __nocapture(2);
++extern void *__memmove(void *, const void *, __kernel_size_t) __nocapture(2);
+
+ #define __HAVE_ARCH_MEMCHR
+-extern void *memchr(const void *, int, __kernel_size_t);
++extern void *memchr(const void *, int, __kernel_size_t) __nocapture(-1);
+
+ #define __HAVE_ARCH_MEMSET
+ extern void *memset(void *, int, __kernel_size_t);
+ extern void *__memset(void *, int, __kernel_size_t);
+
+ #define __HAVE_ARCH_MEMCMP
+-extern int memcmp(const void *, const void *, size_t);
++extern int memcmp(const void *, const void *, size_t) __nocapture(1, 2);
+
+
+ #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
+index db84983..d256a3edc 100644
+--- a/arch/arm64/include/asm/uaccess.h
++++ b/arch/arm64/include/asm/uaccess.h
+@@ -110,6 +110,7 @@ static inline void set_fs(mm_segment_t fs)
+ */
+ #define untagged_addr(addr) sign_extend64(addr, 55)
+
++#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
+ #define access_ok(type, addr, size) __range_ok(addr, size)
+ #define user_addr_max get_fs
+
+@@ -279,6 +280,9 @@ static inline unsigned long __must_check __copy_from_user(void *to, const void _
+
+ static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
+ {
++ if ((long)n < 0)
++ return n;
++
+ kasan_check_read(from, n);
+ check_object_size(from, n, true);
+ return __arch_copy_to_user(to, from, n);
+@@ -286,6 +290,9 @@ static inline unsigned long __must_check __copy_to_user(void __user *to, const v
+
+ static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
++ if ((long)n < 0)
++ return n;
++
+ kasan_check_write(to, n);
+
+ if (access_ok(VERIFY_READ, from, n)) {
+@@ -298,6 +305,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u
+
+ static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
+ {
++ if ((long)n < 0)
++ return n;
++
+ kasan_check_read(from, n);
+
+ if (access_ok(VERIFY_WRITE, to, n)) {
+diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
+index 65d81f9..6a46f09 100644
+--- a/arch/arm64/kernel/hibernate.c
++++ b/arch/arm64/kernel/hibernate.c
+@@ -166,7 +166,7 @@ EXPORT_SYMBOL(arch_hibernation_header_restore);
+ static int create_safe_exec_page(void *src_start, size_t length,
+ unsigned long dst_addr,
+ phys_addr_t *phys_dst_addr,
+- void *(*allocator)(gfp_t mask),
++ unsigned long (*allocator)(gfp_t mask),
+ gfp_t mask)
+ {
+ int rc = 0;
+@@ -174,7 +174,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+- unsigned long dst = (unsigned long)allocator(mask);
++ unsigned long dst = allocator(mask);
+
+ if (!dst) {
+ rc = -ENOMEM;
+@@ -184,9 +184,9 @@ static int create_safe_exec_page(void *src_start, size_t length,
+ memcpy((void *)dst, src_start, length);
+ flush_icache_range(dst, dst + length);
+
+- pgd = pgd_offset_raw(allocator(mask), dst_addr);
++ pgd = pgd_offset_raw((pgd_t *)allocator(mask), dst_addr);
+ if (pgd_none(*pgd)) {
+- pud = allocator(mask);
++ pud = (pud_t *)allocator(mask);
+ if (!pud) {
+ rc = -ENOMEM;
+ goto out;
+@@ -196,7 +196,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
+
+ pud = pud_offset(pgd, dst_addr);
+ if (pud_none(*pud)) {
+- pmd = allocator(mask);
++ pmd = (pmd_t *)allocator(mask);
+ if (!pmd) {
+ rc = -ENOMEM;
+ goto out;
+@@ -206,7 +206,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
+
+ pmd = pmd_offset(pud, dst_addr);
+ if (pmd_none(*pmd)) {
+- pte = allocator(mask);
++ pte = (pte_t *)allocator(mask);
+ if (!pte) {
+ rc = -ENOMEM;
+ goto out;
+@@ -449,7 +449,7 @@ int swsusp_arch_resume(void)
+ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
+ (unsigned long)hibernate_exit,
+ &phys_hibernate_exit,
+- (void *)get_safe_page, GFP_ATOMIC);
++ get_safe_page, GFP_ATOMIC);
+ if (rc) {
+ pr_err("Failed to create safe executable page for hibernate_exit code.");
+ goto out;
+diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
+index 37e47a9..f8597fc 100644
+--- a/arch/arm64/kernel/probes/decode-insn.c
++++ b/arch/arm64/kernel/probes/decode-insn.c
+@@ -157,10 +157,10 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
+ mod = __module_address((unsigned long)addr);
+ if (mod && within_module_init((unsigned long)addr, mod) &&
+ !within_module_init((unsigned long)scan_end, mod))
+- scan_end = (kprobe_opcode_t *)mod->init_layout.base;
++ scan_end = (kprobe_opcode_t *)mod->init_layout.base_rx;
+ else if (mod && within_module_core((unsigned long)addr, mod) &&
+ !within_module_core((unsigned long)scan_end, mod))
+- scan_end = (kprobe_opcode_t *)mod->core_layout.base;
++ scan_end = (kprobe_opcode_t *)mod->core_layout.base_rx;
+ preempt_enable();
+ }
+ #endif
+diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
+index 6cd2612..56d72e5c 100644
+--- a/arch/arm64/kernel/process.c
++++ b/arch/arm64/kernel/process.c
+@@ -63,7 +63,7 @@ EXPORT_SYMBOL(__stack_chk_guard);
+ /*
+ * Function pointers to optional machine specific functions
+ */
+-void (*pm_power_off)(void);
++void (* pm_power_off)(void);
+ EXPORT_SYMBOL_GPL(pm_power_off);
+
+ void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
+@@ -109,7 +109,7 @@ void machine_shutdown(void)
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this.
+ */
+-void machine_halt(void)
++void __noreturn machine_halt(void)
+ {
+ local_irq_disable();
+ smp_send_stop();
+@@ -122,12 +122,13 @@ void machine_halt(void)
+ * achieves this. When the system power is turned off, it will take all CPUs
+ * with it.
+ */
+-void machine_power_off(void)
++void __noreturn machine_power_off(void)
+ {
+ local_irq_disable();
+ smp_send_stop();
+ if (pm_power_off)
+ pm_power_off();
++ while(1);
+ }
+
+ /*
+@@ -139,7 +140,7 @@ void machine_power_off(void)
+ * executing pre-reset code, and using RAM that the primary CPU's code wishes
+ * to use. Implementing such co-ordination would be essentially impossible.
+ */
+-void machine_restart(char *cmd)
++void __noreturn machine_restart(char *cmd)
+ {
+ /* Disable interrupts first */
+ local_irq_disable();
+diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
+index d34fd72..8b6faee 100644
+--- a/arch/arm64/kernel/stacktrace.c
++++ b/arch/arm64/kernel/stacktrace.c
+@@ -95,8 +95,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
+ struct pt_regs *irq_args;
+ unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+
+- if (object_is_on_stack((void *)orig_sp) &&
+- object_is_on_stack((void *)frame->fp)) {
++ if (object_starts_on_stack((void *)orig_sp) &&
++ object_starts_on_stack((void *)frame->fp)) {
+ frame->sp = orig_sp;
+
+ /* orig_sp is the saved pt_regs, find the elr */
+diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
+index 771a01a7f..db6d9cc 100644
+--- a/arch/arm64/kernel/traps.c
++++ b/arch/arm64/kernel/traps.c
+@@ -511,7 +511,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
+ __show_regs(regs);
+ }
+
+- return sys_ni_syscall();
++ return -ENOSYS;
+ }
+
+ static const char *esr_class_str[] = {
+diff --git a/arch/avr32/include/asm/cache.h b/arch/avr32/include/asm/cache.h
+index c3a58a1..78fbf54 100644
+--- a/arch/avr32/include/asm/cache.h
++++ b/arch/avr32/include/asm/cache.h
+@@ -1,8 +1,10 @@
+ #ifndef __ASM_AVR32_CACHE_H
+ #define __ASM_AVR32_CACHE_H
+
++#include <linux/const.h>
++
+ #define L1_CACHE_SHIFT 5
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ /*
+ * Memory returned by kmalloc() may be used for DMA, so we must make
+diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h
+index 0388ece..87c8df1 100644
+--- a/arch/avr32/include/asm/elf.h
++++ b/arch/avr32/include/asm/elf.h
+@@ -84,8 +84,14 @@ typedef struct user_fpu_struct elf_fpregset_t;
+ the loader. We need to make sure that it is out of the way of the program
+ that it will "exec", and that there is sufficient room for the brk. */
+
+-#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
++#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE 0x00001000UL
++
++#define PAX_DELTA_MMAP_LEN 15
++#define PAX_DELTA_STACK_LEN 15
++#endif
+
+ /* This yields a mask that user programs can use to figure out what
+ instruction set this CPU supports. This could be done in user space,
+diff --git a/arch/avr32/include/asm/kmap_types.h b/arch/avr32/include/asm/kmap_types.h
+index 479330b..53717a8 100644
+--- a/arch/avr32/include/asm/kmap_types.h
++++ b/arch/avr32/include/asm/kmap_types.h
+@@ -2,9 +2,9 @@
+ #define __ASM_AVR32_KMAP_TYPES_H
+
+ #ifdef CONFIG_DEBUG_HIGHMEM
+-# define KM_TYPE_NR 29
++# define KM_TYPE_NR 30
+ #else
+-# define KM_TYPE_NR 14
++# define KM_TYPE_NR 15
+ #endif
+
+ #endif /* __ASM_AVR32_KMAP_TYPES_H */
+diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
+index a4b7eda..d057f9e 100644
+--- a/arch/avr32/mm/fault.c
++++ b/arch/avr32/mm/fault.c
+@@ -41,6 +41,23 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap)
+
+ int exception_trace = 1;
+
++#ifdef CONFIG_PAX_PAGEEXEC
++void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
++{
++ unsigned long i;
++
++ printk(KERN_ERR "PAX: bytes at PC: ");
++ for (i = 0; i < 20; i++) {
++ unsigned char c;
++ if (get_user(c, (unsigned char *)pc+i))
++ printk(KERN_CONT "???????? ");
++ else
++ printk(KERN_CONT "%02x ", c);
++ }
++ printk("\n");
++}
++#endif
++
+ /*
+ * This routine handles page faults. It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+@@ -178,6 +195,16 @@ bad_area:
+ up_read(&mm->mmap_sem);
+
+ if (user_mode(regs)) {
++
++#ifdef CONFIG_PAX_PAGEEXEC
++ if (mm->pax_flags & MF_PAX_PAGEEXEC) {
++ if (ecr == ECR_PROTECTION_X || ecr == ECR_TLB_MISS_X) {
++ pax_report_fault(regs, (void *)regs->pc, (void *)regs->sp);
++ do_group_exit(SIGKILL);
++ }
++ }
++#endif
++
+ if (exception_trace && printk_ratelimit())
+ printk("%s%s[%d]: segfault at %08lx pc %08lx "
+ "sp %08lx ecr %lu\n",
+diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
+index f3337ee..15b6f8d 100644
+--- a/arch/blackfin/Kconfig.debug
++++ b/arch/blackfin/Kconfig.debug
+@@ -18,6 +18,7 @@ config DEBUG_VERBOSE
+ config DEBUG_MMRS
+ tristate "Generate Blackfin MMR tree"
+ select DEBUG_FS
++ depends on !GRKERNSEC_KMEM
+ help
+ Create a tree of Blackfin MMRs via the debugfs tree. If
+ you enable this, you will find all MMRs laid out in the
+diff --git a/arch/blackfin/include/asm/cache.h b/arch/blackfin/include/asm/cache.h
+index 568885a..f8008df 100644
+--- a/arch/blackfin/include/asm/cache.h
++++ b/arch/blackfin/include/asm/cache.h
+@@ -7,6 +7,7 @@
+ #ifndef __ARCH_BLACKFIN_CACHE_H
+ #define __ARCH_BLACKFIN_CACHE_H
+
++#include <linux/const.h>
+ #include <linux/linkage.h> /* for asmlinkage */
+
+ /*
+@@ -14,7 +15,7 @@
+ * Blackfin loads 32 bytes for cache
+ */
+ #define L1_CACHE_SHIFT 5
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+ #define SMP_CACHE_BYTES L1_CACHE_BYTES
+
+ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+diff --git a/arch/cris/include/arch-v10/arch/cache.h b/arch/cris/include/arch-v10/arch/cache.h
+index aea2718..3639a60 100644
+--- a/arch/cris/include/arch-v10/arch/cache.h
++++ b/arch/cris/include/arch-v10/arch/cache.h
+@@ -1,8 +1,9 @@
+ #ifndef _ASM_ARCH_CACHE_H
+ #define _ASM_ARCH_CACHE_H
+
++#include <linux/const.h>
+ /* Etrax 100LX have 32-byte cache-lines. */
+-#define L1_CACHE_BYTES 32
+ #define L1_CACHE_SHIFT 5
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #endif /* _ASM_ARCH_CACHE_H */
+diff --git a/arch/cris/include/arch-v32/arch/cache.h b/arch/cris/include/arch-v32/arch/cache.h
+index 7caf25d..ee65ac5 100644
+--- a/arch/cris/include/arch-v32/arch/cache.h
++++ b/arch/cris/include/arch-v32/arch/cache.h
+@@ -1,11 +1,12 @@
+ #ifndef _ASM_CRIS_ARCH_CACHE_H
+ #define _ASM_CRIS_ARCH_CACHE_H
+
++#include <linux/const.h>
+ #include <arch/hwregs/dma.h>
+
+ /* A cache-line is 32 bytes. */
+-#define L1_CACHE_BYTES 32
+ #define L1_CACHE_SHIFT 5
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
+index 1c2a5e2..2579e5f 100644
+--- a/arch/frv/include/asm/atomic.h
++++ b/arch/frv/include/asm/atomic.h
+@@ -146,6 +146,16 @@ static inline void atomic64_dec(atomic64_t *v)
+ #define atomic64_cmpxchg(v, old, new) (__cmpxchg_64(old, new, &(v)->counter))
+ #define atomic64_xchg(v, new) (__xchg_64(new, &(v)->counter))
+
++#define atomic64_read_unchecked(v) atomic64_read(v)
++#define atomic64_set_unchecked(v, i) atomic64_set((v), (i))
++#define atomic64_add_unchecked(a, v) atomic64_add((a), (v))
++#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v))
++#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v))
++#define atomic64_inc_unchecked(v) atomic64_inc(v)
++#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v)
++#define atomic64_dec_unchecked(v) atomic64_dec(v)
++#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n))
++
+ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+ {
+ int c, old;
+diff --git a/arch/frv/include/asm/cache.h b/arch/frv/include/asm/cache.h
+index 2797163..c2a401df9 100644
+--- a/arch/frv/include/asm/cache.h
++++ b/arch/frv/include/asm/cache.h
+@@ -12,10 +12,11 @@
+ #ifndef __ASM_CACHE_H
+ #define __ASM_CACHE_H
+
++#include <linux/const.h>
+
+ /* bytes per L1 cache line */
+ #define L1_CACHE_SHIFT (CONFIG_FRV_L1_CACHE_SHIFT)
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #define __cacheline_aligned __attribute__((aligned(L1_CACHE_BYTES)))
+ #define ____cacheline_aligned __attribute__((aligned(L1_CACHE_BYTES)))
+diff --git a/arch/frv/include/asm/kmap_types.h b/arch/frv/include/asm/kmap_types.h
+index 43901f2..0d8b865 100644
+--- a/arch/frv/include/asm/kmap_types.h
++++ b/arch/frv/include/asm/kmap_types.h
+@@ -2,6 +2,6 @@
+ #ifndef _ASM_KMAP_TYPES_H
+ #define _ASM_KMAP_TYPES_H
+
+-#define KM_TYPE_NR 17
++#define KM_TYPE_NR 18
+
+ #endif
+diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
+index 836f1470..4cf23f5 100644
+--- a/arch/frv/mm/elf-fdpic.c
++++ b/arch/frv/mm/elf-fdpic.c
+@@ -61,6 +61,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
+ {
+ struct vm_area_struct *vma;
+ struct vm_unmapped_area_info info;
++ unsigned long offset = gr_rand_threadstack_offset(current->mm, filp, flags);
+
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+@@ -73,8 +74,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(current->mm, addr);
+- if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len, offset))
+ goto success;
+ }
+
+@@ -85,6 +85,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
+ info.high_limit = (current->mm->start_stack - 0x00200000);
+ info.align_mask = 0;
+ info.align_offset = 0;
++ info.threadstack_offset = offset;
+ addr = vm_unmapped_area(&info);
+ if (!(addr & ~PAGE_MASK))
+ goto success;
+diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h
+index 69952c18..4fa2908 100644
+--- a/arch/hexagon/include/asm/cache.h
++++ b/arch/hexagon/include/asm/cache.h
+@@ -21,9 +21,11 @@
+ #ifndef __ASM_CACHE_H
+ #define __ASM_CACHE_H
+
++#include <linux/const.h>
++
+ /* Bytes per L1 cache line */
+-#define L1_CACHE_SHIFT (5)
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_SHIFT 5
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+
+diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
+index 18ca6a9..77b0e0d 100644
+--- a/arch/ia64/Kconfig
++++ b/arch/ia64/Kconfig
+@@ -519,6 +519,7 @@ config KEXEC
+ bool "kexec system call"
+ depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+ select KEXEC_CORE
++ depends on !GRKERNSEC_KMEM
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
+index c100d78..07538cc 100644
+--- a/arch/ia64/Makefile
++++ b/arch/ia64/Makefile
+@@ -98,5 +98,6 @@ endef
+ archprepare: make_nr_irqs_h
+ PHONY += make_nr_irqs_h
+
++make_nr_irqs_h: KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
+ make_nr_irqs_h:
+ $(Q)$(MAKE) $(build)=arch/ia64/kernel include/generated/nr-irqs.h
+diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
+index f565ad3..484af46 100644
+--- a/arch/ia64/include/asm/atomic.h
++++ b/arch/ia64/include/asm/atomic.h
+@@ -307,4 +307,14 @@ atomic64_add_negative (__s64 i, atomic64_t *v)
+ #define atomic64_inc(v) atomic64_add(1, (v))
+ #define atomic64_dec(v) atomic64_sub(1, (v))
+
++#define atomic64_read_unchecked(v) atomic64_read(v)
++#define atomic64_set_unchecked(v, i) atomic64_set((v), (i))
++#define atomic64_add_unchecked(a, v) atomic64_add((a), (v))
++#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v))
++#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v))
++#define atomic64_inc_unchecked(v) atomic64_inc(v)
++#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v)
++#define atomic64_dec_unchecked(v) atomic64_dec(v)
++#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n))
++
+ #endif /* _ASM_IA64_ATOMIC_H */
+diff --git a/arch/ia64/include/asm/cache.h b/arch/ia64/include/asm/cache.h
+index 988254a..e1ee885 100644
+--- a/arch/ia64/include/asm/cache.h
++++ b/arch/ia64/include/asm/cache.h
+@@ -1,6 +1,7 @@
+ #ifndef _ASM_IA64_CACHE_H
+ #define _ASM_IA64_CACHE_H
+
++#include <linux/const.h>
+
+ /*
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+@@ -9,7 +10,7 @@
+
+ /* Bytes per L1 (data) cache line. */
+ #define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #ifdef CONFIG_SMP
+ # define SMP_CACHE_SHIFT L1_CACHE_SHIFT
+diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
+index 5a83c5c..4d7f553 100644
+--- a/arch/ia64/include/asm/elf.h
++++ b/arch/ia64/include/asm/elf.h
+@@ -42,6 +42,13 @@
+ */
+ #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000UL)
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE (current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL)
++
++#define PAX_DELTA_MMAP_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13)
++#define PAX_DELTA_STACK_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13)
++#endif
++
+ #define PT_IA_64_UNWIND 0x70000001
+
+ /* IA-64 relocations: */
+diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
+index f5e70e9..624fad5 100644
+--- a/arch/ia64/include/asm/pgalloc.h
++++ b/arch/ia64/include/asm/pgalloc.h
+@@ -39,6 +39,12 @@ pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
+ pgd_val(*pgd_entry) = __pa(pud);
+ }
+
++static inline void
++pgd_populate_kernel(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
++{
++ pgd_populate(mm, pgd_entry, pud);
++}
++
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
+@@ -57,6 +63,12 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
+ pud_val(*pud_entry) = __pa(pmd);
+ }
+
++static inline void
++pud_populate_kernel(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
++{
++ pud_populate(mm, pud_entry, pmd);
++}
++
+ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+ return quicklist_alloc(0, GFP_KERNEL, NULL);
+diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
+index 9f3ed9e..c99b418 100644
+--- a/arch/ia64/include/asm/pgtable.h
++++ b/arch/ia64/include/asm/pgtable.h
+@@ -12,7 +12,7 @@
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+-
++#include <linux/const.h>
+ #include <asm/mman.h>
+ #include <asm/page.h>
+ #include <asm/processor.h>
+@@ -139,6 +139,17 @@
+ #define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+ #define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+ #define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
++
++#ifdef CONFIG_PAX_PAGEEXEC
++# define PAGE_SHARED_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
++# define PAGE_READONLY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
++# define PAGE_COPY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
++#else
++# define PAGE_SHARED_NOEXEC PAGE_SHARED
++# define PAGE_READONLY_NOEXEC PAGE_READONLY
++# define PAGE_COPY_NOEXEC PAGE_COPY
++#endif
++
+ #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
+ #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX)
+ #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
+diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
+index ca9e761..40dffaf 100644
+--- a/arch/ia64/include/asm/spinlock.h
++++ b/arch/ia64/include/asm/spinlock.h
+@@ -73,7 +73,7 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
+ unsigned short *p = (unsigned short *)&lock->lock + 1, tmp;
+
+ asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
+- ACCESS_ONCE(*p) = (tmp + 2) & ~1;
++ ACCESS_ONCE_RW(*p) = (tmp + 2) & ~1;
+ }
+
+ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
+diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
+index bfe1319..da0014b 100644
+--- a/arch/ia64/include/asm/uaccess.h
++++ b/arch/ia64/include/asm/uaccess.h
+@@ -70,6 +70,7 @@
+ && ((segment).seg == KERNEL_DS.seg \
+ || likely(REGION_OFFSET((unsigned long) (addr)) < RGN_MAP_LIMIT))); \
+ })
++#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
+ #define access_ok(type, addr, size) __access_ok((addr), (size), get_fs())
+
+ /*
+@@ -241,17 +242,23 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use
+ static inline unsigned long
+ __copy_to_user (void __user *to, const void *from, unsigned long count)
+ {
++ if (count > INT_MAX)
++ return count;
++
+ check_object_size(from, count, true);
+
+- return __copy_user(to, (__force void __user *) from, count);
++ return __copy_user(to, (void __force_user *) from, count);
+ }
+
+ static inline unsigned long
+ __copy_from_user (void *to, const void __user *from, unsigned long count)
+ {
++ if (count > INT_MAX)
++ return count;
++
+ check_object_size(to, count, false);
+
+- return __copy_user((__force void __user *) to, from, count);
++ return __copy_user((void __force_user *) to, from, count);
+ }
+
+ #define __copy_to_user_inatomic __copy_to_user
+@@ -260,11 +267,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
+ ({ \
+ void __user *__cu_to = (to); \
+ const void *__cu_from = (from); \
+- long __cu_len = (n); \
++ unsigned long __cu_len = (n); \
+ \
+- if (__access_ok(__cu_to, __cu_len, get_fs())) { \
+- check_object_size(__cu_from, __cu_len, true); \
+- __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \
++ if (__cu_len <= INT_MAX && __access_ok(__cu_to, __cu_len, get_fs())) { \
++ check_object_size(__cu_from, __cu_len, true); \
++ __cu_len = __copy_user(__cu_to, (void __force_user *) __cu_from, __cu_len); \
+ } \
+ __cu_len; \
+ })
+@@ -272,10 +279,10 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
+ static inline unsigned long
+ copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
+- check_object_size(to, n, false);
+- if (likely(__access_ok(from, n, get_fs())))
+- n = __copy_user((__force void __user *) to, from, n);
+- else
++ if (likely(__access_ok(from, n, get_fs()))) {
++ check_object_size(to, n, false);
++ n = __copy_user((void __force_user *) to, from, n);
++ } else if ((long)n > 0)
+ memset(to, 0, n);
+ return n;
+ }
+diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
+index 6ab0ae7..88f1b60 100644
+--- a/arch/ia64/kernel/module.c
++++ b/arch/ia64/kernel/module.c
+@@ -486,13 +486,13 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
+ static inline int
+ in_init (const struct module *mod, uint64_t addr)
+ {
+- return addr - (uint64_t) mod->init_layout.base < mod->init_layout.size;
++ return within_module_init(addr, mod);
+ }
+
+ static inline int
+ in_core (const struct module *mod, uint64_t addr)
+ {
+- return addr - (uint64_t) mod->core_layout.base < mod->core_layout.size;
++ return within_module_core(addr, mod);
+ }
+
+ static inline int
+@@ -676,6 +676,14 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
+
+ case RV_BDREL:
+ val -= (uint64_t) (in_init(mod, val) ? mod->init_layout.base : mod->core_layout.base);
++ if (within_module_rx(val, &mod->init_layout))
++ val -= mod->init_layout.base_rx;
++ else if (within_module_rw(val, &mod->init_layout))
++ val -= mod->init_layout.base_rw;
++ else if (within_module_rx(val, &mod->core_layout))
++ val -= mod->core_layout.base_rx;
++ else if (within_module_rw(val, &mod->core_layout))
++ val -= mod->core_layout.base_rw;
+ break;
+
+ case RV_LTV:
+@@ -810,15 +818,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
+ * addresses have been selected...
+ */
+ uint64_t gp;
+- if (mod->core_layout.size > MAX_LTOFF)
++ if (mod->core_layout.size_rx + mod->core_layout.size_rw > MAX_LTOFF)
+ /*
+ * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
+ * at the end of the module.
+ */
+- gp = mod->core_layout.size - MAX_LTOFF / 2;
++ gp = mod->core_layout.size_rx + mod->core_layout.size_rw - MAX_LTOFF / 2;
+ else
+- gp = mod->core_layout.size / 2;
+- gp = (uint64_t) mod->core_layout.base + ((gp + 7) & -8);
++ gp = (mod->core_layout.size_rx + mod->core_layout.size_rw) / 2;
++ gp = (uint64_t) mod->core_layout.base_rx + ((gp + 7) & -8);
+ mod->arch.gp = gp;
+ DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp);
+ }
+diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
+index c39c3cd..3c77738 100644
+--- a/arch/ia64/kernel/palinfo.c
++++ b/arch/ia64/kernel/palinfo.c
+@@ -980,7 +980,7 @@ static int palinfo_cpu_callback(struct notifier_block *nfb,
+ return NOTIFY_OK;
+ }
+
+-static struct notifier_block __refdata palinfo_cpu_notifier =
++static struct notifier_block palinfo_cpu_notifier =
+ {
+ .notifier_call = palinfo_cpu_callback,
+ .priority = 0,
+diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
+index 41e33f8..65180b2a 100644
+--- a/arch/ia64/kernel/sys_ia64.c
++++ b/arch/ia64/kernel/sys_ia64.c
+@@ -28,6 +28,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
+ unsigned long align_mask = 0;
+ struct mm_struct *mm = current->mm;
+ struct vm_unmapped_area_info info;
++ unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags);
+
+ if (len > RGN_MAP_LIMIT)
+ return -ENOMEM;
+@@ -43,6 +44,13 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
+ if (REGION_NUMBER(addr) == RGN_HPAGE)
+ addr = 0;
+ #endif
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ addr = mm->free_area_cache;
++ else
++#endif
++
+ if (!addr)
+ addr = TASK_UNMAPPED_BASE;
+
+@@ -61,6 +69,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
+ info.high_limit = TASK_SIZE;
+ info.align_mask = align_mask;
+ info.align_offset = 0;
++ info.threadstack_offset = offset;
+ return vm_unmapped_area(&info);
+ }
+
+diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
+index dc506b0..39baade 100644
+--- a/arch/ia64/kernel/vmlinux.lds.S
++++ b/arch/ia64/kernel/vmlinux.lds.S
+@@ -171,7 +171,7 @@ SECTIONS {
+ /* Per-cpu data: */
+ . = ALIGN(PERCPU_PAGE_SIZE);
+ PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
+- __phys_per_cpu_start = __per_cpu_load;
++ __phys_per_cpu_start = per_cpu_load;
+ /*
+ * ensure percpu data fits
+ * into percpu page size
+diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
+index fa6ad95..b46bd89 100644
+--- a/arch/ia64/mm/fault.c
++++ b/arch/ia64/mm/fault.c
+@@ -72,6 +72,23 @@ mapped_kernel_page_is_present (unsigned long address)
+ return pte_present(pte);
+ }
+
++#ifdef CONFIG_PAX_PAGEEXEC
++void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
++{
++ unsigned long i;
++
++ printk(KERN_ERR "PAX: bytes at PC: ");
++ for (i = 0; i < 8; i++) {
++ unsigned int c;
++ if (get_user(c, (unsigned int *)pc+i))
++ printk(KERN_CONT "???????? ");
++ else
++ printk(KERN_CONT "%08x ", c);
++ }
++ printk("\n");
++}
++#endif
++
+ # define VM_READ_BIT 0
+ # define VM_WRITE_BIT 1
+ # define VM_EXEC_BIT 2
+@@ -151,8 +168,21 @@ retry:
+ if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE))))
+ goto bad_area;
+
+- if ((vma->vm_flags & mask) != mask)
++ if ((vma->vm_flags & mask) != mask) {
++
++#ifdef CONFIG_PAX_PAGEEXEC
++ if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) {
++ if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip)
++ goto bad_area;
++
++ up_read(&mm->mmap_sem);
++ pax_report_fault(regs, (void *)regs->cr_iip, (void *)regs->r12);
++ do_group_exit(SIGKILL);
++ }
++#endif
++
+ goto bad_area;
++ }
+
+ /*
+ * If for any reason at all we couldn't handle the fault, make
+diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
+index 85de86d..db7f6b8 100644
+--- a/arch/ia64/mm/hugetlbpage.c
++++ b/arch/ia64/mm/hugetlbpage.c
+@@ -138,6 +138,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
+ unsigned long pgoff, unsigned long flags)
+ {
+ struct vm_unmapped_area_info info;
++ unsigned long offset = gr_rand_threadstack_offset(current->mm, file, flags);
+
+ if (len > RGN_MAP_LIMIT)
+ return -ENOMEM;
+@@ -161,6 +162,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
+ info.high_limit = HPAGE_REGION_BASE + RGN_MAP_LIMIT;
+ info.align_mask = PAGE_MASK & (HPAGE_SIZE - 1);
+ info.align_offset = 0;
++ info.threadstack_offset = offset;
+ return vm_unmapped_area(&info);
+ }
+
+diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
+index 1841ef6..74d8330 100644
+--- a/arch/ia64/mm/init.c
++++ b/arch/ia64/mm/init.c
+@@ -119,6 +119,19 @@ ia64_init_addr_space (void)
+ vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
+ vma->vm_end = vma->vm_start + PAGE_SIZE;
+ vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
++
++#ifdef CONFIG_PAX_PAGEEXEC
++ if (current->mm->pax_flags & MF_PAX_PAGEEXEC) {
++ vma->vm_flags &= ~VM_EXEC;
++
++#ifdef CONFIG_PAX_MPROTECT
++ if (current->mm->pax_flags & MF_PAX_MPROTECT)
++ vma->vm_flags &= ~VM_MAYEXEC;
++#endif
++
++ }
++#endif
++
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ down_write(&current->mm->mmap_sem);
+ if (insert_vm_struct(current->mm, vma)) {
+@@ -279,7 +292,7 @@ static int __init gate_vma_init(void)
+ gate_vma.vm_start = FIXADDR_USER_START;
+ gate_vma.vm_end = FIXADDR_USER_END;
+ gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+- gate_vma.vm_page_prot = __P101;
++ gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags);
+
+ return 0;
+ }
+diff --git a/arch/m32r/include/asm/cache.h b/arch/m32r/include/asm/cache.h
+index 40b3ee98..8c2c112 100644
+--- a/arch/m32r/include/asm/cache.h
++++ b/arch/m32r/include/asm/cache.h
+@@ -1,8 +1,10 @@
+ #ifndef _ASM_M32R_CACHE_H
+ #define _ASM_M32R_CACHE_H
+
++#include <linux/const.h>
++
+ /* L1 cache line size */
+ #define L1_CACHE_SHIFT 4
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #endif /* _ASM_M32R_CACHE_H */
+diff --git a/arch/m32r/lib/usercopy.c b/arch/m32r/lib/usercopy.c
+index 82abd15..d95ae5d 100644
+--- a/arch/m32r/lib/usercopy.c
++++ b/arch/m32r/lib/usercopy.c
+@@ -14,6 +14,9 @@
+ unsigned long
+ __generic_copy_to_user(void __user *to, const void *from, unsigned long n)
+ {
++ if ((long)n < 0)
++ return n;
++
+ prefetch(from);
+ if (access_ok(VERIFY_WRITE, to, n))
+ __copy_user(to,from,n);
+@@ -23,6 +26,9 @@ __generic_copy_to_user(void __user *to, const void *from, unsigned long n)
+ unsigned long
+ __generic_copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
++ if ((long)n < 0)
++ return n;
++
+ prefetchw(to);
+ if (access_ok(VERIFY_READ, from, n))
+ __copy_user_zeroing(to,from,n);
+diff --git a/arch/m68k/include/asm/cache.h b/arch/m68k/include/asm/cache.h
+index 0395c51..5f26031 100644
+--- a/arch/m68k/include/asm/cache.h
++++ b/arch/m68k/include/asm/cache.h
+@@ -4,9 +4,11 @@
+ #ifndef __ARCH_M68K_CACHE_H
+ #define __ARCH_M68K_CACHE_H
+
++#include <linux/const.h>
++
+ /* bytes per L1 cache line */
+ #define L1_CACHE_SHIFT 4
+-#define L1_CACHE_BYTES (1<< L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+
+diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
+index 4e5aa2f..172c469 100644
+--- a/arch/m68k/kernel/time.c
++++ b/arch/m68k/kernel/time.c
+@@ -107,6 +107,7 @@ static int rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+
+ switch (cmd) {
+ case RTC_PLL_GET:
++ memset(&pll, 0, sizeof(pll));
+ if (!mach_get_rtc_pll || mach_get_rtc_pll(&pll))
+ return -EINVAL;
+ return copy_to_user(argp, &pll, sizeof pll) ? -EFAULT : 0;
+diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
+index db1b7da..8e13684 100644
+--- a/arch/metag/mm/hugetlbpage.c
++++ b/arch/metag/mm/hugetlbpage.c
+@@ -189,6 +189,7 @@ hugetlb_get_unmapped_area_new_pmd(unsigned long len)
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & HUGEPT_MASK;
+ info.align_offset = 0;
++ info.threadstack_offset = 0;
+ return vm_unmapped_area(&info);
+ }
+
+diff --git a/arch/microblaze/include/asm/cache.h b/arch/microblaze/include/asm/cache.h
+index 4efe96a..60e8699 100644
+--- a/arch/microblaze/include/asm/cache.h
++++ b/arch/microblaze/include/asm/cache.h
+@@ -13,11 +13,12 @@
+ #ifndef _ASM_MICROBLAZE_CACHE_H
+ #define _ASM_MICROBLAZE_CACHE_H
+
++#include <linux/const.h>
+ #include <asm/registers.h>
+
+ #define L1_CACHE_SHIFT 5
+ /* word-granular cache in microblaze */
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #define SMP_CACHE_BYTES L1_CACHE_BYTES
+
+diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild
+index 5c3f688..f8cc1b3 100644
+--- a/arch/mips/Kbuild
++++ b/arch/mips/Kbuild
+@@ -1,7 +1,7 @@
+ # Fail on warnings - also for files referenced in subdirs
+ # -Werror can be disabled for specific files using:
+ # CFLAGS_<file.o> := -Wno-error
+-subdir-ccflags-y := -Werror
++# subdir-ccflags-y := -Werror
+
+ # platform specific definitions
+ include arch/mips/Kbuild.platforms
+diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
+index 212ff92..36b3437 100644
+--- a/arch/mips/Kconfig
++++ b/arch/mips/Kconfig
+@@ -50,6 +50,7 @@ config MIPS
+ select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_NMI
+ select VIRT_TO_BUS
++ select HAVE_GCC_PLUGINS
+ select MODULES_USE_ELF_REL if MODULES
+ select MODULES_USE_ELF_RELA if MODULES && 64BIT
+ select CLONE_BACKWARDS
+@@ -2561,7 +2562,7 @@ config RELOCATION_TABLE_SIZE
+
+ config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
+- depends on RELOCATABLE
++ depends on RELOCATABLE && BROKEN_SECURITY
+ ---help---
+ Randomizes the physical and virtual address at which the
+ kernel image is loaded, as a security feature that
+@@ -2777,6 +2778,7 @@ source "kernel/Kconfig.preempt"
+ config KEXEC
+ bool "Kexec system call"
+ select KEXEC_CORE
++ depends on !GRKERNSEC_KMEM
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
+index 0ab176b..c4469a4 100644
+--- a/arch/mips/include/asm/atomic.h
++++ b/arch/mips/include/asm/atomic.h
+@@ -22,15 +22,39 @@
+ #include <asm/cmpxchg.h>
+ #include <asm/war.h>
+
++#ifdef CONFIG_GENERIC_ATOMIC64
++#include <asm-generic/atomic64.h>
++#endif
++
+ #define ATOMIC_INIT(i) { (i) }
+
++#ifdef CONFIG_64BIT
++#define _ASM_EXTABLE(from, to) \
++" .section __ex_table,\"a\"\n" \
++" .dword " #from ", " #to"\n" \
++" .previous\n"
++#else
++#define _ASM_EXTABLE(from, to) \
++" .section __ex_table,\"a\"\n" \
++" .word " #from ", " #to"\n" \
++" .previous\n"
++#endif
++
+ /*
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+-#define atomic_read(v) READ_ONCE((v)->counter)
++static inline int atomic_read(const atomic_t *v)
++{
++ return READ_ONCE(v->counter);
++}
++
++static inline int atomic_read_unchecked(const atomic_unchecked_t *v)
++{
++ return READ_ONCE(v->counter);
++}
+
+ /*
+ * atomic_set - set atomic variable
+@@ -39,47 +63,77 @@
+ *
+ * Atomically sets the value of @v to @i.
+ */
+-#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
++static inline void atomic_set(atomic_t *v, int i)
++{
++ WRITE_ONCE(v->counter, i);
++}
+
+-#define ATOMIC_OP(op, c_op, asm_op) \
+-static __inline__ void atomic_##op(int i, atomic_t * v) \
++static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i)
++{
++ WRITE_ONCE(v->counter, i);
++}
++
++#ifdef CONFIG_PAX_REFCOUNT
++#define __OVERFLOW_POST \
++ " b 4f \n" \
++ " .set noreorder \n" \
++ "3: b 5f \n" \
++ " move %0, %1 \n" \
++ " .set reorder \n"
++#define __OVERFLOW_EXTABLE \
++ "3:\n" \
++ _ASM_EXTABLE(2b, 3b)
++#else
++#define __OVERFLOW_POST
++#define __OVERFLOW_EXTABLE
++#endif
++
++#define __ATOMIC_OP(op, suffix, asm_op, extable) \
++static inline void atomic_##op##suffix(int i, atomic##suffix##_t * v) \
+ { \
+ if (kernel_uses_llsc && R10000_LLSC_WAR) { \
+ int temp; \
+ \
+ __asm__ __volatile__( \
+- " .set arch=r4000 \n" \
+- "1: ll %0, %1 # atomic_" #op " \n" \
+- " " #asm_op " %0, %2 \n" \
++ " .set mips3 \n" \
++ "1: ll %0, %1 # atomic_" #op #suffix "\n" \
++ "2: " #asm_op " %0, %2 \n" \
+ " sc %0, %1 \n" \
+ " beqzl %0, 1b \n" \
++ extable \
+ " .set mips0 \n" \
+ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i)); \
+ } else if (kernel_uses_llsc) { \
+ int temp; \
+ \
+- do { \
+- __asm__ __volatile__( \
+- " .set "MIPS_ISA_LEVEL" \n" \
+- " ll %0, %1 # atomic_" #op "\n" \
+- " " #asm_op " %0, %2 \n" \
+- " sc %0, %1 \n" \
+- " .set mips0 \n" \
+- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
+- : "Ir" (i)); \
+- } while (unlikely(!temp)); \
++ __asm__ __volatile__( \
++ " .set "MIPS_ISA_LEVEL" \n" \
++ "1: ll %0, %1 # atomic_" #op #suffix "\n" \
++ "2: " #asm_op " %0, %2 \n" \
++ " sc %0, %1 \n" \
++ " beqz %0, 1b \n" \
++ extable \
++ " .set mips0 \n" \
++ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
++ : "Ir" (i)); \
+ } else { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+- v->counter c_op i; \
++ __asm__ __volatile__( \
++ "2: " #asm_op " %0, %1 \n" \
++ extable \
++ : "+r" (v->counter) : "Ir" (i)); \
+ raw_local_irq_restore(flags); \
+ } \
+ }
+
+-#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
+-static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
++#define ATOMIC_OP(op, asm_op) __ATOMIC_OP(op, _unchecked, asm_op##u, ) \
++ __ATOMIC_OP(op, , asm_op, __OVERFLOW_EXTABLE)
++
++#define __ATOMIC_OP_RETURN(op, suffix, asm_op, post_op, extable) \
++static inline int atomic_##op##_return##suffix##_relaxed(int i, atomic##suffix##_t * v) \
+ { \
+ int result; \
+ \
+@@ -87,12 +141,15 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
+ int temp; \
+ \
+ __asm__ __volatile__( \
+- " .set arch=r4000 \n" \
+- "1: ll %1, %2 # atomic_" #op "_return \n" \
+- " " #asm_op " %0, %1, %3 \n" \
++ " .set mips3 \n" \
++ "1: ll %1, %2 # atomic_" #op "_return" #suffix"\n" \
++ "2: " #asm_op " %0, %1, %3 \n" \
+ " sc %0, %2 \n" \
+ " beqzl %0, 1b \n" \
+- " " #asm_op " %0, %1, %3 \n" \
++ post_op \
++ extable \
++ "4: " #asm_op " %0, %1, %3 \n" \
++ "5: \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+@@ -100,32 +157,40 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
+ } else if (kernel_uses_llsc) { \
+ int temp; \
+ \
+- do { \
+- __asm__ __volatile__( \
+- " .set "MIPS_ISA_LEVEL" \n" \
+- " ll %1, %2 # atomic_" #op "_return \n" \
+- " " #asm_op " %0, %1, %3 \n" \
+- " sc %0, %2 \n" \
+- " .set mips0 \n" \
+- : "=&r" (result), "=&r" (temp), \
+- "+" GCC_OFF_SMALL_ASM() (v->counter) \
+- : "Ir" (i)); \
+- } while (unlikely(!result)); \
+- \
+- result = temp; result c_op i; \
++ __asm__ __volatile__( \
++ " .set "MIPS_ISA_LEVEL" \n" \
++ "1: ll %1, %2 # atomic_" #op "_return" #suffix "\n" \
++ "2: " #asm_op " %0, %1, %3 \n" \
++ " sc %0, %2 \n" \
++ post_op \
++ extable \
++ "4: " #asm_op " %0, %1, %3 \n" \
++ "5: \n" \
++ " .set mips0 \n" \
++ : "=&r" (result), "=&r" (temp), \
++ "+" GCC_OFF_SMALL_ASM() (v->counter) \
++ : "Ir" (i)); \
+ } else { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+- result = v->counter; \
+- result c_op i; \
+- v->counter = result; \
++ __asm__ __volatile__( \
++ " lw %0, %1 \n" \
++ "2: " #asm_op " %0, %1, %2 \n" \
++ " sw %0, %1 \n" \
++ "3: \n" \
++ extable \
++ : "=&r" (result), "+" GCC_OFF_SMALL_ASM() (v->counter) \
++ : "Ir" (i)); \
+ raw_local_irq_restore(flags); \
+ } \
+ \
+ return result; \
+ }
+
++#define ATOMIC_OP_RETURN(op, asm_op) __ATOMIC_OP_RETURN(op, _unchecked, asm_op##u, , ) \
++ __ATOMIC_OP_RETURN(op, , asm_op, __OVERFLOW_POST, __OVERFLOW_EXTABLE)
++
+ #define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \
+ { \
+@@ -173,13 +238,13 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \
+ return result; \
+ }
+
+-#define ATOMIC_OPS(op, c_op, asm_op) \
+- ATOMIC_OP(op, c_op, asm_op) \
+- ATOMIC_OP_RETURN(op, c_op, asm_op) \
+- ATOMIC_FETCH_OP(op, c_op, asm_op)
++#define ATOMIC_OPS(op, asm_op) \
++ ATOMIC_OP(op, asm_op) \
++ ATOMIC_OP_RETURN(op, asm_op) \
++ ATOMIC_FETCH_OP(op, asm_op)
+
+-ATOMIC_OPS(add, +=, addu)
+-ATOMIC_OPS(sub, -=, subu)
++ATOMIC_OPS(add, addu)
++ATOMIC_OPS(sub, subu)
+
+ #define atomic_add_return_relaxed atomic_add_return_relaxed
+ #define atomic_sub_return_relaxed atomic_sub_return_relaxed
+@@ -187,13 +252,13 @@ ATOMIC_OPS(sub, -=, subu)
+ #define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+ #undef ATOMIC_OPS
+-#define ATOMIC_OPS(op, c_op, asm_op) \
+- ATOMIC_OP(op, c_op, asm_op) \
+- ATOMIC_FETCH_OP(op, c_op, asm_op)
++#define ATOMIC_OPS(op, asm_op) \
++ ATOMIC_OP(op, asm_op) \
++ ATOMIC_FETCH_OP(op, asm_op)
+
+-ATOMIC_OPS(and, &=, and)
+-ATOMIC_OPS(or, |=, or)
+-ATOMIC_OPS(xor, ^=, xor)
++ATOMIC_OPS(and, and)
++ATOMIC_OPS(or, or)
++ATOMIC_OPS(xor, xor)
+
+ #define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+ #define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+@@ -202,7 +267,9 @@ ATOMIC_OPS(xor, ^=, xor)
+ #undef ATOMIC_OPS
+ #undef ATOMIC_FETCH_OP
+ #undef ATOMIC_OP_RETURN
++#undef __ATOMIC_OP_RETURN
+ #undef ATOMIC_OP
++#undef __ATOMIC_OP
+
+ /*
+ * atomic_sub_if_positive - conditionally subtract integer from atomic variable
+@@ -212,7 +279,7 @@ ATOMIC_OPS(xor, ^=, xor)
+ * Atomically test @v and subtract @i if @v is greater or equal than @i.
+ * The function returns the old value of @v minus @i.
+ */
+-static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
++static __inline__ int atomic_sub_if_positive(int i, atomic_t *v)
+ {
+ int result;
+
+@@ -222,7 +289,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
+ int temp;
+
+ __asm__ __volatile__(
+- " .set arch=r4000 \n"
++ " .set "MIPS_ISA_LEVEL" \n"
+ "1: ll %1, %2 # atomic_sub_if_positive\n"
+ " subu %0, %1, %3 \n"
+ " bltz %0, 1f \n"
+@@ -271,8 +338,26 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
+ return result;
+ }
+
+-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+-#define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
++static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
++{
++ return cmpxchg(&v->counter, old, new);
++}
++
++static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old,
++ int new)
++{
++ return cmpxchg(&(v->counter), old, new);
++}
++
++static inline int atomic_xchg(atomic_t *v, int new)
++{
++ return xchg(&v->counter, new);
++}
++
++static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new)
++{
++ return xchg(&(v->counter), new);
++}
+
+ /**
+ * __atomic_add_unless - add unless the number is a given value
+@@ -300,6 +385,10 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+
+ #define atomic_dec_return(v) atomic_sub_return(1, (v))
+ #define atomic_inc_return(v) atomic_add_return(1, (v))
++static __inline__ int atomic_inc_return_unchecked(atomic_unchecked_t *v)
++{
++ return atomic_add_return_unchecked(1, v);
++}
+
+ /*
+ * atomic_sub_and_test - subtract value from variable and test result
+@@ -321,6 +410,10 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+ * other cases.
+ */
+ #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
++static __inline__ int atomic_inc_and_test_unchecked(atomic_unchecked_t *v)
++{
++ return atomic_add_return_unchecked(1, v) == 0;
++}
+
+ /*
+ * atomic_dec_and_test - decrement by 1 and test
+@@ -345,6 +438,10 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+ * Atomically increments @v by 1.
+ */
+ #define atomic_inc(v) atomic_add(1, (v))
++static __inline__ void atomic_inc_unchecked(atomic_unchecked_t *v)
++{
++ atomic_add_unchecked(1, v);
++}
+
+ /*
+ * atomic_dec - decrement and test
+@@ -353,6 +450,10 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+ * Atomically decrements @v by 1.
+ */
+ #define atomic_dec(v) atomic_sub(1, (v))
++static __inline__ void atomic_dec_unchecked(atomic_unchecked_t *v)
++{
++ atomic_sub_unchecked(1, v);
++}
+
+ /*
+ * atomic_add_negative - add and test if negative
+@@ -374,54 +475,77 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+ * @v: pointer of type atomic64_t
+ *
+ */
+-#define atomic64_read(v) READ_ONCE((v)->counter)
++static inline long atomic64_read(const atomic64_t *v)
++{
++ return READ_ONCE(v->counter);
++}
++
++static inline long atomic64_read_unchecked(const atomic64_unchecked_t *v)
++{
++ return READ_ONCE(v->counter);
++}
+
+ /*
+ * atomic64_set - set atomic variable
+ * @v: pointer of type atomic64_t
+ * @i: required value
+ */
+-#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
++static inline void atomic64_set(atomic64_t *v, long i)
++{
++ WRITE_ONCE(v->counter, i);
++}
+
+-#define ATOMIC64_OP(op, c_op, asm_op) \
+-static __inline__ void atomic64_##op(long i, atomic64_t * v) \
++static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i)
++{
++ WRITE_ONCE(v->counter, i);
++}
++
++#define __ATOMIC64_OP(op, suffix, asm_op, extable) \
++static inline void atomic64_##op##suffix(long i, atomic64##suffix##_t * v) \
+ { \
+ if (kernel_uses_llsc && R10000_LLSC_WAR) { \
+ long temp; \
+ \
+ __asm__ __volatile__( \
+- " .set arch=r4000 \n" \
+- "1: lld %0, %1 # atomic64_" #op " \n" \
+- " " #asm_op " %0, %2 \n" \
++ " .set "MIPS_ISA_LEVEL" \n" \
++ "1: lld %0, %1 # atomic64_" #op #suffix "\n" \
++ "2: " #asm_op " %0, %2 \n" \
+ " scd %0, %1 \n" \
+ " beqzl %0, 1b \n" \
++ extable \
+ " .set mips0 \n" \
+ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i)); \
+ } else if (kernel_uses_llsc) { \
+ long temp; \
+ \
+- do { \
+- __asm__ __volatile__( \
+- " .set "MIPS_ISA_LEVEL" \n" \
+- " lld %0, %1 # atomic64_" #op "\n" \
+- " " #asm_op " %0, %2 \n" \
+- " scd %0, %1 \n" \
+- " .set mips0 \n" \
+- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
+- : "Ir" (i)); \
+- } while (unlikely(!temp)); \
++ __asm__ __volatile__( \
++ " .set "MIPS_ISA_LEVEL" \n" \
++ "1: lld %0, %1 # atomic64_" #op #suffix "\n" \
++ "2: " #asm_op " %0, %2 \n" \
++ " scd %0, %1 \n" \
++ " beqz %0, 1b \n" \
++ extable \
++ " .set mips0 \n" \
++ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
++ : "Ir" (i)); \
+ } else { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+- v->counter c_op i; \
++ __asm__ __volatile__( \
++ "2: " #asm_op " %0, %1 \n" \
++ extable \
++ : "+" GCC_OFF_SMALL_ASM() (v->counter) : "Ir" (i)); \
+ raw_local_irq_restore(flags); \
+ } \
+ }
+
+-#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \
+-static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
++#define ATOMIC64_OP(op, asm_op) __ATOMIC64_OP(op, _unchecked, asm_op##u, ) \
++ __ATOMIC64_OP(op, , asm_op, __OVERFLOW_EXTABLE)
++
++#define __ATOMIC64_OP_RETURN(op, suffix, asm_op, post_op, extable) \
++static inline long atomic64_##op##_return##suffix##_relaxed(long i, atomic64##suffix##_t * v)\
+ { \
+ long result; \
+ \
+@@ -429,12 +553,15 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
+ long temp; \
+ \
+ __asm__ __volatile__( \
+- " .set arch=r4000 \n" \
++ " .set mips3 \n" \
+ "1: lld %1, %2 # atomic64_" #op "_return\n" \
+- " " #asm_op " %0, %1, %3 \n" \
++ "2: " #asm_op " %0, %1, %3 \n" \
+ " scd %0, %2 \n" \
+ " beqzl %0, 1b \n" \
+- " " #asm_op " %0, %1, %3 \n" \
++ post_op \
++ extable \
++ "4: " #asm_op " %0, %1, %3 \n" \
++ "5: \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+@@ -442,33 +569,42 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
+ } else if (kernel_uses_llsc) { \
+ long temp; \
+ \
+- do { \
+- __asm__ __volatile__( \
+- " .set "MIPS_ISA_LEVEL" \n" \
+- " lld %1, %2 # atomic64_" #op "_return\n" \
+- " " #asm_op " %0, %1, %3 \n" \
+- " scd %0, %2 \n" \
+- " .set mips0 \n" \
+- : "=&r" (result), "=&r" (temp), \
+- "=" GCC_OFF_SMALL_ASM() (v->counter) \
+- : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter) \
+- : "memory"); \
+- } while (unlikely(!result)); \
+- \
+- result = temp; result c_op i; \
++ __asm__ __volatile__( \
++ " .set "MIPS_ISA_LEVEL" \n" \
++ "1: lld %1, %2 # atomic64_" #op "_return" #suffix "\n"\
++ "2: " #asm_op " %0, %1, %3 \n" \
++ " scd %0, %2 \n" \
++ " beqz %0, 1b \n" \
++ post_op \
++ extable \
++ "4: " #asm_op " %0, %1, %3 \n" \
++ "5: \n" \
++ " .set mips0 \n" \
++ : "=&r" (result), "=&r" (temp), \
++ "=" GCC_OFF_SMALL_ASM() (v->counter) \
++ : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter) \
++ : "memory"); \
+ } else { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+- result = v->counter; \
+- result c_op i; \
+- v->counter = result; \
++ __asm__ __volatile__( \
++ " ld %0, %1 \n" \
++ "2: " #asm_op " %0, %1, %2 \n" \
++ " sd %0, %1 \n" \
++ "3: \n" \
++ extable \
++ : "=&r" (result), "+" GCC_OFF_SMALL_ASM() (v->counter) \
++ : "Ir" (i)); \
+ raw_local_irq_restore(flags); \
+ } \
+ \
+ return result; \
+ }
+
++#define ATOMIC64_OP_RETURN(op, asm_op) __ATOMIC64_OP_RETURN(op, _unchecked, asm_op##u, , ) \
++ __ATOMIC64_OP_RETURN(op, , asm_op, __OVERFLOW_POST, __OVERFLOW_EXTABLE)
++
+ #define ATOMIC64_FETCH_OP(op, c_op, asm_op) \
+ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+ { \
+@@ -517,13 +653,13 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+ return result; \
+ }
+
+-#define ATOMIC64_OPS(op, c_op, asm_op) \
+- ATOMIC64_OP(op, c_op, asm_op) \
+- ATOMIC64_OP_RETURN(op, c_op, asm_op) \
+- ATOMIC64_FETCH_OP(op, c_op, asm_op)
++#define ATOMIC64_OPS(op, asm_op) \
++ ATOMIC64_OP(op, asm_op) \
++ ATOMIC64_OP_RETURN(op, asm_op) \
++ ATOMIC64_FETCH_OP(op, asm_op)
+
+-ATOMIC64_OPS(add, +=, daddu)
+-ATOMIC64_OPS(sub, -=, dsubu)
++ATOMIC64_OPS(add, daddu)
++ATOMIC64_OPS(sub, dsubu)
+
+ #define atomic64_add_return_relaxed atomic64_add_return_relaxed
+ #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+@@ -531,13 +667,13 @@ ATOMIC64_OPS(sub, -=, dsubu)
+ #define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+ #undef ATOMIC64_OPS
+-#define ATOMIC64_OPS(op, c_op, asm_op) \
+- ATOMIC64_OP(op, c_op, asm_op) \
+- ATOMIC64_FETCH_OP(op, c_op, asm_op)
++#define ATOMIC64_OPS(op, asm_op) \
++ ATOMIC64_OP(op, asm_op) \
++ ATOMIC64_FETCH_OP(op, asm_op)
+
+-ATOMIC64_OPS(and, &=, and)
+-ATOMIC64_OPS(or, |=, or)
+-ATOMIC64_OPS(xor, ^=, xor)
++ATOMIC64_OPS(and, and)
++ATOMIC64_OPS(or, or)
++ATOMIC64_OPS(xor, xor)
+
+ #define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+ #define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+@@ -546,7 +682,11 @@ ATOMIC64_OPS(xor, ^=, xor)
+ #undef ATOMIC64_OPS
+ #undef ATOMIC64_FETCH_OP
+ #undef ATOMIC64_OP_RETURN
++#undef __ATOMIC64_OP_RETURN
+ #undef ATOMIC64_OP
++#undef __ATOMIC64_OP
++#undef __OVERFLOW_EXTABLE
++#undef __OVERFLOW_POST
+
+ /*
+ * atomic64_sub_if_positive - conditionally subtract integer from atomic
+@@ -557,7 +697,7 @@ ATOMIC64_OPS(xor, ^=, xor)
+ * Atomically test @v and subtract @i if @v is greater or equal than @i.
+ * The function returns the old value of @v minus @i.
+ */
+-static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
++static __inline__ long atomic64_sub_if_positive(long i, atomic64_t *v)
+ {
+ long result;
+
+@@ -567,7 +707,7 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
+ long temp;
+
+ __asm__ __volatile__(
+- " .set arch=r4000 \n"
++ " .set "MIPS_ISA_LEVEL" \n"
+ "1: lld %1, %2 # atomic64_sub_if_positive\n"
+ " dsubu %0, %1, %3 \n"
+ " bltz %0, 1f \n"
+@@ -616,9 +756,26 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
+ return result;
+ }
+
+-#define atomic64_cmpxchg(v, o, n) \
+- ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
+-#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
++static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
++{
++ return cmpxchg(&v->counter, old, new);
++}
++
++static inline long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long old,
++ long new)
++{
++ return cmpxchg(&(v->counter), old, new);
++}
++
++static inline long atomic64_xchg(atomic64_t *v, long new)
++{
++ return xchg(&v->counter, new);
++}
++
++static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new)
++{
++ return xchg(&(v->counter), new);
++}
+
+ /**
+ * atomic64_add_unless - add unless the number is a given value
+@@ -648,6 +805,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
+
+ #define atomic64_dec_return(v) atomic64_sub_return(1, (v))
+ #define atomic64_inc_return(v) atomic64_add_return(1, (v))
++#define atomic64_inc_return_unchecked(v) atomic64_add_return_unchecked(1, (v))
+
+ /*
+ * atomic64_sub_and_test - subtract value from variable and test result
+@@ -669,6 +827,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
+ * other cases.
+ */
+ #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
++#define atomic64_inc_and_test_unchecked(v) atomic64_add_return_unchecked(1, (v)) == 0)
+
+ /*
+ * atomic64_dec_and_test - decrement by 1 and test
+@@ -693,6 +852,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
+ * Atomically increments @v by 1.
+ */
+ #define atomic64_inc(v) atomic64_add(1, (v))
++#define atomic64_inc_unchecked(v) atomic64_add_unchecked(1, (v))
+
+ /*
+ * atomic64_dec - decrement and test
+@@ -701,6 +861,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
+ * Atomically decrements @v by 1.
+ */
+ #define atomic64_dec(v) atomic64_sub(1, (v))
++#define atomic64_dec_unchecked(v) atomic64_sub_unchecked(1, (v))
+
+ /*
+ * atomic64_add_negative - add and test if negative
+diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
+index b4db69f..8f3b093 100644
+--- a/arch/mips/include/asm/cache.h
++++ b/arch/mips/include/asm/cache.h
+@@ -9,10 +9,11 @@
+ #ifndef _ASM_CACHE_H
+ #define _ASM_CACHE_H
+
++#include <linux/const.h>
+ #include <kmalloc.h>
+
+ #define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #define SMP_CACHE_SHIFT L1_CACHE_SHIFT
+ #define SMP_CACHE_BYTES L1_CACHE_BYTES
+diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
+index 2b3dc29..1f7bdc4 100644
+--- a/arch/mips/include/asm/elf.h
++++ b/arch/mips/include/asm/elf.h
+@@ -458,6 +458,13 @@ extern const char *__elf_platform;
+ #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
+ #endif
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE (TASK_IS_32BIT_ADDR ? 0x00400000UL : 0x00400000UL)
++
++#define PAX_DELTA_MMAP_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
++#define PAX_DELTA_STACK_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
++#endif
++
+ /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+ #define ARCH_DLINFO \
+ do { \
+diff --git a/arch/mips/include/asm/exec.h b/arch/mips/include/asm/exec.h
+index c1f6afa..38cc6e9 100644
+--- a/arch/mips/include/asm/exec.h
++++ b/arch/mips/include/asm/exec.h
+@@ -12,6 +12,6 @@
+ #ifndef _ASM_EXEC_H
+ #define _ASM_EXEC_H
+
+-extern unsigned long arch_align_stack(unsigned long sp);
++#define arch_align_stack(x) ((x) & ~0xfUL)
+
+ #endif /* _ASM_EXEC_H */
+diff --git a/arch/mips/include/asm/hw_irq.h b/arch/mips/include/asm/hw_irq.h
+index 9e8ef59..1139d6b 100644
+--- a/arch/mips/include/asm/hw_irq.h
++++ b/arch/mips/include/asm/hw_irq.h
+@@ -10,7 +10,7 @@
+
+ #include <linux/atomic.h>
+
+-extern atomic_t irq_err_count;
++extern atomic_unchecked_t irq_err_count;
+
+ /*
+ * interrupt-retrigger: NOP for now. This may not be appropriate for all
+diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
+index 15e0fec..3ee3eec 100644
+--- a/arch/mips/include/asm/irq.h
++++ b/arch/mips/include/asm/irq.h
+@@ -11,7 +11,6 @@
+
+ #include <linux/linkage.h>
+ #include <linux/smp.h>
+-#include <linux/irqdomain.h>
+
+ #include <asm/mipsmtregs.h>
+
+diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
+index 8feaed6..1bd8a64 100644
+--- a/arch/mips/include/asm/local.h
++++ b/arch/mips/include/asm/local.h
+@@ -13,15 +13,25 @@ typedef struct
+ atomic_long_t a;
+ } local_t;
+
++typedef struct {
++ atomic_long_unchecked_t a;
++} local_unchecked_t;
++
+ #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
+
+ #define local_read(l) atomic_long_read(&(l)->a)
++#define local_read_unchecked(l) atomic_long_read_unchecked(&(l)->a)
+ #define local_set(l, i) atomic_long_set(&(l)->a, (i))
++#define local_set_unchecked(l, i) atomic_long_set_unchecked(&(l)->a, (i))
+
+ #define local_add(i, l) atomic_long_add((i), (&(l)->a))
++#define local_add_unchecked(i, l) atomic_long_add_unchecked((i), (&(l)->a))
+ #define local_sub(i, l) atomic_long_sub((i), (&(l)->a))
++#define local_sub_unchecked(i, l) atomic_long_sub_unchecked((i), (&(l)->a))
+ #define local_inc(l) atomic_long_inc(&(l)->a)
++#define local_inc_unchecked(l) atomic_long_inc_unchecked(&(l)->a)
+ #define local_dec(l) atomic_long_dec(&(l)->a)
++#define local_dec_unchecked(l) atomic_long_dec_unchecked(&(l)->a)
+
+ /*
+ * Same as above, but return the result value
+@@ -71,6 +81,51 @@ static __inline__ long local_add_return(long i, local_t * l)
+ return result;
+ }
+
++static __inline__ long local_add_return_unchecked(long i, local_unchecked_t * l)
++{
++ unsigned long result;
++
++ if (kernel_uses_llsc && R10000_LLSC_WAR) {
++ unsigned long temp;
++
++ __asm__ __volatile__(
++ " .set mips3 \n"
++ "1:" __LL "%1, %2 # local_add_return \n"
++ " addu %0, %1, %3 \n"
++ __SC "%0, %2 \n"
++ " beqzl %0, 1b \n"
++ " addu %0, %1, %3 \n"
++ " .set mips0 \n"
++ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
++ : "Ir" (i), "m" (l->a.counter)
++ : "memory");
++ } else if (kernel_uses_llsc) {
++ unsigned long temp;
++
++ __asm__ __volatile__(
++ " .set mips3 \n"
++ "1:" __LL "%1, %2 # local_add_return \n"
++ " addu %0, %1, %3 \n"
++ __SC "%0, %2 \n"
++ " beqz %0, 1b \n"
++ " addu %0, %1, %3 \n"
++ " .set mips0 \n"
++ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
++ : "Ir" (i), "m" (l->a.counter)
++ : "memory");
++ } else {
++ unsigned long flags;
++
++ local_irq_save(flags);
++ result = l->a.counter;
++ result += i;
++ l->a.counter = result;
++ local_irq_restore(flags);
++ }
++
++ return result;
++}
++
+ static __inline__ long local_sub_return(long i, local_t * l)
+ {
+ unsigned long result;
+@@ -118,6 +173,8 @@ static __inline__ long local_sub_return(long i, local_t * l)
+
+ #define local_cmpxchg(l, o, n) \
+ ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
++#define local_cmpxchg_unchecked(l, o, n) \
++ ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
+ #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
+
+ /**
+diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
+index 5f98759..a3a7cb2 100644
+--- a/arch/mips/include/asm/page.h
++++ b/arch/mips/include/asm/page.h
+@@ -118,7 +118,7 @@ extern void copy_user_highpage(struct page *to, struct page *from,
+ #ifdef CONFIG_CPU_MIPS32
+ typedef struct { unsigned long pte_low, pte_high; } pte_t;
+ #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
+- #define __pte(x) ({ pte_t __pte = {(x), ((unsigned long long)(x)) >> 32}; __pte; })
++ #define __pte(x) ({ pte_t __pte = {(x), (x) >> 32}; __pte; })
+ #else
+ typedef struct { unsigned long long pte; } pte_t;
+ #define pte_val(x) ((x).pte)
+diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
+index 93c079a..1d6bf7c 100644
+--- a/arch/mips/include/asm/pgalloc.h
++++ b/arch/mips/include/asm/pgalloc.h
+@@ -37,6 +37,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ {
+ set_pud(pud, __pud((unsigned long)pmd));
+ }
++
++static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
++{
++ pud_populate(mm, pud, pmd);
++}
+ #endif
+
+ /*
+diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
+index 70128d3..471bc25 100644
+--- a/arch/mips/include/asm/pgtable.h
++++ b/arch/mips/include/asm/pgtable.h
+@@ -20,6 +20,9 @@
+ #include <asm/io.h>
+ #include <asm/pgtable-bits.h>
+
++#define ktla_ktva(addr) (addr)
++#define ktva_ktla(addr) (addr)
++
+ struct mm_struct;
+ struct vm_area_struct;
+
+diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
+index e309d8f..20eefec 100644
+--- a/arch/mips/include/asm/thread_info.h
++++ b/arch/mips/include/asm/thread_info.h
+@@ -101,6 +101,9 @@ static inline struct thread_info *current_thread_info(void)
+ #define TIF_NOTIFY_RESUME 5 /* callback before returning to user */
+ #define TIF_UPROBE 6 /* breakpointed or singlestepping */
+ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
++/* li takes a 32bit immediate */
++#define TIF_GRSEC_SETXID 10 /* update credentials on syscall entry/exit */
++
+ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
+ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */
+ #define TIF_NOHZ 19 /* in adaptive nohz mode */
+@@ -137,14 +140,16 @@ static inline struct thread_info *current_thread_info(void)
+ #define _TIF_USEDMSA (1<<TIF_USEDMSA)
+ #define _TIF_MSA_CTX_LIVE (1<<TIF_MSA_CTX_LIVE)
+ #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
++#define _TIF_GRSEC_SETXID (1<<TIF_GRSEC_SETXID)
+
+ #define _TIF_WORK_SYSCALL_ENTRY (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \
+ _TIF_SYSCALL_AUDIT | \
+- _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
++ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
++ _TIF_GRSEC_SETXID)
+
+ /* work to do in syscall_trace_leave() */
+ #define _TIF_WORK_SYSCALL_EXIT (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \
+- _TIF_SYSCALL_AUDIT | _TIF_SYSCALL_TRACEPOINT)
++ _TIF_SYSCALL_AUDIT | _TIF_SYSCALL_TRACEPOINT | _TIF_GRSEC_SETXID)
+
+ /* work to do on interrupt/exception return */
+ #define _TIF_WORK_MASK \
+@@ -153,7 +158,7 @@ static inline struct thread_info *current_thread_info(void)
+ /* work to do on any return to u-space */
+ #define _TIF_ALLWORK_MASK (_TIF_NOHZ | _TIF_WORK_MASK | \
+ _TIF_WORK_SYSCALL_EXIT | \
+- _TIF_SYSCALL_TRACEPOINT)
++ _TIF_SYSCALL_TRACEPOINT | _TIF_GRSEC_SETXID)
+
+ /*
+ * We stash processor id into a COP0 register to retrieve it fast
+diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
+index 21a2aab..c00b80d 100644
+--- a/arch/mips/include/asm/uaccess.h
++++ b/arch/mips/include/asm/uaccess.h
+@@ -147,6 +147,7 @@ static inline bool eva_kernel_access(void)
+ __ok == 0; \
+ })
+
++#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
+ #define access_ok(type, addr, size) \
+ likely(__access_ok((addr), (size), __access_mask))
+
+diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
+index 58ad63d..051b4b7 100644
+--- a/arch/mips/kernel/binfmt_elfn32.c
++++ b/arch/mips/kernel/binfmt_elfn32.c
+@@ -36,6 +36,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+ #undef ELF_ET_DYN_BASE
+ #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2)
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE (TASK_IS_32BIT_ADDR ? 0x00400000UL : 0x00400000UL)
++
++#define PAX_DELTA_MMAP_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
++#define PAX_DELTA_STACK_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
++#endif
++
+ #include <asm/processor.h>
+ #include <linux/module.h>
+ #include <linux/elfcore.h>
+diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
+index 49fb881..b9ab7c2 100644
+--- a/arch/mips/kernel/binfmt_elfo32.c
++++ b/arch/mips/kernel/binfmt_elfo32.c
+@@ -40,6 +40,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+ #undef ELF_ET_DYN_BASE
+ #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2)
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE (TASK_IS_32BIT_ADDR ? 0x00400000UL : 0x00400000UL)
++
++#define PAX_DELTA_MMAP_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
++#define PAX_DELTA_STACK_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
++#endif
++
+ #include <asm/processor.h>
+
+ #include <linux/module.h>
+diff --git a/arch/mips/kernel/irq-gt641xx.c b/arch/mips/kernel/irq-gt641xx.c
+index 44a1f79..2bd6aa3 100644
+--- a/arch/mips/kernel/irq-gt641xx.c
++++ b/arch/mips/kernel/irq-gt641xx.c
+@@ -110,7 +110,7 @@ void gt641xx_irq_dispatch(void)
+ }
+ }
+
+- atomic_inc(&irq_err_count);
++ atomic_inc_unchecked(&irq_err_count);
+ }
+
+ void __init gt641xx_irq_init(void)
+diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
+index f25f7ea..19e1c62 100644
+--- a/arch/mips/kernel/irq.c
++++ b/arch/mips/kernel/irq.c
+@@ -34,17 +34,17 @@ void ack_bad_irq(unsigned int irq)
+ printk("unexpected IRQ # %d\n", irq);
+ }
+
+-atomic_t irq_err_count;
++atomic_unchecked_t irq_err_count;
+
+ int arch_show_interrupts(struct seq_file *p, int prec)
+ {
+- seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
++ seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read_unchecked(&irq_err_count));
+ return 0;
+ }
+
+ asmlinkage void spurious_interrupt(void)
+ {
+- atomic_inc(&irq_err_count);
++ atomic_inc_unchecked(&irq_err_count);
+ }
+
+ void __init init_IRQ(void)
+@@ -61,6 +61,8 @@ void __init init_IRQ(void)
+ }
+
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
++
++extern void gr_handle_kernel_exploit(void);
+ static inline void check_stack_overflow(void)
+ {
+ unsigned long sp;
+@@ -76,6 +78,7 @@ static inline void check_stack_overflow(void)
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
++ gr_handle_kernel_exploit();
+ }
+ }
+ #else
+diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
+index 5b31a94..15ac4a1 100644
+--- a/arch/mips/kernel/pm-cps.c
++++ b/arch/mips/kernel/pm-cps.c
+@@ -172,7 +172,7 @@ int cps_pm_enter_state(enum cps_pm_state state)
+ nc_core_ready_count = nc_addr;
+
+ /* Ensure ready_count is zero-initialised before the assembly runs */
+- ACCESS_ONCE(*nc_core_ready_count) = 0;
++ ACCESS_ONCE_RW(*nc_core_ready_count) = 0;
+ coupled_barrier(&per_cpu(pm_barrier, core), online);
+
+ /* Run the generated entry code */
+diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
+index d2d0615..46c1803 100644
+--- a/arch/mips/kernel/process.c
++++ b/arch/mips/kernel/process.c
+@@ -545,18 +545,6 @@ out:
+ return pc;
+ }
+
+-/*
+- * Don't forget that the stack pointer must be aligned on a 8 bytes
+- * boundary for 32-bits ABI and 16 bytes for 64-bits ABI.
+- */
+-unsigned long arch_align_stack(unsigned long sp)
+-{
+- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+- sp -= get_random_int() & ~PAGE_MASK;
+-
+- return sp & ALMASK;
+-}
+-
+ static void arch_dump_stack(void *info)
+ {
+ struct pt_regs *regs;
+diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
+index 6103b24..8253315 100644
+--- a/arch/mips/kernel/ptrace.c
++++ b/arch/mips/kernel/ptrace.c
+@@ -882,6 +882,10 @@ long arch_ptrace(struct task_struct *child, long request,
+ return ret;
+ }
+
++#ifdef CONFIG_GRKERNSEC_SETXID
++extern void gr_delayed_cred_worker(void);
++#endif
++
+ /*
+ * Notification of system call entry/exit
+ * - triggered by current->work.syscall_trace
+@@ -899,6 +903,11 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
+ if (secure_computing(NULL) == -1)
+ return -1;
+
++#ifdef CONFIG_GRKERNSEC_SETXID
++ if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID)))
++ gr_delayed_cred_worker();
++#endif
++
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->regs[2]);
+
+diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
+index 4472a7f..c5905e6 100644
+--- a/arch/mips/kernel/sync-r4k.c
++++ b/arch/mips/kernel/sync-r4k.c
+@@ -18,8 +18,8 @@
+ #include <asm/mipsregs.h>
+
+ static unsigned int initcount = 0;
+-static atomic_t count_count_start = ATOMIC_INIT(0);
+-static atomic_t count_count_stop = ATOMIC_INIT(0);
++static atomic_unchecked_t count_count_start = ATOMIC_INIT(0);
++static atomic_unchecked_t count_count_stop = ATOMIC_INIT(0);
+
+ #define COUNTON 100
+ #define NR_LOOPS 3
+@@ -46,13 +46,13 @@ void synchronise_count_master(int cpu)
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ /* slaves loop on '!= 2' */
+- while (atomic_read(&count_count_start) != 1)
++ while (atomic_read_unchecked(&count_count_start) != 1)
+ mb();
+- atomic_set(&count_count_stop, 0);
++ atomic_set_unchecked(&count_count_stop, 0);
+ smp_wmb();
+
+ /* Let the slave writes its count register */
+- atomic_inc(&count_count_start);
++ atomic_inc_unchecked(&count_count_start);
+
+ /* Count will be initialised to current timer */
+ if (i == 1)
+@@ -67,11 +67,11 @@ void synchronise_count_master(int cpu)
+ /*
+ * Wait for slave to leave the synchronization point:
+ */
+- while (atomic_read(&count_count_stop) != 1)
++ while (atomic_read_unchecked(&count_count_stop) != 1)
+ mb();
+- atomic_set(&count_count_start, 0);
++ atomic_set_unchecked(&count_count_start, 0);
+ smp_wmb();
+- atomic_inc(&count_count_stop);
++ atomic_inc_unchecked(&count_count_stop);
+ }
+ /* Arrange for an interrupt in a short while */
+ write_c0_compare(read_c0_count() + COUNTON);
+@@ -96,8 +96,8 @@ void synchronise_count_slave(int cpu)
+ */
+
+ for (i = 0; i < NR_LOOPS; i++) {
+- atomic_inc(&count_count_start);
+- while (atomic_read(&count_count_start) != 2)
++ atomic_inc_unchecked(&count_count_start);
++ while (atomic_read_unchecked(&count_count_start) != 2)
+ mb();
+
+ /*
+@@ -106,8 +106,8 @@ void synchronise_count_slave(int cpu)
+ if (i == NR_LOOPS-1)
+ write_c0_count(initcount);
+
+- atomic_inc(&count_count_stop);
+- while (atomic_read(&count_count_stop) != 2)
++ atomic_inc_unchecked(&count_count_stop);
++ while (atomic_read_unchecked(&count_count_stop) != 2)
+ mb();
+ }
+ /* Arrange for an interrupt in a short while */
+diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
+index 3de85be..73560ec 100644
+--- a/arch/mips/kernel/traps.c
++++ b/arch/mips/kernel/traps.c
+@@ -695,7 +695,18 @@ asmlinkage void do_ov(struct pt_regs *regs)
+ };
+
+ prev_state = exception_enter();
+- die_if_kernel("Integer overflow", regs);
++ if (unlikely(!user_mode(regs))) {
++
++#ifdef CONFIG_PAX_REFCOUNT
++ if (fixup_exception(regs)) {
++ pax_report_refcount_error(regs, NULL);
++ exception_exit(prev_state);
++ return;
++ }
++#endif
++
++ die("Integer overflow", regs);
++ }
+
+ force_sig_info(SIGFPE, &info, current);
+ exception_exit(prev_state);
+diff --git a/arch/mips/lib/ashldi3.c b/arch/mips/lib/ashldi3.c
+index 927dc94..27269ee 100644
+--- a/arch/mips/lib/ashldi3.c
++++ b/arch/mips/lib/ashldi3.c
+@@ -2,7 +2,11 @@
+
+ #include "libgcc.h"
+
+-long long notrace __ashldi3(long long u, word_type b)
++#ifdef CONFIG_64BIT
++DWtype notrace __ashlti3(DWtype u, word_type b)
++#else
++DWtype notrace __ashldi3(DWtype u, word_type b)
++#endif
+ {
+ DWunion uu, w;
+ word_type bm;
+@@ -11,19 +15,22 @@ long long notrace __ashldi3(long long u, word_type b)
+ return u;
+
+ uu.ll = u;
+- bm = 32 - b;
++ bm = BITS_PER_LONG - b;
+
+ if (bm <= 0) {
+ w.s.low = 0;
+- w.s.high = (unsigned int) uu.s.low << -bm;
++ w.s.high = (unsigned long) uu.s.low << -bm;
+ } else {
+- const unsigned int carries = (unsigned int) uu.s.low >> bm;
++ const unsigned long carries = (unsigned long) uu.s.low >> bm;
+
+- w.s.low = (unsigned int) uu.s.low << b;
+- w.s.high = ((unsigned int) uu.s.high << b) | carries;
++ w.s.low = (unsigned long) uu.s.low << b;
++ w.s.high = ((unsigned long) uu.s.high << b) | carries;
+ }
+
+ return w.ll;
+ }
+-
++#ifdef CONFIG_64BIT
++EXPORT_SYMBOL(__ashlti3);
++#else
+ EXPORT_SYMBOL(__ashldi3);
++#endif
+diff --git a/arch/mips/lib/ashrdi3.c b/arch/mips/lib/ashrdi3.c
+index 9fdf1a5..6741f0e 100644
+--- a/arch/mips/lib/ashrdi3.c
++++ b/arch/mips/lib/ashrdi3.c
+@@ -2,7 +2,11 @@
+
+ #include "libgcc.h"
+
+-long long notrace __ashrdi3(long long u, word_type b)
++#ifdef CONFIG_64BIT
++DWtype notrace __ashrti3(DWtype u, word_type b)
++#else
++DWtype notrace __ashrdi3(DWtype u, word_type b)
++#endif
+ {
+ DWunion uu, w;
+ word_type bm;
+@@ -11,21 +15,24 @@ long long notrace __ashrdi3(long long u, word_type b)
+ return u;
+
+ uu.ll = u;
+- bm = 32 - b;
++ bm = BITS_PER_LONG - b;
+
+ if (bm <= 0) {
+ /* w.s.high = 1..1 or 0..0 */
+ w.s.high =
+- uu.s.high >> 31;
++ uu.s.high >> (BITS_PER_LONG - 1);
+ w.s.low = uu.s.high >> -bm;
+ } else {
+- const unsigned int carries = (unsigned int) uu.s.high << bm;
++ const unsigned long carries = (unsigned long) uu.s.high << bm;
+
+ w.s.high = uu.s.high >> b;
+- w.s.low = ((unsigned int) uu.s.low >> b) | carries;
++ w.s.low = ((unsigned long) uu.s.low >> b) | carries;
+ }
+
+ return w.ll;
+ }
+-
++#ifdef CONFIG_64BIT
++EXPORT_SYMBOL(__ashrti3);
++#else
+ EXPORT_SYMBOL(__ashrdi3);
++#endif
+diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h
+index 05909d58..b03284b 100644
+--- a/arch/mips/lib/libgcc.h
++++ b/arch/mips/lib/libgcc.h
+@@ -5,13 +5,19 @@
+
+ typedef int word_type __attribute__ ((mode (__word__)));
+
++#ifdef CONFIG_64BIT
++typedef int DWtype __attribute__((mode(TI)));
++#else
++typedef long long DWtype;
++#endif
++
+ #ifdef __BIG_ENDIAN
+ struct DWstruct {
+- int high, low;
++ long high, low;
+ };
+ #elif defined(__LITTLE_ENDIAN)
+ struct DWstruct {
+- int low, high;
++ long low, high;
+ };
+ #else
+ #error I feel sick.
+@@ -19,7 +25,7 @@ struct DWstruct {
+
+ typedef union {
+ struct DWstruct s;
+- long long ll;
++ DWtype ll;
+ } DWunion;
+
+ #endif /* __ASM_LIBGCC_H */
+diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
+index 9560ad7..da27540 100644
+--- a/arch/mips/mm/fault.c
++++ b/arch/mips/mm/fault.c
+@@ -31,6 +31,23 @@
+
+ int show_unhandled_signals = 1;
+
++#ifdef CONFIG_PAX_PAGEEXEC
++void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
++{
++ unsigned long i;
++
++ printk(KERN_ERR "PAX: bytes at PC: ");
++ for (i = 0; i < 5; i++) {
++ unsigned int c;
++ if (get_user(c, (unsigned int *)pc+i))
++ printk(KERN_CONT "???????? ");
++ else
++ printk(KERN_CONT "%08x ", c);
++ }
++ printk("\n");
++}
++#endif
++
+ /*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+@@ -205,6 +222,14 @@ bad_area:
+ bad_area_nosemaphore:
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs)) {
++
++#ifdef CONFIG_PAX_PAGEEXEC
++ if (cpu_has_rixi && (mm->pax_flags & MF_PAX_PAGEEXEC) && !write && address == instruction_pointer(regs)) {
++ pax_report_fault(regs, (void *)address, (void *)user_stack_pointer(regs));
++ do_group_exit(SIGKILL);
++ }
++#endif
++
+ tsk->thread.cp0_badvaddr = address;
+ tsk->thread.error_code = write;
+ if (show_unhandled_signals &&
+diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
+index 72f7478..06abd2a 100644
+--- a/arch/mips/mm/init.c
++++ b/arch/mips/mm/init.c
+@@ -474,10 +474,10 @@ void __init mem_init(void)
+
+ #ifdef CONFIG_64BIT
+ if ((unsigned long) &_text > (unsigned long) CKSEG0)
+- /* The -4 is a hack so that user tools don't have to handle
++ /* The -0x2000-4 is a hack so that user tools don't have to handle
+ the overflow. */
+ kclist_add(&kcore_kseg0, (void *) CKSEG0,
+- 0x80000000 - 4, KCORE_TEXT);
++ 0x80000000 - 0x2000 - 4, KCORE_TEXT);
+ #endif
+ }
+ #endif /* !CONFIG_NEED_MULTIPLE_NODES */
+diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
+index 3530376..754dde3 100644
+--- a/arch/mips/mm/mmap.c
++++ b/arch/mips/mm/mmap.c
+@@ -59,6 +59,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
+ struct vm_area_struct *vma;
+ unsigned long addr = addr0;
+ int do_color_align;
++ unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags);
+ struct vm_unmapped_area_info info;
+
+ if (unlikely(len > TASK_SIZE))
+@@ -84,6 +85,11 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
+ do_color_align = 1;
+
+ /* requesting a specific address */
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(current->mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (addr) {
+ if (do_color_align)
+ addr = COLOUR_ALIGN(addr, pgoff);
+@@ -91,14 +97,14 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
+ addr = PAGE_ALIGN(addr);
+
+ vma = find_vma(mm, addr);
+- if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len, offset))
+ return addr;
+ }
+
+ info.length = len;
+ info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
++ info.threadstack_offset = offset;
+
+ if (dir == DOWN) {
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+@@ -160,14 +166,30 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
+ {
+ unsigned long random_factor = 0UL;
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = arch_mmap_rnd();
+
+ if (mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ mm->mmap_base += mm->delta_mmap;
++#endif
++
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ } else {
+ mm->mmap_base = mmap_base(random_factor);
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
++#endif
++
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ }
+ }
+diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c
+index cfceaea..65deeb4 100644
+--- a/arch/mips/sgi-ip27/ip27-nmi.c
++++ b/arch/mips/sgi-ip27/ip27-nmi.c
+@@ -187,9 +187,9 @@ void
+ cont_nmi_dump(void)
+ {
+ #ifndef REAL_NMI_SIGNAL
+- static atomic_t nmied_cpus = ATOMIC_INIT(0);
++ static atomic_unchecked_t nmied_cpus = ATOMIC_INIT(0);
+
+- atomic_inc(&nmied_cpus);
++ atomic_inc_unchecked(&nmied_cpus);
+ #endif
+ /*
+ * Only allow 1 cpu to proceed
+@@ -233,7 +233,7 @@ cont_nmi_dump(void)
+ udelay(10000);
+ }
+ #else
+- while (atomic_read(&nmied_cpus) != num_online_cpus());
++ while (atomic_read_unchecked(&nmied_cpus) != num_online_cpus());
+ #endif
+
+ /*
+diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c
+index 160b880..3b53fdc 100644
+--- a/arch/mips/sni/rm200.c
++++ b/arch/mips/sni/rm200.c
+@@ -270,7 +270,7 @@ spurious_8259A_irq:
+ "spurious RM200 8259A interrupt: IRQ%d.\n", irq);
+ spurious_irq_mask |= irqmask;
+ }
+- atomic_inc(&irq_err_count);
++ atomic_inc_unchecked(&irq_err_count);
+ /*
+ * Theoretically we do not have to handle this IRQ,
+ * but in Linux this does not cause problems and is
+diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c
+index 41e873b..34d33a7 100644
+--- a/arch/mips/vr41xx/common/icu.c
++++ b/arch/mips/vr41xx/common/icu.c
+@@ -653,7 +653,7 @@ static int icu_get_irq(unsigned int irq)
+
+ printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2);
+
+- atomic_inc(&irq_err_count);
++ atomic_inc_unchecked(&irq_err_count);
+
+ return -1;
+ }
+diff --git a/arch/mips/vr41xx/common/irq.c b/arch/mips/vr41xx/common/irq.c
+index ae0e4ee..e8f0692 100644
+--- a/arch/mips/vr41xx/common/irq.c
++++ b/arch/mips/vr41xx/common/irq.c
+@@ -64,7 +64,7 @@ static void irq_dispatch(unsigned int irq)
+ irq_cascade_t *cascade;
+
+ if (irq >= NR_IRQS) {
+- atomic_inc(&irq_err_count);
++ atomic_inc_unchecked(&irq_err_count);
+ return;
+ }
+
+@@ -84,7 +84,7 @@ static void irq_dispatch(unsigned int irq)
+ ret = cascade->get_irq(irq);
+ irq = ret;
+ if (ret < 0)
+- atomic_inc(&irq_err_count);
++ atomic_inc_unchecked(&irq_err_count);
+ else
+ irq_dispatch(irq);
+ if (!irqd_irq_disabled(idata) && chip->irq_unmask)
+diff --git a/arch/mn10300/proc-mn103e010/include/proc/cache.h b/arch/mn10300/proc-mn103e010/include/proc/cache.h
+index 967d144..db12197 100644
+--- a/arch/mn10300/proc-mn103e010/include/proc/cache.h
++++ b/arch/mn10300/proc-mn103e010/include/proc/cache.h
+@@ -11,12 +11,14 @@
+ #ifndef _ASM_PROC_CACHE_H
+ #define _ASM_PROC_CACHE_H
+
++#include <linux/const.h>
++
+ /* L1 cache */
+
+ #define L1_CACHE_NWAYS 4 /* number of ways in caches */
+ #define L1_CACHE_NENTRIES 256 /* number of entries in each way */
+-#define L1_CACHE_BYTES 16 /* bytes per entry */
+ #define L1_CACHE_SHIFT 4 /* shift for bytes per entry */
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) /* bytes per entry */
+ #define L1_CACHE_WAYDISP 0x1000 /* displacement of one way from the next */
+
+ #define L1_CACHE_TAG_VALID 0x00000001 /* cache tag valid bit */
+diff --git a/arch/mn10300/proc-mn2ws0050/include/proc/cache.h b/arch/mn10300/proc-mn2ws0050/include/proc/cache.h
+index bcb5df2..84fabd2 100644
+--- a/arch/mn10300/proc-mn2ws0050/include/proc/cache.h
++++ b/arch/mn10300/proc-mn2ws0050/include/proc/cache.h
+@@ -16,13 +16,15 @@
+ #ifndef _ASM_PROC_CACHE_H
+ #define _ASM_PROC_CACHE_H
+
++#include <linux/const.h>
++
+ /*
+ * L1 cache
+ */
+ #define L1_CACHE_NWAYS 4 /* number of ways in caches */
+ #define L1_CACHE_NENTRIES 128 /* number of entries in each way */
+-#define L1_CACHE_BYTES 32 /* bytes per entry */
+ #define L1_CACHE_SHIFT 5 /* shift for bytes per entry */
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) /* bytes per entry */
+ #define L1_CACHE_WAYDISP 0x1000 /* distance from one way to the next */
+
+ #define L1_CACHE_TAG_VALID 0x00000001 /* cache tag valid bit */
+diff --git a/arch/openrisc/include/asm/cache.h b/arch/openrisc/include/asm/cache.h
+index 4ce7a01..449202a 100644
+--- a/arch/openrisc/include/asm/cache.h
++++ b/arch/openrisc/include/asm/cache.h
+@@ -19,11 +19,13 @@
+ #ifndef __ASM_OPENRISC_CACHE_H
+ #define __ASM_OPENRISC_CACHE_H
+
++#include <linux/const.h>
++
+ /* FIXME: How can we replace these with values from the CPU...
+ * they shouldn't be hard-coded!
+ */
+
+-#define L1_CACHE_BYTES 16
+ #define L1_CACHE_SHIFT 4
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #endif /* __ASM_OPENRISC_CACHE_H */
+diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
+index 5394b9c..e77a306 100644
+--- a/arch/parisc/include/asm/atomic.h
++++ b/arch/parisc/include/asm/atomic.h
+@@ -327,6 +327,16 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
+ return dec;
+ }
+
++#define atomic64_read_unchecked(v) atomic64_read(v)
++#define atomic64_set_unchecked(v, i) atomic64_set((v), (i))
++#define atomic64_add_unchecked(a, v) atomic64_add((a), (v))
++#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v))
++#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v))
++#define atomic64_inc_unchecked(v) atomic64_inc(v)
++#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v)
++#define atomic64_dec_unchecked(v) atomic64_dec(v)
++#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n))
++
+ #endif /* !CONFIG_64BIT */
+
+
+diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
+index df0f52b..810699b 100644
+--- a/arch/parisc/include/asm/cache.h
++++ b/arch/parisc/include/asm/cache.h
+@@ -5,6 +5,7 @@
+ #ifndef __ARCH_PARISC_CACHE_H
+ #define __ARCH_PARISC_CACHE_H
+
++#include <linux/const.h>
+
+ /*
+ * PA 2.0 processors have 64 and 128-byte L2 cachelines; PA 1.1 processors
+@@ -14,6 +15,8 @@
+ #define L1_CACHE_BYTES 16
+ #define L1_CACHE_SHIFT 4
+
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
++
+ #ifndef __ASSEMBLY__
+
+ #define SMP_CACHE_BYTES L1_CACHE_BYTES
+diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
+index 78c9fd3..42fa66a 100644
+--- a/arch/parisc/include/asm/elf.h
++++ b/arch/parisc/include/asm/elf.h
+@@ -342,6 +342,13 @@ struct pt_regs; /* forward declaration... */
+
+ #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000)
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE 0x10000UL
++
++#define PAX_DELTA_MMAP_LEN 16
++#define PAX_DELTA_STACK_LEN 16
++#endif
++
+ /* This yields a mask that user programs can use to figure out what
+ instruction set this CPU supports. This could be done in user space,
+ but it's not easy, and we've already done it here. */
+diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
+index f08dda3..ea6aa1b 100644
+--- a/arch/parisc/include/asm/pgalloc.h
++++ b/arch/parisc/include/asm/pgalloc.h
+@@ -61,6 +61,11 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+ (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
+ }
+
++static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
++{
++ pgd_populate(mm, pgd, pmd);
++}
++
+ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+ {
+ pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER);
+@@ -96,6 +101,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+ #define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
+ #define pmd_free(mm, x) do { } while (0)
+ #define pgd_populate(mm, pmd, pte) BUG()
++#define pgd_populate_kernel(mm, pmd, pte) BUG()
+
+ #endif
+
+diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
+index c2c43f7..b08ffd9 100644
+--- a/arch/parisc/include/asm/pgtable.h
++++ b/arch/parisc/include/asm/pgtable.h
+@@ -236,6 +236,17 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+ #define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED)
+ #define PAGE_COPY PAGE_EXECREAD
+ #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED)
++
++#ifdef CONFIG_PAX_PAGEEXEC
++# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED)
++# define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED)
++# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED)
++#else
++# define PAGE_SHARED_NOEXEC PAGE_SHARED
++# define PAGE_COPY_NOEXEC PAGE_COPY
++# define PAGE_READONLY_NOEXEC PAGE_READONLY
++#endif
++
+ #define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
+ #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC)
+ #define PAGE_KERNEL_RWX __pgprot(_PAGE_KERNEL_RWX)
+diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
+index 4828478..89b1fbe 100644
+--- a/arch/parisc/include/asm/uaccess.h
++++ b/arch/parisc/include/asm/uaccess.h
+@@ -221,17 +221,17 @@ static inline unsigned long __must_check copy_from_user(void *to,
+ const void __user *from,
+ unsigned long n)
+ {
+- int sz = __compiletime_object_size(to);
++ size_t sz = __compiletime_object_size(to);
+ unsigned long ret = n;
+
+- if (likely(sz == -1 || sz >= n))
++ if (likely(sz == (size_t)-1 || sz >= n))
+ ret = __copy_from_user(to, from, n);
+ else if (!__builtin_constant_p(n))
+ copy_user_overflow(sz, n);
+ else
+ __bad_copy_user();
+
+- if (unlikely(ret))
++ if (unlikely(ret && (long)ret > 0))
+ memset(to + (n - ret), 0, ret);
+ return ret;
+ }
+diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
+index a0ecdb4a..71d2069 100644
+--- a/arch/parisc/kernel/module.c
++++ b/arch/parisc/kernel/module.c
+@@ -100,14 +100,12 @@
+ * or init pieces the location is */
+ static inline int in_init(struct module *me, void *loc)
+ {
+- return (loc >= me->init_layout.base &&
+- loc <= (me->init_layout.base + me->init_layout.size));
++ within_module_init((unsigned long)loc, me);
+ }
+
+ static inline int in_core(struct module *me, void *loc)
+ {
+- return (loc >= me->core_layout.base &&
+- loc <= (me->core_layout.base + me->core_layout.size));
++ within_module_core((unsigned long)loc, me);
+ }
+
+ static inline int in_local(struct module *me, void *loc)
+@@ -367,13 +365,13 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr,
+ }
+
+ /* align things a bit */
+- me->core_layout.size = ALIGN(me->core_layout.size, 16);
+- me->arch.got_offset = me->core_layout.size;
+- me->core_layout.size += gots * sizeof(struct got_entry);
++ me->core_layout.size_rw = ALIGN(me->core_layout.size_rw, 16);
++ me->arch.got_offset = me->core_layout.size_rw;
++ me->core_layout.size_rw += gots * sizeof(struct got_entry);
+
+- me->core_layout.size = ALIGN(me->core_layout.size, 16);
+- me->arch.fdesc_offset = me->core_layout.size;
+- me->core_layout.size += fdescs * sizeof(Elf_Fdesc);
++ me->core_layout.size_rw = ALIGN(me->core_layout.size_rw, 16);
++ me->arch.fdesc_offset = me->core_layout.size_rw;
++ me->core_layout.size_rw += fdescs * sizeof(Elf_Fdesc);
+
+ me->arch.got_max = gots;
+ me->arch.fdesc_max = fdescs;
+@@ -391,7 +389,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend)
+
+ BUG_ON(value == 0);
+
+- got = me->core_layout.base + me->arch.got_offset;
++ got = me->core_layout.base_rw + me->arch.got_offset;
+ for (i = 0; got[i].addr; i++)
+ if (got[i].addr == value)
+ goto out;
+@@ -409,7 +407,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend)
+ #ifdef CONFIG_64BIT
+ static Elf_Addr get_fdesc(struct module *me, unsigned long value)
+ {
+- Elf_Fdesc *fdesc = me->core_layout.base + me->arch.fdesc_offset;
++ Elf_Fdesc *fdesc = me->core_layout.base_rw + me->arch.fdesc_offset;
+
+ if (!value) {
+ printk(KERN_ERR "%s: zero OPD requested!\n", me->name);
+@@ -427,7 +425,7 @@ static Elf_Addr get_fdesc(struct module *me, unsigned long value)
+
+ /* Create new one */
+ fdesc->addr = value;
+- fdesc->gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset;
++ fdesc->gp = (Elf_Addr)me->core_layout.base_rw + me->arch.got_offset;
+ return (Elf_Addr)fdesc;
+ }
+ #endif /* CONFIG_64BIT */
+@@ -847,7 +845,7 @@ register_unwind_table(struct module *me,
+
+ table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr;
+ end = table + sechdrs[me->arch.unwind_section].sh_size;
+- gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset;
++ gp = (Elf_Addr)me->core_layout.base_rw + me->arch.got_offset;
+
+ DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n",
+ me->arch.unwind_section, table, end, gp);
+diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
+index 0a393a0..5b3199e0 100644
+--- a/arch/parisc/kernel/sys_parisc.c
++++ b/arch/parisc/kernel/sys_parisc.c
+@@ -92,6 +92,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long task_size = TASK_SIZE;
+ int do_color_align, last_mmap;
+ struct vm_unmapped_area_info info;
++ unsigned long offset = gr_rand_threadstack_offset(current->mm, filp, flags);
+
+ if (len > task_size)
+ return -ENOMEM;
+@@ -109,6 +110,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ goto found_addr;
+ }
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (addr) {
+ if (do_color_align && last_mmap)
+ addr = COLOR_ALIGN(addr, last_mmap, pgoff);
+@@ -127,6 +132,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ info.high_limit = mmap_upper_limit();
+ info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
+ info.align_offset = shared_align_offset(last_mmap, pgoff);
++ info.threadstack_offset = offset;
+ addr = vm_unmapped_area(&info);
+
+ found_addr:
+@@ -146,6 +152,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ unsigned long addr = addr0;
+ int do_color_align, last_mmap;
+ struct vm_unmapped_area_info info;
++ unsigned long offset = gr_rand_threadstack_offset(current->mm, filp, flags);
+
+ #ifdef CONFIG_64BIT
+ /* This should only ever run for 32-bit processes. */
+@@ -170,6 +177,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ }
+
+ /* requesting a specific address */
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (addr) {
+ if (do_color_align && last_mmap)
+ addr = COLOR_ALIGN(addr, last_mmap, pgoff);
+@@ -187,6 +198,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ info.high_limit = mm->mmap_base;
+ info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
+ info.align_offset = shared_align_offset(last_mmap, pgoff);
++ info.threadstack_offset = offset;
+ addr = vm_unmapped_area(&info);
+ if (!(addr & ~PAGE_MASK))
+ goto found_addr;
+@@ -252,6 +264,13 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
+ mm->mmap_legacy_base = mmap_legacy_base();
+ mm->mmap_base = mmap_upper_limit();
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP) {
++ mm->mmap_legacy_base += mm->delta_mmap;
++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
++ }
++#endif
++
+ if (mmap_is_legacy()) {
+ mm->mmap_base = mm->mmap_legacy_base;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
+index 97d6b20..2ab0232 100644
+--- a/arch/parisc/kernel/traps.c
++++ b/arch/parisc/kernel/traps.c
+@@ -719,9 +719,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
+
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm,regs->iaoq[0]);
+- if (vma && (regs->iaoq[0] >= vma->vm_start)
+- && (vma->vm_flags & VM_EXEC)) {
+-
++ if (vma && (regs->iaoq[0] >= vma->vm_start)) {
+ fault_address = regs->iaoq[0];
+ fault_space = regs->iasq[0];
+
+diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
+index 163af2c..ed77b14 100644
+--- a/arch/parisc/mm/fault.c
++++ b/arch/parisc/mm/fault.c
+@@ -16,6 +16,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/module.h>
+ #include <linux/uaccess.h>
++#include <linux/unistd.h>
+
+ #include <asm/traps.h>
+
+@@ -50,7 +51,7 @@ int show_unhandled_signals = 1;
+ static unsigned long
+ parisc_acctyp(unsigned long code, unsigned int inst)
+ {
+- if (code == 6 || code == 16)
++ if (code == 6 || code == 7 || code == 16)
+ return VM_EXEC;
+
+ switch (inst & 0xf0000000) {
+@@ -136,6 +137,116 @@ parisc_acctyp(unsigned long code, unsigned int inst)
+ }
+ #endif
+
++#ifdef CONFIG_PAX_PAGEEXEC
++/*
++ * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address)
++ *
++ * returns 1 when task should be killed
++ * 2 when rt_sigreturn trampoline was detected
++ * 3 when unpatched PLT trampoline was detected
++ */
++static int pax_handle_fetch_fault(struct pt_regs *regs)
++{
++
++#ifdef CONFIG_PAX_EMUPLT
++ int err;
++
++ do { /* PaX: unpatched PLT emulation */
++ unsigned int bl, depwi;
++
++ err = get_user(bl, (unsigned int *)instruction_pointer(regs));
++ err |= get_user(depwi, (unsigned int *)(instruction_pointer(regs)+4));
++
++ if (err)
++ break;
++
++ if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) {
++ unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12;
++
++ err = get_user(ldw, (unsigned int *)addr);
++ err |= get_user(bv, (unsigned int *)(addr+4));
++ err |= get_user(ldw2, (unsigned int *)(addr+8));
++
++ if (err)
++ break;
++
++ if (ldw == 0x0E801096U &&
++ bv == 0xEAC0C000U &&
++ ldw2 == 0x0E881095U)
++ {
++ unsigned int resolver, map;
++
++ err = get_user(resolver, (unsigned int *)(instruction_pointer(regs)+8));
++ err |= get_user(map, (unsigned int *)(instruction_pointer(regs)+12));
++ if (err)
++ break;
++
++ regs->gr[20] = instruction_pointer(regs)+8;
++ regs->gr[21] = map;
++ regs->gr[22] = resolver;
++ regs->iaoq[0] = resolver | 3UL;
++ regs->iaoq[1] = regs->iaoq[0] + 4;
++ return 3;
++ }
++ }
++ } while (0);
++#endif
++
++#ifdef CONFIG_PAX_EMUTRAMP
++
++#ifndef CONFIG_PAX_EMUSIGRT
++ if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP))
++ return 1;
++#endif
++
++ do { /* PaX: rt_sigreturn emulation */
++ unsigned int ldi1, ldi2, bel, nop;
++
++ err = get_user(ldi1, (unsigned int *)instruction_pointer(regs));
++ err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4));
++ err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8));
++ err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12));
++
++ if (err)
++ break;
++
++ if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) &&
++ ldi2 == 0x3414015AU &&
++ bel == 0xE4008200U &&
++ nop == 0x08000240U)
++ {
++ regs->gr[25] = (ldi1 & 2) >> 1;
++ regs->gr[20] = __NR_rt_sigreturn;
++ regs->gr[31] = regs->iaoq[1] + 16;
++ regs->sr[0] = regs->iasq[1];
++ regs->iaoq[0] = 0x100UL;
++ regs->iaoq[1] = regs->iaoq[0] + 4;
++ regs->iasq[0] = regs->sr[2];
++ regs->iasq[1] = regs->sr[2];
++ return 2;
++ }
++ } while (0);
++#endif
++
++ return 1;
++}
++
++void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
++{
++ unsigned long i;
++
++ printk(KERN_ERR "PAX: bytes at PC: ");
++ for (i = 0; i < 5; i++) {
++ unsigned int c;
++ if (get_user(c, (unsigned int *)pc+i))
++ printk(KERN_CONT "???????? ");
++ else
++ printk(KERN_CONT "%08x ", c);
++ }
++ printk("\n");
++}
++#endif
++
+ int fixup_exception(struct pt_regs *regs)
+ {
+ const struct exception_table_entry *fix;
+@@ -230,8 +341,33 @@ retry:
+
+ good_area:
+
+- if ((vma->vm_flags & acc_type) != acc_type)
++ if ((vma->vm_flags & acc_type) != acc_type) {
++
++#ifdef CONFIG_PAX_PAGEEXEC
++ if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) &&
++ (address & ~3UL) == instruction_pointer(regs))
++ {
++ up_read(&mm->mmap_sem);
++ switch (pax_handle_fetch_fault(regs)) {
++
++#ifdef CONFIG_PAX_EMUPLT
++ case 3:
++ return;
++#endif
++
++#ifdef CONFIG_PAX_EMUTRAMP
++ case 2:
++ return;
++#endif
++
++ }
++ pax_report_fault(regs, (void *)instruction_pointer(regs), (void *)regs->gr[30]);
++ do_group_exit(SIGKILL);
++ }
++#endif
++
+ goto bad_area;
++ }
+
+ /*
+ * If for any reason at all we couldn't handle the fault, make
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index 792cb17..1a96a22 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -146,6 +146,7 @@ config PPC
+ select ARCH_USE_BUILTIN_BSWAP
+ select OLD_SIGSUSPEND
+ select OLD_SIGACTION if PPC32
++ select HAVE_GCC_PLUGINS
+ select HAVE_DEBUG_STACKOVERFLOW
+ select HAVE_IRQ_EXIT_ON_IRQ_STACK
+ select ARCH_USE_CMPXCHG_LOCKREF if PPC64
+@@ -446,6 +447,7 @@ config KEXEC
+ bool "kexec system call"
+ depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) || PPC_BOOK3E
+ select KEXEC_CORE
++ depends on !GRKERNSEC_KMEM
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
+index f08d567..94e5497 100644
+--- a/arch/powerpc/include/asm/atomic.h
++++ b/arch/powerpc/include/asm/atomic.h
+@@ -12,6 +12,11 @@
+
+ #define ATOMIC_INIT(i) { (i) }
+
++#define _ASM_EXTABLE(from, to) \
++" .section __ex_table,\"a\"\n" \
++ PPC_LONG" " #from ", " #to"\n" \
++" .previous\n"
++
+ /*
+ * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
+ * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
+@@ -39,38 +44,79 @@ static __inline__ int atomic_read(const atomic_t *v)
+ return t;
+ }
+
++static __inline__ int atomic_read_unchecked(const atomic_unchecked_t *v)
++{
++ int t;
++
++ __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
++
++ return t;
++}
++
+ static __inline__ void atomic_set(atomic_t *v, int i)
+ {
+ __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+ }
+
+-#define ATOMIC_OP(op, asm_op) \
+-static __inline__ void atomic_##op(int a, atomic_t *v) \
++static __inline__ void atomic_set_unchecked(atomic_unchecked_t *v, int i)
++{
++ __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
++}
++
++#ifdef CONFIG_PAX_REFCOUNT
++#define __REFCOUNT_OP(op) op##o.
++#define __OVERFLOW_PRE \
++ " mcrxr cr0\n"
++#define __OVERFLOW_POST \
++ " bf 4*cr0+so, 3f\n" \
++ "2: .long 0x00c00b00\n" \
++ "3:\n"
++#define __OVERFLOW_EXTABLE \
++ "\n4:\n" \
++ _ASM_EXTABLE(2b, 4b)
++#else
++#define __REFCOUNT_OP(op) op
++#define __OVERFLOW_PRE
++#define __OVERFLOW_POST
++#define __OVERFLOW_EXTABLE
++#endif
++
++#define __ATOMIC_OP(op, suffix, pre_op, asm_op, post_op, extable) \
++static inline void atomic_##op##suffix(int a, atomic##suffix##_t *v) \
+ { \
+ int t; \
+ \
+ __asm__ __volatile__( \
+-"1: lwarx %0,0,%3 # atomic_" #op "\n" \
++"1: lwarx %0,0,%3 # atomic_" #op #suffix "\n" \
++ pre_op \
+ #asm_op " %0,%2,%0\n" \
++ post_op \
+ PPC405_ERR77(0,%3) \
+ " stwcx. %0,0,%3 \n" \
+ " bne- 1b\n" \
++ extable \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+ } \
+
+-#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
+-static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
++#define ATOMIC_OP(op, asm_op) __ATOMIC_OP(op, , , asm_op, , ) \
++ __ATOMIC_OP(op, _unchecked, __OVERFLOW_PRE, __REFCOUNT_OP(asm_op), __OVERFLOW_POST, __OVERFLOW_EXTABLE)
++
++#define __ATOMIC_OP_RETURN(op, suffix, pre_op, asm_op, post_op, extable)\
++static inline int atomic_##op##_return##suffix##_relaxed(int a, atomic##suffix##_t *v)\
+ { \
+ int t; \
+ \
+ __asm__ __volatile__( \
+-"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \
++"1: lwarx %0,0,%2 # atomic_" #op "_return" #suffix "_relaxed\n"\
++ pre_op \
+ #asm_op " %0,%2,%0\n" \
++ post_op \
+ PPC405_ERR77(0, %3) \
+ " stwcx. %0,0,%3\n" \
+ " bne- 1b\n" \
++ extable \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+@@ -78,6 +124,9 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
+ return t; \
+ }
+
++#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) __ATOMIC_OP_RETURN(op, , , asm_op, , )\
++ __ATOMIC_OP_RETURN(op, _unchecked, __OVERFLOW_PRE, __REFCOUNT_OP(asm_op), __OVERFLOW_POST, __OVERFLOW_EXTABLE)
++
+ #define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
+ static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
+ { \
+@@ -105,6 +154,7 @@ ATOMIC_OPS(add, add)
+ ATOMIC_OPS(sub, subf)
+
+ #define atomic_add_return_relaxed atomic_add_return_relaxed
++#define atomic_add_return_unchecked_relaxed atomic_add_return_unchecked_relaxed
+ #define atomic_sub_return_relaxed atomic_sub_return_relaxed
+
+ #define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+@@ -126,41 +176,22 @@ ATOMIC_OPS(xor, xor)
+ #undef ATOMIC_OPS
+ #undef ATOMIC_FETCH_OP_RELAXED
+ #undef ATOMIC_OP_RETURN_RELAXED
++#undef __ATOMIC_OP_RETURN
+ #undef ATOMIC_OP
++#undef __ATOMIC_OP
+
+ #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
+
+-static __inline__ void atomic_inc(atomic_t *v)
+-{
+- int t;
+-
+- __asm__ __volatile__(
+-"1: lwarx %0,0,%2 # atomic_inc\n\
+- addic %0,%0,1\n"
+- PPC405_ERR77(0,%2)
+-" stwcx. %0,0,%2 \n\
+- bne- 1b"
+- : "=&r" (t), "+m" (v->counter)
+- : "r" (&v->counter)
+- : "cc", "xer");
+-}
+-
+-static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
+-{
+- int t;
+-
+- __asm__ __volatile__(
+-"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n"
+-" addic %0,%0,1\n"
+- PPC405_ERR77(0, %2)
+-" stwcx. %0,0,%2\n"
+-" bne- 1b"
+- : "=&r" (t), "+m" (v->counter)
+- : "r" (&v->counter)
+- : "cc", "xer");
+-
+- return t;
+-}
++/*
++ * atomic_inc - increment atomic variable
++ * @v: pointer of type atomic_t
++ *
++ * Automatically increments @v by 1
++ */
++#define atomic_inc(v) atomic_add(1, (v))
++#define atomic_inc_unchecked(v) atomic_add_unchecked(1, (v))
++#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v))
++#define atomic_inc_return_unchecked_relaxed(v) atomic_add_return_unchecked_relaxed(1, (v))
+
+ /*
+ * atomic_inc_and_test - increment and test
+@@ -171,37 +202,20 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
+ * other cases.
+ */
+ #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+-
+-static __inline__ void atomic_dec(atomic_t *v)
+-{
+- int t;
+-
+- __asm__ __volatile__(
+-"1: lwarx %0,0,%2 # atomic_dec\n\
+- addic %0,%0,-1\n"
+- PPC405_ERR77(0,%2)\
+-" stwcx. %0,0,%2\n\
+- bne- 1b"
+- : "=&r" (t), "+m" (v->counter)
+- : "r" (&v->counter)
+- : "cc", "xer");
+-}
+-
+-static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
++#define atomic_inc_and_test_unchecked(v) (atomic_inc_return_unchecked(v) == 0)
++
++/*
++ * atomic_dec - decrement atomic variable
++ * @v: pointer of type atomic_t
++ *
++ * Atomically decrements @v by 1
++ */
++#define atomic_dec(v) atomic_sub(1, (v))
++#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v))
++
++static __inline__ void atomic_dec_unchecked(atomic_unchecked_t *v)
+ {
+- int t;
+-
+- __asm__ __volatile__(
+-"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n"
+-" addic %0,%0,-1\n"
+- PPC405_ERR77(0, %2)
+-" stwcx. %0,0,%2\n"
+-" bne- 1b"
+- : "=&r" (t), "+m" (v->counter)
+- : "r" (&v->counter)
+- : "cc", "xer");
+-
+- return t;
++ atomic_sub_unchecked(1, v);
+ }
+
+ #define atomic_inc_return_relaxed atomic_inc_return_relaxed
+@@ -216,6 +230,16 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
+ #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+ #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+
++static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new)
++{
++ return cmpxchg(&(v->counter), old, new);
++}
++
++static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new)
++{
++ return xchg(&(v->counter), new);
++}
++
+ /**
+ * __atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+@@ -233,14 +257,21 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+ PPC_ATOMIC_ENTRY_BARRIER
+ "1: lwarx %0,0,%1 # __atomic_add_unless\n\
+ cmpw 0,%0,%3 \n\
+- beq- 2f \n\
+- add %0,%2,%0 \n"
++ beq- 5f \n"
++
++ __OVERFLOW_PRE
++ __REFCOUNT_OP(add) " %0,%2,%0 \n"
++ __OVERFLOW_POST
++
+ PPC405_ERR77(0,%2)
+ " stwcx. %0,0,%1 \n\
+ bne- 1b \n"
++
++ __OVERFLOW_EXTABLE
++
+ PPC_ATOMIC_EXIT_BARRIER
+ " subf %0,%2,%0 \n\
+-2:"
++5:"
+ : "=&r" (t)
+ : "r" (&v->counter), "r" (a), "r" (u)
+ : "cc", "memory");
+@@ -323,37 +354,59 @@ static __inline__ long atomic64_read(const atomic64_t *v)
+ return t;
+ }
+
++static __inline__ long atomic64_read_unchecked(const atomic64_unchecked_t *v)
++{
++ long t;
++
++ __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
++
++ return t;
++}
++
+ static __inline__ void atomic64_set(atomic64_t *v, long i)
+ {
+ __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+ }
+
+-#define ATOMIC64_OP(op, asm_op) \
+-static __inline__ void atomic64_##op(long a, atomic64_t *v) \
++static __inline__ void atomic64_set_unchecked(atomic64_unchecked_t *v, long i)
++{
++ __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
++}
++
++#define __ATOMIC64_OP(op, suffix, pre_op, asm_op, post_op, extable) \
++static inline void atomic64_##op##suffix(long a, atomic64##suffix##_t *v)\
+ { \
+ long t; \
+ \
+ __asm__ __volatile__( \
+ "1: ldarx %0,0,%3 # atomic64_" #op "\n" \
++ pre_op \
+ #asm_op " %0,%2,%0\n" \
++ post_op \
+ " stdcx. %0,0,%3 \n" \
+ " bne- 1b\n" \
++ extable \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+ }
+
+-#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
+-static inline long \
+-atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
++#define ATOMIC64_OP(op, asm_op) __ATOMIC64_OP(op, , , asm_op, , ) \
++ __ATOMIC64_OP(op, _unchecked, __OVERFLOW_PRE, __REFCOUNT_OP(asm_op), __OVERFLOW_POST, __OVERFLOW_EXTABLE)
++
++#define __ATOMIC64_OP_RETURN(op, suffix, pre_op, asm_op, post_op, extable)\
++static inline long atomic64_##op##_return##suffix##_relaxed(long a, atomic64##suffix##_t *v)\
+ { \
+ long t; \
+ \
+ __asm__ __volatile__( \
+ "1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
++ pre_op \
+ #asm_op " %0,%2,%0\n" \
++ post_op \
+ " stdcx. %0,0,%3\n" \
+ " bne- 1b\n" \
++ extable \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+@@ -361,6 +414,9 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
+ return t; \
+ }
+
++#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) __ATOMIC64_OP_RETURN(op, , , asm_op, , )\
++ __ATOMIC64_OP_RETURN(op, _unchecked, __OVERFLOW_PRE, __REFCOUNT_OP(asm_op), __OVERFLOW_POST, __OVERFLOW_EXTABLE)
++
+ #define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
+ static inline long \
+ atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \
+@@ -409,38 +465,33 @@ ATOMIC64_OPS(xor, xor)
+ #undef ATOPIC64_OPS
+ #undef ATOMIC64_FETCH_OP_RELAXED
+ #undef ATOMIC64_OP_RETURN_RELAXED
++#undef __ATOMIC64_OP_RETURN
+ #undef ATOMIC64_OP
++#undef __ATOMIC64_OP
++#undef __OVERFLOW_EXTABLE
++#undef __OVERFLOW_POST
++#undef __OVERFLOW_PRE
++#undef __REFCOUNT_OP
+
+ #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
+
+-static __inline__ void atomic64_inc(atomic64_t *v)
+-{
+- long t;
++/*
++ * atomic64_inc - increment atomic variable
++ * @v: pointer of type atomic64_t
++ *
++ * Automatically increments @v by 1
++ */
++#define atomic64_inc(v) atomic64_add(1, (v))
++#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v))
+
+- __asm__ __volatile__(
+-"1: ldarx %0,0,%2 # atomic64_inc\n\
+- addic %0,%0,1\n\
+- stdcx. %0,0,%2 \n\
+- bne- 1b"
+- : "=&r" (t), "+m" (v->counter)
+- : "r" (&v->counter)
+- : "cc", "xer");
++static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v)
++{
++ atomic64_add_unchecked(1, v);
+ }
+
+-static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
++static inline long atomic64_inc_return_unchecked_relaxed(atomic64_unchecked_t *v)
+ {
+- long t;
+-
+- __asm__ __volatile__(
+-"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
+-" addic %0,%0,1\n"
+-" stdcx. %0,0,%2\n"
+-" bne- 1b"
+- : "=&r" (t), "+m" (v->counter)
+- : "r" (&v->counter)
+- : "cc", "xer");
+-
+- return t;
++ return atomic64_add_return_unchecked_relaxed(1, v);
+ }
+
+ /*
+@@ -453,34 +504,18 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
+ */
+ #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+
+-static __inline__ void atomic64_dec(atomic64_t *v)
++/*
++ * atomic64_dec - decrement atomic variable
++ * @v: pointer of type atomic64_t
++ *
++ * Atomically decrements @v by 1
++ */
++#define atomic64_dec(v) atomic64_sub(1, (v))
++#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v))
++
++static __inline__ void atomic64_dec_unchecked(atomic64_unchecked_t *v)
+ {
+- long t;
+-
+- __asm__ __volatile__(
+-"1: ldarx %0,0,%2 # atomic64_dec\n\
+- addic %0,%0,-1\n\
+- stdcx. %0,0,%2\n\
+- bne- 1b"
+- : "=&r" (t), "+m" (v->counter)
+- : "r" (&v->counter)
+- : "cc", "xer");
+-}
+-
+-static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
+-{
+- long t;
+-
+- __asm__ __volatile__(
+-"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
+-" addic %0,%0,-1\n"
+-" stdcx. %0,0,%2\n"
+-" bne- 1b"
+- : "=&r" (t), "+m" (v->counter)
+- : "r" (&v->counter)
+- : "cc", "xer");
+-
+- return t;
++ atomic64_sub_unchecked(1, v);
+ }
+
+ #define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
+@@ -522,6 +557,16 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
+ #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+ #define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+
++static inline long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long old, long new)
++{
++ return cmpxchg(&(v->counter), old, new);
++}
++
++static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new)
++{
++ return xchg(&(v->counter), new);
++}
++
+ /**
+ * atomic64_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+@@ -537,15 +582,22 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
+
+ __asm__ __volatile__ (
+ PPC_ATOMIC_ENTRY_BARRIER
+-"1: ldarx %0,0,%1 # __atomic_add_unless\n\
++"1: ldarx %0,0,%1 # atomic64_add_unless\n\
+ cmpd 0,%0,%3 \n\
+- beq- 2f \n\
+- add %0,%2,%0 \n"
++ beq- 5f \n"
++
++ __OVERFLOW_PRE
++ __REFCOUNT_OP(add) " %0,%2,%0 \n"
++ __OVERFLOW_POST
++
+ " stdcx. %0,0,%1 \n\
+ bne- 1b \n"
+ PPC_ATOMIC_EXIT_BARRIER
++
++ __OVERFLOW_EXTABLE
++
+ " subf %0,%2,%0 \n\
+-2:"
++5:"
+ : "=&r" (t)
+ : "r" (&v->counter), "r" (a), "r" (u)
+ : "cc", "memory");
+diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h
+index 880db13..bb4ed4a 100644
+--- a/arch/powerpc/include/asm/book3s/32/hash.h
++++ b/arch/powerpc/include/asm/book3s/32/hash.h
+@@ -20,6 +20,7 @@
+ #define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */
+ #define _PAGE_USER 0x004 /* usermode access allowed */
+ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */
++#define _PAGE_NX _PAGE_GUARDED
+ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */
+ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */
+ #define _PAGE_WRITETHRU 0x040 /* W: cache write-through */
+diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
+index 38b33dc..945d1f1 100644
+--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
++++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
+@@ -226,7 +226,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+ {
+ unsigned long set = pte_val(entry) &
+- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
++ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC | _PAGE_NX);
+ unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+
+ pte_update(ptep, clr, set);
+diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
+index cd5e7aa..7709061 100644
+--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
++++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
+@@ -91,6 +91,11 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+ pgd_set(pgd, __pgtable_ptr_val(pud) | PGD_VAL_BITS);
+ }
+
++static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
++{
++ pgd_populate(mm, pgd, pud);
++}
++
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
+@@ -106,6 +111,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ pud_set(pud, __pgtable_ptr_val(pmd) | PUD_VAL_BITS);
+ }
+
++static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
++{
++ pud_populate_kernel(mm, pud, pmd);
++}
++
+ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long address)
+ {
+diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
+index ffbafbf..71d037fb 100644
+--- a/arch/powerpc/include/asm/cache.h
++++ b/arch/powerpc/include/asm/cache.h
+@@ -3,6 +3,8 @@
+
+ #ifdef __KERNEL__
+
++#include <asm/reg.h>
++#include <linux/const.h>
+
+ /* bytes per L1 cache line */
+ #if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
+@@ -22,7 +24,7 @@
+ #define L1_CACHE_SHIFT 7
+ #endif
+
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #define SMP_CACHE_BYTES L1_CACHE_BYTES
+
+diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
+index ee46ffe..b36c98c 100644
+--- a/arch/powerpc/include/asm/elf.h
++++ b/arch/powerpc/include/asm/elf.h
+@@ -30,6 +30,18 @@
+
+ #define ELF_ET_DYN_BASE 0x20000000
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE (0x10000000UL)
++
++#ifdef __powerpc64__
++#define PAX_DELTA_MMAP_LEN (is_32bit_task() ? 16 : 28)
++#define PAX_DELTA_STACK_LEN (is_32bit_task() ? 16 : 28)
++#else
++#define PAX_DELTA_MMAP_LEN 15
++#define PAX_DELTA_STACK_LEN 15
++#endif
++#endif
++
+ #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
+
+ /*
+diff --git a/arch/powerpc/include/asm/exec.h b/arch/powerpc/include/asm/exec.h
+index 8196e9c..d83a9f3 100644
+--- a/arch/powerpc/include/asm/exec.h
++++ b/arch/powerpc/include/asm/exec.h
+@@ -4,6 +4,6 @@
+ #ifndef _ASM_POWERPC_EXEC_H
+ #define _ASM_POWERPC_EXEC_H
+
+-extern unsigned long arch_align_stack(unsigned long sp);
++#define arch_align_stack(x) ((x) & ~0xfUL)
+
+ #endif /* _ASM_POWERPC_EXEC_H */
+diff --git a/arch/powerpc/include/asm/kmap_types.h b/arch/powerpc/include/asm/kmap_types.h
+index 5acabbd..7ea14fa 100644
+--- a/arch/powerpc/include/asm/kmap_types.h
++++ b/arch/powerpc/include/asm/kmap_types.h
+@@ -10,7 +10,7 @@
+ * 2 of the License, or (at your option) any later version.
+ */
+
+-#define KM_TYPE_NR 16
++#define KM_TYPE_NR 17
+
+ #endif /* __KERNEL__ */
+ #endif /* _ASM_POWERPC_KMAP_TYPES_H */
+diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
+index b8da913..c02b593 100644
+--- a/arch/powerpc/include/asm/local.h
++++ b/arch/powerpc/include/asm/local.h
+@@ -9,21 +9,65 @@ typedef struct
+ atomic_long_t a;
+ } local_t;
+
++typedef struct
++{
++ atomic_long_unchecked_t a;
++} local_unchecked_t;
++
+ #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
+
+ #define local_read(l) atomic_long_read(&(l)->a)
++#define local_read_unchecked(l) atomic_long_read_unchecked(&(l)->a)
+ #define local_set(l,i) atomic_long_set(&(l)->a, (i))
++#define local_set_unchecked(l,i) atomic_long_set_unchecked(&(l)->a, (i))
+
+ #define local_add(i,l) atomic_long_add((i),(&(l)->a))
++#define local_add_unchecked(i,l) atomic_long_add_unchecked((i),(&(l)->a))
+ #define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
++#define local_sub_unchecked(i,l) atomic_long_sub_unchecked((i),(&(l)->a))
+ #define local_inc(l) atomic_long_inc(&(l)->a)
++#define local_inc_unchecked(l) atomic_long_inc_unchecked(&(l)->a)
+ #define local_dec(l) atomic_long_dec(&(l)->a)
++#define local_dec_unchecked(l) atomic_long_dec_unchecked(&(l)->a)
+
+ static __inline__ long local_add_return(long a, local_t *l)
+ {
+ long t;
+
+ __asm__ __volatile__(
++"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n"
++
++#ifdef CONFIG_PAX_REFCOUNT
++" mcrxr cr0\n"
++" addo. %0,%1,%0\n"
++" bf 4*cr0+so, 3f\n"
++"2:.long " "0x00c00b00""\n"
++#else
++" add %0,%1,%0\n"
++#endif
++
++"3:\n"
++ PPC405_ERR77(0,%2)
++ PPC_STLCX "%0,0,%2 \n\
++ bne- 1b"
++
++#ifdef CONFIG_PAX_REFCOUNT
++"\n4:\n"
++ _ASM_EXTABLE(2b, 4b)
++#endif
++
++ : "=&r" (t)
++ : "r" (a), "r" (&(l->a.counter))
++ : "cc", "memory");
++
++ return t;
++}
++
++static __inline__ long local_add_return_unchecked(long a, local_unchecked_t *l)
++{
++ long t;
++
++ __asm__ __volatile__(
+ "1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\
+ add %0,%1,%0\n"
+ PPC405_ERR77(0,%2)
+@@ -101,6 +145,8 @@ static __inline__ long local_dec_return(local_t *l)
+
+ #define local_cmpxchg(l, o, n) \
+ (cmpxchg_local(&((l)->a.counter), (o), (n)))
++#define local_cmpxchg_unchecked(l, o, n) \
++ (cmpxchg_local(&((l)->a.counter), (o), (n)))
+ #define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+
+ /**
+diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
+index 30922f6..0bb237c 100644
+--- a/arch/powerpc/include/asm/mman.h
++++ b/arch/powerpc/include/asm/mman.h
+@@ -26,7 +26,7 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
+ }
+ #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
+
+-static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
++static inline pgprot_t arch_vm_get_page_prot(vm_flags_t vm_flags)
+ {
+ return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+ }
+diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
+index 897d2e1..399f34f 100644
+--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
++++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
+@@ -54,6 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+ #ifndef CONFIG_PPC_64K_PAGES
+
+ #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
++#define pgd_populate_kernel(MM, PGD, PUD) pgd_populate((MM), (PGD), (PUD))
+
+ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+ {
+@@ -70,6 +71,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ pud_set(pud, (unsigned long)pmd);
+ }
+
++static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
++{
++ pud_populate(mm, pud, pmd);
++}
++
+ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+ {
+@@ -139,6 +145,7 @@ extern void __tlb_remove_table(void *_table);
+ #endif
+
+ #define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
++#define pud_populate_kernel(mm, pud, pmd) pud_populate((mm), (pud), (pmd))
+
+ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
+index 56398e7..287a772 100644
+--- a/arch/powerpc/include/asm/page.h
++++ b/arch/powerpc/include/asm/page.h
+@@ -230,8 +230,9 @@ extern long long virt_phys_offset;
+ * and needs to be executable. This means the whole heap ends
+ * up being executable.
+ */
+-#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
+- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
++#define VM_DATA_DEFAULT_FLAGS32 \
++ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
++ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+ #define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+@@ -259,6 +260,9 @@ extern long long virt_phys_offset;
+ #define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
+ #endif
+
++#define ktla_ktva(addr) (addr)
++#define ktva_ktla(addr) (addr)
++
+ #ifndef CONFIG_PPC_BOOK3S_64
+ /*
+ * Use the top bit of the higher-level page table entries to indicate whether
+diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
+index dd5f071..0470718 100644
+--- a/arch/powerpc/include/asm/page_64.h
++++ b/arch/powerpc/include/asm/page_64.h
+@@ -169,15 +169,18 @@ do { \
+ * stack by default, so in the absence of a PT_GNU_STACK program header
+ * we turn execute permission off.
+ */
+-#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
+- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
++#define VM_STACK_DEFAULT_FLAGS32 \
++ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
++ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+ #define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
++#ifndef CONFIG_PAX_PAGEEXEC
+ #define VM_STACK_DEFAULT_FLAGS \
+ (is_32bit_task() ? \
+ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
++#endif
+
+ #include <asm-generic/getorder.h>
+
+diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
+index 9bd87f2..f600e6d 100644
+--- a/arch/powerpc/include/asm/pgtable.h
++++ b/arch/powerpc/include/asm/pgtable.h
+@@ -1,6 +1,7 @@
+ #ifndef _ASM_POWERPC_PGTABLE_H
+ #define _ASM_POWERPC_PGTABLE_H
+
++#include <linux/const.h>
+ #ifndef __ASSEMBLY__
+ #include <linux/mmdebug.h>
+ #include <linux/mmzone.h>
+diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
+index 4ba26dd..2d1137d 100644
+--- a/arch/powerpc/include/asm/pte-common.h
++++ b/arch/powerpc/include/asm/pte-common.h
+@@ -16,6 +16,9 @@
+ #ifndef _PAGE_EXEC
+ #define _PAGE_EXEC 0
+ #endif
++#ifndef _PAGE_NX
++#define _PAGE_NX 0
++#endif
+ #ifndef _PAGE_ENDIAN
+ #define _PAGE_ENDIAN 0
+ #endif
+@@ -53,13 +56,13 @@
+ #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE()
+ #endif
+ #ifndef _PAGE_KERNEL_RO
+-#define _PAGE_KERNEL_RO (_PAGE_RO)
++#define _PAGE_KERNEL_RO (_PAGE_RO | _PAGE_NX)
+ #endif
+ #ifndef _PAGE_KERNEL_ROX
+ #define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO)
+ #endif
+ #ifndef _PAGE_KERNEL_RW
+-#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
++#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_NX)
+ #endif
+ #ifndef _PAGE_KERNEL_RWX
+ #define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC)
+@@ -142,15 +145,12 @@ static inline bool pte_user(pte_t pte)
+ * Note due to the way vm flags are laid out, the bits are XWR
+ */
+ #define PAGE_NONE __pgprot(_PAGE_BASE)
+-#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+-#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
+- _PAGE_EXEC)
+-#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO)
+-#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
+- _PAGE_EXEC)
+-#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO)
+-#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
+- _PAGE_EXEC)
++#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_NX)
++#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
++#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | _PAGE_NX)
++#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | _PAGE_EXEC)
++#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | _PAGE_NX)
++#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | _PAGE_EXEC)
+
+ #define __P000 PAGE_NONE
+ #define __P001 PAGE_READONLY
+@@ -171,11 +171,9 @@ static inline bool pte_user(pte_t pte)
+ #define __S111 PAGE_SHARED_X
+
+ /* Permission masks used for kernel mappings */
+-#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+-#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+- _PAGE_NO_CACHE)
+-#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+- _PAGE_NO_CACHE | _PAGE_GUARDED)
++#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW | _PAGE_NX)
++#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
++#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED)
+ #define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+ #define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+ #define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
+index 978dada..5d29335 100644
+--- a/arch/powerpc/include/asm/reg.h
++++ b/arch/powerpc/include/asm/reg.h
+@@ -270,6 +270,7 @@
+ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */
+ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */
+ #define DSISR_NOHPTE 0x40000000 /* no translation found */
++#define DSISR_GUARDED 0x10000000 /* fetch from guarded storage */
+ #define DSISR_PROTFAULT 0x08000000 /* protection fault */
+ #define DSISR_ISSTORE 0x02000000 /* access was a store */
+ #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
+diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
+index 0d02c11..33a8f08 100644
+--- a/arch/powerpc/include/asm/smp.h
++++ b/arch/powerpc/include/asm/smp.h
+@@ -51,7 +51,7 @@ struct smp_ops_t {
+ int (*cpu_disable)(void);
+ void (*cpu_die)(unsigned int nr);
+ int (*cpu_bootable)(unsigned int nr);
+-};
++} __no_const;
+
+ extern void smp_send_debugger_break(void);
+ extern void start_secondary_resume(void);
+diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
+index fa37fe9..867d3cf 100644
+--- a/arch/powerpc/include/asm/spinlock.h
++++ b/arch/powerpc/include/asm/spinlock.h
+@@ -27,6 +27,7 @@
+ #include <asm/asm-compat.h>
+ #include <asm/synch.h>
+ #include <asm/ppc-opcode.h>
++#include <asm/atomic.h>
+
+ #ifdef CONFIG_PPC64
+ /* use 0x800000yy when locked, where yy == CPU number */
+@@ -228,13 +229,29 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw)
+ __asm__ __volatile__(
+ "1: " PPC_LWARX(%0,0,%1,1) "\n"
+ __DO_SIGN_EXTEND
+-" addic. %0,%0,1\n\
+- ble- 2f\n"
++
++#ifdef CONFIG_PAX_REFCOUNT
++" mcrxr cr0\n"
++" addico. %0,%0,1\n"
++" bf 4*cr0+so, 3f\n"
++"2:.long " "0x00c00b00""\n"
++#else
++" addic. %0,%0,1\n"
++#endif
++
++"3:\n"
++ "ble- 4f\n"
+ PPC405_ERR77(0,%1)
+ " stwcx. %0,0,%1\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+-"2:" : "=&r" (tmp)
++"4:"
++
++#ifdef CONFIG_PAX_REFCOUNT
++ _ASM_EXTABLE(2b,4b)
++#endif
++
++ : "=&r" (tmp)
+ : "r" (&rw->lock)
+ : "cr0", "xer", "memory");
+
+@@ -310,11 +327,27 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
+ __asm__ __volatile__(
+ "# read_unlock\n\t"
+ PPC_RELEASE_BARRIER
+-"1: lwarx %0,0,%1\n\
+- addic %0,%0,-1\n"
++"1: lwarx %0,0,%1\n"
++
++#ifdef CONFIG_PAX_REFCOUNT
++" mcrxr cr0\n"
++" addico. %0,%0,-1\n"
++" bf 4*cr0+so, 3f\n"
++"2:.long " "0x00c00b00""\n"
++#else
++" addic. %0,%0,-1\n"
++#endif
++
++"3:\n"
+ PPC405_ERR77(0,%1)
+ " stwcx. %0,0,%1\n\
+ bne- 1b"
++
++#ifdef CONFIG_PAX_REFCOUNT
++"\n4:\n"
++ _ASM_EXTABLE(2b, 4b)
++#endif
++
+ : "=&r"(tmp)
+ : "r"(&rw->lock)
+ : "cr0", "xer", "memory");
+diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
+index da3cdff..c774844 100644
+--- a/arch/powerpc/include/asm/string.h
++++ b/arch/powerpc/include/asm/string.h
+@@ -11,17 +11,17 @@
+ #define __HAVE_ARCH_MEMCMP
+ #define __HAVE_ARCH_MEMCHR
+
+-extern char * strcpy(char *,const char *);
+-extern char * strncpy(char *,const char *, __kernel_size_t);
+-extern __kernel_size_t strlen(const char *);
+-extern int strcmp(const char *,const char *);
+-extern int strncmp(const char *, const char *, __kernel_size_t);
+-extern char * strcat(char *, const char *);
++extern char * strcpy(char *,const char *) __nocapture(2);
++extern char * strncpy(char *,const char *, __kernel_size_t) __nocapture(2);
++extern __kernel_size_t strlen(const char *) __nocapture(1);
++extern int strcmp(const char *,const char *) __nocapture();
++extern int strncmp(const char *, const char *, __kernel_size_t) __nocapture(1, 2);
++extern char * strcat(char *, const char *) __nocapture(2);
+ extern void * memset(void *,int,__kernel_size_t);
+-extern void * memcpy(void *,const void *,__kernel_size_t);
+-extern void * memmove(void *,const void *,__kernel_size_t);
+-extern int memcmp(const void *,const void *,__kernel_size_t);
+-extern void * memchr(const void *,int,__kernel_size_t);
++extern void * memcpy(void *,const void *,__kernel_size_t) __nocapture(2);
++extern void * memmove(void *,const void *,__kernel_size_t) __nocapture(2);
++extern int memcmp(const void *,const void *,__kernel_size_t) __nocapture(1, 2);
++extern void * memchr(const void *,int,__kernel_size_t) __nocapture(1);
+
+ #endif /* __KERNEL__ */
+
+diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
+index 87e4b2d..c362390 100644
+--- a/arch/powerpc/include/asm/thread_info.h
++++ b/arch/powerpc/include/asm/thread_info.h
+@@ -107,6 +107,8 @@ static inline struct thread_info *current_thread_info(void)
+ #if defined(CONFIG_PPC64)
+ #define TIF_ELF2ABI 18 /* function descriptors must die! */
+ #endif
++/* mask must be expressable within 16 bits to satisfy 'andi' instruction reqs */
++#define TIF_GRSEC_SETXID 6 /* update credentials on syscall entry/exit */
+
+ /* as above, but as bit values */
+ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+@@ -125,9 +127,10 @@ static inline struct thread_info *current_thread_info(void)
+ #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+ #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
+ #define _TIF_NOHZ (1<<TIF_NOHZ)
++#define _TIF_GRSEC_SETXID (1<<TIF_GRSEC_SETXID)
+ #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
+- _TIF_NOHZ)
++ _TIF_NOHZ | _TIF_GRSEC_SETXID)
+
+ #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
+index c266227..f3dc6bb 100644
+--- a/arch/powerpc/include/asm/uaccess.h
++++ b/arch/powerpc/include/asm/uaccess.h
+@@ -58,6 +58,7 @@
+
+ #endif
+
++#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
+ #define access_ok(type, addr, size) \
+ (__chk_user_ptr(addr), \
+ __access_ok((__force unsigned long)(addr), (size), get_fs()))
+@@ -303,43 +304,6 @@ do { \
+ extern unsigned long __copy_tofrom_user(void __user *to,
+ const void __user *from, unsigned long size);
+
+-#ifndef __powerpc64__
+-
+-static inline unsigned long copy_from_user(void *to,
+- const void __user *from, unsigned long n)
+-{
+- if (likely(access_ok(VERIFY_READ, from, n))) {
+- check_object_size(to, n, false);
+- return __copy_tofrom_user((__force void __user *)to, from, n);
+- }
+- memset(to, 0, n);
+- return n;
+-}
+-
+-static inline unsigned long copy_to_user(void __user *to,
+- const void *from, unsigned long n)
+-{
+- if (access_ok(VERIFY_WRITE, to, n)) {
+- check_object_size(from, n, true);
+- return __copy_tofrom_user(to, (__force void __user *)from, n);
+- }
+- return n;
+-}
+-
+-#else /* __powerpc64__ */
+-
+-#define __copy_in_user(to, from, size) \
+- __copy_tofrom_user((to), (from), (size))
+-
+-extern unsigned long copy_from_user(void *to, const void __user *from,
+- unsigned long n);
+-extern unsigned long copy_to_user(void __user *to, const void *from,
+- unsigned long n);
+-extern unsigned long copy_in_user(void __user *to, const void __user *from,
+- unsigned long n);
+-
+-#endif /* __powerpc64__ */
+-
+ static inline unsigned long __copy_from_user_inatomic(void *to,
+ const void __user *from, unsigned long n)
+ {
+@@ -412,6 +376,70 @@ static inline unsigned long __copy_to_user(void __user *to,
+ return __copy_to_user_inatomic(to, from, size);
+ }
+
++#ifndef __powerpc64__
++
++static inline unsigned long __must_check copy_from_user(void *to,
++ const void __user *from, unsigned long n)
++{
++ if ((long)n < 0)
++ return n;
++
++ if (likely(access_ok(VERIFY_READ, from, n))) {
++ check_object_size(to, n, false);
++ return __copy_tofrom_user((void __force_user *)to, from, n);
++ }
++ memset(to, 0, n);
++ return n;
++}
++
++static inline unsigned long __must_check copy_to_user(void __user *to,
++ const void *from, unsigned long n)
++{
++ if ((long)n < 0)
++ return n;
++
++ if (likely(access_ok(VERIFY_WRITE, to, n))) {
++ check_object_size(from, n, true);
++ return __copy_tofrom_user(to, (void __force_user *)from, n);
++ }
++ return n;
++}
++
++#else /* __powerpc64__ */
++
++#define __copy_in_user(to, from, size) \
++ __copy_tofrom_user((to), (from), (size))
++
++static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
++{
++ if ((long)n < 0 || n > INT_MAX)
++ return n;
++
++ if (likely(access_ok(VERIFY_READ, from, n))) {
++ check_object_size(to, n, false);
++ n = __copy_from_user(to, from, n);
++ } else
++ memset(to, 0, n);
++ return n;
++}
++
++static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
++{
++ if ((long)n < 0 || n > INT_MAX)
++ return n;
++
++ if (likely(access_ok(VERIFY_WRITE, to, n))) {
++ check_object_size(from, n, true);
++ n = __copy_to_user(to, from, n);
++ }
++ return n;
++}
++
++extern unsigned long copy_in_user(void __user *to, const void __user *from,
++ unsigned long n);
++
++#endif /* __powerpc64__ */
++
+ extern unsigned long __clear_user(void __user *addr, unsigned long size);
+
+ static inline unsigned long clear_user(void __user *addr, unsigned long size)
+diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
+index fe4c075..fcb4600 100644
+--- a/arch/powerpc/kernel/Makefile
++++ b/arch/powerpc/kernel/Makefile
+@@ -14,6 +14,11 @@ CFLAGS_prom_init.o += -fPIC
+ CFLAGS_btext.o += -fPIC
+ endif
+
++CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
++CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
++CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
++CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
++
+ ifdef CONFIG_FUNCTION_TRACER
+ # Do not trace early boot code
+ CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+@@ -26,6 +31,8 @@ CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+ CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+ endif
+
++CFLAGS_REMOVE_prom_init.o += $(LATENT_ENTROPY_PLUGIN_CFLAGS)
++
+ obj-y := cputable.o ptrace.o syscalls.o \
+ irq.o align.o signal_32.o pmc.o vdso.o \
+ process.o systbl.o idle.o \
+diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
+index 38a1f96..ed94e42 100644
+--- a/arch/powerpc/kernel/exceptions-64e.S
++++ b/arch/powerpc/kernel/exceptions-64e.S
+@@ -1010,6 +1010,7 @@ storage_fault_common:
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
++ bl save_nvgprs
+ mr r4,r14
+ mr r5,r15
+ ld r14,PACA_EXGEN+EX_R14(r13)
+@@ -1018,8 +1019,7 @@ storage_fault_common:
+ cmpdi r3,0
+ bne- 1f
+ b ret_from_except_lite
+-1: bl save_nvgprs
+- mr r5,r3
++1: mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r4,_DAR(r1)
+ bl bad_page_fault
+diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
+index bffec73..9cc5a35 100644
+--- a/arch/powerpc/kernel/exceptions-64s.S
++++ b/arch/powerpc/kernel/exceptions-64s.S
+@@ -1520,10 +1520,10 @@ handle_page_fault:
+ 11: ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
++ bl save_nvgprs
+ bl do_page_fault
+ cmpdi r3,0
+ beq+ 12f
+- bl save_nvgprs
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ lwz r4,_DAR(r1)
+diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
+index 08887cf..0c98725 100644
+--- a/arch/powerpc/kernel/irq.c
++++ b/arch/powerpc/kernel/irq.c
+@@ -477,6 +477,8 @@ void migrate_irqs(void)
+ }
+ #endif
+
++extern void gr_handle_kernel_exploit(void);
++
+ static inline void check_stack_overflow(void)
+ {
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+@@ -489,6 +491,7 @@ static inline void check_stack_overflow(void)
+ pr_err("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
++ gr_handle_kernel_exploit();
+ }
+ #endif
+ }
+diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
+index 5a7a78f..c0e4207 100644
+--- a/arch/powerpc/kernel/module_32.c
++++ b/arch/powerpc/kernel/module_32.c
+@@ -158,7 +158,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
+ me->arch.core_plt_section = i;
+ }
+ if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
+- pr_err("Module doesn't contain .plt or .init.plt sections.\n");
++ pr_err("Module $s doesn't contain .plt or .init.plt sections.\n", me->name);
+ return -ENOEXEC;
+ }
+
+@@ -188,11 +188,16 @@ static uint32_t do_plt_call(void *location,
+
+ pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
+ /* Init, or core PLT? */
+- if (location >= mod->core_layout.base
+- && location < mod->core_layout.base + mod->core_layout.size)
++ if ((location >= mod->core_layout.base_rx && location < mod->core_layout.base_rx + mod->core_layout.size_rx) ||
++ (location >= mod->core_layout.base_rw && location < mod->core_layout.base_rw + mod->core_layout.size_rw))
+ entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
+- else
++ else if ((location >= mod->init_layout.base_rx && location < mod->init_layout.base_rx + mod->init_layout.size_rx) ||
++ (location >= mod->init_layout.base_rw && location < mod->init_layout.base_rw + mod->init_layout.size_rw))
+ entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
++ else {
++ printk(KERN_ERR "%s: invalid R_PPC_REL24 entry found\n", mod->name);
++ return ~0UL;
++ }
+
+ /* Find this entry, or if that fails, the next avail. entry */
+ while (entry->jump[0]) {
+@@ -301,7 +306,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
+ {
+- module->arch.tramp = do_plt_call(module->core_layout.base,
++ module->arch.tramp = do_plt_call(module->core_layout.base_rx,
+ (unsigned long)ftrace_caller,
+ sechdrs, module);
+ if (!module->arch.tramp)
+diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
+index ad37aa1..51da6c4 100644
+--- a/arch/powerpc/kernel/process.c
++++ b/arch/powerpc/kernel/process.c
+@@ -1360,8 +1360,8 @@ void show_regs(struct pt_regs * regs)
+ * Lookup NIP late so we have the best change of getting the
+ * above info out without failing
+ */
+- printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+- printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
++ printk("NIP ["REG"] %pA\n", regs->nip, (void *)regs->nip);
++ printk("LR ["REG"] %pA\n", regs->link, (void *)regs->link);
+ #endif
+ show_stack(current, (unsigned long *) regs->gpr[1]);
+ if (!user_mode(regs))
+@@ -1882,10 +1882,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
+ newsp = stack[0];
+ ip = stack[STACK_FRAME_LR_SAVE];
+ if (!firstframe || ip != lr) {
+- printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
++ printk("["REG"] ["REG"] %pA", sp, ip, (void *)ip);
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ if ((ip == rth) && curr_frame >= 0) {
+- printk(" (%pS)",
++ printk(" (%pA)",
+ (void *)current->ret_stack[curr_frame].ret);
+ curr_frame--;
+ }
+@@ -1905,7 +1905,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
+ struct pt_regs *regs = (struct pt_regs *)
+ (sp + STACK_FRAME_OVERHEAD);
+ lr = regs->link;
+- printk("--- interrupt: %lx at %pS\n LR = %pS\n",
++ printk("--- interrupt: %lx at %pA\n LR = %pA\n",
+ regs->trap, (void *)regs->nip, (void *)lr);
+ firstframe = 1;
+ }
+@@ -1942,13 +1942,6 @@ void notrace __ppc64_runlatch_off(void)
+ }
+ #endif /* CONFIG_PPC64 */
+
+-unsigned long arch_align_stack(unsigned long sp)
+-{
+- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+- sp -= get_random_int() & ~PAGE_MASK;
+- return sp & ~0xf;
+-}
+-
+ static inline unsigned long brk_rnd(void)
+ {
+ unsigned long rnd = 0;
+diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
+index bf91658..edd21f8 100644
+--- a/arch/powerpc/kernel/ptrace.c
++++ b/arch/powerpc/kernel/ptrace.c
+@@ -3312,6 +3312,10 @@ static int do_seccomp(struct pt_regs *regs)
+ static inline int do_seccomp(struct pt_regs *regs) { return 0; }
+ #endif /* CONFIG_SECCOMP */
+
++#ifdef CONFIG_GRKERNSEC_SETXID
++extern void gr_delayed_cred_worker(void);
++#endif
++
+ /**
+ * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
+ * @regs: the pt_regs of the task to trace (current)
+@@ -3335,6 +3339,11 @@ long do_syscall_trace_enter(struct pt_regs *regs)
+ {
+ user_exit();
+
++#ifdef CONFIG_GRKERNSEC_SETXID
++ if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID)))
++ gr_delayed_cred_worker();
++#endif
++
+ /*
+ * The tracer may decide to abort the syscall, if so tracehook
+ * will return !0. Note that the tracer may also just change
+@@ -3353,6 +3362,7 @@ long do_syscall_trace_enter(struct pt_regs *regs)
+ if (regs->gpr[0] >= NR_syscalls)
+ goto skip;
+
++
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gpr[0]);
+
+@@ -3384,6 +3394,11 @@ void do_syscall_trace_leave(struct pt_regs *regs)
+ {
+ int step;
+
++#ifdef CONFIG_GRKERNSEC_SETXID
++ if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID)))
++ gr_delayed_cred_worker();
++#endif
++
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
+index a7daf74..d8159e5 100644
+--- a/arch/powerpc/kernel/signal_32.c
++++ b/arch/powerpc/kernel/signal_32.c
+@@ -1000,7 +1000,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
+ /* Save user registers on the stack */
+ frame = &rt_sf->uc.uc_mcontext;
+ addr = frame;
+- if (vdso32_rt_sigtramp && current->mm->context.vdso_base) {
++ if (vdso32_rt_sigtramp && current->mm->context.vdso_base != ~0UL) {
+ sigret = 0;
+ tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp;
+ } else {
+diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
+index 70409bb..6cc6990 100644
+--- a/arch/powerpc/kernel/signal_64.c
++++ b/arch/powerpc/kernel/signal_64.c
+@@ -770,7 +770,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
+ current->thread.fp_state.fpscr = 0;
+
+ /* Set up to return from userspace. */
+- if (vdso64_rt_sigtramp && current->mm->context.vdso_base) {
++ if (vdso64_rt_sigtramp && current->mm->context.vdso_base != ~0UL) {
+ regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp;
+ } else {
+ err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
+diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
+index 62859eb..035955d 100644
+--- a/arch/powerpc/kernel/traps.c
++++ b/arch/powerpc/kernel/traps.c
+@@ -37,6 +37,7 @@
+ #include <linux/debugfs.h>
+ #include <linux/ratelimit.h>
+ #include <linux/context_tracking.h>
++#include <linux/uaccess.h>
+
+ #include <asm/emulated_ops.h>
+ #include <asm/pgtable.h>
+@@ -145,6 +146,8 @@ static unsigned __kprobes long oops_begin(struct pt_regs *regs)
+ return flags;
+ }
+
++extern void gr_handle_kernel_exploit(void);
++
+ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
+ int signr)
+ {
+@@ -194,6 +197,9 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception");
++
++ gr_handle_kernel_exploit();
++
+ do_exit(signr);
+ }
+
+@@ -1145,6 +1151,26 @@ void __kprobes program_check_exception(struct pt_regs *regs)
+ enum ctx_state prev_state = exception_enter();
+ unsigned int reason = get_reason(regs);
+
++#ifdef CONFIG_PAX_REFCOUNT
++ unsigned int bkpt;
++ const struct exception_table_entry *entry;
++
++ if (reason & REASON_ILLEGAL) {
++ /* Check if PaX bad instruction */
++ if (!probe_kernel_address((const void *)regs->nip, bkpt) && bkpt == 0xc00b00) {
++ current->thread.trap_nr = 0;
++ pax_report_refcount_error(regs, NULL);
++ /* fixup_exception() for PowerPC does not exist, simulate its job */
++ if ((entry = search_exception_tables(regs->nip)) != NULL) {
++ regs->nip = entry->fixup;
++ return;
++ }
++ /* fixup_exception() could not handle */
++ goto bail;
++ }
++ }
++#endif
++
+ /* We can now get here via a FP Unavailable exception if the core
+ * has no FPU, in that case the reason flags will be 0 */
+
+diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
+index 4111d30..fa5e7be 100644
+--- a/arch/powerpc/kernel/vdso.c
++++ b/arch/powerpc/kernel/vdso.c
+@@ -35,6 +35,7 @@
+ #include <asm/vdso.h>
+ #include <asm/vdso_datapage.h>
+ #include <asm/setup.h>
++#include <asm/mman.h>
+
+ #undef DEBUG
+
+@@ -180,7 +181,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+ vdso_base = VDSO32_MBASE;
+ #endif
+
+- current->mm->context.vdso_base = 0;
++ current->mm->context.vdso_base = ~0UL;
+
+ /* vDSO has a problem and was disabled, just don't "enable" it for the
+ * process
+@@ -201,7 +202,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+ vdso_base = get_unmapped_area(NULL, vdso_base,
+ (vdso_pages << PAGE_SHIFT) +
+ ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
+- 0, 0);
++ 0, MAP_PRIVATE | MAP_EXECUTABLE);
+ if (IS_ERR_VALUE(vdso_base)) {
+ rc = vdso_base;
+ goto fail_mmapsem;
+diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c
+index 5eea6f3..5d10396 100644
+--- a/arch/powerpc/lib/usercopy_64.c
++++ b/arch/powerpc/lib/usercopy_64.c
+@@ -9,22 +9,6 @@
+ #include <linux/module.h>
+ #include <asm/uaccess.h>
+
+-unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
+-{
+- if (likely(access_ok(VERIFY_READ, from, n)))
+- n = __copy_from_user(to, from, n);
+- else
+- memset(to, 0, n);
+- return n;
+-}
+-
+-unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
+-{
+- if (likely(access_ok(VERIFY_WRITE, to, n)))
+- n = __copy_to_user(to, from, n);
+- return n;
+-}
+-
+ unsigned long copy_in_user(void __user *to, const void __user *from,
+ unsigned long n)
+ {
+@@ -35,7 +19,5 @@ unsigned long copy_in_user(void __user *to, const void __user *from,
+ return n;
+ }
+
+-EXPORT_SYMBOL(copy_from_user);
+-EXPORT_SYMBOL(copy_to_user);
+ EXPORT_SYMBOL(copy_in_user);
+
+diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
+index bb1ffc5..9ae5cb6 100644
+--- a/arch/powerpc/mm/fault.c
++++ b/arch/powerpc/mm/fault.c
+@@ -34,6 +34,10 @@
+ #include <linux/context_tracking.h>
+ #include <linux/hugetlb.h>
+ #include <linux/uaccess.h>
++#include <linux/slab.h>
++#include <linux/pagemap.h>
++#include <linux/compiler.h>
++#include <linux/unistd.h>
+
+ #include <asm/firmware.h>
+ #include <asm/page.h>
+@@ -68,6 +72,33 @@ static inline int notify_page_fault(struct pt_regs *regs)
+ }
+ #endif
+
++#ifdef CONFIG_PAX_PAGEEXEC
++/*
++ * PaX: decide what to do with offenders (regs->nip = fault address)
++ *
++ * returns 1 when task should be killed
++ */
++static int pax_handle_fetch_fault(struct pt_regs *regs)
++{
++ return 1;
++}
++
++void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
++{
++ unsigned long i;
++
++ printk(KERN_ERR "PAX: bytes at PC: ");
++ for (i = 0; i < 5; i++) {
++ unsigned int c;
++ if (get_user(c, (unsigned int __user *)pc+i))
++ printk(KERN_CONT "???????? ");
++ else
++ printk(KERN_CONT "%08x ", c);
++ }
++ printk("\n");
++}
++#endif
++
+ /*
+ * Check whether the instruction at regs->nip is a store using
+ * an update addressing form which will update r1.
+@@ -227,7 +258,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
+ * indicate errors in DSISR but can validly be set in SRR1.
+ */
+ if (trap == 0x400)
+- error_code &= 0x48200000;
++ error_code &= 0x58200000;
+ else
+ is_write = error_code & DSISR_ISSTORE;
+ #else
+@@ -384,12 +415,16 @@ good_area:
+ * "undefined". Of those that can be set, this is the only
+ * one which seems bad.
+ */
+- if (error_code & 0x10000000)
++ if (error_code & DSISR_GUARDED)
+ /* Guarded storage error. */
+ goto bad_area;
+ #endif /* CONFIG_8xx */
+
+ if (is_exec) {
++#ifdef CONFIG_PPC_STD_MMU
++ if (error_code & DSISR_GUARDED)
++ goto bad_area;
++#endif
+ /*
+ * Allow execution from readable areas if the MMU does not
+ * provide separate controls over reading and executing.
+@@ -484,6 +519,23 @@ bad_area:
+ bad_area_nosemaphore:
+ /* User mode accesses cause a SIGSEGV */
+ if (user_mode(regs)) {
++
++#ifdef CONFIG_PAX_PAGEEXEC
++ if (mm->pax_flags & MF_PAX_PAGEEXEC) {
++#ifdef CONFIG_PPC_STD_MMU
++ if (is_exec && (error_code & (DSISR_PROTFAULT | DSISR_GUARDED))) {
++#else
++ if (is_exec && regs->nip == address) {
++#endif
++ switch (pax_handle_fetch_fault(regs)) {
++ }
++
++ pax_report_fault(regs, (void *)regs->nip, (void *)regs->gpr[PT_R1]);
++ do_group_exit(SIGKILL);
++ }
++ }
++#endif
++
+ _exception(SIGSEGV, regs, code, address);
+ goto bail;
+ }
+diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
+index 2f1e443..de888bf 100644
+--- a/arch/powerpc/mm/mmap.c
++++ b/arch/powerpc/mm/mmap.c
+@@ -194,6 +194,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
+ {
+ unsigned long random_factor = 0UL;
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = arch_mmap_rnd();
+
+@@ -205,9 +209,21 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
+ */
+ if (mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ mm->mmap_base += mm->delta_mmap;
++#endif
++
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ } else {
+ mm->mmap_base = mmap_base(random_factor);
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
++#endif
++
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ }
+ }
+diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
+index 2b27458..7c7c59b 100644
+--- a/arch/powerpc/mm/slice.c
++++ b/arch/powerpc/mm/slice.c
+@@ -105,7 +105,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
+ if ((mm->task_size - len) < addr)
+ return 0;
+ vma = find_vma(mm, addr);
+- return (!vma || (addr + len) <= vma->vm_start);
++ return check_heap_stack_gap(vma, addr, len, 0);
+ }
+
+ static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+@@ -276,6 +276,12 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
+ info.align_offset = 0;
+
+ addr = TASK_UNMAPPED_BASE;
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ addr += mm->delta_mmap;
++#endif
++
+ while (addr < TASK_SIZE) {
+ info.low_limit = addr;
+ if (!slice_scan_available(addr, available, 1, &addr))
+@@ -410,6 +416,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ if (fixed && addr > (mm->task_size - len))
+ return -ENOMEM;
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!fixed && (mm->pax_flags & MF_PAX_RANDMMAP))
++ addr = 0;
++#endif
++
+ /* If hint, make sure it matches our alignment restrictions */
+ if (!fixed && addr) {
+ addr = _ALIGN_UP(addr, 1ul << pshift);
+@@ -555,10 +566,10 @@ unsigned long arch_get_unmapped_area(struct file *filp,
+ }
+
+ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+- const unsigned long addr0,
+- const unsigned long len,
+- const unsigned long pgoff,
+- const unsigned long flags)
++ unsigned long addr0,
++ unsigned long len,
++ unsigned long pgoff,
++ unsigned long flags)
+ {
+ return slice_get_unmapped_area(addr0, len, flags,
+ current->mm->context.user_psize, 1);
+diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
+index 0625446..139a0aa 100644
+--- a/arch/powerpc/platforms/cell/spufs/file.c
++++ b/arch/powerpc/platforms/cell/spufs/file.c
+@@ -263,9 +263,9 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ return VM_FAULT_NOPAGE;
+ }
+
+-static int spufs_mem_mmap_access(struct vm_area_struct *vma,
++static ssize_t spufs_mem_mmap_access(struct vm_area_struct *vma,
+ unsigned long address,
+- void *buf, int len, int write)
++ void *buf, size_t len, int write)
+ {
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long offset = address - vma->vm_start;
+diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
+index 26c5d5be..a308c28 100644
+--- a/arch/s390/Kconfig.debug
++++ b/arch/s390/Kconfig.debug
+@@ -9,6 +9,7 @@ config S390_PTDUMP
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL
+ select DEBUG_FS
++ depends on !GRKERNSEC_KMEM
+ ---help---
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
+index d28cc2f..a937312 100644
+--- a/arch/s390/include/asm/atomic.h
++++ b/arch/s390/include/asm/atomic.h
+@@ -342,4 +342,14 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
+ #define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0)
+ #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
++#define atomic64_read_unchecked(v) atomic64_read(v)
++#define atomic64_set_unchecked(v, i) atomic64_set((v), (i))
++#define atomic64_add_unchecked(a, v) atomic64_add((a), (v))
++#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v))
++#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v))
++#define atomic64_inc_unchecked(v) atomic64_inc(v)
++#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v)
++#define atomic64_dec_unchecked(v) atomic64_dec(v)
++#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n))
++
+ #endif /* __ARCH_S390_ATOMIC__ */
+diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
+index 05219a5..032f5f0 100644
+--- a/arch/s390/include/asm/cache.h
++++ b/arch/s390/include/asm/cache.h
+@@ -9,8 +9,10 @@
+ #ifndef __ARCH_S390_CACHE_H
+ #define __ARCH_S390_CACHE_H
+
+-#define L1_CACHE_BYTES 256
++#include <linux/const.h>
++
+ #define L1_CACHE_SHIFT 8
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+ #define NET_SKB_PAD 32
+
+ #define __read_mostly __section(.data..read_mostly)
+diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
+index 1736c7d..261351c 100644
+--- a/arch/s390/include/asm/elf.h
++++ b/arch/s390/include/asm/elf.h
+@@ -167,6 +167,13 @@ extern unsigned int vdso_enabled;
+ (STACK_TOP / 3 * 2) : \
+ (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_31BIT) ? 0x10000UL : 0x80000000UL)
++
++#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_31BIT) ? 15 : 26)
++#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_31BIT) ? 15 : 26)
++#endif
++
+ /* This yields a mask that user programs can use to figure out what
+ instruction set this CPU supports. */
+
+diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h
+index c4a93d6..4d2a9b4 100644
+--- a/arch/s390/include/asm/exec.h
++++ b/arch/s390/include/asm/exec.h
+@@ -7,6 +7,6 @@
+ #ifndef __ASM_EXEC_H
+ #define __ASM_EXEC_H
+
+-extern unsigned long arch_align_stack(unsigned long sp);
++#define arch_align_stack(x) ((x) & ~0xfUL)
+
+ #endif /* __ASM_EXEC_H */
+diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
+index 52d7c87..577d292 100644
+--- a/arch/s390/include/asm/uaccess.h
++++ b/arch/s390/include/asm/uaccess.h
+@@ -59,6 +59,7 @@ static inline int __range_ok(unsigned long addr, unsigned long size)
+ __range_ok((unsigned long)(addr), (size)); \
+ })
+
++#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
+ #define access_ok(type, addr, size) __access_ok(addr, size)
+
+ /*
+@@ -337,6 +338,10 @@ static inline unsigned long __must_check
+ copy_to_user(void __user *to, const void *from, unsigned long n)
+ {
+ might_fault();
++
++ if ((long)n < 0)
++ return n;
++
+ return __copy_to_user(to, from, n);
+ }
+
+@@ -360,10 +365,14 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
+ static inline unsigned long __must_check
+ copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
+- unsigned int sz = __compiletime_object_size(to);
++ size_t sz = __compiletime_object_size(to);
+
+ might_fault();
+- if (unlikely(sz != -1 && sz < n)) {
++
++ if ((long)n < 0)
++ return n;
++
++ if (unlikely(sz != (size_t)-1 && sz < n)) {
+ if (!__builtin_constant_p(n))
+ copy_user_overflow(sz, n);
+ else
+diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
+index fbc0789..e7962a1 100644
+--- a/arch/s390/kernel/module.c
++++ b/arch/s390/kernel/module.c
+@@ -163,11 +163,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+
+ /* Increase core size by size of got & plt and set start
+ offsets for got and plt. */
+- me->core_layout.size = ALIGN(me->core_layout.size, 4);
+- me->arch.got_offset = me->core_layout.size;
+- me->core_layout.size += me->arch.got_size;
+- me->arch.plt_offset = me->core_layout.size;
+- me->core_layout.size += me->arch.plt_size;
++ me->core_layout.size_rw = ALIGN(me->core_layout.size_rw, 4);
++ me->arch.got_offset = me->core_layout.size_rw;
++ me->core_layout.size_rw += me->arch.got_size;
++ me->arch.plt_offset = me->core_layout.size_rx;
++ me->core_layout.size_rx += me->arch.plt_size;
+ return 0;
+ }
+
+@@ -283,7 +283,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ if (info->got_initialized == 0) {
+ Elf_Addr *gotent;
+
+- gotent = me->core_layout.base + me->arch.got_offset +
++ gotent = me->core_layout.base_rw + me->arch.got_offset +
+ info->got_offset;
+ *gotent = val;
+ info->got_initialized = 1;
+@@ -306,7 +306,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ else if (r_type == R_390_GOTENT ||
+ r_type == R_390_GOTPLTENT) {
+- val += (Elf_Addr) me->core_layout.base - loc;
++ val += (Elf_Addr) me->core_layout.base_rw - loc;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ }
+ break;
+@@ -319,7 +319,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
+ if (info->plt_initialized == 0) {
+ unsigned int *ip;
+- ip = me->core_layout.base + me->arch.plt_offset +
++ ip = me->core_layout.base_rx + me->arch.plt_offset +
+ info->plt_offset;
+ ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
+ ip[1] = 0x100a0004;
+@@ -338,7 +338,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ val - loc + 0xffffUL < 0x1ffffeUL) ||
+ (r_type == R_390_PLT32DBL &&
+ val - loc + 0xffffffffULL < 0x1fffffffeULL)))
+- val = (Elf_Addr) me->core_layout.base +
++ val = (Elf_Addr) me->core_layout.base_rx +
+ me->arch.plt_offset +
+ info->plt_offset;
+ val += rela->r_addend - loc;
+@@ -360,7 +360,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ case R_390_GOTOFF32: /* 32 bit offset to GOT. */
+ case R_390_GOTOFF64: /* 64 bit offset to GOT. */
+ val = val + rela->r_addend -
+- ((Elf_Addr) me->core_layout.base + me->arch.got_offset);
++ ((Elf_Addr) me->core_layout.base_rw + me->arch.got_offset);
+ if (r_type == R_390_GOTOFF16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_GOTOFF32)
+@@ -370,7 +370,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ break;
+ case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
+ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
+- val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
++ val = (Elf_Addr) me->core_layout.base_rw + me->arch.got_offset +
+ rela->r_addend - loc;
+ if (r_type == R_390_GOTPC)
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
+diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
+index bba4fa7..9c32b3c 100644
+--- a/arch/s390/kernel/process.c
++++ b/arch/s390/kernel/process.c
+@@ -217,13 +217,6 @@ unsigned long get_wchan(struct task_struct *p)
+ return 0;
+ }
+
+-unsigned long arch_align_stack(unsigned long sp)
+-{
+- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+- sp -= get_random_int() & ~PAGE_MASK;
+- return sp & ~0xf;
+-}
+-
+ static inline unsigned long brk_rnd(void)
+ {
+ return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
+diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
+index eb9df28..7b686ba 100644
+--- a/arch/s390/mm/mmap.c
++++ b/arch/s390/mm/mmap.c
+@@ -201,9 +201,9 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
+ }
+
+ static unsigned long
+-s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
+- const unsigned long len, const unsigned long pgoff,
+- const unsigned long flags)
++s390_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
++ unsigned long len, unsigned long pgoff,
++ unsigned long flags)
+ {
+ struct mm_struct *mm = current->mm;
+ unsigned long area;
+@@ -230,6 +230,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
+ {
+ unsigned long random_factor = 0UL;
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = arch_mmap_rnd();
+
+@@ -239,9 +243,21 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
+ */
+ if (mmap_is_legacy()) {
+ mm->mmap_base = mmap_base_legacy(random_factor);
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ mm->mmap_base += mm->delta_mmap;
++#endif
++
+ mm->get_unmapped_area = s390_get_unmapped_area;
+ } else {
+ mm->mmap_base = mmap_base(random_factor);
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
++#endif
++
+ mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+ }
+ }
+diff --git a/arch/score/include/asm/cache.h b/arch/score/include/asm/cache.h
+index ae3d59f..f65f075 100644
+--- a/arch/score/include/asm/cache.h
++++ b/arch/score/include/asm/cache.h
+@@ -1,7 +1,9 @@
+ #ifndef _ASM_SCORE_CACHE_H
+ #define _ASM_SCORE_CACHE_H
+
++#include <linux/const.h>
++
+ #define L1_CACHE_SHIFT 4
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #endif /* _ASM_SCORE_CACHE_H */
+diff --git a/arch/score/include/asm/exec.h b/arch/score/include/asm/exec.h
+index f9f3cd5..58ff438 100644
+--- a/arch/score/include/asm/exec.h
++++ b/arch/score/include/asm/exec.h
+@@ -1,6 +1,6 @@
+ #ifndef _ASM_SCORE_EXEC_H
+ #define _ASM_SCORE_EXEC_H
+
+-extern unsigned long arch_align_stack(unsigned long sp);
++#define arch_align_stack(x) (x)
+
+ #endif /* _ASM_SCORE_EXEC_H */
+diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
+index aae9480..93e40a4 100644
+--- a/arch/score/kernel/process.c
++++ b/arch/score/kernel/process.c
+@@ -114,8 +114,3 @@ unsigned long get_wchan(struct task_struct *task)
+
+ return task_pt_regs(task)->cp0_epc;
+ }
+-
+-unsigned long arch_align_stack(unsigned long sp)
+-{
+- return sp;
+-}
+diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h
+index ef9e555..331bd29 100644
+--- a/arch/sh/include/asm/cache.h
++++ b/arch/sh/include/asm/cache.h
+@@ -9,10 +9,11 @@
+ #define __ASM_SH_CACHE_H
+ #ifdef __KERNEL__
+
++#include <linux/const.h>
+ #include <linux/init.h>
+ #include <cpu/cache.h>
+
+-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
+index 6777177..d44b592 100644
+--- a/arch/sh/mm/mmap.c
++++ b/arch/sh/mm/mmap.c
+@@ -36,6 +36,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int do_colour_align;
++ unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags);
+ struct vm_unmapped_area_info info;
+
+ if (flags & MAP_FIXED) {
+@@ -55,6 +56,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ if (filp || (flags & MAP_SHARED))
+ do_colour_align = 1;
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ if (addr) {
+ if (do_colour_align)
+ addr = COLOUR_ALIGN(addr, pgoff);
+@@ -62,14 +67,13 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ addr = PAGE_ALIGN(addr);
+
+ vma = find_vma(mm, addr);
+- if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len, offset))
+ return addr;
+ }
+
+ info.flags = 0;
+ info.length = len;
+- info.low_limit = TASK_UNMAPPED_BASE;
++ info.low_limit = mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
+@@ -77,14 +81,15 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ }
+
+ unsigned long
+-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+- const unsigned long len, const unsigned long pgoff,
+- const unsigned long flags)
++arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr0,
++ unsigned long len, unsigned long pgoff,
++ unsigned long flags)
+ {
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ int do_colour_align;
++ unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags);
+ struct vm_unmapped_area_info info;
+
+ if (flags & MAP_FIXED) {
+@@ -104,6 +109,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ if (filp || (flags & MAP_SHARED))
+ do_colour_align = 1;
+
++#ifdef CONFIG_PAX_RANDMMAP
++ if (!(mm->pax_flags & MF_PAX_RANDMMAP))
++#endif
++
+ /* requesting a specific address */
+ if (addr) {
+ if (do_colour_align)
+@@ -112,8 +121,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ addr = PAGE_ALIGN(addr);
+
+ vma = find_vma(mm, addr);
+- if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len, offset))
+ return addr;
+ }
+
+@@ -135,6 +143,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
++
++#ifdef CONFIG_PAX_RANDMMAP
++ if (mm->pax_flags & MF_PAX_RANDMMAP)
++ info.low_limit += mm->delta_mmap;
++#endif
++
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index 59b0960..75a8bcb 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -39,6 +39,7 @@ config SPARC
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
+ select MODULES_USE_ELF_RELA
++ select HAVE_GCC_PLUGINS
+ select ODD_RT_SIGACTION
+ select OLD_SIGSUSPEND
+ select ARCH_HAS_SG_CHAIN
+diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
+index 24827a3..5dd45ac4 100644
+--- a/arch/sparc/include/asm/atomic_64.h
++++ b/arch/sparc/include/asm/atomic_64.h
+@@ -15,18 +15,38 @@
+ #define ATOMIC64_INIT(i) { (i) }
+
+ #define atomic_read(v) READ_ONCE((v)->counter)
++static inline int atomic_read_unchecked(const atomic_unchecked_t *v)
++{
++ return READ_ONCE(v->counter);
++}
+ #define atomic64_read(v) READ_ONCE((v)->counter)
++static inline long atomic64_read_unchecked(const atomic64_unchecked_t *v)
++{
++ return READ_ONCE(v->counter);
++}
+
+ #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
++static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i)
++{
++ WRITE_ONCE(v->counter, i);
++}
+ #define atomic64_set(v, i) WRITE_ONCE(((v)->counter), (i))
++static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i)
++{
++ WRITE_ONCE(v->counter, i);
++}
+
+-#define ATOMIC_OP(op) \
+-void atomic_##op(int, atomic_t *); \
+-void atomic64_##op(long, atomic64_t *);
++#define __ATOMIC_OP(op, suffix) \
++void atomic_##op##suffix(int, atomic##suffix##_t *); \
++void atomic64_##op##suffix(long, atomic64##suffix##_t *);
+
+-#define ATOMIC_OP_RETURN(op) \
+-int atomic_##op##_return(int, atomic_t *); \
+-long atomic64_##op##_return(long, atomic64_t *);
++#define ATOMIC_OP(op) __ATOMIC_OP(op, ) __ATOMIC_OP(op, _unchecked)
++
++#define __ATOMIC_OP_RETURN(op, suffix) \
++int atomic_##op##_return##suffix(int, atomic##suffix##_t *); \
++long atomic64_##op##_return##suffix(long, atomic64##suffix##_t *);
++
++#define ATOMIC_OP_RETURN(op) __ATOMIC_OP_RETURN(op, ) __ATOMIC_OP_RETURN(op, _unchecked)
+
+ #define ATOMIC_FETCH_OP(op) \
+ int atomic_fetch_##op(int, atomic_t *); \
+@@ -47,13 +67,23 @@ ATOMIC_OPS(xor)
+ #undef ATOMIC_OPS
+ #undef ATOMIC_FETCH_OP
+ #undef ATOMIC_OP_RETURN
++#undef __ATOMIC_OP_RETURN
+ #undef ATOMIC_OP
++#undef __ATOMIC_OP
+
+ #define atomic_dec_return(v) atomic_sub_return(1, v)
+ #define atomic64_dec_return(v) atomic64_sub_return(1, v)
+
+ #define atomic_inc_return(v) atomic_add_return(1, v)
++static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v)
++{
++ return atomic_add_return_unchecked(1, v);
++}
+ #define atomic64_inc_return(v) atomic64_add_return(1, v)
++static inline long atomic64_inc_return_unchecked(atomic64_unchecked_t *v)
++{
++ return atomic64_add_return_unchecked(1, v);
++}
+
+ /*
+ * atomic_inc_and_test - increment and test
+@@ -64,6 +94,10 @@ ATOMIC_OPS(xor)
+ * other cases.
+ */
+ #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
++static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v)
++{
++ return atomic_inc_return_unchecked(v) == 0;
++}
+ #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+
+ #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
+@@ -73,25 +107,60 @@ ATOMIC_OPS(xor)
+ #define atomic64_dec_and_test(v) (atomic64_sub_return(1, v) == 0)
+
+ #define atomic_inc(v) atomic_add(1, v)
++static inline void atomic_inc_unchecked(atomic_unchecked_t *v)
++{
++ atomic_add_unchecked(1, v);
++}
+ #define atomic64_inc(v) atomic64_add(1, v)
++static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v)
++{
++ atomic64_add_unchecked(1, v);
++}
+
+ #define atomic_dec(v) atomic_sub(1, v)
++static inline void atomic_dec_unchecked(atomic_unchecked_t *v)
++{
++ atomic_sub_unchecked(1, v);
++}
+ #define atomic64_dec(v) atomic64_sub(1, v)
++static inline void atomic64_dec_unchecked(atomic64_unchecked_t *v)
++{
++ atomic64_sub_unchecked(1, v);
++}
+
+ #define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
+ #define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0)
+
+ #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
++static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new)
++{
++ return cmpxchg(&v->counter, old, new);
++}
+ #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
++static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new)
++{
++ return xchg(&v->counter, new);
++}
+
+ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+ {
+- int c, old;
++ int c, old, new;
+ c = atomic_read(v);
+ for (;;) {
+- if (unlikely(c == (u)))
++ if (unlikely(c == u))
+ break;
+- old = atomic_cmpxchg((v), c, c + (a));
++
++ asm volatile("addcc %2, %0, %0\n"
++
++#ifdef CONFIG_PAX_REFCOUNT
++ "tvs %%icc, 6\n"
++#endif
++
++ : "=r" (new)
++ : "0" (c), "ir" (a)
++ : "cc");
++
++ old = atomic_cmpxchg(v, c, new);
+ if (likely(old == c))
+ break;
+ c = old;
+@@ -101,21 +170,42 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+
+ #define atomic64_cmpxchg(v, o, n) \
+ ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
++static inline long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long old,
++ long new)
++{
++ return cmpxchg(&(v->counter), old, new);
++}
++
+ #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
++static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new)
++{
++ return xchg(&v->counter, new);
++}
+
+ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+ {
+- long c, old;
++ long c, old, new;
+ c = atomic64_read(v);
+ for (;;) {
+- if (unlikely(c == (u)))
++ if (unlikely(c == u))
+ break;
+- old = atomic64_cmpxchg((v), c, c + (a));
++
++ asm volatile("addcc %2, %0, %0\n"
++
++#ifdef CONFIG_PAX_REFCOUNT
++ "tvs %%xcc, 6\n"
++#endif
++
++ : "=r" (new)
++ : "0" (c), "ir" (a)
++ : "cc");
++
++ old = atomic64_cmpxchg(v, c, new);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+- return c != (u);
++ return c != u;
+ }
+
+ #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
+index 5bb6991..5c2132e 100644
+--- a/arch/sparc/include/asm/cache.h
++++ b/arch/sparc/include/asm/cache.h
+@@ -7,10 +7,12 @@
+ #ifndef _SPARC_CACHE_H
+ #define _SPARC_CACHE_H
+
++#include <linux/const.h>
++
+ #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+
+ #define L1_CACHE_SHIFT 5
+-#define L1_CACHE_BYTES 32
++#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT)
+
+ #ifdef CONFIG_SPARC32
+ #define SMP_CACHE_BYTES_SHIFT 5
+diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h
+index a24e41f..47677ff 100644
+--- a/arch/sparc/include/asm/elf_32.h
++++ b/arch/sparc/include/asm/elf_32.h
+@@ -114,6 +114,13 @@ typedef struct {
+
+ #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE)
+
++#ifdef CONFIG_PAX_ASLR
++#define PAX_ELF_ET_DYN_BASE 0x10000UL
++
++#define PAX_DELTA_MMAP_LEN 16
++#define PAX_DELTA_S