diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c840e7d..14c7fb0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1725,6 +1725,11 @@ and is between 256 and 4096 characters. It is defined in the file noresidual [PPC] Don't use residual data on PReP machines. + nordrand [X86] Disable the direct use of the RDRAND + instruction even if it is supported by the + processor. RDRAND is still available to user + space applications. + noresume [SWSUSP] Disables resume and restores original swap space. diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt index e6e482f..3c9d7ac 100644 --- a/Documentation/stable_kernel_rules.txt +++ b/Documentation/stable_kernel_rules.txt @@ -12,6 +12,12 @@ Rules on what kind of patches are accepted, and which ones are not, into the marked CONFIG_BROKEN), an oops, a hang, data corruption, a real security issue, or some "oh, that's not good" issue. In short, something critical. + - Serious issues as reported by a user of a distribution kernel may also + be considered if they fix a notable performance or interactivity issue. + As these fixes are not as obvious and have a higher risk of a subtle + regression they should only be submitted by a distribution kernel + maintainer and include an addendum linking to a bugzilla entry if it + exists and additional information on the user-visible impact. - New device IDs and quirks are also accepted. - No "theoretical race condition" issues, unless an explanation of how the race can be exploited is also provided. diff --git a/MAINTAINERS b/MAINTAINERS index 613da5d..334258c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4379,7 +4379,7 @@ F: Documentation/blockdev/ramdisk.txt F: drivers/block/brd.c RANDOM NUMBER DRIVER -M: Matt Mackall +M: Theodore Ts'o" S: Maintained F: drivers/char/random.c diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index ae4027b..2dd070f 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -240,7 +240,7 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]) "Ir" (THREAD_START_SP - sizeof(regs)), "r" (®s), "Ir" (sizeof(regs)) - : "r0", "r1", "r2", "r3", "ip", "lr", "memory"); + : "r0", "r1", "r2", "r3", "r8", "r9", "ip", "lr", "memory"); out: return ret; diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 5a5347f..08a0e5c 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -311,11 +311,12 @@ #define __NR_preadv 1319 #define __NR_pwritev 1320 #define __NR_rt_tgsigqueueinfo 1321 +#define __NR_accept4 1334 #ifdef __KERNEL__ -#define NR_syscalls 298 /* length of syscall table */ +#define NR_syscalls 311 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index d0e7d37..e3be543 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1806,6 +1806,19 @@ sys_call_table: data8 sys_preadv data8 sys_pwritev // 1320 data8 sys_rt_tgsigqueueinfo + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall // 1325 + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall // 1330 + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_accept4 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index dd9d7b5..463b8a7 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -24,7 +24,6 @@ #include #include #include -#include /* for rand_initialize_irq() */ #include #include #include diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 2eb6365..0edba06 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1185,6 +1185,11 @@ out: #define PALE_RESET_ENTRY 0x80000000ffffffb0UL +bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) +{ + return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { struct kvm_vcpu *v; diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 845da21..0e50757 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -60,6 +60,8 @@ struct thread_info { register struct thread_info *__current_thread_info __asm__("$28"); #define current_thread_info() __current_thread_info +#endif /* !__ASSEMBLY__ */ + /* thread information allocation */ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT) #define THREAD_SIZE_ORDER (1) @@ -93,8 +95,6 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define free_thread_info(info) kfree(info) -#endif /* !__ASSEMBLY__ */ - #define PREEMPT_ACTIVE 0x10000000 /* diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 162b299..d5c95d6 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include #include +#include #include #undef mips @@ -70,7 +71,7 @@ SECTIONS .data : { /* Data */ . = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */ - INIT_TASK_DATA(PAGE_SIZE) + INIT_TASK_DATA(THREAD_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) DATA_DATA diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 8bc9e96..6ee459d 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -248,7 +248,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0) -#define ATOMIC_INIT(i) ((atomic_t) { (i) }) +#define ATOMIC_INIT(i) { (i) } #define smp_mb__before_atomic_dec() smp_mb() #define smp_mb__after_atomic_dec() smp_mb() @@ -257,7 +257,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #ifdef CONFIG_64BIT -#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) +#define ATOMIC64_INIT(i) { (i) } static __inline__ int __atomic64_add_return(s64 i, atomic64_t *v) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 32a7c30..6ce44dd 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -870,7 +870,8 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ #define mfmsr() ({unsigned long rval; \ - asm volatile("mfmsr %0" : "=r" (rval)); rval;}) + asm volatile("mfmsr %0" : "=r" (rval) : \ + : "memory"); rval;}) #ifdef CONFIG_PPC64 #define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \ : : "r" (v) : "memory") diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index ce1f3e4..eda40d2 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -244,9 +244,9 @@ __ftrace_make_nop(struct module *mod, /* * On PPC32 the trampoline looks like: - * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha - * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l - * 0x7d, 0x69, 0x03, 0xa6 mtctr r11 + * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha + * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l + * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 * 0x4e, 0x80, 0x04, 0x20 bctr */ @@ -261,9 +261,9 @@ __ftrace_make_nop(struct module *mod, pr_devel(" %08x %08x ", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d600000) || - ((jmp[1] & 0xffff0000) != 0x396b0000) || - (jmp[2] != 0x7d6903a6) || + if (((jmp[0] & 0xffff0000) != 0x3d800000) || + ((jmp[1] & 0xffff0000) != 0x398c0000) || + (jmp[2] != 0x7d8903a6) || (jmp[3] != 0x4e800420)) { printk(KERN_ERR "Not a trampoline\n"); return -EINVAL; diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index f832773..449a7e0 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -187,8 +187,8 @@ int apply_relocate(Elf32_Shdr *sechdrs, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) - && entry->jump[1] == 0x396b0000 + (val & 0xffff)) + if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x398c0000 + (val & 0xffff)) return 1; return 0; } @@ -215,10 +215,9 @@ static uint32_t do_plt_call(void *location, entry++; } - /* Stolen from Paul Mackerras as well... */ - entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ - entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ - entry->jump[2] = 0x7d6903a6; /* mtctr r11 */ + entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */ + entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/ + entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index b40c22d..7f66d0c 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -402,7 +402,7 @@ static struct irqaction psurge_irqaction = { static void __init smp_psurge_setup_cpu(int cpu_nr) { - if (cpu_nr != 0) + if (cpu_nr != 0 || !psurge_start) return; /* reset the entry point so if we get another intr we won't diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 113225b..0538555 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -31,7 +31,7 @@ UTS_MACHINE := sparc #KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7 KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 -KBUILD_AFLAGS += -m32 +KBUILD_AFLAGS += -m32 -Wa,-Av8 #LDFLAGS_vmlinux = -N -Ttext 0xf0004000 # Since 2.5.40, the first stage is left not btfix-ed. diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 4a700f4..6a831bd 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -1242,4 +1242,4 @@ static int __init ds_init(void) return vio_register_driver(&ds_driver); } -subsys_initcall(ds_init); +fs_initcall(ds_init); diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index fd3cee4..cc4b1ff 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -20,11 +20,6 @@ .text .align 32 -__handle_softirq: - call do_softirq - nop - ba,a,pt %xcc, __handle_softirq_continue - nop __handle_preemption: call schedule wrpr %g0, RTRAP_PSTATE, %pstate @@ -159,9 +154,7 @@ rtrap: cmp %l1, 0 /* mm/ultra.S:xcall_report_regs KNOWS about this load. */ - bne,pn %icc, __handle_softirq ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 -__handle_softirq_continue: rtrap_xcall: sethi %hi(0xf << 20), %l4 and %l1, %l4, %l4 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 73ae02a..aa889d6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1428,6 +1428,15 @@ config ARCH_USES_PG_UNCACHED def_bool y depends on X86_PAT +config ARCH_RANDOM + def_bool y + prompt "x86 architectural random number generator" if EXPERT + ---help--- + Enable the x86 architectural RDRAND instruction + (Intel Bull Mountain technology) to generate random numbers. + If supported, this is a high bandwidth, cryptographically + secure hardware random number generator. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h new file mode 100644 index 0000000..0d9ec77 --- /dev/null +++ b/arch/x86/include/asm/archrandom.h @@ -0,0 +1,75 @@ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011, Intel Corporation + * Authors: Fenghua Yu , + * H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef ASM_X86_ARCHRANDOM_H +#define ASM_X86_ARCHRANDOM_H + +#include +#include +#include +#include + +#define RDRAND_RETRY_LOOPS 10 + +#define RDRAND_INT ".byte 0x0f,0xc7,0xf0" +#ifdef CONFIG_X86_64 +# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" +#else +# define RDRAND_LONG RDRAND_INT +#endif + +#ifdef CONFIG_ARCH_RANDOM + +#define GET_RANDOM(name, type, rdrand, nop) \ +static inline int name(type *v) \ +{ \ + int ok; \ + alternative_io("movl $0, %0\n\t" \ + nop, \ + "\n1: " rdrand "\n\t" \ + "jc 2f\n\t" \ + "decl %0\n\t" \ + "jnz 1b\n\t" \ + "2:", \ + X86_FEATURE_RDRAND, \ + ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ + "0" (RDRAND_RETRY_LOOPS)); \ + return ok; \ +} + +#ifdef CONFIG_X86_64 + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); + +#else + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); + +#endif /* CONFIG_X86_64 */ + +#endif /* CONFIG_ARCH_RANDOM */ + +extern void x86_init_rdrand(struct cpuinfo_x86 *c); + +#endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1efb1fa..27929b8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -124,6 +124,8 @@ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */ #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index f0746f4..41845d2 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -1,11 +1,13 @@ #ifndef _ASM_X86_K8_H #define _ASM_X86_K8_H +#include #include extern struct pci_device_id k8_nb_ids[]; extern int early_is_k8_nb(u32 value); +extern struct resource *amd_get_mmconfig_range(struct resource *res); extern struct pci_dev **k8_northbridges; extern int num_k8_northbridges; extern int cache_k8_northbridges(void); diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 5ed59ec..cc44e3d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -109,6 +109,8 @@ struct x86_emulate_ops { unsigned int bytes, struct kvm_vcpu *vcpu); + bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); }; /* Type, address-of, and value of an instruction's operand. */ @@ -190,6 +192,19 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 #endif +/* CPUID vendors */ +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 + +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41 +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574 +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273 + +#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547 +#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e +#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69 + int x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index b93a9aa..18e1ca7 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -63,14 +63,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - unsigned long long quot; - unsigned long long rem; int cpu = smp_processor_id(); unsigned long long ns = per_cpu(cyc2ns_offset, cpu); - quot = (cyc >> CYC2NS_SCALE_FACTOR); - rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1); - ns += quot * per_cpu(cyc2ns, cpu) + - ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR); + ns += mult_frac(cyc, per_cpu(cyc2ns, cpu), + (1UL << CYC2NS_SCALE_FACTOR)); return ns; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ff502cc..1f537a2 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -14,6 +14,7 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o +obj-y += rdrand.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4e34d10..ba1a1dd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -815,6 +816,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #endif init_hypervisor(c); + x86_init_rdrand(c); /* * Clear/Set all flags overriden by options, need do it diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c new file mode 100644 index 0000000..feca286 --- /dev/null +++ b/arch/x86/kernel/cpu/rdrand.c @@ -0,0 +1,73 @@ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011, Intel Corporation + * Authors: Fenghua Yu , + * H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include + +static int __init x86_rdrand_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_RDRAND); + return 1; +} +__setup("nordrand", x86_rdrand_setup); + +/* We can't use arch_get_random_long() here since alternatives haven't run */ +static inline int rdrand_long(unsigned long *v) +{ + int ok; + asm volatile("1: " RDRAND_LONG "\n\t" + "jc 2f\n\t" + "decl %0\n\t" + "jnz 1b\n\t" + "2:" + : "=r" (ok), "=a" (*v) + : "0" (RDRAND_RETRY_LOOPS)); + return ok; +} + +/* + * Force a reseed cycle; we are architecturally guaranteed a reseed + * after no more than 512 128-bit chunks of random data. This also + * acts as a test of the CPU capability. + */ +#define RESEED_LOOP ((512*128)/sizeof(unsigned long)) + +void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_RANDOM + unsigned long tmp; + int i, count, ok; + + if (!cpu_has(c, X86_FEATURE_RDRAND)) + return; /* Nothing to do */ + + for (count = i = 0; i < RESEED_LOOP; i++) { + ok = rdrand_long(&tmp); + if (ok) + count++; + } + + if (count != RESEED_LOOP) + clear_cpu_cap(c, X86_FEATURE_RDRAND); +#endif +} diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 9b89546..2831a32 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -87,6 +87,37 @@ int __init early_is_k8_nb(u32 device) return 0; } +struct resource *amd_get_mmconfig_range(struct resource *res) +{ + u32 address; + u64 base, msr; + unsigned segn_busn_bits; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return NULL; + + /* assume all cpus from fam10h have mmconfig */ + if (boot_cpu_data.x86 < 0x10) + return NULL; + + address = MSR_FAM10H_MMIO_CONF_BASE; + rdmsrl(address, msr); + + /* mmconfig is not enabled */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return NULL; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + res->flags = IORESOURCE_MEM; + res->start = base; + res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1; + return res; +} + void k8_flush_garts(void) { int flushed, i; diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 6bb7b85..bcfec2d 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -163,7 +163,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset, { const struct desc_struct *tls; - if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || (count % sizeof(struct user_desc)) != 0) return -EINVAL; @@ -198,7 +198,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; - if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || (count % sizeof(struct user_desc)) != 0) return -EINVAL; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index bc07543..9972276 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) if (cpu_khz) { *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); + *offset = ns_now - mult_frac(tsc_now, *scale, + (1UL << CYC2NS_SCALE_FACTOR)); } sched_clock_idle_wakeup_event(0); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1350e43..aa2d905 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1495,20 +1495,73 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, ss->present = 1; } +static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + u32 eax, ebx, ecx, edx; + + /* + * syscall should always be enabled in longmode - so only become + * vendor specific (cpuid) if other modes are active... + */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + return true; + + eax = 0x00000000; + ecx = 0x00000000; + if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) { + /* + * Intel ("GenuineIntel") + * remark: Intel CPUs only support "syscall" in 64bit + * longmode. Also an 64bit guest with a + * 32bit compat-app running will #UD !! While this + * behaviour can be fixed (by emulating) into AMD + * response - CPUs of AMD can't behave like Intel. + */ + if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && + ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && + edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx) + return false; + + /* AMD ("AuthenticAMD") */ + if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx && + ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx && + edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) + return true; + + /* AMD ("AMDisbetter!") */ + if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx && + ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx && + edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx) + return true; + } + + /* default: (not Intel, not AMD), apply Intel's stricter rules... */ + return false; +} + static int -emulate_syscall(struct x86_emulate_ctxt *ctxt) +emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; struct kvm_segment cs, ss; u64 msr_data; + u64 efer = 0; /* syscall is not available in real mode */ if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) return -1; + if (!(em_syscall_is_enabled(ctxt, ops))) + return -1; + + kvm_x86_ops->get_msr(ctxt->vcpu, MSR_EFER, &efer); setup_syscalls_segments(ctxt, &cs, &ss); + if (!(efer & EFER_SCE)) + return -1; + kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); msr_data >>= 32; cs.selector = (u16)(msr_data & 0xfffc); @@ -2342,7 +2395,7 @@ twobyte_insn: } break; case 0x05: /* syscall */ - if (emulate_syscall(ctxt) == -1) + if (emulate_syscall(ctxt, ops) == -1) goto cannot_emulate; else goto writeback; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 88ad162..7e361b4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -277,11 +277,15 @@ static struct kvm_timer_ops kpit_ops = { .is_periodic = kpit_is_periodic, }; -static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) +static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) { + struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; struct kvm_timer *pt = &ps->pit_timer; s64 interval; + if (!irqchip_in_kernel(kvm)) + return; + interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); @@ -333,13 +337,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) /* FIXME: enhance mode 4 precision */ case 4: if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { - create_pit_timer(ps, val, 0); + create_pit_timer(kvm, val, 0); } break; case 2: case 3: if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ - create_pit_timer(ps, val, 1); + create_pit_timer(kvm, val, 1); } break; default: diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7d6058a..85a8721 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -85,7 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) static inline int irqchip_in_kernel(struct kvm *kvm) { - return pic_irqchip(kvm) != NULL; + int ret; + + ret = (pic_irqchip(kvm) != NULL); + smp_rmb(); + return ret; } void kvm_pic_reset(struct kvm_kpic_state *s); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index df1cefb..271fddf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2273,25 +2273,42 @@ long kvm_arch_vm_ioctl(struct file *filp, if (r) goto out; break; - case KVM_CREATE_IRQCHIP: + case KVM_CREATE_IRQCHIP: { + struct kvm_pic *vpic; + + mutex_lock(&kvm->lock); + r = -EEXIST; + if (kvm->arch.vpic) + goto create_irqchip_unlock; + r = -EINVAL; + if (atomic_read(&kvm->online_vcpus)) + goto create_irqchip_unlock; r = -ENOMEM; - kvm->arch.vpic = kvm_create_pic(kvm); - if (kvm->arch.vpic) { + vpic = kvm_create_pic(kvm); + if (vpic) { r = kvm_ioapic_init(kvm); if (r) { - kfree(kvm->arch.vpic); - kvm->arch.vpic = NULL; - goto out; + kfree(vpic); + goto create_irqchip_unlock; } } else - goto out; + goto create_irqchip_unlock; + smp_wmb(); + kvm->arch.vpic = vpic; + smp_wmb(); r = kvm_setup_default_irq_routing(kvm); if (r) { + mutex_lock(&kvm->irq_lock); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); - goto out; + kvm->arch.vpic = NULL; + kvm->arch.vioapic = NULL; + mutex_unlock(&kvm->irq_lock); } + create_irqchip_unlock: + mutex_unlock(&kvm->lock); break; + } case KVM_CREATE_PIT: u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; goto create_pit; @@ -2871,12 +2888,35 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) } EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); +static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +{ + struct kvm_cpuid_entry2 *cpuid = NULL; + + if (eax && ecx) + cpuid = kvm_find_cpuid_entry(ctxt->vcpu, + *eax, *ecx); + + if (cpuid) { + *eax = cpuid->eax; + *ecx = cpuid->ecx; + if (ebx) + *ebx = cpuid->ebx; + if (edx) + *edx = cpuid->edx; + return true; + } + + return false; +} + static struct x86_emulate_ops emulate_ops = { .read_std = kvm_read_guest_virt_system, .fetch = kvm_fetch_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, + .get_cpuid = emulator_get_cpuid, }; static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -4990,6 +5030,11 @@ void kvm_arch_check_processor_compat(void *rtn) kvm_x86_ops->check_processor_compatibility(rtn); } +bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) +{ + return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { struct page *page; diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index ff485d3..b6372ce 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops) } /* TSC based delay: */ -static void delay_tsc(unsigned long loops) +static void delay_tsc(unsigned long __loops) { - unsigned long bclock, now; + u32 bclock, now, loops = __loops; int cpu; preempt_disable(); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8ac0d76..249ad57 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -223,15 +223,14 @@ void vmalloc_sync_all(void) address >= TASK_SIZE && address < FIXADDR_TOP; address += PMD_SIZE) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (!vmalloc_sync_one(page_address(page), address)) break; } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } } @@ -331,13 +330,12 @@ void vmalloc_sync_all(void) address += PGDIR_SIZE) { const pgd_t *pgd_ref = pgd_offset_k(address); - unsigned long flags; struct page *page; if (pgd_none(*pgd_ref)) continue; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; pgd = (pgd_t *)page_address(page) + pgd_index(address); @@ -346,7 +344,7 @@ void vmalloc_sync_all(void) else BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dd38bfb..6d44087 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -56,12 +56,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM]; void update_page_count(int level, unsigned long pages) { - unsigned long flags; - /* Protect against CPA */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); direct_pages_count[level] += pages; - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } static void split_page_count(int level) @@ -354,7 +352,7 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { - unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; + unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot; int i, do_split = 1; @@ -363,7 +361,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, if (cpa->force_split) return 1; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up already: @@ -458,14 +456,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } out_unlock: - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return do_split; } static int split_large_page(pte_t *kpte, unsigned long address) { - unsigned long flags, pfn, pfninc = 1; + unsigned long pfn, pfninc = 1; unsigned int i, level; pte_t *pbase, *tmp; pgprot_t ref_prot; @@ -479,7 +477,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) if (!base) return -ENOMEM; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up for us already: @@ -551,7 +549,7 @@ out_unlock: */ if (base) __free_page(base); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return 0; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e0e6fad..cb7cfc8 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -110,14 +110,12 @@ static void pgd_ctor(pgd_t *pgd) static void pgd_dtor(pgd_t *pgd) { - unsigned long flags; /* can be called from interrupt context */ - if (SHARED_KERNEL_PMD) return; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -248,7 +246,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *pmds[PREALLOCATED_PMDS]; - unsigned long flags; pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); @@ -268,12 +265,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) * respect to anything walking the pgd_list, so that they * never see a partially populated pgd. */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_ctor(pgd); pgd_prepopulate_pmd(mm, pgd, pmds); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return pgd; diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 829edf0..b50a280 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -71,9 +71,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) offset = addr & (PAGE_SIZE - 1); size = min(PAGE_SIZE - offset, n - len); - map = kmap_atomic(page, KM_USER0); + map = kmap_atomic(page, KM_NMI); memcpy(to, map+offset, size); - kunmap_atomic(map, KM_USER0); + kunmap_atomic(map, KM_NMI); put_page(page); len += size; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 572ee97..aae9931 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -3,6 +3,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -190,34 +191,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, }; -static u64 __initdata fam10h_mmconf_start; -static u64 __initdata fam10h_mmconf_end; -static void __init get_pci_mmcfg_amd_fam10h_range(void) -{ - u32 address; - u64 base, msr; - unsigned segn_busn_bits; - - /* assume all cpus from fam10h have mmconf */ - if (boot_cpu_data.x86 < 0x10) - return; - - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, msr); - - /* mmconfig is not enable */ - if (!(msr & FAM10H_MMIO_CONF_ENABLE)) - return; - - base = msr & (FAM10H_MMIO_CONF_BASE_MASK<> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & - FAM10H_MMIO_CONF_BUSRANGE_MASK; - - fam10h_mmconf_start = base; - fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; -} - /** * early_fill_mp_bus_to_node() * called before pcibios_scan_root and pci_scan_bus @@ -243,6 +216,9 @@ static int __init early_fill_mp_bus_info(void) struct res_range range[RANGE_NUM]; u64 val; u32 address; + struct resource fam10h_mmconf_res, *fam10h_mmconf; + u64 fam10h_mmconf_start; + u64 fam10h_mmconf_end; if (!early_pci_allowed()) return -1; @@ -367,11 +343,16 @@ static int __init early_fill_mp_bus_info(void) update_range(range, 0, end - 1); /* get mmconfig */ - get_pci_mmcfg_amd_fam10h_range(); + fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res); /* need to take out mmconf range */ - if (fam10h_mmconf_end) { - printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); + if (fam10h_mmconf) { + printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf); + fam10h_mmconf_start = fam10h_mmconf->start; + fam10h_mmconf_end = fam10h_mmconf->end; update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); + } else { + fam10h_mmconf_start = 0; + fam10h_mmconf_end = 0; } /* mmio resource */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0087b00..d52f895 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -945,7 +945,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, + .rdmsr_regs = native_rdmsr_safe_regs, .write_msr = xen_write_msr_safe, + .wrmsr_regs = native_wrmsr_safe_regs, + .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3f90a2c..8f4452c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -987,10 +987,9 @@ static void xen_pgd_pin(struct mm_struct *mm) */ void xen_mm_pin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (!PagePinned(page)) { @@ -999,7 +998,7 @@ void xen_mm_pin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -1100,10 +1099,9 @@ static void xen_pgd_unpin(struct mm_struct *mm) */ void xen_mm_unpin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (PageSavePinned(page)) { @@ -1113,7 +1111,7 @@ void xen_mm_unpin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 79d7362..3e45aa0 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct) /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f + jnz 1f 2: call check_events 1: ENDPATCH(xen_restore_fl_direct) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index d4ed600..cbdabb0 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -66,22 +66,22 @@ static void cfq_exit(struct io_context *ioc) } /* Called by the exitting task */ -void exit_io_context(void) +void exit_io_context(struct task_struct *task) { struct io_context *ioc; - task_lock(current); - ioc = current->io_context; - current->io_context = NULL; - task_unlock(current); + task_lock(task); + ioc = task->io_context; + task->io_context = NULL; + task_unlock(task); if (atomic_dec_and_test(&ioc->nr_tasks)) { if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); cfq_exit(ioc); - put_io_context(ioc); } + put_io_context(ioc); } struct io_context *alloc_io_context(gfp_t gfp_flags, int node) diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 107f6f7..dd30f40 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -174,7 +174,7 @@ sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) index = sctx->count[0] & 0x7f; /* Update number of bytes */ - if (!(sctx->count[0] += len)) + if ((sctx->count[0] += len) < len) sctx->count[1]++; part_len = 128 - index; diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index b6ed60b..bc3f918 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -287,7 +287,9 @@ static int acpi_ac_add(struct acpi_device *device) ac->charger.properties = ac_props; ac->charger.num_properties = ARRAY_SIZE(ac_props); ac->charger.get_property = get_ac_property; - power_supply_register(&ac->device->dev, &ac->charger); + result = power_supply_register(&ac->device->dev, &ac->charger); + if (result) + goto end; #endif printk(KERN_INFO PREFIX "%s [%s] (%s)\n", diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 3315268..ad8e592 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -747,17 +747,7 @@ complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag) { case CMD_TARGET_STATUS: /* Pass it up to the upper layers... */ - if( ei->ScsiStatus) - { -#if 0 - printk(KERN_WARNING "cciss: cmd %p " - "has SCSI Status = %x\n", - cp, - ei->ScsiStatus); -#endif - cmd->result |= (ei->ScsiStatus < 1); - } - else { /* scsi status is zero??? How??? */ + if (!ei->ScsiStatus) { /* Ordinarily, this case should never happen, but there is a bug in some released firmware revisions that allows it to happen diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index a7c4184..bcbfc20 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1116,7 +1116,7 @@ static inline void carm_handle_resp(struct carm_host *host, break; case MISC_GET_FW_VER: { struct carm_fw_ver *ver = (struct carm_fw_ver *) - mem + sizeof(struct carm_msg_get_fw_ver); + (mem + sizeof(struct carm_msg_get_fw_ver)); if (!error) { host->fw_ver = le32_to_cpu(ver->version); host->flags |= (ver->features & FL_FW_VER_MASK); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 75185a6..a562761 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -470,15 +470,10 @@ static int btusb_submit_isoc_urb(struct hci_dev *hdev, gfp_t mem_flags) pipe = usb_rcvisocpipe(data->udev, data->isoc_rx_ep->bEndpointAddress); - urb->dev = data->udev; - urb->pipe = pipe; - urb->context = hdev; - urb->complete = btusb_isoc_complete; - urb->interval = data->isoc_rx_ep->bInterval; + usb_fill_int_urb(urb, data->udev, pipe, buf, size, btusb_isoc_complete, + hdev, data->isoc_rx_ep->bInterval); urb->transfer_flags = URB_FREE_BUFFER | URB_ISO_ASAP; - urb->transfer_buffer = buf; - urb->transfer_buffer_length = size; __fill_isoc_descriptor(urb, size, le16_to_cpu(data->isoc_rx_ep->wMaxPacketSize)); diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index e3d4eda..d68e2f5 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -312,9 +312,11 @@ static void hci_uart_tty_close(struct tty_struct *tty) hci_uart_close(hdev); if (test_and_clear_bit(HCI_UART_PROTO_SET, &hu->flags)) { + if (hdev) { + hci_unregister_dev(hdev); + hci_free_dev(hdev); + } hu->proto->close(hu); - hci_unregister_dev(hdev); - hci_free_dev(hdev); } } } diff --git a/drivers/char/random.c b/drivers/char/random.c index 3a19e2d..446b20a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -125,20 +125,32 @@ * The current exported interfaces for gathering environmental noise * from the devices are: * + * void add_device_randomness(const void *buf, unsigned int size); * void add_input_randomness(unsigned int type, unsigned int code, * unsigned int value); - * void add_interrupt_randomness(int irq); + * void add_interrupt_randomness(int irq, int irq_flags); + * void add_disk_randomness(struct gendisk *disk); + * + * add_device_randomness() is for adding data to the random pool that + * is likely to differ between two devices (or possibly even per boot). + * This would be things like MAC addresses or serial numbers, or the + * read-out of the RTC. This does *not* add any actual entropy to the + * pool, but it initializes the pool to different values for devices + * that might otherwise be identical and have very little entropy + * available to them (particularly common in the embedded world). * * add_input_randomness() uses the input layer interrupt timing, as well as * the event type information from the hardware. * - * add_interrupt_randomness() uses the inter-interrupt timing as random - * inputs to the entropy pool. Note that not all interrupts are good - * sources of randomness! For example, the timer interrupts is not a - * good choice, because the periodicity of the interrupts is too - * regular, and hence predictable to an attacker. Disk interrupts are - * a better measure, since the timing of the disk interrupts are more - * unpredictable. + * add_interrupt_randomness() uses the interrupt timing as random + * inputs to the entropy pool. Using the cycle counters and the irq source + * as inputs, it feeds the randomness roughly once a second. + * + * add_disk_randomness() uses what amounts to the seek time of block + * layer request events, on a per-disk_devt basis, as input to the + * entropy pool. Note that high-speed solid state drives with very low + * seek times do not make for good sources of entropy, as their seek + * times are usually fairly consistent. * * All of these routines try to estimate how many bits of randomness a * particular randomness source. They do this by keeping track of the @@ -241,6 +253,8 @@ #include #include #include +#include +#include #ifdef CONFIG_GENERIC_HARDIRQS # include @@ -249,6 +263,7 @@ #include #include #include +#include #include /* @@ -257,6 +272,9 @@ #define INPUT_POOL_WORDS 128 #define OUTPUT_POOL_WORDS 32 #define SEC_XFER_SIZE 512 +#define EXTRACT_SIZE 10 + +#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long)) /* * The minimum number of bits of entropy before we wake up a read on @@ -406,15 +424,17 @@ struct entropy_store { struct poolinfo *poolinfo; __u32 *pool; const char *name; - int limit; struct entropy_store *pull; + int limit; /* read-write data: */ spinlock_t lock; unsigned add_ptr; + unsigned input_rotate; int entropy_count; - int input_rotate; - __u8 *last_data; + int entropy_total; + unsigned int initialized:1; + __u8 last_data[EXTRACT_SIZE]; }; static __u32 input_pool_data[INPUT_POOL_WORDS]; @@ -446,6 +466,10 @@ static struct entropy_store nonblocking_pool = { .pool = nonblocking_pool_data }; +static __u32 const twist_table[8] = { + 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, + 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; + /* * This function adds bytes into the entropy "pool". It does not * update the entropy estimate. The caller should call @@ -456,29 +480,24 @@ static struct entropy_store nonblocking_pool = { * it's cheap to do so and helps slightly in the expected case where * the entropy is concentrated in the low-order bits. */ -static void mix_pool_bytes_extract(struct entropy_store *r, const void *in, - int nbytes, __u8 out[64]) +static void __mix_pool_bytes(struct entropy_store *r, const void *in, + int nbytes, __u8 out[64]) { - static __u32 const twist_table[8] = { - 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, - 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; unsigned long i, j, tap1, tap2, tap3, tap4, tap5; int input_rotate; int wordmask = r->poolinfo->poolwords - 1; const char *bytes = in; __u32 w; - unsigned long flags; - /* Taps are constant, so we can load them without holding r->lock. */ tap1 = r->poolinfo->tap1; tap2 = r->poolinfo->tap2; tap3 = r->poolinfo->tap3; tap4 = r->poolinfo->tap4; tap5 = r->poolinfo->tap5; - spin_lock_irqsave(&r->lock, flags); - input_rotate = r->input_rotate; - i = r->add_ptr; + smp_rmb(); + input_rotate = ACCESS_ONCE(r->input_rotate); + i = ACCESS_ONCE(r->add_ptr); /* mix one byte at a time to simplify size handling and churn faster */ while (nbytes--) { @@ -505,19 +524,53 @@ static void mix_pool_bytes_extract(struct entropy_store *r, const void *in, input_rotate += i ? 7 : 14; } - r->input_rotate = input_rotate; - r->add_ptr = i; + ACCESS_ONCE(r->input_rotate) = input_rotate; + ACCESS_ONCE(r->add_ptr) = i; + smp_wmb(); if (out) for (j = 0; j < 16; j++) ((__u32 *)out)[j] = r->pool[(i - j) & wordmask]; +} +static void mix_pool_bytes(struct entropy_store *r, const void *in, + int nbytes, __u8 out[64]) +{ + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); + __mix_pool_bytes(r, in, nbytes, out); spin_unlock_irqrestore(&r->lock, flags); } -static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes) +struct fast_pool { + __u32 pool[4]; + unsigned long last; + unsigned short count; + unsigned char rotate; + unsigned char last_timer_intr; +}; + +/* + * This is a fast mixing routine used by the interrupt randomness + * collector. It's hardcoded for an 128 bit pool and assumes that any + * locks that might be needed are taken by the caller. + */ +static void fast_mix(struct fast_pool *f, const void *in, int nbytes) { - mix_pool_bytes_extract(r, in, bytes, NULL); + const char *bytes = in; + __u32 w; + unsigned i = f->count; + unsigned input_rotate = f->rotate; + + while (nbytes--) { + w = rol32(*bytes++, input_rotate & 31) ^ f->pool[i & 3] ^ + f->pool[(i + 1) & 3]; + f->pool[i & 3] = (w >> 3) ^ twist_table[w & 7]; + input_rotate += (i++ & 3) ? 7 : 14; + } + f->count = i; + f->rotate = input_rotate; } /* @@ -525,30 +578,34 @@ static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes) */ static void credit_entropy_bits(struct entropy_store *r, int nbits) { - unsigned long flags; - int entropy_count; + int entropy_count, orig; if (!nbits) return; - spin_lock_irqsave(&r->lock, flags); - DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name); - entropy_count = r->entropy_count; +retry: + entropy_count = orig = ACCESS_ONCE(r->entropy_count); entropy_count += nbits; if (entropy_count < 0) { DEBUG_ENT("negative entropy/overflow\n"); entropy_count = 0; } else if (entropy_count > r->poolinfo->POOLBITS) entropy_count = r->poolinfo->POOLBITS; - r->entropy_count = entropy_count; + if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) + goto retry; + + if (!r->initialized && nbits > 0) { + r->entropy_total += nbits; + if (r->entropy_total > 128) + r->initialized = 1; + } /* should we wake readers? */ if (r == &input_pool && entropy_count >= random_read_wakeup_thresh) { wake_up_interruptible(&random_read_wait); kill_fasync(&fasync, SIGIO, POLL_IN); } - spin_unlock_irqrestore(&r->lock, flags); } /********************************************************************* @@ -564,42 +621,24 @@ struct timer_rand_state { unsigned dont_count_entropy:1; }; -#ifndef CONFIG_GENERIC_HARDIRQS - -static struct timer_rand_state *irq_timer_state[NR_IRQS]; - -static struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - return irq_timer_state[irq]; -} - -static void set_timer_rand_state(unsigned int irq, - struct timer_rand_state *state) -{ - irq_timer_state[irq] = state; -} - -#else - -static struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - return desc->timer_rand_state; -} - -static void set_timer_rand_state(unsigned int irq, - struct timer_rand_state *state) +/* + * Add device- or boot-specific data to the input and nonblocking + * pools to help initialize them to unique values. + * + * None of this adds any entropy, it is meant to avoid the + * problem of the nonblocking pool having similar initial state + * across largely identical devices. + */ +void add_device_randomness(const void *buf, unsigned int size) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); + unsigned long time = get_cycles() ^ jiffies; - desc->timer_rand_state = state; + mix_pool_bytes(&input_pool, buf, size, NULL); + mix_pool_bytes(&input_pool, &time, sizeof(time), NULL); + mix_pool_bytes(&nonblocking_pool, buf, size, NULL); + mix_pool_bytes(&nonblocking_pool, &time, sizeof(time), NULL); } -#endif +EXPORT_SYMBOL(add_device_randomness); static struct timer_rand_state input_timer_state; @@ -616,8 +655,8 @@ static struct timer_rand_state input_timer_state; static void add_timer_randomness(struct timer_rand_state *state, unsigned num) { struct { - cycles_t cycles; long jiffies; + unsigned cycles; unsigned num; } sample; long delta, delta2, delta3; @@ -631,7 +670,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) sample.jiffies = jiffies; sample.cycles = get_cycles(); sample.num = num; - mix_pool_bytes(&input_pool, &sample, sizeof(sample)); + mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL); /* * Calculate number of bits of randomness we probably added. @@ -688,17 +727,48 @@ void add_input_randomness(unsigned int type, unsigned int code, } EXPORT_SYMBOL_GPL(add_input_randomness); -void add_interrupt_randomness(int irq) +static DEFINE_PER_CPU(struct fast_pool, irq_randomness); + +void add_interrupt_randomness(int irq, int irq_flags) { - struct timer_rand_state *state; + struct entropy_store *r; + struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness); + struct pt_regs *regs = get_irq_regs(); + unsigned long now = jiffies; + __u32 input[4], cycles = get_cycles(); + + input[0] = cycles ^ jiffies; + input[1] = irq; + if (regs) { + __u64 ip = instruction_pointer(regs); + input[2] = ip; + input[3] = ip >> 32; + } - state = get_timer_rand_state(irq); + fast_mix(fast_pool, input, sizeof(input)); - if (state == NULL) + if ((fast_pool->count & 1023) && + !time_after(now, fast_pool->last + HZ)) return; - DEBUG_ENT("irq event %d\n", irq); - add_timer_randomness(state, 0x100 + irq); + fast_pool->last = now; + + r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool; + __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL); + /* + * If we don't have a valid cycle counter, and we see + * back-to-back timer interrupts, then skip giving credit for + * any entropy. + */ + if (cycles == 0) { + if (irq_flags & __IRQF_TIMER) { + if (fast_pool->last_timer_intr) + return; + fast_pool->last_timer_intr = 1; + } else + fast_pool->last_timer_intr = 0; + } + credit_entropy_bits(r, 1); } #ifdef CONFIG_BLOCK @@ -714,8 +784,6 @@ void add_disk_randomness(struct gendisk *disk) } #endif -#define EXTRACT_SIZE 10 - /********************************************************************* * * Entropy extraction routines @@ -732,7 +800,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, */ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) { - __u32 tmp[OUTPUT_POOL_WORDS]; + __u32 tmp[OUTPUT_POOL_WORDS]; if (r->pull && r->entropy_count < nbytes * 8 && r->entropy_count < r->poolinfo->POOLBITS) { @@ -751,7 +819,7 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) bytes = extract_entropy(r->pull, tmp, bytes, random_read_wakeup_thresh / 8, rsvd); - mix_pool_bytes(r, tmp, bytes); + mix_pool_bytes(r, tmp, bytes, NULL); credit_entropy_bits(r, bytes*8); } } @@ -810,13 +878,19 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, static void extract_buf(struct entropy_store *r, __u8 *out) { int i; - __u32 hash[5], workspace[SHA_WORKSPACE_WORDS]; + union { + __u32 w[5]; + unsigned long l[LONGS(EXTRACT_SIZE)]; + } hash; + __u32 workspace[SHA_WORKSPACE_WORDS]; __u8 extract[64]; + unsigned long flags; /* Generate a hash across the pool, 16 words (512 bits) at a time */ - sha_init(hash); + sha_init(hash.w); + spin_lock_irqsave(&r->lock, flags); for (i = 0; i < r->poolinfo->poolwords; i += 16) - sha_transform(hash, (__u8 *)(r->pool + i), workspace); + sha_transform(hash.w, (__u8 *)(r->pool + i), workspace); /* * We mix the hash back into the pool to prevent backtracking @@ -827,13 +901,14 @@ static void extract_buf(struct entropy_store *r, __u8 *out) * brute-forcing the feedback as hard as brute-forcing the * hash. */ - mix_pool_bytes_extract(r, hash, sizeof(hash), extract); + __mix_pool_bytes(r, hash.w, sizeof(hash.w), extract); + spin_unlock_irqrestore(&r->lock, flags); /* * To avoid duplicates, we atomically extract a portion of the * pool while mixing, and hash one final time. */ - sha_transform(hash, extract, workspace); + sha_transform(hash.w, extract, workspace); memset(extract, 0, sizeof(extract)); memset(workspace, 0, sizeof(workspace)); @@ -842,19 +917,30 @@ static void extract_buf(struct entropy_store *r, __u8 *out) * pattern, we fold it in half. Thus, we always feed back * twice as much data as we output. */ - hash[0] ^= hash[3]; - hash[1] ^= hash[4]; - hash[2] ^= rol32(hash[2], 16); - memcpy(out, hash, EXTRACT_SIZE); - memset(hash, 0, sizeof(hash)); + hash.w[0] ^= hash.w[3]; + hash.w[1] ^= hash.w[4]; + hash.w[2] ^= rol32(hash.w[2], 16); + + /* + * If we have a architectural hardware random number + * generator, mix that in, too. + */ + for (i = 0; i < LONGS(EXTRACT_SIZE); i++) { + unsigned long v; + if (!arch_get_random_long(&v)) + break; + hash.l[i] ^= v; + } + + memcpy(out, &hash, EXTRACT_SIZE); + memset(&hash, 0, sizeof(hash)); } static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int reserved) + size_t nbytes, int min, int reserved) { ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; - unsigned long flags; xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, min, reserved); @@ -862,7 +948,9 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, while (nbytes) { extract_buf(r, tmp); - if (r->last_data) { + if (fips_enabled) { + unsigned long flags; + spin_lock_irqsave(&r->lock, flags); if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) panic("Hardware RNG duplicated output!\n"); @@ -921,8 +1009,9 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, /* * This function is the exported kernel interface. It returns some - * number of good random numbers, suitable for seeding TCP sequence - * numbers, etc. + * number of good random numbers, suitable for key generation, seeding + * TCP sequence numbers, etc. It does not use the hw random number + * generator, if available; use get_random_bytes_arch() for that. */ void get_random_bytes(void *buf, int nbytes) { @@ -931,6 +1020,38 @@ void get_random_bytes(void *buf, int nbytes) EXPORT_SYMBOL(get_random_bytes); /* + * This function will use the architecture-specific hardware random + * number generator if it is available. The arch-specific hw RNG will + * almost certainly be faster than what we can do in software, but it + * is impossible to verify that it is implemented securely (as + * opposed, to, say, the AES encryption of a sequence number using a + * key known by the NSA). So it's useful if we need the speed, but + * only if we're willing to trust the hardware manufacturer not to + * have put in a back door. + */ +void get_random_bytes_arch(void *buf, int nbytes) +{ + char *p = buf; + + while (nbytes) { + unsigned long v; + int chunk = min(nbytes, (int)sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; + + memcpy(p, &v, chunk); + p += chunk; + nbytes -= chunk; + } + + if (nbytes) + extract_entropy(&nonblocking_pool, p, nbytes, 0, 0); +} +EXPORT_SYMBOL(get_random_bytes_arch); + + +/* * init_std_data - initialize pool with system data * * @r: pool to initialize @@ -941,21 +1062,31 @@ EXPORT_SYMBOL(get_random_bytes); */ static void init_std_data(struct entropy_store *r) { - ktime_t now; - unsigned long flags; + int i; + ktime_t now = ktime_get_real(); + unsigned long rv; - spin_lock_irqsave(&r->lock, flags); r->entropy_count = 0; - spin_unlock_irqrestore(&r->lock, flags); - - now = ktime_get_real(); - mix_pool_bytes(r, &now, sizeof(now)); - mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); - /* Enable continuous test in fips mode */ - if (fips_enabled) - r->last_data = kmalloc(EXTRACT_SIZE, GFP_KERNEL); + r->entropy_total = 0; + mix_pool_bytes(r, &now, sizeof(now), NULL); + for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) { + if (!arch_get_random_long(&rv)) + break; + mix_pool_bytes(r, &rv, sizeof(rv), NULL); + } + mix_pool_bytes(r, utsname(), sizeof(*(utsname())), NULL); } +/* + * Note that setup_arch() may call add_device_randomness() + * long before we get here. This allows seeding of the pools + * with some platform dependent data very early in the boot + * process. But it limits our options here. We must use + * statically allocated structures that already have all + * initializations complete at compile time. We should also + * take care not to overwrite the precious per platform data + * we were given. + */ static int rand_initialize(void) { init_std_data(&input_pool); @@ -965,24 +1096,6 @@ static int rand_initialize(void) } module_init(rand_initialize); -void rand_initialize_irq(int irq) -{ - struct timer_rand_state *state; - - state = get_timer_rand_state(irq); - - if (state) - return; - - /* - * If kzalloc returns null, we just won't use that entropy - * source. - */ - state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); - if (state) - set_timer_rand_state(irq, state); -} - #ifdef CONFIG_BLOCK void rand_initialize_disk(struct gendisk *disk) { @@ -1090,7 +1203,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count) count -= bytes; p += bytes; - mix_pool_bytes(r, buf, bytes); + mix_pool_bytes(r, buf, bytes, NULL); cond_resched(); } @@ -1231,10 +1344,15 @@ static int proc_do_uuid(ctl_table *table, int write, uuid = table->data; if (!uuid) { uuid = tmp_uuid; - uuid[8] = 0; - } - if (uuid[8] == 0) generate_random_uuid(uuid); + } else { + static DEFINE_SPINLOCK(bootid_spinlock); + + spin_lock(&bootid_spinlock); + if (!uuid[8]) + generate_random_uuid(uuid); + spin_unlock(&bootid_spinlock); + } sprintf(buf, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" "%02x%02x%02x%02x%02x%02x", @@ -1357,9 +1475,14 @@ late_initcall(random_int_secret_init); DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash); unsigned int get_random_int(void) { - __u32 *hash = get_cpu_var(get_random_int_hash); + __u32 *hash; unsigned int ret; + if (arch_get_random_int(&ret)) + return ret; + + hash = get_cpu_var(get_random_int_hash); + hash[0] += current->pid + jiffies + get_cycles(); md5_transform(hash, random_int_secret); ret = hash[0]; diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index ac16fbe..dd4691a 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -94,8 +94,10 @@ static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, { if (buf->valid == 0) return; - if (audit_enabled == 0) + if (audit_enabled == 0) { + buf->valid = 0; return; + } tty_audit_log("tty", tsk, loginuid, sessionid, buf->major, buf->minor, buf->data, buf->valid); buf->valid = 0; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 5cc37af..d1be371 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -51,48 +51,40 @@ MODULE_PARM_DESC(ioat_ring_max_alloc_order, void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) { - void * __iomem reg_base = ioat->base.reg_base; + struct ioat_chan_common *chan = &ioat->base; - ioat->pending = 0; ioat->dmacount += ioat2_ring_pending(ioat); ioat->issued = ioat->head; /* make descriptor updates globally visible before notifying channel */ wmb(); - writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); - dev_dbg(to_dev(&ioat->base), + writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x count: %#x\n", __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); } -void ioat2_issue_pending(struct dma_chan *chan) +void ioat2_issue_pending(struct dma_chan *c) { - struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - spin_lock_bh(&ioat->ring_lock); - if (ioat->pending == 1) + if (ioat2_ring_pending(ioat)) { + spin_lock_bh(&ioat->ring_lock); __ioat2_issue_pending(ioat); - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&ioat->ring_lock); + } } /** * ioat2_update_pending - log pending descriptors * @ioat: ioat2+ channel * - * set pending to '1' unless pending is already set to '2', pending == 2 - * indicates that submission is temporarily blocked due to an in-flight - * reset. If we are already above the ioat_pending_level threshold then - * just issue pending. - * - * called with ring_lock held + * Check if the number of unsubmitted descriptors has exceeded the + * watermark. Called with ring_lock held */ static void ioat2_update_pending(struct ioat2_dma_chan *ioat) { - if (unlikely(ioat->pending == 2)) - return; - else if (ioat2_ring_pending(ioat) > ioat_pending_level) + if (ioat2_ring_pending(ioat) > ioat_pending_level) __ioat2_issue_pending(ioat); - else - ioat->pending = 1; } static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) @@ -546,7 +538,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) ioat->head = 0; ioat->issued = 0; ioat->tail = 0; - ioat->pending = 0; ioat->alloc_order = order; spin_unlock_bh(&ioat->ring_lock); @@ -815,7 +806,6 @@ void ioat2_free_chan_resources(struct dma_chan *c) chan->last_completion = 0; chan->completion_dma = 0; - ioat->pending = 0; ioat->dmacount = 0; } diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 3afad8d..d211335 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -47,7 +47,6 @@ extern int ioat_ring_alloc_order; * @head: allocated index * @issued: hardware notification point * @tail: cleanup index - * @pending: lock free indicator for issued != head * @dmacount: identical to 'head' except for occasionally resetting to zero * @alloc_order: log2 of the number of allocated descriptors * @ring: software ring buffer implementation of hardware ring @@ -61,7 +60,6 @@ struct ioat2_dma_chan { u16 tail; u16 dmacount; u16 alloc_order; - int pending; struct ioat_ring_ent **ring; spinlock_t ring_lock; }; diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 3a2ccb0..10a4246 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -6,6 +6,7 @@ #include #include #include +#include #include /* @@ -111,6 +112,8 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, dmi_table(buf, dmi_len, dmi_num, decode, NULL); + add_device_randomness(buf, dmi_len); + dmi_iounmap(buf, dmi_len); return 0; } diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c index 51e0e2d..a330492 100644 --- a/drivers/firmware/pcdp.c +++ b/drivers/firmware/pcdp.c @@ -95,7 +95,7 @@ efi_setup_pcdp_console(char *cmdline) if (efi.hcdp == EFI_INVALID_TABLE_ADDR) return -ENODEV; - pcdp = ioremap(efi.hcdp, 4096); + pcdp = early_ioremap(efi.hcdp, 4096); printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, efi.hcdp); if (strstr(cmdline, "console=hcdp")) { @@ -131,6 +131,6 @@ efi_setup_pcdp_console(char *cmdline) } out: - iounmap(pcdp); + early_iounmap(pcdp, 4096); return rc; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 79cc437..25b3e90 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4355,17 +4355,18 @@ static void intel_init_display(struct drm_device *dev) dev_priv->display.update_wm = g4x_update_wm; else if (IS_I965G(dev)) dev_priv->display.update_wm = i965_update_wm; - else if (IS_I9XX(dev) || IS_MOBILE(dev)) { + else if (IS_I9XX(dev)) { dev_priv->display.update_wm = i9xx_update_wm; dev_priv->display.get_fifo_size = i9xx_get_fifo_size; + } else if (IS_I85X(dev)) { + dev_priv->display.update_wm = i9xx_update_wm; + dev_priv->display.get_fifo_size = i85x_get_fifo_size; } else { - if (IS_I85X(dev)) - dev_priv->display.get_fifo_size = i85x_get_fifo_size; - else if (IS_845G(dev)) + dev_priv->display.update_wm = i830_update_wm; + if (IS_845G(dev)) dev_priv->display.get_fifo_size = i845_get_fifo_size; else dev_priv->display.get_fifo_size = i830_get_fifo_size; - dev_priv->display.update_wm = i830_update_wm; } } diff --git a/drivers/mfd/wm831x-otp.c b/drivers/mfd/wm831x-otp.c index f742745..b90f3e0 100644 --- a/drivers/mfd/wm831x-otp.c +++ b/drivers/mfd/wm831x-otp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -66,6 +67,7 @@ static DEVICE_ATTR(unique_id, 0444, wm831x_unique_id_show, NULL); int wm831x_otp_init(struct wm831x *wm831x) { + char uuid[WM831X_UNIQUE_ID_LEN]; int ret; ret = device_create_file(wm831x->dev, &dev_attr_unique_id); @@ -73,6 +75,12 @@ int wm831x_otp_init(struct wm831x *wm831x) dev_err(wm831x->dev, "Unique ID attribute not created: %d\n", ret); + ret = wm831x_unique_id_read(wm831x, uuid); + if (ret == 0) + add_device_randomness(uuid, sizeof(uuid)); + else + dev_err(wm831x->dev, "Failed to read UUID: %d\n", ret); + return ret; } diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index c828d9a..97b9c7b 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -103,7 +103,7 @@ static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL }; static int cafe_device_ready(struct mtd_info *mtd) { struct cafe_priv *cafe = mtd->priv; - int result = !!(cafe_readl(cafe, NAND_STATUS) | 0x40000000); + int result = !!(cafe_readl(cafe, NAND_STATUS) & 0x40000000); uint32_t irqs = cafe_readl(cafe, NAND_IRQ); cafe_writel(cafe, irqs, NAND_IRQ); diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 403bfb6..adc862f 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -2478,7 +2478,7 @@ static irqreturn_t atl1_intr(int irq, void *data) "pcie phy link down %x\n", status); if (netif_running(adapter->netdev)) { /* reset MAC */ iowrite32(0, adapter->hw.hw_addr + REG_IMR); - schedule_work(&adapter->pcie_dma_to_rst_task); + schedule_work(&adapter->reset_dev_task); return IRQ_HANDLED; } } @@ -2490,7 +2490,7 @@ static irqreturn_t atl1_intr(int irq, void *data) "pcie DMA r/w error (status = 0x%x)\n", status); iowrite32(0, adapter->hw.hw_addr + REG_IMR); - schedule_work(&adapter->pcie_dma_to_rst_task); + schedule_work(&adapter->reset_dev_task); return IRQ_HANDLED; } @@ -2635,10 +2635,10 @@ static void atl1_down(struct atl1_adapter *adapter) atl1_clean_rx_ring(adapter); } -static void atl1_tx_timeout_task(struct work_struct *work) +static void atl1_reset_dev_task(struct work_struct *work) { struct atl1_adapter *adapter = - container_of(work, struct atl1_adapter, tx_timeout_task); + container_of(work, struct atl1_adapter, reset_dev_task); struct net_device *netdev = adapter->netdev; netif_device_detach(netdev); @@ -3050,12 +3050,10 @@ static int __devinit atl1_probe(struct pci_dev *pdev, (unsigned long)adapter); adapter->phy_timer_pending = false; - INIT_WORK(&adapter->tx_timeout_task, atl1_tx_timeout_task); + INIT_WORK(&adapter->reset_dev_task, atl1_reset_dev_task); INIT_WORK(&adapter->link_chg_task, atlx_link_chg_task); - INIT_WORK(&adapter->pcie_dma_to_rst_task, atl1_tx_timeout_task); - err = register_netdev(netdev); if (err) goto err_common; diff --git a/drivers/net/atlx/atl1.h b/drivers/net/atlx/atl1.h index 146372f..0494e514 100644 --- a/drivers/net/atlx/atl1.h +++ b/drivers/net/atlx/atl1.h @@ -762,9 +762,8 @@ struct atl1_adapter { u16 link_speed; u16 link_duplex; spinlock_t lock; - struct work_struct tx_timeout_task; + struct work_struct reset_dev_task; struct work_struct link_chg_task; - struct work_struct pcie_dma_to_rst_task; struct timer_list phy_config_timer; bool phy_timer_pending; diff --git a/drivers/net/atlx/atlx.c b/drivers/net/atlx/atlx.c index 3dc0142..ce09b95 100644 --- a/drivers/net/atlx/atlx.c +++ b/drivers/net/atlx/atlx.c @@ -189,7 +189,7 @@ static void atlx_tx_timeout(struct net_device *netdev) { struct atlx_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ - schedule_work(&adapter->tx_timeout_task); + schedule_work(&adapter->reset_dev_task); } /* diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 223990d..05308e6 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1471,8 +1471,11 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, static int agg_device_up(const struct aggregator *agg) { - return (netif_running(agg->slave->dev) && - netif_carrier_ok(agg->slave->dev)); + struct port *port = agg->lag_ports; + if (!port) + return 0; + return (netif_running(port->slave->dev) && + netif_carrier_ok(port->slave->dev)); } /** diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c index 7fa7a90..c2f9313 100644 --- a/drivers/net/dl2k.c +++ b/drivers/net/dl2k.c @@ -1279,55 +1279,21 @@ rio_ioctl (struct net_device *dev, struct ifreq *rq, int cmd) { int phy_addr; struct netdev_private *np = netdev_priv(dev); - struct mii_data *miidata = (struct mii_data *) &rq->ifr_ifru; - - struct netdev_desc *desc; - int i; + struct mii_ioctl_data *miidata = if_mii(rq); phy_addr = np->phy_addr; switch (cmd) { - case SIOCDEVPRIVATE: - break; - - case SIOCDEVPRIVATE + 1: - miidata->out_value = mii_read (dev, phy_addr, miidata->reg_num); + case SIOCGMIIPHY: + miidata->phy_id = phy_addr; break; - case SIOCDEVPRIVATE + 2: - mii_write (dev, phy_addr, miidata->reg_num, miidata->in_value); + case SIOCGMIIREG: + miidata->val_out = mii_read (dev, phy_addr, miidata->reg_num); break; - case SIOCDEVPRIVATE + 3: - break; - case SIOCDEVPRIVATE + 4: - break; - case SIOCDEVPRIVATE + 5: - netif_stop_queue (dev); + case SIOCSMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + mii_write (dev, phy_addr, miidata->reg_num, miidata->val_in); break; - case SIOCDEVPRIVATE + 6: - netif_wake_queue (dev); - break; - case SIOCDEVPRIVATE + 7: - printk - ("tx_full=%x cur_tx=%lx old_tx=%lx cur_rx=%lx old_rx=%lx\n", - netif_queue_stopped(dev), np->cur_tx, np->old_tx, np->cur_rx, - np->old_rx); - break; - case SIOCDEVPRIVATE + 8: - printk("TX ring:\n"); - for (i = 0; i < TX_RING_SIZE; i++) { - desc = &np->tx_ring[i]; - printk - ("%02x:cur:%08x next:%08x status:%08x frag1:%08x frag0:%08x", - i, - (u32) (np->tx_ring_dma + i * sizeof (*desc)), - (u32)le64_to_cpu(desc->next_desc), - (u32)le64_to_cpu(desc->status), - (u32)(le64_to_cpu(desc->fraginfo) >> 32), - (u32)le64_to_cpu(desc->fraginfo)); - printk ("\n"); - } - printk ("\n"); - break; - default: return -EOPNOTSUPP; } @@ -1448,7 +1414,7 @@ mii_wait_link (struct net_device *dev, int wait) do { bmsr = mii_read (dev, phy_addr, MII_BMSR); - if (bmsr & MII_BMSR_LINK_STATUS) + if (bmsr & BMSR_LSTATUS) return 0; mdelay (1); } while (--wait > 0); @@ -1469,60 +1435,60 @@ mii_get_media (struct net_device *dev) bmsr = mii_read (dev, phy_addr, MII_BMSR); if (np->an_enable) { - if (!(bmsr & MII_BMSR_AN_COMPLETE)) { + if (!(bmsr & BMSR_ANEGCOMPLETE)) { /* Auto-Negotiation not completed */ return -1; } - negotiate = mii_read (dev, phy_addr, MII_ANAR) & - mii_read (dev, phy_addr, MII_ANLPAR); - mscr = mii_read (dev, phy_addr, MII_MSCR); - mssr = mii_read (dev, phy_addr, MII_MSSR); - if (mscr & MII_MSCR_1000BT_FD && mssr & MII_MSSR_LP_1000BT_FD) { + negotiate = mii_read (dev, phy_addr, MII_ADVERTISE) & + mii_read (dev, phy_addr, MII_LPA); + mscr = mii_read (dev, phy_addr, MII_CTRL1000); + mssr = mii_read (dev, phy_addr, MII_STAT1000); + if (mscr & ADVERTISE_1000FULL && mssr & LPA_1000FULL) { np->speed = 1000; np->full_duplex = 1; printk (KERN_INFO "Auto 1000 Mbps, Full duplex\n"); - } else if (mscr & MII_MSCR_1000BT_HD && mssr & MII_MSSR_LP_1000BT_HD) { + } else if (mscr & ADVERTISE_1000HALF && mssr & LPA_1000HALF) { np->speed = 1000; np->full_duplex = 0; printk (KERN_INFO "Auto 1000 Mbps, Half duplex\n"); - } else if (negotiate & MII_ANAR_100BX_FD) { + } else if (negotiate & ADVERTISE_100FULL) { np->speed = 100; np->full_duplex = 1; printk (KERN_INFO "Auto 100 Mbps, Full duplex\n"); - } else if (negotiate & MII_ANAR_100BX_HD) { + } else if (negotiate & ADVERTISE_100HALF) { np->speed = 100; np->full_duplex = 0; printk (KERN_INFO "Auto 100 Mbps, Half duplex\n"); - } else if (negotiate & MII_ANAR_10BT_FD) { + } else if (negotiate & ADVERTISE_10FULL) { np->speed = 10; np->full_duplex = 1; printk (KERN_INFO "Auto 10 Mbps, Full duplex\n"); - } else if (negotiate & MII_ANAR_10BT_HD) { + } else if (negotiate & ADVERTISE_10HALF) { np->speed = 10; np->full_duplex = 0; printk (KERN_INFO "Auto 10 Mbps, Half duplex\n"); } - if (negotiate & MII_ANAR_PAUSE) { + if (negotiate & ADVERTISE_PAUSE_CAP) { np->tx_flow &= 1; np->rx_flow &= 1; - } else if (negotiate & MII_ANAR_ASYMMETRIC) { + } else if (negotiate & ADVERTISE_PAUSE_ASYM) { np->tx_flow = 0; np->rx_flow &= 1; } /* else tx_flow, rx_flow = user select */ } else { __u16 bmcr = mii_read (dev, phy_addr, MII_BMCR); - switch (bmcr & (MII_BMCR_SPEED_100 | MII_BMCR_SPEED_1000)) { - case MII_BMCR_SPEED_1000: + switch (bmcr & (BMCR_SPEED100 | BMCR_SPEED1000)) { + case BMCR_SPEED1000: printk (KERN_INFO "Operating at 1000 Mbps, "); break; - case MII_BMCR_SPEED_100: + case BMCR_SPEED100: printk (KERN_INFO "Operating at 100 Mbps, "); break; case 0: printk (KERN_INFO "Operating at 10 Mbps, "); } - if (bmcr & MII_BMCR_DUPLEX_MODE) { + if (bmcr & BMCR_FULLDPLX) { printk (KERN_CONT "Full duplex\n"); } else { printk (KERN_CONT "Half duplex\n"); @@ -1556,24 +1522,22 @@ mii_set_media (struct net_device *dev) if (np->an_enable) { /* Advertise capabilities */ bmsr = mii_read (dev, phy_addr, MII_BMSR); - anar = mii_read (dev, phy_addr, MII_ANAR) & - ~MII_ANAR_100BX_FD & - ~MII_ANAR_100BX_HD & - ~MII_ANAR_100BT4 & - ~MII_ANAR_10BT_FD & - ~MII_ANAR_10BT_HD; - if (bmsr & MII_BMSR_100BX_FD) - anar |= MII_ANAR_100BX_FD; - if (bmsr & MII_BMSR_100BX_HD) - anar |= MII_ANAR_100BX_HD; - if (bmsr & MII_BMSR_100BT4) - anar |= MII_ANAR_100BT4; - if (bmsr & MII_BMSR_10BT_FD) - anar |= MII_ANAR_10BT_FD; - if (bmsr & MII_BMSR_10BT_HD) - anar |= MII_ANAR_10BT_HD; - anar |= MII_ANAR_PAUSE | MII_ANAR_ASYMMETRIC; - mii_write (dev, phy_addr, MII_ANAR, anar); + anar = mii_read (dev, phy_addr, MII_ADVERTISE) & + ~(ADVERTISE_100FULL | ADVERTISE_10FULL | + ADVERTISE_100HALF | ADVERTISE_10HALF | + ADVERTISE_100BASE4); + if (bmsr & BMSR_100FULL) + anar |= ADVERTISE_100FULL; + if (bmsr & BMSR_100HALF) + anar |= ADVERTISE_100HALF; + if (bmsr & BMSR_100BASE4) + anar |= ADVERTISE_100BASE4; + if (bmsr & BMSR_10FULL) + anar |= ADVERTISE_10FULL; + if (bmsr & BMSR_10HALF) + anar |= ADVERTISE_10HALF; + anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; + mii_write (dev, phy_addr, MII_ADVERTISE, anar); /* Enable Auto crossover */ pscr = mii_read (dev, phy_addr, MII_PHY_SCR); @@ -1581,8 +1545,8 @@ mii_set_media (struct net_device *dev) mii_write (dev, phy_addr, MII_PHY_SCR, pscr); /* Soft reset PHY */ - mii_write (dev, phy_addr, MII_BMCR, MII_BMCR_RESET); - bmcr = MII_BMCR_AN_ENABLE | MII_BMCR_RESTART_AN | MII_BMCR_RESET; + mii_write (dev, phy_addr, MII_BMCR, BMCR_RESET); + bmcr = BMCR_ANENABLE | BMCR_ANRESTART | BMCR_RESET; mii_write (dev, phy_addr, MII_BMCR, bmcr); mdelay(1); } else { @@ -1594,7 +1558,7 @@ mii_set_media (struct net_device *dev) /* 2) PHY Reset */ bmcr = mii_read (dev, phy_addr, MII_BMCR); - bmcr |= MII_BMCR_RESET; + bmcr |= BMCR_RESET; mii_write (dev, phy_addr, MII_BMCR, bmcr); /* 3) Power Down */ @@ -1603,25 +1567,25 @@ mii_set_media (struct net_device *dev) mdelay (100); /* wait a certain time */ /* 4) Advertise nothing */ - mii_write (dev, phy_addr, MII_ANAR, 0); + mii_write (dev, phy_addr, MII_ADVERTISE, 0); /* 5) Set media and Power Up */ - bmcr = MII_BMCR_POWER_DOWN; + bmcr = BMCR_PDOWN; if (np->speed == 100) { - bmcr |= MII_BMCR_SPEED_100; + bmcr |= BMCR_SPEED100; printk (KERN_INFO "Manual 100 Mbps, "); } else if (np->speed == 10) { printk (KERN_INFO "Manual 10 Mbps, "); } if (np->full_duplex) { - bmcr |= MII_BMCR_DUPLEX_MODE; + bmcr |= BMCR_FULLDPLX; printk (KERN_CONT "Full duplex\n"); } else { printk (KERN_CONT "Half duplex\n"); } #if 0 /* Set 1000BaseT Master/Slave setting */ - mscr = mii_read (dev, phy_addr, MII_MSCR); + mscr = mii_read (dev, phy_addr, MII_CTRL1000); mscr |= MII_MSCR_CFG_ENABLE; mscr &= ~MII_MSCR_CFG_VALUE = 0; #endif @@ -1644,7 +1608,7 @@ mii_get_media_pcs (struct net_device *dev) bmsr = mii_read (dev, phy_addr, PCS_BMSR); if (np->an_enable) { - if (!(bmsr & MII_BMSR_AN_COMPLETE)) { + if (!(bmsr & BMSR_ANEGCOMPLETE)) { /* Auto-Negotiation not completed */ return -1; } @@ -1669,7 +1633,7 @@ mii_get_media_pcs (struct net_device *dev) } else { __u16 bmcr = mii_read (dev, phy_addr, PCS_BMCR); printk (KERN_INFO "Operating at 1000 Mbps, "); - if (bmcr & MII_BMCR_DUPLEX_MODE) { + if (bmcr & BMCR_FULLDPLX) { printk (KERN_CONT "Full duplex\n"); } else { printk (KERN_CONT "Half duplex\n"); @@ -1702,7 +1666,7 @@ mii_set_media_pcs (struct net_device *dev) if (np->an_enable) { /* Advertise capabilities */ esr = mii_read (dev, phy_addr, PCS_ESR); - anar = mii_read (dev, phy_addr, MII_ANAR) & + anar = mii_read (dev, phy_addr, MII_ADVERTISE) & ~PCS_ANAR_HALF_DUPLEX & ~PCS_ANAR_FULL_DUPLEX; if (esr & (MII_ESR_1000BT_HD | MII_ESR_1000BX_HD)) @@ -1710,22 +1674,21 @@ mii_set_media_pcs (struct net_device *dev) if (esr & (MII_ESR_1000BT_FD | MII_ESR_1000BX_FD)) anar |= PCS_ANAR_FULL_DUPLEX; anar |= PCS_ANAR_PAUSE | PCS_ANAR_ASYMMETRIC; - mii_write (dev, phy_addr, MII_ANAR, anar); + mii_write (dev, phy_addr, MII_ADVERTISE, anar); /* Soft reset PHY */ - mii_write (dev, phy_addr, MII_BMCR, MII_BMCR_RESET); - bmcr = MII_BMCR_AN_ENABLE | MII_BMCR_RESTART_AN | - MII_BMCR_RESET; + mii_write (dev, phy_addr, MII_BMCR, BMCR_RESET); + bmcr = BMCR_ANENABLE | BMCR_ANRESTART | BMCR_RESET; mii_write (dev, phy_addr, MII_BMCR, bmcr); mdelay(1); } else { /* Force speed setting */ /* PHY Reset */ - bmcr = MII_BMCR_RESET; + bmcr = BMCR_RESET; mii_write (dev, phy_addr, MII_BMCR, bmcr); mdelay(10); if (np->full_duplex) { - bmcr = MII_BMCR_DUPLEX_MODE; + bmcr = BMCR_FULLDPLX; printk (KERN_INFO "Manual full duplex\n"); } else { bmcr = 0; @@ -1735,7 +1698,7 @@ mii_set_media_pcs (struct net_device *dev) mdelay(10); /* Advertise nothing */ - mii_write (dev, phy_addr, MII_ANAR, 0); + mii_write (dev, phy_addr, MII_ADVERTISE, 0); } return 0; } diff --git a/drivers/net/dl2k.h b/drivers/net/dl2k.h index 266ec87..cde8ecd 100644 --- a/drivers/net/dl2k.h +++ b/drivers/net/dl2k.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include /* Processor type for cache alignment. */ #include @@ -271,20 +272,9 @@ enum RFS_bits { #define MII_RESET_TIME_OUT 10000 /* MII register */ enum _mii_reg { - MII_BMCR = 0, - MII_BMSR = 1, - MII_PHY_ID1 = 2, - MII_PHY_ID2 = 3, - MII_ANAR = 4, - MII_ANLPAR = 5, - MII_ANER = 6, - MII_ANNPT = 7, - MII_ANLPRNP = 8, - MII_MSCR = 9, - MII_MSSR = 10, - MII_ESR = 15, MII_PHY_SCR = 16, }; + /* PCS register */ enum _pcs_reg { PCS_BMCR = 0, @@ -297,102 +287,6 @@ enum _pcs_reg { PCS_ESR = 15, }; -/* Basic Mode Control Register */ -enum _mii_bmcr { - MII_BMCR_RESET = 0x8000, - MII_BMCR_LOOP_BACK = 0x4000, - MII_BMCR_SPEED_LSB = 0x2000, - MII_BMCR_AN_ENABLE = 0x1000, - MII_BMCR_POWER_DOWN = 0x0800, - MII_BMCR_ISOLATE = 0x0400, - MII_BMCR_RESTART_AN = 0x0200, - MII_BMCR_DUPLEX_MODE = 0x0100, - MII_BMCR_COL_TEST = 0x0080, - MII_BMCR_SPEED_MSB = 0x0040, - MII_BMCR_SPEED_RESERVED = 0x003f, - MII_BMCR_SPEED_10 = 0, - MII_BMCR_SPEED_100 = MII_BMCR_SPEED_LSB, - MII_BMCR_SPEED_1000 = MII_BMCR_SPEED_MSB, -}; - -/* Basic Mode Status Register */ -enum _mii_bmsr { - MII_BMSR_100BT4 = 0x8000, - MII_BMSR_100BX_FD = 0x4000, - MII_BMSR_100BX_HD = 0x2000, - MII_BMSR_10BT_FD = 0x1000, - MII_BMSR_10BT_HD = 0x0800, - MII_BMSR_100BT2_FD = 0x0400, - MII_BMSR_100BT2_HD = 0x0200, - MII_BMSR_EXT_STATUS = 0x0100, - MII_BMSR_PREAMBLE_SUPP = 0x0040, - MII_BMSR_AN_COMPLETE = 0x0020, - MII_BMSR_REMOTE_FAULT = 0x0010, - MII_BMSR_AN_ABILITY = 0x0008, - MII_BMSR_LINK_STATUS = 0x0004, - MII_BMSR_JABBER_DETECT = 0x0002, - MII_BMSR_EXT_CAP = 0x0001, -}; - -/* ANAR */ -enum _mii_anar { - MII_ANAR_NEXT_PAGE = 0x8000, - MII_ANAR_REMOTE_FAULT = 0x4000, - MII_ANAR_ASYMMETRIC = 0x0800, - MII_ANAR_PAUSE = 0x0400, - MII_ANAR_100BT4 = 0x0200, - MII_ANAR_100BX_FD = 0x0100, - MII_ANAR_100BX_HD = 0x0080, - MII_ANAR_10BT_FD = 0x0020, - MII_ANAR_10BT_HD = 0x0010, - MII_ANAR_SELECTOR = 0x001f, - MII_IEEE8023_CSMACD = 0x0001, -}; - -/* ANLPAR */ -enum _mii_anlpar { - MII_ANLPAR_NEXT_PAGE = MII_ANAR_NEXT_PAGE, - MII_ANLPAR_REMOTE_FAULT = MII_ANAR_REMOTE_FAULT, - MII_ANLPAR_ASYMMETRIC = MII_ANAR_ASYMMETRIC, - MII_ANLPAR_PAUSE = MII_ANAR_PAUSE, - MII_ANLPAR_100BT4 = MII_ANAR_100BT4, - MII_ANLPAR_100BX_FD = MII_ANAR_100BX_FD, - MII_ANLPAR_100BX_HD = MII_ANAR_100BX_HD, - MII_ANLPAR_10BT_FD = MII_ANAR_10BT_FD, - MII_ANLPAR_10BT_HD = MII_ANAR_10BT_HD, - MII_ANLPAR_SELECTOR = MII_ANAR_SELECTOR, -}; - -/* Auto-Negotiation Expansion Register */ -enum _mii_aner { - MII_ANER_PAR_DETECT_FAULT = 0x0010, - MII_ANER_LP_NEXTPAGABLE = 0x0008, - MII_ANER_NETXTPAGABLE = 0x0004, - MII_ANER_PAGE_RECEIVED = 0x0002, - MII_ANER_LP_NEGOTIABLE = 0x0001, -}; - -/* MASTER-SLAVE Control Register */ -enum _mii_mscr { - MII_MSCR_TEST_MODE = 0xe000, - MII_MSCR_CFG_ENABLE = 0x1000, - MII_MSCR_CFG_VALUE = 0x0800, - MII_MSCR_PORT_VALUE = 0x0400, - MII_MSCR_1000BT_FD = 0x0200, - MII_MSCR_1000BT_HD = 0X0100, -}; - -/* MASTER-SLAVE Status Register */ -enum _mii_mssr { - MII_MSSR_CFG_FAULT = 0x8000, - MII_MSSR_CFG_RES = 0x4000, - MII_MSSR_LOCAL_RCV_STATUS = 0x2000, - MII_MSSR_REMOTE_RCVR = 0x1000, - MII_MSSR_LP_1000BT_FD = 0x0800, - MII_MSSR_LP_1000BT_HD = 0x0400, - MII_MSSR_IDLE_ERR_COUNT = 0x00ff, -}; - /* IEEE Extened Status Register */ enum _mii_esr { MII_ESR_1000BX_FD = 0x8000, @@ -471,13 +365,6 @@ struct ioctl_data { char *data; }; -struct mii_data { - __u16 reserved; - __u16 reg_num; - __u16 in_value; - __u16 out_value; -}; - /* The Rx and Tx buffer descriptors. */ struct netdev_desc { __le64 next_desc; diff --git a/drivers/net/ks8851_mll.c b/drivers/net/ks8851_mll.c index c0ceebc..4e3a69c 100644 --- a/drivers/net/ks8851_mll.c +++ b/drivers/net/ks8851_mll.c @@ -35,7 +35,7 @@ #define DRV_NAME "ks8851_mll" static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 }; -#define MAX_RECV_FRAMES 32 +#define MAX_RECV_FRAMES 255 #define MAX_BUF_SIZE 2048 #define TX_BUF_SIZE 2000 #define RX_BUF_SIZE 2000 diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index e52af5b..50d2af8 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -700,7 +700,8 @@ struct netxen_recv_context { #define NX_CDRP_CMD_READ_PEXQ_PARAMETERS 0x0000001c #define NX_CDRP_CMD_GET_LIC_CAPABILITIES 0x0000001d #define NX_CDRP_CMD_READ_MAX_LRO_PER_BOARD 0x0000001e -#define NX_CDRP_CMD_MAX 0x0000001f +#define NX_CDRP_CMD_CONFIG_GBE_PORT 0x0000001f +#define NX_CDRP_CMD_MAX 0x00000020 #define NX_RCODE_SUCCESS 0 #define NX_RCODE_NO_HOST_MEM 1 @@ -1015,6 +1016,7 @@ typedef struct { #define NX_FW_CAPABILITY_BDG (1 << 8) #define NX_FW_CAPABILITY_FVLANTX (1 << 9) #define NX_FW_CAPABILITY_HW_LRO (1 << 10) +#define NX_FW_CAPABILITY_GBE_LINK_CFG (1 << 11) /* module types */ #define LINKEVENT_MODULE_NOT_PRESENT 1 @@ -1323,6 +1325,9 @@ int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd); int netxen_linkevent_request(struct netxen_adapter *adapter, int enable); void netxen_advert_link_change(struct netxen_adapter *adapter, int linkup); +int nx_fw_cmd_set_gbe_port(struct netxen_adapter *adapter, + u32 speed, u32 duplex, u32 autoneg); + int nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu); int netxen_nic_change_mtu(struct net_device *netdev, int new_mtu); int netxen_config_hw_lro(struct netxen_adapter *adapter, int enable); diff --git a/drivers/net/netxen/netxen_nic_ctx.c b/drivers/net/netxen/netxen_nic_ctx.c index 9cb8f68..f48cdb2 100644 --- a/drivers/net/netxen/netxen_nic_ctx.c +++ b/drivers/net/netxen/netxen_nic_ctx.c @@ -112,6 +112,21 @@ nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu) return 0; } +int +nx_fw_cmd_set_gbe_port(struct netxen_adapter *adapter, + u32 speed, u32 duplex, u32 autoneg) +{ + + return netxen_issue_cmd(adapter, + adapter->ahw.pci_func, + NXHAL_VERSION, + speed, + duplex, + autoneg, + NX_CDRP_CMD_CONFIG_GBE_PORT); + +} + static int nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) { diff --git a/drivers/net/netxen/netxen_nic_ethtool.c b/drivers/net/netxen/netxen_nic_ethtool.c index 714f387..7e34840 100644 --- a/drivers/net/netxen/netxen_nic_ethtool.c +++ b/drivers/net/netxen/netxen_nic_ethtool.c @@ -216,7 +216,6 @@ skip: check_sfp_module = netif_running(dev) && adapter->has_link_events; } else { - ecmd->autoneg = AUTONEG_ENABLE; ecmd->supported |= (SUPPORTED_TP |SUPPORTED_Autoneg); ecmd->advertising |= (ADVERTISED_TP | ADVERTISED_Autoneg); @@ -254,53 +253,24 @@ static int netxen_nic_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct netxen_adapter *adapter = netdev_priv(dev); - __u32 status; + int ret; - /* read which mode */ - if (adapter->ahw.port_type == NETXEN_NIC_GBE) { - /* autonegotiation */ - if (adapter->phy_write - && adapter->phy_write(adapter, - NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG, - ecmd->autoneg) != 0) - return -EIO; - else - adapter->link_autoneg = ecmd->autoneg; + if (adapter->ahw.port_type != NETXEN_NIC_GBE) + return -EOPNOTSUPP; - if (adapter->phy_read - && adapter->phy_read(adapter, - NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, - &status) != 0) - return -EIO; + if (!(adapter->capabilities & NX_FW_CAPABILITY_GBE_LINK_CFG)) + return -EOPNOTSUPP; - /* speed */ - switch (ecmd->speed) { - case SPEED_10: - netxen_set_phy_speed(status, 0); - break; - case SPEED_100: - netxen_set_phy_speed(status, 1); - break; - case SPEED_1000: - netxen_set_phy_speed(status, 2); - break; - } - /* set duplex mode */ - if (ecmd->duplex == DUPLEX_HALF) - netxen_clear_phy_duplex(status); - if (ecmd->duplex == DUPLEX_FULL) - netxen_set_phy_duplex(status); - if (adapter->phy_write - && adapter->phy_write(adapter, - NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS, - *((int *)&status)) != 0) - return -EIO; - else { - adapter->link_speed = ecmd->speed; - adapter->link_duplex = ecmd->duplex; - } - } else + ret = nx_fw_cmd_set_gbe_port(adapter, ecmd->speed, ecmd->duplex, + ecmd->autoneg); + if (ret == NX_RCODE_NOT_SUPPORTED) return -EOPNOTSUPP; + else if (ret) + return -EIO; + + adapter->link_speed = ecmd->speed; + adapter->link_duplex = ecmd->duplex; + adapter->link_autoneg = ecmd->autoneg; if (!netif_running(dev)) return 0; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0f77aca..894ad84 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1121,10 +1121,12 @@ static long tun_chr_ioctl(struct file *file, unsigned int cmd, int sndbuf; int ret; - if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) + if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) { if (copy_from_user(&ifr, argp, sizeof ifr)) return -EFAULT; - + } else { + memset(&ifr, 0, sizeof(ifr)); + } if (cmd == TUNGETFEATURES) { /* Currently this just means: "what IFF flags are valid?". * This is needed because we never checked for invalid flags on diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index e391ef9..fd8e335 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1325,7 +1325,7 @@ static int kaweth_internal_control_msg(struct usb_device *usb_dev, int retv; int length = 0; /* shut up GCC */ - urb = usb_alloc_urb(0, GFP_NOIO); + urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return -ENOMEM; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index da33dce..07f69ee 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -584,6 +584,14 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q) entry = (struct skb_data *) skb->cb; urb = entry->urb; + /* + * Get reference count of the URB to avoid it to be + * freed during usb_unlink_urb, which may trigger + * use-after-free problem inside usb_unlink_urb since + * usb_unlink_urb is always racing with .complete + * handler(include defer_bh). + */ + usb_get_urb(urb); spin_unlock_irqrestore(&q->lock, flags); // during some PM-driven resume scenarios, // these (async) unlinks complete immediately @@ -592,6 +600,7 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q) devdbg (dev, "unlink urb err, %d", retval); else count++; + usb_put_urb(urb); spin_lock_irqsave(&q->lock, flags); } spin_unlock_irqrestore (&q->lock, flags); @@ -989,7 +998,6 @@ static void tx_complete (struct urb *urb) } } - urb->dev = NULL; entry->state = tx_done; defer_bh(dev, skb, &dev->txq); } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1e42381..d0959af 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2550,6 +2550,40 @@ static void __devinit fixup_ti816x_class(struct pci_dev* dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class); +/* + * Some BIOS implementations leave the Intel GPU interrupts enabled, + * even though no one is handling them (f.e. i915 driver is never loaded). + * Additionally the interrupt destination is not set up properly + * and the interrupt ends up -somewhere-. + * + * These spurious interrupts are "sticky" and the kernel disables + * the (shared) interrupt line after 100.000+ generated interrupts. + * + * Fix it by disabling the still enabled interrupts. + * This resolves crashes often seen on monitor unplug. + */ +#define I915_DEIER_REG 0x4400c +static void __devinit disable_igfx_irq(struct pci_dev *dev) +{ + void __iomem *regs = pci_iomap(dev, 0, 0); + if (regs == NULL) { + dev_warn(&dev->dev, "igfx quirk: Can't iomap PCI device\n"); + return; + } + + /* Check if any interrupt line is still enabled */ + if (readl(regs + I915_DEIER_REG) != 0) { + dev_warn(&dev->dev, "BIOS left Intel GPU interrupts enabled; " + "disabling\n"); + + writel(0, regs + I915_DEIER_REG); + } + + pci_iounmap(dev, regs); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index eb39d26..253996c 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -300,9 +300,9 @@ static void quirk_system_pci_resources(struct pnp_dev *dev) } } -#ifdef CONFIG_AMD_NB +#ifdef CONFIG_K8_NB -#include +#include static void quirk_amd_mmconfig_area(struct pnp_dev *dev) { @@ -366,7 +366,7 @@ static struct pnp_fixup pnp_fixups[] = { /* PnP resources that might overlap PCI BARs */ {"PNP0c01", quirk_system_pci_resources}, {"PNP0c02", quirk_system_pci_resources}, -#ifdef CONFIG_AMD_NB +#ifdef CONFIG_K8_NB {"PNP0c01", quirk_amd_mmconfig_area}, #endif {""} diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c index 79795cd..daefe66 100644 --- a/drivers/rtc/rtc-wm831x.c +++ b/drivers/rtc/rtc-wm831x.c @@ -23,7 +23,7 @@ #include #include #include - +#include /* * R16416 (0x4020) - RTC Write Counter @@ -95,6 +95,26 @@ struct wm831x_rtc { unsigned int alarm_enabled:1; }; +static void wm831x_rtc_add_randomness(struct wm831x *wm831x) +{ + int ret; + u16 reg; + + /* + * The write counter contains a pseudo-random number which is + * regenerated every time we set the RTC so it should be a + * useful per-system source of entropy. + */ + ret = wm831x_reg_read(wm831x, WM831X_RTC_WRITE_COUNTER); + if (ret >= 0) { + reg = ret; + add_device_randomness(®, sizeof(reg)); + } else { + dev_warn(wm831x->dev, "Failed to read RTC write counter: %d\n", + ret); + } +} + /* * Read current time and date in RTC */ @@ -464,6 +484,8 @@ static int wm831x_rtc_probe(struct platform_device *pdev) alm_irq, ret); } + wm831x_rtc_add_randomness(wm831x); + return 0; err: diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index b10ee2a..1bdfde1 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -754,7 +754,7 @@ static struct domain_device *sas_ex_discover_end_dev( } /* See if this phy is part of a wide port */ -static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id) +static bool sas_ex_join_wide_port(struct domain_device *parent, int phy_id) { struct ex_phy *phy = &parent->ex_dev.ex_phy[phy_id]; int i; @@ -770,11 +770,11 @@ static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id) sas_port_add_phy(ephy->port, phy->phy); phy->port = ephy->port; phy->phy_state = PHY_DEVICE_DISCOVERED; - return 0; + return true; } } - return -ENODEV; + return false; } static struct domain_device *sas_ex_discover_expander( @@ -912,8 +912,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) return res; } - res = sas_ex_join_wide_port(dev, phy_id); - if (!res) { + if (sas_ex_join_wide_port(dev, phy_id)) { SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", phy_id, SAS_ADDR(ex_phy->attached_sas_addr)); return res; @@ -958,8 +957,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == SAS_ADDR(child->sas_addr)) { ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; - res = sas_ex_join_wide_port(dev, i); - if (!res) + if (sas_ex_join_wide_port(dev, i)) SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr)); @@ -1812,32 +1810,20 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) { struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id]; struct domain_device *child; - bool found = false; - int res, i; + int res; SAS_DPRINTK("ex %016llx phy%d new device attached\n", SAS_ADDR(dev->sas_addr), phy_id); res = sas_ex_phy_discover(dev, phy_id); if (res) - goto out; - /* to support the wide port inserted */ - for (i = 0; i < dev->ex_dev.num_phys; i++) { - struct ex_phy *ex_phy_temp = &dev->ex_dev.ex_phy[i]; - if (i == phy_id) - continue; - if (SAS_ADDR(ex_phy_temp->attached_sas_addr) == - SAS_ADDR(ex_phy->attached_sas_addr)) { - found = true; - break; - } - } - if (found) { - sas_ex_join_wide_port(dev, phy_id); + return res; + + if (sas_ex_join_wide_port(dev, phy_id)) return 0; - } + res = sas_ex_discover_devices(dev, phy_id); - if (!res) - goto out; + if (res) + return res; list_for_each_entry(child, &dev->ex_dev.children, siblings) { if (SAS_ADDR(child->sas_addr) == SAS_ADDR(ex_phy->attached_sas_addr)) { @@ -1847,7 +1833,6 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) break; } } -out: return res; } @@ -1946,9 +1931,7 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) struct domain_device *dev = NULL; res = sas_find_bcast_dev(port_dev, &dev); - if (res) - goto out; - if (dev) { + while (res == 0 && dev) { struct expander_device *ex = &dev->ex_dev; int i = 0, phy_id; @@ -1960,8 +1943,10 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) res = sas_rediscover(dev, phy_id); i = phy_id + 1; } while (i < ex->num_phys); + + dev = NULL; + res = sas_find_bcast_dev(port_dev, &dev); } -out: return res; } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 573921d..3890793 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1550,6 +1550,20 @@ static void scsi_restart_operations(struct Scsi_Host *shost) * requests are started. */ scsi_run_host_queues(shost); + + /* + * if eh is active and host_eh_scheduled is pending we need to re-run + * recovery. we do this check after scsi_run_host_queues() to allow + * everything pent up since the last eh run a chance to make forward + * progress before we sync again. Either we'll immediately re-run + * recovery or scsi_device_unbusy() will wake us again when these + * pending commands complete. + */ + spin_lock_irqsave(shost->host_lock, flags); + if (shost->host_eh_scheduled) + if (scsi_host_set_state(shost, SHOST_RECOVERY)) + WARN_ON(scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)); + spin_unlock_irqrestore(shost->host_lock, flags); } /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 8df12522..e28f9b0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -482,15 +482,26 @@ static void scsi_run_queue(struct request_queue *q) */ static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd) { + struct scsi_device *sdev = cmd->device; struct request *req = cmd->request; unsigned long flags; + /* + * We need to hold a reference on the device to avoid the queue being + * killed after the unlock and before scsi_run_queue is invoked which + * may happen because scsi_unprep_request() puts the command which + * releases its reference on the device. + */ + get_device(&sdev->sdev_gendev); + spin_lock_irqsave(q->queue_lock, flags); scsi_unprep_request(req); blk_requeue_request(q, req); spin_unlock_irqrestore(q->queue_lock, flags); scsi_run_queue(q); + + put_device(&sdev->sdev_gendev); } void scsi_next_command(struct scsi_cmnd *cmd) diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 1fbf7c7..11c0085 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -107,6 +107,7 @@ extern void scsi_exit_procfs(void); #endif /* CONFIG_PROC_FS */ /* scsi_scan.c */ +extern int scsi_complete_async_scans(void); extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int, unsigned int, unsigned int, int); extern void scsi_forget_host(struct Scsi_Host *); diff --git a/drivers/scsi/scsi_wait_scan.c b/drivers/scsi/scsi_wait_scan.c index 74708fc..5c22bda 100644 --- a/drivers/scsi/scsi_wait_scan.c +++ b/drivers/scsi/scsi_wait_scan.c @@ -13,6 +13,7 @@ #include #include #include +#include "scsi_priv.h" static int __init wait_scan_init(void) { diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 653f853..8ad9dfb 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1120,7 +1120,8 @@ skip_normal_probe: } - if (data_interface->cur_altsetting->desc.bNumEndpoints < 2) + if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 || + control_interface->cur_altsetting->desc.bNumEndpoints == 0) return -EINVAL; epctrl = &control_interface->cur_altsetting->endpoint[0].desc; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index d71514b..37f2899 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -441,6 +441,8 @@ retry: goto retry; } if (!desc->reslength) { /* zero length read */ + dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__); + clear_bit(WDM_READ, &desc->flags); spin_unlock_irq(&desc->iuspin); goto retry; } diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index df1e873..48742ff 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1454,10 +1454,14 @@ static int processcompl_compat(struct async *as, void __user * __user *arg) void __user *addr = as->userurb; unsigned int i; - if (as->userbuffer && urb->actual_length) - if (copy_to_user(as->userbuffer, urb->transfer_buffer, - urb->actual_length)) + if (as->userbuffer && urb->actual_length) { + if (urb->number_of_packets > 0) /* Isochronous */ + i = urb->transfer_buffer_length; + else /* Non-Isoc */ + i = urb->actual_length; + if (copy_to_user(as->userbuffer, urb->transfer_buffer, i)) return -EFAULT; + } if (put_user(as->status, &userurb->status)) return -EFAULT; if (put_user(urb->actual_length, &userurb->actual_length)) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 2b428fc..02aad50 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -458,10 +459,8 @@ hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt) * talking to TTs must queue control transfers (not just bulk and iso), so * both can talk to the same hub concurrently. */ -static void hub_tt_work(struct work_struct *work) +void _hub_tt_work(struct usb_hub *hub) { - struct usb_hub *hub = - container_of(work, struct usb_hub, tt.clear_work); unsigned long flags; int limit = 100; @@ -496,6 +495,14 @@ static void hub_tt_work(struct work_struct *work) spin_unlock_irqrestore (&hub->tt.lock, flags); } +void hub_tt_work(struct work_struct *work) +{ + struct usb_hub *hub = + container_of(work, struct usb_hub, tt.clear_work); + + _hub_tt_work(hub); +} + /** * usb_hub_clear_tt_buffer - clear control/bulk TT state in high speed hub * @urb: an URB associated with the failed or incomplete split transaction @@ -543,7 +550,20 @@ int usb_hub_clear_tt_buffer(struct urb *urb) /* tell keventd to clear state for this TT */ spin_lock_irqsave (&tt->lock, flags); list_add_tail (&clear->clear_list, &tt->clear_list); - schedule_work(&tt->clear_work); + /* don't schedule on kevent if we're running on keventd (e.g., + * in hid_reset we can get here on kevent) unless on >=2.6.36 + */ + if (!current_is_keventd()) + /* put it on keventd */ + schedule_work(&tt->clear_work); + else { + /* let khubd do it */ + struct usb_hub *hub = + container_of(&tt->clear_work, struct usb_hub, + tt.clear_work); + kick_khubd(hub); + } + spin_unlock_irqrestore (&tt->lock, flags); return 0; } @@ -1812,6 +1832,14 @@ int usb_new_device(struct usb_device *udev) /* Tell the world! */ announce_device(udev); + if (udev->serial) + add_device_randomness(udev->serial, strlen(udev->serial)); + if (udev->product) + add_device_randomness(udev->product, strlen(udev->product)); + if (udev->manufacturer) + add_device_randomness(udev->manufacturer, + strlen(udev->manufacturer)); + /* Register the device. The device driver is responsible * for configuring the device and invoking the add-device * notifier chain (used by usbfs and possibly others). @@ -3274,6 +3302,10 @@ static void hub_events(void) if (hub->quiescing) goto loop_autopm; + /* _hub_tt_work usually run on keventd */ + if (!list_empty(&hub->tt.clear_list)) + _hub_tt_work(hub); + if (hub->error) { dev_dbg (hub_dev, "resetting for error %d\n", hub->error); diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c index 1206a26..7565f55 100644 --- a/drivers/usb/early/ehci-dbgp.c +++ b/drivers/usb/early/ehci-dbgp.c @@ -449,7 +449,7 @@ static int dbgp_ehci_startup(void) writel(FLAG_CF, &ehci_regs->configured_flag); /* Wait until the controller is no longer halted */ - loop = 10; + loop = 1000; do { status = readl(&ehci_regs->status); if (!(status & STS_HALT)) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 0ff157a..981b604 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -458,9 +458,13 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev) } } - /* Disable any BIOS SMIs */ - writel(XHCI_LEGACY_DISABLE_SMI, - base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET); + val = readl(base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET); + /* Mask off (turn off) any enabled SMIs */ + val &= XHCI_LEGACY_DISABLE_SMI; + /* Mask all SMI events bits, RW1C */ + val |= XHCI_LEGACY_SMI_EVENTS; + /* Disable any BIOS SMIs and clear all SMI events*/ + writel(val, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET); hc_init: op_reg_base = base + XHCI_HC_LENGTH(readl(base)); diff --git a/drivers/usb/host/xhci-ext-caps.h b/drivers/usb/host/xhci-ext-caps.h index 78c4eda..e2acc97 100644 --- a/drivers/usb/host/xhci-ext-caps.h +++ b/drivers/usb/host/xhci-ext-caps.h @@ -62,8 +62,9 @@ /* USB Legacy Support Control and Status Register - section 7.1.2 */ /* Add this offset, plus the value of xECP in HCCPARAMS to the base address */ #define XHCI_LEGACY_CONTROL_OFFSET (0x04) -/* bits 1:2, 5:12, and 17:19 need to be preserved; bits 21:28 should be zero */ -#define XHCI_LEGACY_DISABLE_SMI ((0x3 << 1) + (0xff << 5) + (0x7 << 17)) +/* bits 1:3, 5:12, and 17:19 need to be preserved; bits 21:28 should be zero */ +#define XHCI_LEGACY_DISABLE_SMI ((0x7 << 1) + (0xff << 5) + (0x7 << 17)) +#define XHCI_LEGACY_SMI_EVENTS (0x7 << 29) /* command register values to disable interrupts and halt the HC */ /* start/stop HC execution - do not write unless HC is halted*/ diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c index 56661a2..0641633 100644 --- a/drivers/usb/host/xhci-hcd.c +++ b/drivers/usb/host/xhci-hcd.c @@ -150,7 +150,7 @@ int xhci_reset(struct xhci_hcd *xhci) xhci_to_hcd(xhci)->state = HC_STATE_HALT; ret = handshake(xhci, &xhci->op_regs->command, - CMD_RESET, 0, 250 * 1000); + CMD_RESET, 0, 10 * 1000 * 1000); if (ret) return ret; diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 8c29073..776fd43 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -934,11 +934,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) int i; /* Free the Event Ring Segment Table and the actual Event Ring */ - if (xhci->ir_set) { - xhci_writel(xhci, 0, &xhci->ir_set->erst_size); - xhci_write_64(xhci, 0, &xhci->ir_set->erst_base); - xhci_write_64(xhci, 0, &xhci->ir_set->erst_dequeue); - } size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); if (xhci->erst.entries) pci_free_consistent(pdev, size, @@ -950,7 +945,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->event_ring = NULL; xhci_dbg(xhci, "Freed event ring\n"); - xhci_write_64(xhci, 0, &xhci->op_regs->cmd_ring); + xhci->cmd_ring_reserved_trbs = 0; if (xhci->cmd_ring) xhci_ring_free(xhci, xhci->cmd_ring); xhci->cmd_ring = NULL; @@ -969,7 +964,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->device_pool = NULL; xhci_dbg(xhci, "Freed device context pool\n"); - xhci_write_64(xhci, 0, &xhci->op_regs->dcbaa_ptr); if (xhci->dcbaa) pci_free_consistent(pdev, sizeof(*xhci->dcbaa), xhci->dcbaa, xhci->dcbaa->dma); @@ -1146,6 +1140,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) fail: xhci_warn(xhci, "Couldn't initialize memory\n"); + xhci_halt(xhci); + xhci_reset(xhci); xhci_mem_cleanup(xhci); return -ENOMEM; } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 0a1ccaa..c374beb 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1784,7 +1784,8 @@ static int ftdi_8u2232c_probe(struct usb_serial *serial) dbg("%s", __func__); - if (strcmp(udev->manufacturer, "CALAO Systems") == 0) + if ((udev->manufacturer) && + (strcmp(udev->manufacturer, "CALAO Systems") == 0)) return ftdi_jtag_probe(serial); return 0; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 9fdcee2..61829b8 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1181,9 +1181,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty) } spin_lock_irqsave(&mos7840_port->pool_lock, flags); - for (i = 0; i < NUM_URBS; ++i) - if (mos7840_port->busy[i]) - chars += URB_TRANSFER_BUFFER_SIZE; + for (i = 0; i < NUM_URBS; ++i) { + if (mos7840_port->busy[i]) { + struct urb *urb = mos7840_port->write_urb_pool[i]; + chars += urb->transfer_buffer_length; + } + } spin_unlock_irqrestore(&mos7840_port->pool_lock, flags); dbg("%s - returns %d", __func__, chars); return chars; diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index f23f3b4..5429bc5 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1083,6 +1083,12 @@ int usb_serial_probe(struct usb_interface *interface, serial->attached = 1; } + /* Avoid race with tty_open and serial_install by setting the + * disconnected flag and not clearing it until all ports have been + * registered. + */ + serial->disconnected = 1; + if (get_free_serial(serial, num_ports, &minor) == NULL) { dev_err(&interface->dev, "No more free serial devices\n"); goto probe_error; @@ -1105,6 +1111,8 @@ int usb_serial_probe(struct usb_interface *interface, } } + serial->disconnected = 0; + usb_serial_console_init(debug, minor); exit: diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index 54fbb29..6623a2e 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -814,8 +814,15 @@ static int __devinit uvesafb_vbe_init(struct fb_info *info) par->pmi_setpal = pmi_setpal; par->ypan = ypan; - if (par->pmi_setpal || par->ypan) - uvesafb_vbe_getpmi(task, par); + if (par->pmi_setpal || par->ypan) { + if (__supported_pte_mask & _PAGE_NX) { + par->pmi_setpal = par->ypan = 0; + printk(KERN_WARNING "uvesafb: NX protection is actively." + "We have better not to use the PMI.\n"); + } else { + uvesafb_vbe_getpmi(task, par); + } + } #else /* The protected mode interface is not available on non-x86. */ par->pmi_setpal = par->ypan = 0; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c0861e7..8aac2d6 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -211,10 +211,17 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, work->ordered_func(work); - /* now take the lock again and call the freeing code */ + /* now take the lock again and drop our item from the list */ spin_lock(&workers->order_lock); list_del(&work->order_list); + spin_unlock(&workers->order_lock); + + /* + * we don't want to call the ordered free functions + * with the lock held though + */ work->ordered_free(work); + spin_lock(&workers->order_lock); } spin_unlock(&workers->order_lock); diff --git a/fs/compat.c b/fs/compat.c index d1e2411..46b93d1 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1208,11 +1208,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_readv(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_readv(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } @@ -1265,11 +1268,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_writev(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_writev(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 90a6087..3c1dbc0 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -777,6 +777,9 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, goto out; } crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; + if (crypt_stat->flags & ECRYPTFS_NEW_FILE) + crypt_stat->flags &= ~(ECRYPTFS_NEW_FILE); + /* Set up a fake ecryptfs file, this is used to interface with * the file in the underlying filesystem so that the * truncation has an effect there as well. */ @@ -1035,6 +1038,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value, size, flags); mutex_unlock(&lower_dentry->d_inode->i_mutex); + if (!rc) + fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode, NULL); out: return rc; } diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index e14cf7e..5ffc900 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -148,7 +148,7 @@ int ecryptfs_privileged_open(struct file **lower_file, (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); if (!IS_ERR(*lower_file)) goto out; - if (flags & O_RDONLY) { + if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index f539204..ff57421 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -200,6 +200,12 @@ struct eventpoll { /* The user that created the eventpoll descriptor */ struct user_struct *user; + + struct file *file; + + /* used to optimize loop detection check */ + int visited; + struct list_head visited_list_link; }; /* Wait structure used by the poll hooks */ @@ -258,6 +264,15 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly; +/* Visited nodes during ep_loop_check(), so we can unset them when we finish */ +static LIST_HEAD(visited_list); + +/* + * List of files with newly added links, where we may need to limit the number + * of emanating paths. Protected by the epmutex. + */ +static LIST_HEAD(tfile_check_list); + #ifdef CONFIG_SYSCTL #include @@ -277,6 +292,12 @@ ctl_table epoll_table[] = { }; #endif /* CONFIG_SYSCTL */ +static const struct file_operations eventpoll_fops; + +static inline int is_file_epoll(struct file *f) +{ + return f->f_op == &eventpoll_fops; +} /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, @@ -300,6 +321,11 @@ static inline int ep_is_linked(struct list_head *p) return !list_empty(p); } +static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p) +{ + return container_of(p, struct eppoll_entry, wait); +} + /* Get the "struct epitem" from a wait queue pointer */ static inline struct epitem *ep_item_from_wait(wait_queue_t *p) { @@ -434,6 +460,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq) put_cpu(); } +static void ep_remove_wait_queue(struct eppoll_entry *pwq) +{ + wait_queue_head_t *whead; + + rcu_read_lock(); + /* If it is cleared by POLLFREE, it should be rcu-safe */ + whead = rcu_dereference(pwq->whead); + if (whead) + remove_wait_queue(whead, &pwq->wait); + rcu_read_unlock(); +} + /* * This function unregisters poll callbacks from the associated file * descriptor. Must be called with "mtx" held (or "epmutex" if called from @@ -448,7 +486,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) pwq = list_first_entry(lsthead, struct eppoll_entry, llink); list_del(&pwq->llink); - remove_wait_queue(pwq->whead, &pwq->wait); + ep_remove_wait_queue(pwq); kmem_cache_free(pwq_cache, pwq); } } @@ -698,12 +736,6 @@ static const struct file_operations eventpoll_fops = { .poll = ep_eventpoll_poll }; -/* Fast test to see if the file is an evenpoll file */ -static inline int is_file_epoll(struct file *f) -{ - return f->f_op == &eventpoll_fops; -} - /* * This is called from eventpoll_release() to unlink files from the eventpoll * interface. We need to have this facility to cleanup correctly files that are @@ -814,6 +846,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; + if ((unsigned long)key & POLLFREE) { + ep_pwq_from_wait(wait)->whead = NULL; + /* + * whead = NULL above can race with ep_remove_wait_queue() + * which can do another remove_wait_queue() after us, so we + * can't use __remove_wait_queue(). whead->lock is held by + * the caller. + */ + list_del_init(&wait->task_list); + } + spin_lock_irqsave(&ep->lock, flags); /* @@ -913,6 +956,103 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } + + +#define PATH_ARR_SIZE 5 +/* + * These are the number paths of length 1 to 5, that we are allowing to emanate + * from a single file of interest. For example, we allow 1000 paths of length + * 1, to emanate from each file of interest. This essentially represents the + * potential wakeup paths, which need to be limited in order to avoid massive + * uncontrolled wakeup storms. The common use case should be a single ep which + * is connected to n file sources. In this case each file source has 1 path + * of length 1. Thus, the numbers below should be more than sufficient. These + * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify + * and delete can't add additional paths. Protected by the epmutex. + */ +static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 }; +static int path_count[PATH_ARR_SIZE]; + +static int path_count_inc(int nests) +{ + /* Allow an arbitrary number of depth 1 paths */ + if (nests == 0) + return 0; + + if (++path_count[nests] > path_limits[nests]) + return -1; + return 0; +} + +static void path_count_init(void) +{ + int i; + + for (i = 0; i < PATH_ARR_SIZE; i++) + path_count[i] = 0; +} + +static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) +{ + int error = 0; + struct file *file = priv; + struct file *child_file; + struct epitem *epi; + + list_for_each_entry(epi, &file->f_ep_links, fllink) { + child_file = epi->ep->file; + if (is_file_epoll(child_file)) { + if (list_empty(&child_file->f_ep_links)) { + if (path_count_inc(call_nests)) { + error = -1; + break; + } + } else { + error = ep_call_nested(&poll_loop_ncalls, + EP_MAX_NESTS, + reverse_path_check_proc, + child_file, child_file, + current); + } + if (error != 0) + break; + } else { + printk(KERN_ERR "reverse_path_check_proc: " + "file is not an ep!\n"); + } + } + return error; +} + +/** + * reverse_path_check - The tfile_check_list is list of file *, which have + * links that are proposed to be newly added. We need to + * make sure that those added links don't add too many + * paths such that we will spend all our time waking up + * eventpoll objects. + * + * Returns: Returns zero if the proposed links don't create too many paths, + * -1 otherwise. + */ +static int reverse_path_check(void) +{ + int length = 0; + int error = 0; + struct file *current_file; + + /* let's call this for all tfiles */ + list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { + length++; + path_count_init(); + error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + reverse_path_check_proc, current_file, + current_file, current); + if (error) + break; + } + return error; +} + /* * Must be called with "mtx" held. */ @@ -973,6 +1113,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, */ ep_rbtree_insert(ep, epi); + /* now check if we've created too many backpaths */ + error = -EINVAL; + if (reverse_path_check()) + goto error_remove_epi; + /* We have to drop the new item inside our item list to keep track of it */ spin_lock_irqsave(&ep->lock, flags); @@ -997,6 +1142,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, return 0; +error_remove_epi: + spin_lock(&tfile->f_lock); + if (ep_is_linked(&epi->fllink)) + list_del_init(&epi->fllink); + spin_unlock(&tfile->f_lock); + + rb_erase(&epi->rbn, &ep->rbr); + error_unregister: ep_unregister_pollwait(ep, epi); @@ -1223,18 +1376,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) int error = 0; struct file *file = priv; struct eventpoll *ep = file->private_data; + struct eventpoll *ep_tovisit; struct rb_node *rbp; struct epitem *epi; mutex_lock_nested(&ep->mtx, call_nests + 1); + ep->visited = 1; + list_add(&ep->visited_list_link, &visited_list); for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { + ep_tovisit = epi->ffd.file->private_data; + if (ep_tovisit->visited) + continue; error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, - ep_loop_check_proc, epi->ffd.file, - epi->ffd.file->private_data, current); + ep_loop_check_proc, epi->ffd.file, + ep_tovisit, current); if (error != 0) break; + } else { + /* + * If we've reached a file that is not associated with + * an ep, then we need to check if the newly added + * links are going to add too many wakeup paths. We do + * this by adding it to the tfile_check_list, if it's + * not already there, and calling reverse_path_check() + * during ep_insert(). + */ + if (list_empty(&epi->ffd.file->f_tfile_llink)) + list_add(&epi->ffd.file->f_tfile_llink, + &tfile_check_list); } } mutex_unlock(&ep->mtx); @@ -1255,8 +1426,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) */ static int ep_loop_check(struct eventpoll *ep, struct file *file) { - return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + int ret; + struct eventpoll *ep_cur, *ep_next; + + ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, ep_loop_check_proc, file, ep, current); + /* clear visited list */ + list_for_each_entry_safe(ep_cur, ep_next, &visited_list, + visited_list_link) { + ep_cur->visited = 0; + list_del(&ep_cur->visited_list_link); + } + return ret; +} + +static void clear_tfile_check_list(void) +{ + struct file *file; + + /* first clear the tfile_check_list */ + while (!list_empty(&tfile_check_list)) { + file = list_first_entry(&tfile_check_list, struct file, + f_tfile_llink); + list_del_init(&file->f_tfile_llink); + } + INIT_LIST_HEAD(&tfile_check_list); } /* @@ -1264,8 +1458,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file) */ SYSCALL_DEFINE1(epoll_create1, int, flags) { - int error; + int error, fd; struct eventpoll *ep = NULL; + struct file *file; /* Check the EPOLL_* constant for consistency. */ BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); @@ -1282,11 +1477,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) * Creates all the items needed to setup an eventpoll file. That is, * a file structure and a free file descriptor. */ - error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, - flags & O_CLOEXEC); - if (error < 0) - ep_free(ep); - + fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC)); + if (fd < 0) { + error = fd; + goto out_free_ep; + } + file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep, + O_RDWR | (flags & O_CLOEXEC)); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto out_free_fd; + } + fd_install(fd, file); + ep->file = file; + return fd; + +out_free_fd: + put_unused_fd(fd); +out_free_ep: + ep_free(ep); return error; } @@ -1352,21 +1561,29 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, /* * When we insert an epoll file descriptor, inside another epoll file * descriptor, there is the change of creating closed loops, which are - * better be handled here, than in more critical paths. + * better be handled here, than in more critical paths. While we are + * checking for loops we also determine the list of files reachable + * and hang them on the tfile_check_list, so we can check that we + * haven't created too many possible wakeup paths. * - * We hold epmutex across the loop check and the insert in this case, in - * order to prevent two separate inserts from racing and each doing the - * insert "at the same time" such that ep_loop_check passes on both - * before either one does the insert, thereby creating a cycle. + * We need to hold the epmutex across both ep_insert and ep_remove + * b/c we want to make sure we are looking at a coherent view of + * epoll network. */ - if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { + if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) { mutex_lock(&epmutex); did_lock_epmutex = 1; - error = -ELOOP; - if (ep_loop_check(ep, tfile) != 0) - goto error_tgt_fput; } - + if (op == EPOLL_CTL_ADD) { + if (is_file_epoll(tfile)) { + error = -ELOOP; + if (ep_loop_check(ep, tfile) != 0) { + clear_tfile_check_list(); + goto error_tgt_fput; + } + } else + list_add(&tfile->f_tfile_llink, &tfile_check_list); + } mutex_lock_nested(&ep->mtx, 0); @@ -1385,6 +1602,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, error = ep_insert(ep, &epds, tfile, fd); } else error = -EEXIST; + clear_tfile_check_list(); break; case EPOLL_CTL_DEL: if (epi) @@ -1403,7 +1621,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, mutex_unlock(&ep->mtx); error_tgt_fput: - if (unlikely(did_lock_epmutex)) + if (did_lock_epmutex) mutex_unlock(&epmutex); fput(tfile); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index b399912..108f4fc 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -575,8 +575,12 @@ got: if (IS_DIRSYNC(inode)) handle->h_sync = 1; if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f9d6937..3191a30 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2948,6 +2948,8 @@ static int ext3_do_update_inode(handle_t *handle, struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; + __le32 disksize; again: /* we can't allow multiple procs in here at once, its a bit racey */ @@ -2985,7 +2987,11 @@ again: raw_inode->i_gid_high = 0; } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); + disksize = cpu_to_le32(ei->i_disksize); + if (disksize != raw_inode->i_size) { + need_datasync = 1; + raw_inode->i_size = disksize; + } raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); @@ -3001,8 +3007,11 @@ again: if (!S_ISREG(inode->i_mode)) { raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); } else { - raw_inode->i_size_high = - cpu_to_le32(ei->i_disksize >> 32); + disksize = cpu_to_le32(ei->i_disksize >> 32); + if (disksize != raw_inode->i_size_high) { + raw_inode->i_size_high = disksize; + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, @@ -3055,6 +3064,8 @@ again: ei->i_state &= ~EXT3_STATE_NEW; atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); + if (need_datasync) + atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); out_brelse: brelse (bh); ext3_std_error(inode->i_sb, err); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 93f7999..b4402c8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -358,6 +358,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) ext4_fsblk_t block = ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); + if (len == 0) + return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 55a93f5..29d9055 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1015,8 +1015,12 @@ got: if (IS_DIRSYNC(inode)) ext4_handle_sync(handle); if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 72ba88f..efe6363 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1112,6 +1112,15 @@ void ext4_da_update_reserve_space(struct inode *inode, used = ei->i_reserved_data_blocks; } + if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { + ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " + "with only %d reserved metadata blocks\n", __func__, + inode->i_ino, ei->i_allocated_meta_blocks, + ei->i_reserved_meta_blocks); + WARN_ON(1); + ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; + } + /* Update per-inode reservations */ ei->i_reserved_data_blocks -= used; used += ei->i_allocated_meta_blocks; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4787ae6..b359543 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -855,6 +855,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, if (stat) { generic_fillattr(inode, stat); stat->mode = fi->orig_i_mode; + stat->ino = fi->orig_ino; } } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f6104a95..102d582 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1664,7 +1664,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) size_t n; u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; - for (n = 0; n < count; n++) { + for (n = 0; n < count; n++, iov++) { if (iov->iov_len > (size_t) max) return -ENOMEM; max -= iov->iov_len; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e6d614d..829acee 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -76,6 +76,9 @@ struct fuse_inode { preserve the original mode */ mode_t orig_i_mode; + /** 64 bit inode number */ + u64 orig_ino; + /** Version of last attribute change */ u64 attr_version; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1a822ce..c95186c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -86,6 +86,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->nlookup = 0; fi->attr_version = 0; fi->writectr = 0; + fi->orig_ino = 0; INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); @@ -140,6 +141,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } +/* + * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down + * so that it will fit. + */ +static ino_t fuse_squash_ino(u64 ino64) +{ + ino_t ino = (ino_t) ino64; + if (sizeof(ino_t) < sizeof(u64)) + ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; + return ino; +} + void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -149,7 +162,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->attr_version = ++fc->attr_version; fi->i_time = attr_valid; - inode->i_ino = attr->ino; + inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); inode->i_nlink = attr->nlink; inode->i_uid = attr->uid; @@ -175,6 +188,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->orig_i_mode = inode->i_mode; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) inode->i_mode &= ~S_ISVTX; + + fi->orig_ino = attr->ino; } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index f6874ac..a0786c6 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -329,6 +329,10 @@ int hfsplus_rename_cat(u32 cnid, err = hfs_brec_find(&src_fd); if (err) goto out; + if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) { + err = -EIO; + goto out; + } hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, src_fd.entrylength); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 5f40236..f4300ff7 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -138,6 +138,11 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos++; /* fall through */ case 1: + if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { + err = -EIO; + goto out; + } + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { printk(KERN_ERR "hfs: bad catalog folder thread\n"); @@ -168,6 +173,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) err = -EIO; goto out; } + + if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { + err = -EIO; + goto out; + } + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); type = be16_to_cpu(entry.type); len = HFSPLUS_MAX_STRLEN; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 87a1258..2179de8 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -601,9 +601,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) spin_lock(&sbinfo->stat_lock); /* If no limits set, just report 0 for max/free/used * blocks, like simple_statfs() */ - if (sbinfo->max_blocks >= 0) { - buf->f_blocks = sbinfo->max_blocks; - buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; + if (sbinfo->spool) { + long free_pages; + + spin_lock(&sbinfo->spool->lock); + buf->f_blocks = sbinfo->spool->max_hpages; + free_pages = sbinfo->spool->max_hpages + - sbinfo->spool->used_hpages; + buf->f_bavail = buf->f_bfree = free_pages; + spin_unlock(&sbinfo->spool->lock); buf->f_files = sbinfo->max_inodes; buf->f_ffree = sbinfo->free_inodes; } @@ -619,6 +625,10 @@ static void hugetlbfs_put_super(struct super_block *sb) if (sbi) { sb->s_fs_info = NULL; + + if (sbi->spool) + hugepage_put_subpool(sbi->spool); + kfree(sbi); } } @@ -842,10 +852,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbinfo; sbinfo->hstate = config.hstate; spin_lock_init(&sbinfo->stat_lock); - sbinfo->max_blocks = config.nr_blocks; - sbinfo->free_blocks = config.nr_blocks; sbinfo->max_inodes = config.nr_inodes; sbinfo->free_inodes = config.nr_inodes; + sbinfo->spool = NULL; + if (config.nr_blocks != -1) { + sbinfo->spool = hugepage_new_subpool(config.nr_blocks); + if (!sbinfo->spool) + goto out_free; + } sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = huge_page_size(config.hstate); sb->s_blocksize_bits = huge_page_shift(config.hstate); @@ -865,38 +879,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_root = root; return 0; out_free: + if (sbinfo->spool) + kfree(sbinfo->spool); kfree(sbinfo); return -ENOMEM; } -int hugetlb_get_quota(struct address_space *mapping, long delta) -{ - int ret = 0; - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); - - if (sbinfo->free_blocks > -1) { - spin_lock(&sbinfo->stat_lock); - if (sbinfo->free_blocks - delta >= 0) - sbinfo->free_blocks -= delta; - else - ret = -ENOMEM; - spin_unlock(&sbinfo->stat_lock); - } - - return ret; -} - -void hugetlb_put_quota(struct address_space *mapping, long delta) -{ - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); - - if (sbinfo->free_blocks > -1) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_blocks += delta; - spin_unlock(&sbinfo->stat_lock); - } -} - static int hugetlbfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a051270..5c156ad 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1822,6 +1822,8 @@ zap_buffer_unlocked: clear_buffer_mapped(bh); clear_buffer_req(bh); clear_buffer_new(bh); + clear_buffer_delay(bh); + clear_buffer_unwritten(bh); bh->b_bdev = NULL; return may_free; } diff --git a/fs/locks.c b/fs/locks.c index a8794f2..fde92d1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -291,7 +291,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock, return 0; } -static int assign_type(struct file_lock *fl, int type) +static int assign_type(struct file_lock *fl, long type) { switch (type) { case F_RDLCK: @@ -444,7 +444,7 @@ static const struct lock_manager_operations lease_manager_ops = { /* * Initialize a lease, use the default lock manager operations */ -static int lease_init(struct file *filp, int type, struct file_lock *fl) +static int lease_init(struct file *filp, long type, struct file_lock *fl) { if (assign_type(fl, type) != 0) return -EINVAL; @@ -462,7 +462,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static struct file_lock *lease_alloc(struct file *filp, int type) +static struct file_lock *lease_alloc(struct file *filp, long type) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3f8881d..59d9304 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -66,7 +66,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3c759df..21c7190 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1586,6 +1586,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in goto err_opendata_put; if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + nfs_revalidate_inode(server, state->inode); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c346808..9a3f15b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2934,4 +2934,6 @@ out: return error; } +MODULE_ALIAS("nfs4"); + #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4a82a96..6d27757 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1955,7 +1955,7 @@ out_acl: if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(1); + WRITE32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { if ((buflen -= 4) < 0) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index ad391a8..149a8a1 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -478,6 +478,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, brelse(sbh[1]); sbh[1] = NULL; sbp[1] = NULL; + valid[1] = 0; swp = 0; } if (!valid[swp]) { diff --git a/fs/signalfd.c b/fs/signalfd.c index d98bea8..02c25d7 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -29,6 +29,21 @@ #include #include +void signalfd_cleanup(struct sighand_struct *sighand) +{ + wait_queue_head_t *wqh = &sighand->signalfd_wqh; + /* + * The lockless check can race with remove_wait_queue() in progress, + * but in this case its caller should run under rcu_read_lock() and + * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return. + */ + if (likely(!waitqueue_active(wqh))) + return; + + /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */ + wake_up_poll(wqh, POLLHUP | POLLFREE); +} + struct signalfd_ctx { sigset_t sigmask; }; diff --git a/fs/udf/file.c b/fs/udf/file.c index b80cbd7..78bdef3 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -40,20 +40,24 @@ #include "udf_i.h" #include "udf_sb.h" -static int udf_adinicb_readpage(struct file *file, struct page *page) +static void __udf_adinicb_readpage(struct page *page) { struct inode *inode = page->mapping->host; char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); - BUG_ON(!PageLocked(page)); - kaddr = kmap(page); - memset(kaddr, 0, PAGE_CACHE_SIZE); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); + memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size); flush_dcache_page(page); SetPageUptodate(page); kunmap(page); +} + +static int udf_adinicb_readpage(struct file *file, struct page *page) +{ + BUG_ON(!PageLocked(page)); + __udf_adinicb_readpage(page); unlock_page(page); return 0; @@ -78,6 +82,25 @@ static int udf_adinicb_writepage(struct page *page, return 0; } +static int udf_adinicb_write_begin(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned len, unsigned flags, struct page **pagep, + void **fsdata) +{ + struct page *page; + + if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE)) + return -EIO; + page = grab_cache_page_write_begin(mapping, 0, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) + __udf_adinicb_readpage(page); + return 0; +} + static int udf_adinicb_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, @@ -100,8 +123,8 @@ const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, .sync_page = block_sync_page, - .write_begin = simple_write_begin, - .write_end = udf_adinicb_write_end, + .write_begin = udf_adinicb_write_begin, + .write_end = udf_adinicb_write_end, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, diff --git a/fs/udf/super.c b/fs/udf/super.c index ee6b3af..0045ebc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "udf_sb.h" @@ -1239,16 +1240,65 @@ out_bh: return ret; } +static int udf_load_sparable_map(struct super_block *sb, + struct udf_part_map *map, + struct sparablePartitionMap *spm) +{ + uint32_t loc; + uint16_t ident; + struct sparingTable *st; + struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing; + int i; + struct buffer_head *bh; + + map->s_partition_type = UDF_SPARABLE_MAP15; + sdata->s_packet_len = le16_to_cpu(spm->packetLength); + if (!is_power_of_2(sdata->s_packet_len)) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Invalid packet length %u\n", + (unsigned)sdata->s_packet_len); + return -EIO; + } + if (spm->numSparingTables > 4) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Too many sparing tables (%d)\n", + (int)spm->numSparingTables); + return -EIO; + } + + for (i = 0; i < spm->numSparingTables; i++) { + loc = le32_to_cpu(spm->locSparingTable[i]); + bh = udf_read_tagged(sb, loc, loc, &ident); + if (!bh) + continue; + + st = (struct sparingTable *)bh->b_data; + if (ident != 0 || + strncmp(st->sparingIdent.ident, UDF_ID_SPARING, + strlen(UDF_ID_SPARING)) || + sizeof(*st) + le16_to_cpu(st->reallocationTableLen) > + sb->s_blocksize) { + brelse(bh); + continue; + } + + sdata->s_spar_map[i] = bh; + } + map->s_partition_func = udf_get_pblock_spar15; + return 0; +} + static int udf_load_logicalvol(struct super_block *sb, sector_t block, struct kernel_lb_addr *fileset) { struct logicalVolDesc *lvd; - int i, j, offset; + int i, offset; uint8_t type; struct udf_sb_info *sbi = UDF_SB(sb); struct genericPartitionMap *gpm; uint16_t ident; struct buffer_head *bh; + unsigned int table_len; int ret = 0; bh = udf_read_tagged(sb, block, block, &ident); @@ -1257,6 +1307,15 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; + table_len = le32_to_cpu(lvd->mapTableLength); + if (table_len > sb->s_blocksize - sizeof(*lvd)) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Partition table too long (%u > %lu)\n", table_len, + sb->s_blocksize - sizeof(*lvd)); + ret = 1; + goto out_bh; + } + i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); if (i != 0) { ret = i; @@ -1264,7 +1323,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, } for (i = 0, offset = 0; - i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); + i < sbi->s_partitions && offset < table_len; i++, offset += gpm->partitionMapLength) { struct udf_part_map *map = &sbi->s_partmaps[i]; gpm = (struct genericPartitionMap *) @@ -1299,38 +1358,11 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, } else if (!strncmp(upm2->partIdent.ident, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { - uint32_t loc; - struct sparingTable *st; - struct sparablePartitionMap *spm = - (struct sparablePartitionMap *)gpm; - - map->s_partition_type = UDF_SPARABLE_MAP15; - map->s_type_specific.s_sparing.s_packet_len = - le16_to_cpu(spm->packetLength); - for (j = 0; j < spm->numSparingTables; j++) { - struct buffer_head *bh2; - - loc = le32_to_cpu( - spm->locSparingTable[j]); - bh2 = udf_read_tagged(sb, loc, loc, - &ident); - map->s_type_specific.s_sparing. - s_spar_map[j] = bh2; - - if (bh2 == NULL) - continue; - - st = (struct sparingTable *)bh2->b_data; - if (ident != 0 || strncmp( - st->sparingIdent.ident, - UDF_ID_SPARING, - strlen(UDF_ID_SPARING))) { - brelse(bh2); - map->s_type_specific.s_sparing. - s_spar_map[j] = NULL; - } + if (udf_load_sparable_map(sb, map, + (struct sparablePartitionMap *)gpm) < 0) { + ret = 1; + goto out_bh; } - map->s_partition_func = udf_get_pblock_spar15; } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 844a99b..bae2c99 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3298,37 +3298,26 @@ xlog_recover_process_iunlinks( */ continue; } + /* + * Unlock the buffer so that it can be acquired in the normal + * course of the transaction to truncate and free each inode. + * Because we are not racing with anyone else here for the AGI + * buffer, we don't even need to hold it locked to read the + * initial unlinked bucket entries out of the buffer. We keep + * buffer reference though, so that it stays pinned in memory + * while we need the buffer. + */ agi = XFS_BUF_TO_AGI(agibp); + xfs_buf_unlock(agibp); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { agino = be32_to_cpu(agi->agi_unlinked[bucket]); while (agino != NULLAGINO) { - /* - * Release the agi buffer so that it can - * be acquired in the normal course of the - * transaction to truncate and free the inode. - */ - xfs_buf_relse(agibp); - agino = xlog_recover_process_one_iunlink(mp, agno, agino, bucket); - - /* - * Reacquire the agibuffer and continue around - * the loop. This should never fail as we know - * the buffer was good earlier on. - */ - error = xfs_read_agi(mp, NULL, agno, &agibp); - ASSERT(error == 0); - agi = XFS_BUF_TO_AGI(agibp); } } - - /* - * Release the buffer for the current agi so we can - * go on to the next one. - */ - xfs_buf_relse(agibp); + xfs_buf_rele(agibp); } mp->m_dmevmask = mp_dmevmask; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8f32f50..1a07e03 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -554,7 +554,7 @@ xfs_readlink( char *link) { xfs_mount_t *mp = ip->i_mount; - int pathlen; + xfs_fsize_t pathlen; int error = 0; xfs_itrace_entry(ip); @@ -564,13 +564,21 @@ xfs_readlink( xfs_ilock(ip, XFS_ILOCK_SHARED); - ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); - ASSERT(ip->i_d.di_size <= MAXPATHLEN); - pathlen = ip->i_d.di_size; if (!pathlen) goto out; + if (pathlen < 0 || pathlen > MAXPATHLEN) { + xfs_fs_cmn_err(CE_ALERT, mp, + "%s: inode (%llu) bad symlink length (%lld)", + __func__, (unsigned long long) ip->i_ino, + (long long) pathlen); + ASSERT(0); + error = XFS_ERROR(EFSCORRUPTED); + goto out; + } + + if (ip->i_df.if_flags & XFS_IFINLINE) { memcpy(link, ip->i_df.if_u1.if_data, pathlen); link[pathlen] = '\0'; diff --git a/include/asm-generic/poll.h b/include/asm-generic/poll.h index 44bce83..9ce7f44 100644 --- a/include/asm-generic/poll.h +++ b/include/asm-generic/poll.h @@ -28,6 +28,8 @@ #define POLLRDHUP 0x2000 #endif +#define POLLFREE 0x4000 /* currently only for epoll */ + struct pollfd { int fd; short events; diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index f6856a5..ca399c5 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -61,6 +61,7 @@ struct file; static inline void eventpoll_init_file(struct file *file) { INIT_LIST_HEAD(&file->f_ep_links); + INIT_LIST_HEAD(&file->f_tfile_llink); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 1b9a47a..860cb6d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -941,6 +941,7 @@ struct file { #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links; + struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; #ifdef CONFIG_DEBUG_WRITECOUNT diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 040b679..b4f0b3f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -159,6 +159,7 @@ struct hrtimer_clock_base { * and timers * @clock_base: array of clock bases for this cpu * @curr_timer: the timer which is executing a callback right now + * @clock_was_set: Indicates that clock was set from irq context. * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -171,6 +172,7 @@ struct hrtimer_clock_base { struct hrtimer_cpu_base { spinlock_t lock; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + unsigned int clock_was_set; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; @@ -280,6 +282,8 @@ extern void hrtimer_peek_ahead_timers(void); # define MONOTONIC_RES_NSEC HIGH_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_HIGH_RES +extern void clock_was_set_delayed(void); + #else # define MONOTONIC_RES_NSEC LOW_RES_NSEC @@ -308,11 +312,14 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; } + +static inline void clock_was_set_delayed(void) { } + #endif extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); - +extern ktime_t ktime_get_update_offsets(ktime_t *offs_real); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 41a59af..6b3feef 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -12,6 +12,15 @@ struct user_struct; #include #include +struct hugepage_subpool { + spinlock_t lock; + long count; + long max_hpages, used_hpages; +}; + +struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); +void hugepage_put_subpool(struct hugepage_subpool *spool); + int PageHuge(struct page *page); static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) @@ -138,12 +147,11 @@ struct hugetlbfs_config { }; struct hugetlbfs_sb_info { - long max_blocks; /* blocks allowed */ - long free_blocks; /* blocks free */ long max_inodes; /* inodes allowed */ long free_inodes; /* inodes free */ spinlock_t stat_lock; struct hstate *hstate; + struct hugepage_subpool *spool; }; @@ -166,8 +174,6 @@ extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, int acct, struct user_struct **user, int creat_flags); -int hugetlb_get_quota(struct address_space *mapping, long delta); -void hugetlb_put_quota(struct address_space *mapping, long delta); static inline int is_file_hugepages(struct file *file) { diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index eb73632..19abfc1 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -94,14 +94,15 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) return NULL; } +struct task_struct; #ifdef CONFIG_BLOCK int put_io_context(struct io_context *ioc); -void exit_io_context(void); +void exit_io_context(struct task_struct *task); struct io_context *get_io_context(gfp_t gfp_flags, int node); struct io_context *alloc_io_context(gfp_t gfp_flags, int node); void copy_io_context(struct io_context **pdst, struct io_context **psrc); #else -static inline void exit_io_context(void) +static inline void exit_io_context(struct task_struct *task) { } diff --git a/include/linux/irq.h b/include/linux/irq.h index 9e5f45a..2333710 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -174,7 +174,6 @@ struct irq_2_iommu; */ struct irq_desc { unsigned int irq; - struct timer_rand_state *timer_rand_state; unsigned int *kstat_irqs; #ifdef CONFIG_INTR_REMAP struct irq_2_iommu *irq_2_iommu; diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 9acb92d..3526cd4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -55,6 +55,19 @@ extern const char linux_proc_banner[]; } \ ) +/* + * Multiplies an integer by a fraction, while avoiding unnecessary + * overflow or loss of precision. + */ +#define mult_frac(x, numer, denom)( \ +{ \ + typeof(x) quot = (x) / (denom); \ + typeof(x) rem = (x) % (denom); \ + (quot * (numer)) + ((rem * (numer)) / (denom)); \ +} \ +) + + #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) diff --git a/include/linux/ktime.h b/include/linux/ktime.h index ce59832..ecdf64e 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -58,13 +58,6 @@ union ktime { typedef union ktime ktime_t; /* Kill this */ -#define KTIME_MAX ((s64)~((u64)1 << 63)) -#if (BITS_PER_LONG == 64) -# define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) -#else -# define KTIME_SEC_MAX LONG_MAX -#endif - /* * ktime_t definitions when using the 64-bit scalar representation: */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c728a50..8bfed57 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -556,5 +556,12 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; } + +bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); + +#else + +static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } + #endif #endif diff --git a/include/linux/random.h b/include/linux/random.h index 2948046..1864957 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -44,13 +44,13 @@ struct rand_pool_info { #ifdef __KERNEL__ -extern void rand_initialize_irq(int irq); - +extern void add_device_randomness(const void *, unsigned int); extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); -extern void add_interrupt_randomness(int irq); +extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); +extern void get_random_bytes_arch(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); #ifndef MODULE @@ -63,6 +63,19 @@ unsigned long randomize_range(unsigned long start, unsigned long end, unsigned l u32 random32(void); void srandom32(u32 seed); +#ifdef CONFIG_ARCH_RANDOM +# include +#else +static inline int arch_get_random_long(unsigned long *v) +{ + return 0; +} +static inline int arch_get_random_int(unsigned int *v) +{ + return 0; +} +#endif + #endif /* __KERNEL___ */ #endif /* _LINUX_RANDOM_H */ diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index b363b91..ed9b65e 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -60,13 +60,16 @@ static inline void signalfd_notify(struct task_struct *tsk, int sig) wake_up(&tsk->sighand->signalfd_wqh); } +extern void signalfd_cleanup(struct sighand_struct *sighand); + #else /* CONFIG_SIGNALFD */ static inline void signalfd_notify(struct task_struct *tsk, int sig) { } +static inline void signalfd_cleanup(struct sighand_struct *sighand) { } + #endif /* CONFIG_SIGNALFD */ #endif /* __KERNEL__ */ #endif /* _LINUX_SIGNALFD_H */ - diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bcdd660..4e647bb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1312,6 +1312,16 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_mac_header_rebuild(struct sk_buff *skb) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -skb->mac_len); + memmove(skb_mac_header(skb), old_mac, skb->mac_len); + } +} + static inline int skb_transport_offset(const struct sk_buff *skb) { return skb_transport_header(skb) - skb->data; diff --git a/include/linux/time.h b/include/linux/time.h index 6e026e4..bc93987 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -91,11 +91,36 @@ static inline struct timespec timespec_sub(struct timespec lhs, return ts_delta; } +#define KTIME_MAX ((s64)~((u64)1 << 63)) +#if (BITS_PER_LONG == 64) +# define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) +#else +# define KTIME_SEC_MAX LONG_MAX +#endif + /* * Returns true if the timespec is norm, false if denorm: */ -#define timespec_valid(ts) \ - (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC)) +static inline bool timespec_valid(const struct timespec *ts) +{ + /* Dates before 1970 are bogus */ + if (ts->tv_sec < 0) + return false; + /* Can't have more nanoseconds then a second */ + if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) + return false; + return true; +} + +static inline bool timespec_valid_strict(const struct timespec *ts) +{ + if (!timespec_valid(ts)) + return false; + /* Disallow values that could overflow ktime_t */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return false; + return true; +} extern struct timespec xtime; extern struct timespec wall_to_monotonic; diff --git a/include/linux/timex.h b/include/linux/timex.h index e6967d1..3b587b4 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -271,7 +271,7 @@ static inline int ntp_synced(void) /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ extern u64 tick_length; -extern void second_overflow(void); +extern int second_overflow(unsigned long secs); extern void update_ntp_one_tick(void); extern int do_adjtimex(struct timex *); diff --git a/include/net/rose.h b/include/net/rose.h index 5ba9f02..555dd19 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -14,6 +14,12 @@ #define ROSE_MIN_LEN 3 +#define ROSE_CALL_REQ_ADDR_LEN_OFF 3 +#define ROSE_CALL_REQ_ADDR_LEN_VAL 0xAA /* each address is 10 digits */ +#define ROSE_CALL_REQ_DEST_ADDR_OFF 4 +#define ROSE_CALL_REQ_SRC_ADDR_OFF 9 +#define ROSE_CALL_REQ_FACILITIES_OFF 14 + #define ROSE_GFI 0x10 #define ROSE_Q_BIT 0x80 #define ROSE_D_BIT 0x40 @@ -214,7 +220,7 @@ extern void rose_requeue_frames(struct sock *); extern int rose_validate_nr(struct sock *, unsigned short); extern void rose_write_internal(struct sock *, int); extern int rose_decode(struct sk_buff *, int *, int *, int *, int *, int *); -extern int rose_parse_facilities(unsigned char *, struct rose_facilities_struct *); +extern int rose_parse_facilities(unsigned char *, unsigned int, struct rose_facilities_struct *); extern void rose_disconnect(struct sock *, int, int, int); /* rose_timer.c */ diff --git a/kernel/cred.c b/kernel/cred.c index 0b5b5fc..9c06d10 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -443,6 +443,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) mutex_init(&p->cred_guard_mutex); + p->replacement_session_keyring = NULL; + if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/kernel/exit.c b/kernel/exit.c index 0f8fae3..a2a1659 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1020,7 +1020,7 @@ NORET_TYPE void do_exit(long code) tsk->flags |= PF_EXITPIDONE; if (tsk->io_context) - exit_io_context(); + exit_io_context(tsk); if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); diff --git a/kernel/fork.c b/kernel/fork.c index 4bde56f..c28f804 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -815,8 +816,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_sighand(struct sighand_struct *sighand) { - if (atomic_dec_and_test(&sighand->count)) + if (atomic_dec_and_test(&sighand->count)) { + signalfd_cleanup(sighand); kmem_cache_free(sighand_cachep, sighand); + } } @@ -1299,7 +1302,8 @@ bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: - put_io_context(p->io_context); + if (p->io_context) + exit_io_context(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: diff --git a/kernel/futex.c b/kernel/futex.c index fb98c9f..9c5ffe1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -264,17 +264,29 @@ again: page = compound_head(page); lock_page(page); + + /* + * If page->mapping is NULL, then it cannot be a PageAnon + * page; but it might be the ZERO_PAGE or in the gate area or + * in a special mapping (all cases which we are happy to fail); + * or it may have been a good file page when get_user_pages_fast + * found it, but truncated or holepunched or subjected to + * invalidate_complete_page2 before we got the page lock (also + * cases which we are happy to fail). And we hold a reference, + * so refcount care in invalidate_complete_page's remove_mapping + * prevents drop_caches from setting mapping to NULL beneath us. + * + * The case we do have to guard against is when memory pressure made + * shmem_writepage move it from filecache to swapcache beneath us: + * an unlikely race, but we do need to retry for page->mapping. + */ if (!page->mapping) { + int shmem_swizzled = PageSwapCache(page); unlock_page(page); put_page(page); - /* - * ZERO_PAGE pages don't have a mapping. Avoid a busy loop - * trying to find one. RW mapping would have COW'd (and thus - * have a mapping) so this page is RO and won't ever change. - */ - if ((page == ZERO_PAGE(address))) - return -EFAULT; - goto again; + if (shmem_swizzled) + goto again; + return -EFAULT; } /* @@ -2192,11 +2204,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * @uaddr2: the pi futex we will take prior to returning to user-space * * The caller will wait on uaddr and will be requeued by futex_requeue() to - * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and - * complete the acquisition of the rt_mutex prior to returning to userspace. - * This ensures the rt_mutex maintains an owner when it has waiters; without - * one, the pi logic wouldn't know which task to boost/deboost, if there was a - * need to. + * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake + * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to + * userspace. This ensures the rt_mutex maintains an owner when it has waiters; + * without one, the pi logic would not know which task to boost/deboost, if + * there was a need to. * * We call schedule in futex_wait_queue_me() when we enqueue and return there * via the following: @@ -2233,6 +2245,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, struct futex_q q; int res, ret; + if (uaddr == uaddr2) + return -EINVAL; + if (!bitset) return -EINVAL; @@ -2306,7 +2321,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, * signal. futex_unlock_pi() will not destroy the lock_ptr nor * the pi_state. */ - WARN_ON(!&q.pi_state); + WARN_ON(!q.pi_state); pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); debug_rt_mutex_free_waiter(&rt_waiter); @@ -2333,7 +2348,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, * fault, unlock the rt_mutex and return the fault to userspace. */ if (ret == -EFAULT) { - if (rt_mutex_owner(pi_mutex) == current) + if (pi_mutex && rt_mutex_owner(pi_mutex) == current) rt_mutex_unlock(pi_mutex); } else if (ret == -EINTR) { /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index a6e9d00..2818422 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -603,6 +603,12 @@ static int hrtimer_reprogram(struct hrtimer *timer, return res; } +static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) +{ + ktime_t *offs_real = &base->clock_base[CLOCK_REALTIME].offset; + + return ktime_get_update_offsets(offs_real); +} /* * Retrigger next event is called after clock was set @@ -612,26 +618,15 @@ static int hrtimer_reprogram(struct hrtimer *timer, static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base; - struct timespec realtime_offset; - unsigned long seq; if (!hrtimer_hres_active()) return; - do { - seq = read_seqbegin(&xtime_lock); - set_normalized_timespec(&realtime_offset, - -wall_to_monotonic.tv_sec, - -wall_to_monotonic.tv_nsec); - } while (read_seqretry(&xtime_lock, seq)); - base = &__get_cpu_var(hrtimer_bases); /* Adjust CLOCK_REALTIME offset */ spin_lock(&base->lock); - base->clock_base[CLOCK_REALTIME].offset = - timespec_to_ktime(realtime_offset); - + hrtimer_update_base(base); hrtimer_force_reprogram(base, 0); spin_unlock(&base->lock); } @@ -731,13 +726,25 @@ static int hrtimer_switch_to_hres(void) base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES; tick_setup_sched_timer(); - /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); local_irq_restore(flags); return 1; } +/* + * Called from timekeeping code to reprogramm the hrtimer interrupt + * device. If called from the timer interrupt context we defer it to + * softirq context. + */ +void clock_was_set_delayed(void) +{ + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + cpu_base->clock_was_set = 1; + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); +} + #else static inline int hrtimer_hres_active(void) { return 0; } @@ -1250,11 +1257,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) cpu_base->nr_events++; dev->next_event.tv64 = KTIME_MAX; - entry_time = now = ktime_get(); + spin_lock(&cpu_base->lock); + entry_time = now = hrtimer_update_base(cpu_base); retry: expires_next.tv64 = KTIME_MAX; - - spin_lock(&cpu_base->lock); /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via @@ -1328,8 +1334,12 @@ retry: * We need to prevent that we loop forever in the hrtimer * interrupt routine. We give it 3 attempts to avoid * overreacting on some spurious event. + * + * Acquire base lock for updating the offsets and retrieving + * the current time. */ - now = ktime_get(); + spin_lock(&cpu_base->lock); + now = hrtimer_update_base(cpu_base); cpu_base->nr_retries++; if (++retries < 3) goto retry; @@ -1341,6 +1351,7 @@ retry: */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; + spin_unlock(&cpu_base->lock); delta = ktime_sub(now, entry_time); if (delta.tv64 > cpu_base->max_hang_time.tv64) cpu_base->max_hang_time = delta; @@ -1393,6 +1404,13 @@ void hrtimer_peek_ahead_timers(void) static void run_hrtimer_softirq(struct softirq_action *h) { + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + if (cpu_base->clock_was_set) { + cpu_base->clock_was_set = 0; + clock_was_set(); + } + hrtimer_peek_ahead_timers(); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 17c71bb..27fd0a6 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -370,7 +370,7 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) { irqreturn_t ret, retval = IRQ_NONE; - unsigned int status = 0; + unsigned int flags = 0; if (!(action->flags & IRQF_DISABLED)) local_irq_enable_in_hardirq(); @@ -413,7 +413,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) /* Fall through to add to randomness */ case IRQ_HANDLED: - status |= action->flags; + flags |= action->flags; break; default: @@ -424,8 +424,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) action = action->next; } while (action); - if (status & IRQF_SAMPLE_RANDOM) - add_interrupt_randomness(irq); + add_interrupt_randomness(irq, flags); local_irq_disable(); return retval; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 315705c..5dd29f3 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -633,22 +633,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->chip == &no_irq_chip) return -ENOSYS; - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & IRQF_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } /* Oneshot interrupts are not allowed with shared */ if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED)) @@ -1021,7 +1005,6 @@ EXPORT_SYMBOL(free_irq); * * IRQF_SHARED Interrupt is shared * IRQF_DISABLED Disable local interrupts while processing - * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy * IRQF_TRIGGER_* Specify active edge(s) or level * */ diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cd9a40b..fd6e5f2 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -862,6 +862,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) struct sched_entity *se = __pick_next_entity(cfs_rq); s64 delta = curr->vruntime - se->vruntime; + if (delta < 0) + return; + if (delta > ideal_runtime) resched_task(rq_of(cfs_rq)->curr); } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4800f93..264928c 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -28,8 +28,6 @@ unsigned long tick_nsec; u64 tick_length; static u64 tick_length_base; -static struct hrtimer leap_timer; - #define MAX_TICKADJ 500LL /* usecs */ #define MAX_TICKADJ_SCALED \ (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) @@ -108,7 +106,7 @@ static inline s64 ntp_update_offset_fll(s64 offset64, long secs) { time_status &= ~STA_MODE; - if (secs < MINSEC) + if ((s32)secs < MINSEC) return 0; if (!(time_status & STA_FLL) && (secs <= MAXSEC)) @@ -180,60 +178,64 @@ void ntp_clear(void) } /* - * Leap second processing. If in leap-insert state at the end of the - * day, the system clock is set back one second; if in leap-delete - * state, the system clock is set ahead one second. + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + * + * Also handles leap second processing, and returns leap offset */ -static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) +int second_overflow(unsigned long secs) { - enum hrtimer_restart res = HRTIMER_NORESTART; - - write_seqlock(&xtime_lock); + int leap = 0; + s64 delta; + /* + * Leap second processing. If in leap-insert state at the end of the + * day, the system clock is set back one second; if in leap-delete + * state, the system clock is set ahead one second. + */ switch (time_state) { case TIME_OK: + if (time_status & STA_INS) + time_state = TIME_INS; + else if (time_status & STA_DEL) + time_state = TIME_DEL; break; case TIME_INS: - timekeeping_leap_insert(-1); - time_state = TIME_OOP; - printk(KERN_NOTICE - "Clock: inserting leap second 23:59:60 UTC\n"); - hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); - res = HRTIMER_RESTART; + if (!(time_status & STA_INS)) + time_state = TIME_OK; + else if (secs % 86400 == 0) { + leap = -1; + time_state = TIME_OOP; + time_tai++; + printk(KERN_NOTICE + "Clock: inserting leap second 23:59:60 UTC\n"); + } break; case TIME_DEL: - timekeeping_leap_insert(1); - time_tai--; - time_state = TIME_WAIT; - printk(KERN_NOTICE - "Clock: deleting leap second 23:59:59 UTC\n"); + if (!(time_status & STA_DEL)) + time_state = TIME_OK; + else if ((secs + 1) % 86400 == 0) { + leap = 1; + time_tai--; + time_state = TIME_WAIT; + printk(KERN_NOTICE + "Clock: deleting leap second 23:59:59 UTC\n"); + } break; case TIME_OOP: - time_tai++; time_state = TIME_WAIT; - /* fall through */ + break; + case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; break; } - write_sequnlock(&xtime_lock); - - return res; -} - -/* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - */ -void second_overflow(void) -{ - s64 delta; /* Bump the maxerror field */ time_maxerror += MAXFREQ / NSEC_PER_USEC; @@ -253,23 +255,25 @@ void second_overflow(void) tick_length += delta; if (!time_adjust) - return; + goto out; if (time_adjust > MAX_TICKADJ) { time_adjust -= MAX_TICKADJ; tick_length += MAX_TICKADJ_SCALED; - return; + goto out; } if (time_adjust < -MAX_TICKADJ) { time_adjust += MAX_TICKADJ; tick_length -= MAX_TICKADJ_SCALED; - return; + goto out; } tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; time_adjust = 0; +out: + return leap; } #ifdef CONFIG_GENERIC_CMOS_UPDATE @@ -331,27 +335,6 @@ static void notify_cmos_timer(void) static inline void notify_cmos_timer(void) { } #endif -/* - * Start the leap seconds timer: - */ -static inline void ntp_start_leap_timer(struct timespec *ts) -{ - long now = ts->tv_sec; - - if (time_status & STA_INS) { - time_state = TIME_INS; - now += 86400 - now % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - - return; - } - - if (time_status & STA_DEL) { - time_state = TIME_DEL; - now += 86400 - (now + 1) % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - } -} /* * Propagate a new txc->status value into the NTP state: @@ -374,22 +357,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) time_status &= STA_RONLY; time_status |= txc->status & ~STA_RONLY; - switch (time_state) { - case TIME_OK: - ntp_start_leap_timer(ts); - break; - case TIME_INS: - case TIME_DEL: - time_state = TIME_OK; - ntp_start_leap_timer(ts); - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - break; - case TIME_OOP: - hrtimer_restart(&leap_timer); - break; - } } /* * Called with the xtime lock held, so we can access and modify @@ -469,9 +436,6 @@ int do_adjtimex(struct timex *txc) (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)) return -EINVAL; - - if (txc->modes & ADJ_STATUS && time_state != TIME_OK) - hrtimer_cancel(&leap_timer); } getnstimeofday(&ts); @@ -549,6 +513,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { ntp_clear(); - hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - leap_timer.function = ntp_leap_second; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4a71cff..3d35af3 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -161,11 +161,39 @@ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static struct timespec total_sleep_time; +/* Offset clock monotonic -> clock realtime */ +static ktime_t offs_real; + +/* Offset clock monotonic -> clock boottime */ +static ktime_t offs_boot; + /* * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ struct timespec raw_time; +/* must hold write on xtime_lock */ +static void update_rt_offset(void) +{ + struct timespec tmp, *wtm = &wall_to_monotonic; + + set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); + offs_real = timespec_to_ktime(tmp); +} + +/* must hold write on xtime_lock */ +static void timekeeping_update(bool clearntp) +{ + if (clearntp) { + timekeeper.ntp_error = 0; + ntp_clear(); + } + update_rt_offset(); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); +} + + + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -183,14 +211,6 @@ void update_xtime_cache(u64 nsec) ACCESS_ONCE(xtime_cache) = ts; } -/* must hold xtime_lock */ -void timekeeping_leap_insert(int leapsecond) -{ - xtime.tv_sec += leapsecond; - wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); -} - #ifdef CONFIG_GENERIC_TIME /** @@ -334,7 +354,7 @@ int do_settimeofday(struct timespec *tv) struct timespec ts_delta; unsigned long flags; - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + if (!timespec_valid_strict(tv)) return -EINVAL; write_seqlock_irqsave(&xtime_lock, flags); @@ -349,10 +369,7 @@ int do_settimeofday(struct timespec *tv) update_xtime_cache(0); - timekeeper.ntp_error = 0; - ntp_clear(); - - update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); + timekeeping_update(true); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -553,7 +570,20 @@ void __init timekeeping_init(void) struct timespec now, boot; read_persistent_clock(&now); + if (!timespec_valid_strict(&now)) { + printk("WARNING: Persistent clock returned invalid value!\n" + " Check your CMOS/BIOS settings.\n"); + now.tv_sec = 0; + now.tv_nsec = 0; + } + read_boot_clock(&boot); + if (!timespec_valid_strict(&boot)) { + printk("WARNING: Boot clock returned invalid value!\n" + " Check your CMOS/BIOS settings.\n"); + boot.tv_sec = 0; + boot.tv_nsec = 0; + } write_seqlock_irqsave(&xtime_lock, flags); @@ -575,6 +605,7 @@ void __init timekeeping_init(void) set_normalized_timespec(&wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); update_xtime_cache(0); + update_rt_offset(); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -583,6 +614,12 @@ void __init timekeeping_init(void) /* time in seconds when suspend began */ static struct timespec timekeeping_suspend_time; +static void update_sleep_time(struct timespec t) +{ + total_sleep_time = t; + offs_boot = timespec_to_ktime(t); +} + /** * timekeeping_resume - Resumes the generic timekeeping subsystem. * @dev: unused @@ -606,13 +643,14 @@ static int timekeeping_resume(struct sys_device *dev) ts = timespec_sub(ts, timekeeping_suspend_time); xtime = timespec_add_safe(xtime, ts); wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); - total_sleep_time = timespec_add_safe(total_sleep_time, ts); + update_sleep_time(timespec_add_safe(total_sleep_time, ts)); } update_xtime_cache(0); /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; timekeeping_suspended = 0; + timekeeping_update(false); write_sequnlock_irqrestore(&xtime_lock, flags); touch_softlockup_watchdog(); @@ -769,6 +807,10 @@ void update_wall_time(void) #else offset = timekeeper.cycle_interval; #endif + /* Check if there's really nothing to do */ + if (offset < timekeeper.cycle_interval) + return; + timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; /* normally this loop will run just once, however in the @@ -783,9 +825,14 @@ void update_wall_time(void) timekeeper.xtime_nsec += timekeeper.xtime_interval; if (timekeeper.xtime_nsec >= nsecps) { + int leap; timekeeper.xtime_nsec -= nsecps; xtime.tv_sec++; - second_overflow(); + leap = second_overflow(xtime.tv_sec); + xtime.tv_sec += leap; + wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); } raw_time.tv_nsec += timekeeper.raw_interval; @@ -837,8 +884,7 @@ void update_wall_time(void) nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); update_xtime_cache(nsecs); - /* check to see if there is a new clocksource to use */ - update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); + timekeeping_update(false); } /** @@ -915,3 +961,35 @@ struct timespec get_monotonic_coarse(void) now.tv_nsec + mono.tv_nsec); return now; } + +#ifdef CONFIG_HIGH_RES_TIMERS +/** + * ktime_get_update_offsets - hrtimer helper + * @real: pointer to storage for monotonic -> realtime offset + * + * Returns current monotonic time and updates the offsets + * Called from hrtimer_interupt() or retrigger_next_event() + */ +ktime_t ktime_get_update_offsets(ktime_t *real) +{ + ktime_t now; + unsigned int seq; + u64 secs, nsecs; + + do { + seq = read_seqbegin(&xtime_lock); + + secs = xtime.tv_sec; + nsecs = xtime.tv_nsec; + nsecs += timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); + + *real = offs_real; + } while (read_seqretry(&xtime_lock, seq)); + + now = ktime_add_ns(ktime_set(secs, 0), nsecs); + now = ktime_sub(now, *real); + return now; +} +#endif diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 67e526b..b617e0c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -772,6 +772,7 @@ int current_is_keventd(void) return ret; } +EXPORT_SYMBOL_GPL(current_is_keventd); static struct cpu_workqueue_struct * init_cpu_workqueue(struct workqueue_struct *wq, int cpu) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5e1e508..20f9240 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -49,6 +49,84 @@ static unsigned long __initdata default_hstate_size; */ static DEFINE_SPINLOCK(hugetlb_lock); +static inline void unlock_or_release_subpool(struct hugepage_subpool *spool) +{ + bool free = (spool->count == 0) && (spool->used_hpages == 0); + + spin_unlock(&spool->lock); + + /* If no pages are used, and no other handles to the subpool + * remain, free the subpool the subpool remain */ + if (free) + kfree(spool); +} + +struct hugepage_subpool *hugepage_new_subpool(long nr_blocks) +{ + struct hugepage_subpool *spool; + + spool = kmalloc(sizeof(*spool), GFP_KERNEL); + if (!spool) + return NULL; + + spin_lock_init(&spool->lock); + spool->count = 1; + spool->max_hpages = nr_blocks; + spool->used_hpages = 0; + + return spool; +} + +void hugepage_put_subpool(struct hugepage_subpool *spool) +{ + spin_lock(&spool->lock); + BUG_ON(!spool->count); + spool->count--; + unlock_or_release_subpool(spool); +} + +static int hugepage_subpool_get_pages(struct hugepage_subpool *spool, + long delta) +{ + int ret = 0; + + if (!spool) + return 0; + + spin_lock(&spool->lock); + if ((spool->used_hpages + delta) <= spool->max_hpages) { + spool->used_hpages += delta; + } else { + ret = -ENOMEM; + } + spin_unlock(&spool->lock); + + return ret; +} + +static void hugepage_subpool_put_pages(struct hugepage_subpool *spool, + long delta) +{ + if (!spool) + return; + + spin_lock(&spool->lock); + spool->used_hpages -= delta; + /* If hugetlbfs_put_super couldn't free spool due to + * an outstanding quota reference, free it now. */ + unlock_or_release_subpool(spool); +} + +static inline struct hugepage_subpool *subpool_inode(struct inode *inode) +{ + return HUGETLBFS_SB(inode->i_sb)->spool; +} + +static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) +{ + return subpool_inode(vma->vm_file->f_dentry->d_inode); +} + /* * Region tracking -- allows tracking of reservations and instantiated pages * across the pages in a mapping. @@ -541,9 +619,9 @@ static void free_huge_page(struct page *page) */ struct hstate *h = page_hstate(page); int nid = page_to_nid(page); - struct address_space *mapping; + struct hugepage_subpool *spool = + (struct hugepage_subpool *)page_private(page); - mapping = (struct address_space *) page_private(page); set_page_private(page, 0); page->mapping = NULL; BUG_ON(page_count(page)); @@ -558,8 +636,7 @@ static void free_huge_page(struct page *page) enqueue_huge_page(h, page); } spin_unlock(&hugetlb_lock); - if (mapping) - hugetlb_put_quota(mapping, 1); + hugepage_subpool_put_pages(spool, 1); } static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) @@ -927,11 +1004,12 @@ static void return_unused_surplus_pages(struct hstate *h, /* * Determine if the huge page at addr within the vma has an associated * reservation. Where it does not we will need to logically increase - * reservation and actually increase quota before an allocation can occur. - * Where any new reservation would be required the reservation change is - * prepared, but not committed. Once the page has been quota'd allocated - * an instantiated the change should be committed via vma_commit_reservation. - * No action is required on failure. + * reservation and actually increase subpool usage before an allocation + * can occur. Where any new reservation would be required the + * reservation change is prepared, but not committed. Once the page + * has been allocated from the subpool and instantiated the change should + * be committed via vma_commit_reservation. No action is required on + * failure. */ static long vma_needs_reservation(struct hstate *h, struct vm_area_struct *vma, unsigned long addr) @@ -980,24 +1058,24 @@ static void vma_commit_reservation(struct hstate *h, static struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { + struct hugepage_subpool *spool = subpool_vma(vma); struct hstate *h = hstate_vma(vma); struct page *page; - struct address_space *mapping = vma->vm_file->f_mapping; - struct inode *inode = mapping->host; long chg; /* - * Processes that did not create the mapping will have no reserves and - * will not have accounted against quota. Check that the quota can be - * made before satisfying the allocation - * MAP_NORESERVE mappings may also need pages and quota allocated - * if no reserve mapping overlaps. + * Processes that did not create the mapping will have no + * reserves and will not have accounted against subpool + * limit. Check that the subpool limit can be made before + * satisfying the allocation MAP_NORESERVE mappings may also + * need pages and subpool limit allocated allocated if no reserve + * mapping overlaps. */ chg = vma_needs_reservation(h, vma, addr); if (chg < 0) return ERR_PTR(-VM_FAULT_OOM); if (chg) - if (hugetlb_get_quota(inode->i_mapping, chg)) + if (hugepage_subpool_get_pages(spool, chg)) return ERR_PTR(-VM_FAULT_SIGBUS); spin_lock(&hugetlb_lock); @@ -1007,13 +1085,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, if (!page) { page = alloc_buddy_huge_page(h, vma, addr); if (!page) { - hugetlb_put_quota(inode->i_mapping, chg); + hugepage_subpool_put_pages(spool, chg); return ERR_PTR(-VM_FAULT_SIGBUS); } } set_page_refcounted(page); - set_page_private(page, (unsigned long) mapping); + set_page_private(page, (unsigned long)spool); vma_commit_reservation(h, vma, addr); @@ -1698,6 +1776,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) { struct hstate *h = hstate_vma(vma); struct resv_map *reservations = vma_resv_map(vma); + struct hugepage_subpool *spool = subpool_vma(vma); unsigned long reserve; unsigned long start; unsigned long end; @@ -1713,7 +1792,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) if (reserve) { hugetlb_acct_memory(h, -reserve); - hugetlb_put_quota(vma->vm_file->f_mapping, reserve); + hugepage_subpool_put_pages(spool, reserve); } } } @@ -1910,7 +1989,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, address = address & huge_page_mask(h); pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + (vma->vm_pgoff >> PAGE_SHIFT); - mapping = (struct address_space *)page_private(page); + mapping = vma->vm_file->f_dentry->d_inode->i_mapping; vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) { /* Do not unmap the current VMA */ @@ -2364,11 +2443,12 @@ int hugetlb_reserve_pages(struct inode *inode, { long ret, chg; struct hstate *h = hstate_inode(inode); + struct hugepage_subpool *spool = subpool_inode(inode); /* * Only apply hugepage reservation if asked. At fault time, an * attempt will be made for VM_NORESERVE to allocate a page - * and filesystem quota without using reserves + * without using reserves */ if (acctflag & VM_NORESERVE) return 0; @@ -2395,17 +2475,17 @@ int hugetlb_reserve_pages(struct inode *inode, if (chg < 0) return chg; - /* There must be enough filesystem quota for the mapping */ - if (hugetlb_get_quota(inode->i_mapping, chg)) + /* There must be enough pages in the subpool for the mapping */ + if (hugepage_subpool_get_pages(spool, chg)) return -ENOSPC; /* * Check enough hugepages are available for the reservation. - * Hand back the quota if there are not + * Hand the pages back to the subpool if there are not */ ret = hugetlb_acct_memory(h, chg); if (ret < 0) { - hugetlb_put_quota(inode->i_mapping, chg); + hugepage_subpool_put_pages(spool, chg); return ret; } @@ -2429,11 +2509,12 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) { struct hstate *h = hstate_inode(inode); long chg = region_truncate(&inode->i_mapping->private_list, offset); + struct hugepage_subpool *spool = subpool_inode(inode); spin_lock(&inode->i_lock); inode->i_blocks -= (blocks_per_huge_page(h) * freed); spin_unlock(&inode->i_lock); - hugetlb_put_quota(inode->i_mapping, (chg - freed)); + hugepage_subpool_put_pages(spool, (chg - freed)); hugetlb_acct_memory(h, -(chg - freed)); } diff --git a/mm/madvise.c b/mm/madvise.c index 35b1479..e405c5f 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -12,6 +12,7 @@ #include #include #include +#include /* * Any behaviour which results in changes to the vma->vm_flags needs to @@ -190,14 +191,16 @@ static long madvise_remove(struct vm_area_struct *vma, struct address_space *mapping; loff_t offset, endoff; int error; + struct file *f; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) return -EINVAL; - if (!vma->vm_file || !vma->vm_file->f_mapping - || !vma->vm_file->f_mapping->host) { + f = vma->vm_file; + + if (!f || !f->f_mapping || !f->f_mapping->host) { return -EINVAL; } @@ -211,9 +214,16 @@ static long madvise_remove(struct vm_area_struct *vma, endoff = (loff_t)(end - vma->vm_start - 1) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ + /* + * vmtruncate_range may need to take i_mutex and i_alloc_sem. + * We need to explicitly grab a reference because the vma (and + * hence the vma's reference to the file) can go away as soon as + * we drop mmap_sem. + */ + get_file(f); up_read(¤t->mm->mmap_sem); error = vmtruncate_range(mapping->host, offset, endoff); + fput(f); down_read(¤t->mm->mmap_sem); return error; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3c6e3e2..a6563fb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2259,7 +2259,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) break; default: - BUG(); + return -EINVAL; } l = strlen(policy_types[mode]); diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 7e33f2c..8aa875c 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -32,6 +32,24 @@ void __mmu_notifier_release(struct mm_struct *mm) { struct mmu_notifier *mn; + struct hlist_node *n; + + /* + * RCU here will block mmu_notifier_unregister until + * ->release returns. + */ + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) + /* + * if ->release runs before mmu_notifier_unregister it + * must be handled as it's the only way for the driver + * to flush all existing sptes and stop the driver + * from establishing any more sptes before all the + * pages in the mm are freed. + */ + if (mn->ops->release) + mn->ops->release(mn, mm); + rcu_read_unlock(); spin_lock(&mm->mmu_notifier_mm->lock); while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { @@ -45,23 +63,6 @@ void __mmu_notifier_release(struct mm_struct *mm) * mmu_notifier_unregister to return. */ hlist_del_init_rcu(&mn->hlist); - /* - * RCU here will block mmu_notifier_unregister until - * ->release returns. - */ - rcu_read_lock(); - spin_unlock(&mm->mmu_notifier_mm->lock); - /* - * if ->release runs before mmu_notifier_unregister it - * must be handled as it's the only way for the driver - * to flush all existing sptes and stop the driver - * from establishing any more sptes before all the - * pages in the mm are freed. - */ - if (mn->ops->release) - mn->ops->release(mn, mm); - rcu_read_unlock(); - spin_lock(&mm->mmu_notifier_mm->lock); } spin_unlock(&mm->mmu_notifier_mm->lock); @@ -263,16 +264,13 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) { BUG_ON(atomic_read(&mm->mm_count) <= 0); - spin_lock(&mm->mmu_notifier_mm->lock); if (!hlist_unhashed(&mn->hlist)) { - hlist_del_rcu(&mn->hlist); - /* * RCU here will force exit_mmap to wait ->release to finish * before freeing the pages. */ rcu_read_lock(); - spin_unlock(&mm->mmu_notifier_mm->lock); + /* * exit_mmap will block in mmu_notifier_release to * guarantee ->release is called before freeing the @@ -281,8 +279,11 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) if (mn->ops->release) mn->ops->release(mn, mm); rcu_read_unlock(); - } else + + spin_lock(&mm->mmu_notifier_mm->lock); + hlist_del_rcu(&mn->hlist); spin_unlock(&mm->mmu_notifier_mm->lock); + } /* * Wait any running method to finish, of course including diff --git a/net/core/dev.c b/net/core/dev.c index 84a0705..46e2a29 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1133,6 +1133,7 @@ int dev_open(struct net_device *dev) /* * ... and announce new interface. */ + add_device_randomness(dev->dev_addr, dev->addr_len); call_netdevice_notifiers(NETDEV_UP, dev); } @@ -4268,6 +4269,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + add_device_randomness(dev->dev_addr, dev->addr_len); return err; } EXPORT_SYMBOL(dev_set_mac_address); @@ -4871,6 +4873,7 @@ int register_netdevice(struct net_device *dev) dev_init_scheduler(dev); dev_hold(dev); list_netdevice(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d4fd895..9d70042 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -817,6 +817,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; send_addr_notify = 1; modified = 1; + add_device_randomness(dev->dev_addr, dev->addr_len); } if (tb[IFLA_MTU]) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 025f924..72ff527 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2989,6 +2989,8 @@ static void sock_rmem_free(struct sk_buff *skb) */ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { + int len = skb->len; + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) return -ENOMEM; @@ -3000,7 +3002,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk, len); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); diff --git a/net/core/sock.c b/net/core/sock.c index 6605e75..4538a34 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1391,6 +1391,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, gfp_t gfp_mask; long timeo; int err; + int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + + err = -EMSGSIZE; + if (npages > MAX_SKB_FRAGS) + goto failure; gfp_mask = sk->sk_allocation; if (gfp_mask & __GFP_WAIT) @@ -1409,14 +1414,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { skb = alloc_skb(header_len, gfp_mask); if (skb) { - int npages; int i; /* No pages, we're done... */ if (!data_len) break; - npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; skb->truesize += data_len; skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index facedd2..ab260b0 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -214,7 +214,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; @@ -225,7 +225,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 039cc1f..10f8f8d 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1726,8 +1726,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) case CIPSO_V4_TAG_LOCAL: /* This is a non-standard tag that we only allow for * local connections, so if the incoming interface is - * not the loopback device drop the packet. */ - if (!(skb->dev->flags & IFF_LOOPBACK)) { + * not the loopback device drop the packet. Further, + * there is no legitimate reason for setting this from + * userspace so reject it if skb is NULL. */ + if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) { err_offset = opt_iter; goto validate_return_locked; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f095659..b9644d8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -838,8 +838,7 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ce1ce82..db755c4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5239,7 +5239,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len) { #ifdef CONFIG_NET_DMA - if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { + if (tp->ucopy.task == current && + sock_owned_by_user(sk) && + tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { copied_early = 1; eaten = 1; } @@ -5632,6 +5634,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if (th->syn) { + if (th->fin) + goto discard; if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6fc7961..6a4e832 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -406,6 +406,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) !icsk->icsk_backoff) break; + if (sock_owned_by_user(sk)) + break; + icsk->icsk_backoff--; inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << icsk->icsk_backoff; @@ -420,11 +423,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (remaining) { inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, TCP_RTO_MAX); - } else if (sock_owned_by_user(sk)) { - /* RTO revert clocked out retransmission, - * but socket is locked. Will defer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - HZ/20, TCP_RTO_MAX); } else { /* RTO revert clocked out retransmission. * Will retransmit now */ diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 6341818..e3db3f9 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); - - memmove(skb->data - skb->mac_len, skb_mac_header(skb), - skb->mac_len); - skb_set_mac_header(skb, -skb->mac_len); + skb_mac_header_rebuild(skb); xfrm4_beet_make_header(skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 3444f3b..5d1d1fd 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -65,7 +65,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - const unsigned char *old_mac; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -83,10 +82,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index bbd48b1..6cc7a45 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -82,7 +82,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *ip6h; - const unsigned char *old_mac; int size = sizeof(struct ipv6hdr); int err; @@ -92,10 +91,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) __skb_push(skb, size); skb_reset_network_header(skb); - - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); + skb_mac_header_rebuild(skb); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 3927832..672c0da 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -61,7 +61,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - const unsigned char *old_mac; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) goto out; @@ -78,10 +77,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(skb); - old_mac = skb_mac_header(skb); - skb_set_mac_header(skb, -skb->mac_len); - memmove(skb_mac_header(skb), old_mac, skb->mac_len); skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + err = 0; out: diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5a7dcdf..fc91ff6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -821,12 +821,19 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, return 0; } -int netlink_sendskb(struct sock *sk, struct sk_buff *skb) +static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); + return len; +} + +int netlink_sendskb(struct sock *sk, struct sk_buff *skb) +{ + int len = __netlink_sendskb(sk, skb); + sock_put(sk); return len; } @@ -951,8 +958,7 @@ static inline int netlink_broadcast_deliver(struct sock *sk, if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !test_bit(0, &nlk->state)) { skb_set_owner_r(skb, sk); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + __netlink_sendskb(sk, skb); return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; } return -1; @@ -1665,10 +1671,8 @@ static int netlink_dump(struct sock *sk) if (sk_filter(sk, skb)) kfree_skb(skb); - else { - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } + else + __netlink_sendskb(sk, skb); return 0; } @@ -1680,10 +1684,8 @@ static int netlink_dump(struct sock *sk) if (sk_filter(sk, skb)) kfree_skb(skb); - else { - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } + else + __netlink_sendskb(sk, skb); if (cb->done) cb->done(cb); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 9cdd35e..7481d70 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -851,6 +851,9 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, int flags = msg->msg_flags; int err, done; + if (len > 65535) + return -EMSGSIZE; + if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR)) return -EOPNOTSUPP; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 7d188bc..523efbb 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -983,7 +983,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros struct sock *make; struct rose_sock *make_rose; struct rose_facilities_struct facilities; - int n, len; + int n; skb->sk = NULL; /* Initially we don't know who it's for */ @@ -992,9 +992,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros */ memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); - len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1; - len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1; - if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { + if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, + skb->len - ROSE_CALL_REQ_FACILITIES_OFF, + &facilities)) { rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); return 0; } diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 114df6e..37965b8 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -72,9 +72,20 @@ static void rose_loopback_timer(unsigned long param) unsigned int lci_i, lci_o; while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + if (skb->len < ROSE_MIN_LEN) { + kfree_skb(skb); + continue; + } lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); frametype = skb->data[2]; - dest = (rose_address *)(skb->data + 4); + if (frametype == ROSE_CALL_REQUEST && + (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || + skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != + ROSE_CALL_REQ_ADDR_LEN_VAL)) { + kfree_skb(skb); + continue; + } + dest = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); lci_o = 0xFFF - lci_i; skb_reset_transport_header(skb); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 08230fa..1646b25 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -852,7 +852,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) unsigned int lci, new_lci; unsigned char cause, diagnostic; struct net_device *dev; - int len, res = 0; + int res = 0; char buf[11]; #if 0 @@ -860,10 +860,17 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) return res; #endif + if (skb->len < ROSE_MIN_LEN) + return res; frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); - src_addr = (rose_address *)(skb->data + 9); - dest_addr = (rose_address *)(skb->data + 4); + if (frametype == ROSE_CALL_REQUEST && + (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || + skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != + ROSE_CALL_REQ_ADDR_LEN_VAL)) + return res; + src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF); + dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); spin_lock_bh(&rose_neigh_list_lock); spin_lock_bh(&rose_route_list_lock); @@ -1001,12 +1008,11 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) goto out; } - len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1; - len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1; - memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); - if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { + if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, + skb->len - ROSE_CALL_REQ_FACILITIES_OFF, + &facilities)) { rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76); goto out; } diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 07bca7d..32e5c9f 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -141,7 +141,7 @@ void rose_write_internal(struct sock *sk, int frametype) *dptr++ = ROSE_GFI | lci1; *dptr++ = lci2; *dptr++ = frametype; - *dptr++ = 0xAA; + *dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL; memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); @@ -245,12 +245,16 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct * do { switch (*p & 0xC0) { case 0x00: + if (len < 2) + return -1; p += 2; n += 2; len -= 2; break; case 0x40: + if (len < 3) + return -1; if (*p == FAC_NATIONAL_RAND) facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF); p += 3; @@ -259,32 +263,48 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct * break; case 0x80: + if (len < 4) + return -1; p += 4; n += 4; len -= 4; break; case 0xC0: + if (len < 2) + return -1; l = p[1]; + if (len < 2 + l) + return -1; if (*p == FAC_NATIONAL_DEST_DIGI) { if (!fac_national_digis_received) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN); facilities->source_ndigis = 1; } } else if (*p == FAC_NATIONAL_SRC_DIGI) { if (!fac_national_digis_received) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN); facilities->dest_ndigis = 1; } } else if (*p == FAC_NATIONAL_FAIL_CALL) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN); } else if (*p == FAC_NATIONAL_FAIL_ADD) { + if (l < 1 + ROSE_ADDR_LEN) + return -1; memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN); } else if (*p == FAC_NATIONAL_DIGIS) { + if (l % AX25_ADDR_LEN) + return -1; fac_national_digis_received = 1; facilities->source_ndigis = 0; facilities->dest_ndigis = 0; @@ -318,24 +338,32 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac do { switch (*p & 0xC0) { case 0x00: + if (len < 2) + return -1; p += 2; n += 2; len -= 2; break; case 0x40: + if (len < 3) + return -1; p += 3; n += 3; len -= 3; break; case 0x80: + if (len < 4) + return -1; p += 4; n += 4; len -= 4; break; case 0xC0: + if (len < 2) + return -1; l = p[1]; /* Prevent overflows*/ @@ -364,49 +392,44 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac return n; } -int rose_parse_facilities(unsigned char *p, +int rose_parse_facilities(unsigned char *p, unsigned packet_len, struct rose_facilities_struct *facilities) { int facilities_len, len; facilities_len = *p++; - if (facilities_len == 0) + if (facilities_len == 0 || (unsigned)facilities_len > packet_len) return 0; - while (facilities_len > 0) { - if (*p == 0x00) { - facilities_len--; - p++; - - switch (*p) { - case FAC_NATIONAL: /* National */ - len = rose_parse_national(p + 1, facilities, facilities_len - 1); - if (len < 0) - return 0; - facilities_len -= len + 1; - p += len + 1; - break; - - case FAC_CCITT: /* CCITT */ - len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); - if (len < 0) - return 0; - facilities_len -= len + 1; - p += len + 1; - break; - - default: - printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); - facilities_len--; - p++; - break; - } - } else - break; /* Error in facilities format */ + while (facilities_len >= 3 && *p == 0x00) { + facilities_len--; + p++; + + switch (*p) { + case FAC_NATIONAL: /* National */ + len = rose_parse_national(p + 1, facilities, facilities_len - 1); + break; + + case FAC_CCITT: /* CCITT */ + len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); + break; + + default: + printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); + len = 1; + break; + } + + if (len < 0) + return 0; + if (WARN_ON(len >= facilities_len)) + return 0; + facilities_len -= len + 1; + p += len + 1; } - return 1; + return facilities_len == 0; } static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 40408d5..bf98414 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -544,11 +544,8 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) opt.packets = q->packetsin; opt.bytesin = q->bytesin; - if (gred_wred_mode(table)) { - q->parms.qidlestart = - table->tab[table->def]->parms.qidlestart; - q->parms.qavg = table->tab[table->def]->parms.qavg; - } + if (gred_wred_mode(table)) + gred_load_wred_set(table, q); opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 2b88295..0ae345a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -199,12 +199,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { - if (!(skb = skb_unshare(skb, GFP_ATOMIC)) - || (skb->ip_summed == CHECKSUM_PARTIAL - && skb_checksum_help(skb))) { - sch->qstats.drops++; - return NET_XMIT_DROP; - } + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || + (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb))) + return qdisc_drop(skb, sch); skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } diff --git a/net/sctp/input.c b/net/sctp/input.c index 254afea..e8e73f1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -739,15 +739,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) epb = &ep->base; - if (hlist_unhashed(&epb->node)) - return; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - __hlist_del(&epb->node); + hlist_del_init(&epb->node); sctp_write_unlock(&head->lock); } @@ -828,7 +825,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc) head = &sctp_assoc_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - __hlist_del(&epb->node); + hlist_del_init(&epb->node); sctp_write_unlock(&head->lock); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3a95fcb..1f9843e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1142,8 +1142,14 @@ out_free: SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" " kaddrs: %p err: %d\n", asoc, kaddrs, err); - if (asoc) + if (asoc) { + /* sctp_primitive_ASSOCIATE may have added this association + * To the hash table, try to unhash it, just in case, its a noop + * if it wasn't hashed so we're safe + */ + sctp_unhash_established(asoc); sctp_association_free(asoc); + } return err; } @@ -1851,8 +1857,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; out_free: - if (new_asoc) + if (new_asoc) { + sctp_unhash_established(asoc); sctp_association_free(asoc); + } out_unlock: sctp_release_sock(sk); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 25f7801..e3fea46 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -719,6 +719,8 @@ static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, { ssize_t ret; + if (count == 0) + return -EINVAL; if (copy_from_user(kaddr, buf, count)) return -EFAULT; kaddr[count] = '\0'; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ac94477..9b3941d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -485,14 +485,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); rpc_wake_up_task_queue_locked(queue, task); + } if (head == &queue->tasks[0]) break; head--; @@ -510,13 +514,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) { + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); task->tk_status = status; rpc_wake_up_task_queue_locked(queue, task); } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 314320a..8d72660 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -304,7 +304,6 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_serv *serv = xprt->xpt_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; @@ -381,8 +380,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp, rqstp->rq_xprt); rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); rqstp->rq_waking = 1; pool->sp_nwaking++; pool->sp_stats.threads_woken++; @@ -667,8 +664,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (xprt) { rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } else { /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); @@ -758,6 +753,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) } else len = xprt->xpt_ops->xpo_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); + rqstp->rq_reserved = serv->sv_max_mesg; + atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } /* No data, incomplete (TCP) read, or accept() */ @@ -811,7 +808,8 @@ int svc_send(struct svc_rqst *rqstp) /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + if (test_bit(XPT_DEAD, &xprt->xpt_flags) + || test_bit(XPT_CLOSE, &xprt->xpt_flags)) len = -ENOTCONN; else len = xprt->xpt_ops->xpo_sendto(rqstp); diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 258daa8..0d8380a 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -603,36 +603,31 @@ static int wanrouter_device_new_if(struct wan_device *wandev, * successfully, add it to the interface list. */ - if (dev->name == NULL) { - err = -EINVAL; - } else { +#ifdef WANDEBUG + printk(KERN_INFO "%s: registering interface %s...\n", + wanrouter_modname, dev->name); +#endif - #ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); - #endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; + err = register_netdev(dev); + if (!err) { + struct net_device *slave = NULL; + unsigned long smp_flags=0; + + lock_adapter_irq(&wandev->lock, &smp_flags); + + if (wandev->dev == NULL) { + wandev->dev = dev; + } else { + for (slave=wandev->dev; + DEV_TO_SLAVE(slave); + slave = DEV_TO_SLAVE(slave)) + DEV_TO_SLAVE(slave) = dev; } + ++wandev->ndev; + + unlock_adapter_irq(&wandev->lock, &smp_flags); + err = 0; /* done !!! */ + goto out; } if (wandev->del_if) wandev->del_if(wandev, dev); diff --git a/security/commoncap.c b/security/commoncap.c index fe30751..ee9d623 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * If a non-root user executes a setuid-root binary in @@ -511,6 +512,11 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) } skip: + /* if we have fs caps, clear dangerous personality flags */ + if (!cap_issubset(new->cap_permitted, old->cap_permitted)) + bprm->per_clear |= PER_CLEAR_ON_SETID; + + /* Don't let someone trace a set[ug]id/setpcap binary with the revised * credentials unless they have the appropriate permit */ diff --git a/sound/drivers/mpu401/mpu401_uart.c b/sound/drivers/mpu401/mpu401_uart.c index 2af0999..74f5a3d 100644 --- a/sound/drivers/mpu401/mpu401_uart.c +++ b/sound/drivers/mpu401/mpu401_uart.c @@ -554,6 +554,7 @@ int snd_mpu401_uart_new(struct snd_card *card, int device, spin_lock_init(&mpu->output_lock); spin_lock_init(&mpu->timer_lock); mpu->hardware = hardware; + mpu->irq = -1; if (! (info_flags & MPU401_INFO_INTEGRATED)) { int res_size = hardware == MPU401_HW_PC98II ? 4 : 2; mpu->res = request_region(port, res_size, "MPU401 UART"); diff --git a/sound/pci/echoaudio/echoaudio_dsp.c b/sound/pci/echoaudio/echoaudio_dsp.c index 4df51ef..5d14b7a 100644 --- a/sound/pci/echoaudio/echoaudio_dsp.c +++ b/sound/pci/echoaudio/echoaudio_dsp.c @@ -474,7 +474,7 @@ static int load_firmware(struct echoaudio *chip) const struct firmware *fw; int box_type, err; - if (snd_BUG_ON(!chip->dsp_code_to_load || !chip->comm_page)) + if (snd_BUG_ON(!chip->comm_page)) return -EPERM; /* See if the ASIC is present and working - only if the DSP is already loaded */ diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c index 2b3d859..9294d40 100644 --- a/sound/pci/hda/hda_proc.c +++ b/sound/pci/hda/hda_proc.c @@ -340,7 +340,7 @@ static void print_digital_conv(struct snd_info_buffer *buffer, if (digi1 & AC_DIG1_EMPHASIS) snd_iprintf(buffer, " Preemphasis"); if (digi1 & AC_DIG1_COPYRIGHT) - snd_iprintf(buffer, " Copyright"); + snd_iprintf(buffer, " Non-Copyright"); if (digi1 & AC_DIG1_NONAUDIO) snd_iprintf(buffer, " Non-Audio"); if (digi1 & AC_DIG1_PROFESSIONAL) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4f3434f..82b6fdc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #include @@ -575,12 +577,76 @@ out: return r; } +/* + * We want to test whether the caller has been granted permissions to + * use this device. To be able to configure and control the device, + * the user needs access to PCI configuration space and BAR resources. + * These are accessed through PCI sysfs. PCI config space is often + * passed to the process calling this ioctl via file descriptor, so we + * can't rely on access to that file. We can check for permissions + * on each of the BAR resource files, which is a pretty clear + * indicator that the user has been granted access to the device. + */ +static int probe_sysfs_permissions(struct pci_dev *dev) +{ +#ifdef CONFIG_SYSFS + int i; + bool bar_found = false; + + for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { + char *kpath, *syspath; + struct path path; + struct inode *inode; + int r; + + if (!pci_resource_len(dev, i)) + continue; + + kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); + if (!kpath) + return -ENOMEM; + + /* Per sysfs-rules, sysfs is always at /sys */ + syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); + kfree(kpath); + if (!syspath) + return -ENOMEM; + + r = kern_path(syspath, LOOKUP_FOLLOW, &path); + kfree(syspath); + if (r) + return r; + + inode = path.dentry->d_inode; + + r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); + path_put(&path); + if (r) + return r; + + bar_found = true; + } + + /* If no resources, probably something special */ + if (!bar_found) + return -EPERM; + + return 0; +#else + return -EINVAL; /* No way to control the device without sysfs */ +#endif +} + static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) { int r = 0; struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; + u8 header_type; + + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) + return -EINVAL; down_read(&kvm->slots_lock); mutex_lock(&kvm->lock); @@ -607,6 +673,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, r = -EINVAL; goto out_free; } + + /* Don't allow bridges to be assigned */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) { + r = -EPERM; + goto out_put; + } + + r = probe_sysfs_permissions(dev); + if (r) + goto out_put; + if (pci_enable_device(dev)) { printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); r = -EBUSY; @@ -635,16 +713,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - if (!kvm->arch.iommu_domain) { - r = kvm_iommu_map_guest(kvm); - if (r) - goto out_list_del; - } - r = kvm_assign_device(kvm, match); + if (!kvm->arch.iommu_domain) { + r = kvm_iommu_map_guest(kvm); if (r) goto out_list_del; } + r = kvm_assign_device(kvm, match); + if (r) + goto out_list_del; out: mutex_unlock(&kvm->lock); @@ -683,8 +759,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, goto out; } - if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) - kvm_deassign_device(kvm, match); + kvm_deassign_device(kvm, match); kvm_free_assigned_device(kvm, match); @@ -1782,6 +1857,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) return r; mutex_lock(&kvm->lock); + if (!kvm_vcpu_compatible(vcpu)) { + r = -EINVAL; + goto vcpu_destroy; + } if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { r = -EINVAL; goto vcpu_destroy;