[packages/kernel/LINUX_4_9] rt patch updated to 4.9.65-rt57
jajcus
jajcus at pld-linux.org
Fri Dec 15 13:10:15 CET 2017
commit 7323c6802138dc483e23b468239a67b3ca38cdac
Author: Jacek Konieczny <j.konieczny at eggsoft.pl>
Date: Fri Dec 15 13:08:25 2017 +0100
rt patch updated to 4.9.65-rt57
one chunk dropped, doesn't apply and seems obsolete (was in #ifdef-ed
code, which is now gone).
This version of RT for the 4.9 kernel should be much more stable now.
kernel-rt.patch | 3335 ++++++++++++++++++++++++++++++++-----------------------
kernel.spec | 2 +-
2 files changed, 1966 insertions(+), 1371 deletions(-)
---
diff --git a/kernel.spec b/kernel.spec
index abdf3e46..4e057629 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -213,7 +213,7 @@ Patch146: kernel-aufs4+vserver.patch
Patch250: kernel-fix_256colors_menuconfig.patch
# https://rt.wiki.kernel.org/
-# https://www.kernel.org/pub/linux/kernel/projects/rt/4.9/patch-4.9.27-rt18.patch.xz
+# http://www.kernel.org/pub/linux/kernel/projects/rt/4.9/patch-4.9.65-rt57.patch.xz
Patch500: kernel-rt.patch
Patch2000: kernel-small_fixes.patch
diff --git a/kernel-rt.patch b/kernel-rt.patch
index 69fb3ec7..f620c485 100644
--- a/kernel-rt.patch
+++ b/kernel-rt.patch
@@ -378,7 +378,7 @@ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 9f157e7c51e7..468e224d76aa 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
-@@ -220,11 +220,18 @@ ENDPROC(__dabt_svc)
+@@ -220,11 +220,18 @@ __irq_svc:
#ifdef CONFIG_PREEMPT
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
@@ -399,7 +399,7 @@ index 9f157e7c51e7..468e224d76aa 100644
#endif
svc_exit r5, irq = 1 @ return from exception
-@@ -239,8 +246,14 @@ ENDPROC(__irq_svc)
+@@ -239,8 +246,14 @@ svc_preempt:
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
@@ -419,7 +419,7 @@ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 10c3283d6c19..8872937862cc 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
-@@ -36,7 +36,9 @@
+@@ -36,7 +36,9 @@ ret_fast_syscall:
UNWIND(.cantunwind )
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
@@ -430,7 +430,7 @@ index 10c3283d6c19..8872937862cc 100644
bne fast_work_pending
/* perform architecture specific actions before user return */
-@@ -62,8 +64,11 @@ ENDPROC(ret_fast_syscall)
+@@ -62,8 +64,11 @@ ret_fast_syscall:
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
@@ -983,10 +983,10 @@ index ea5a2277ee46..b988e081ac79 100644
return pen_release != -1 ? -ENOSYS : 0;
}
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
-index 0122ad1a6027..926b1be48043 100644
+index f7861dc83182..ce47dfe25fb0 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
-@@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+@@ -433,6 +433,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
@@ -996,7 +996,7 @@ index 0122ad1a6027..926b1be48043 100644
if (user_mode(regs))
goto bad_area;
-@@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+@@ -500,6 +503,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
@@ -1255,10 +1255,10 @@ index c58ddf8c4062..a8f2f7c1fe12 100644
DEFINE(TI_TASK, offsetof(struct thread_info, task));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
-index 79b0fe24d5b7..f3c959ade308 100644
+index b4c7db434654..433d846f4f51 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
-@@ -428,11 +428,16 @@ ENDPROC(el1_sync)
+@@ -430,11 +430,16 @@ el1_irq:
#ifdef CONFIG_PREEMPT
ldr w24, [tsk, #TI_PREEMPT] // get preempt count
@@ -1278,7 +1278,7 @@ index 79b0fe24d5b7..f3c959ade308 100644
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on
-@@ -446,6 +451,7 @@ ENDPROC(el1_irq)
+@@ -448,6 +453,7 @@ el1_preempt:
1: bl preempt_schedule_irq // irq en/disable is done inside
ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
@@ -1300,10 +1300,10 @@ index 404dd67080b9..639dc6d12e72 100644
} else {
local_irq_enable();
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
-index 5a4f2eb9d0d5..867eca2e7210 100644
+index 5e844f68e847..dc613cc10f54 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
-@@ -2515,7 +2515,7 @@ config MIPS_ASID_BITS_VARIABLE
+@@ -2516,7 +2516,7 @@ config MIPS_ASID_BITS_VARIABLE
#
config HIGHMEM
bool "High Memory Support"
@@ -1313,7 +1313,7 @@ index 5a4f2eb9d0d5..867eca2e7210 100644
config CPU_SUPPORTS_HIGHMEM
bool
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
-index 65fba4c34cd7..4b5ba68910e0 100644
+index 6eda5abbd719..601e27701a4a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -52,10 +52,11 @@ config LOCKDEP_SUPPORT
@@ -1412,7 +1412,7 @@ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 3841d749a430..6dbaeff192b9 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
-@@ -835,7 +835,14 @@ user_exc_return: /* r10 contains MSR_KERNEL here */
+@@ -835,7 +835,14 @@ resume_kernel:
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
bne restore
andi. r8,r8,_TIF_NEED_RESCHED
@@ -1427,7 +1427,7 @@ index 3841d749a430..6dbaeff192b9 100644
lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
beq restore /* don't schedule if so */
-@@ -846,11 +853,11 @@ user_exc_return: /* r10 contains MSR_KERNEL here */
+@@ -846,11 +853,11 @@ resume_kernel:
*/
bl trace_hardirqs_off
#endif
@@ -1442,7 +1442,7 @@ index 3841d749a430..6dbaeff192b9 100644
#ifdef CONFIG_TRACE_IRQFLAGS
/* And now, to properly rebalance the above, we tell lockdep they
* are being turned back on, which will happen when we return
-@@ -1171,7 +1178,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
+@@ -1171,7 +1178,7 @@ global_dbcr0:
#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
do_work: /* r10 contains MSR_KERNEL here */
@@ -1451,7 +1451,7 @@ index 3841d749a430..6dbaeff192b9 100644
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
-@@ -1192,7 +1199,7 @@ do_resched: /* r10 contains MSR_KERNEL here */
+@@ -1192,7 +1199,7 @@ recheck:
MTMSRD(r10) /* disable interrupts */
CURRENT_THREAD_INFO(r9, r1)
lwz r9,TI_FLAGS(r9)
@@ -1461,7 +1461,7 @@ index 3841d749a430..6dbaeff192b9 100644
andi. r0,r9,_TIF_USER_WORK_MASK
beq restore_user
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
-index 767ef6d68c9e..2cb4d5552319 100644
+index caa659671599..891080c4a41e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -656,7 +656,7 @@ _GLOBAL(ret_from_except_lite)
@@ -1473,29 +1473,27 @@ index 767ef6d68c9e..2cb4d5552319 100644
beq 2f
bl restore_interrupts
SCHEDULE_USER
-@@ -718,10 +718,18 @@ _GLOBAL(ret_from_except_lite)
+@@ -718,10 +718,18 @@ resume_kernel:
#ifdef CONFIG_PREEMPT
/* Check if we need to preempt */
-- andi. r0,r4,_TIF_NEED_RESCHED
-- beq+ restore
-- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
++ lwz r8,TI_PREEMPT(r9)
+ cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
+ bne restore
-+ andi. r0,r4,_TIF_NEED_RESCHED
+ andi. r0,r4,_TIF_NEED_RESCHED
+ bne+ check_count
+
+ andi. r0,r4,_TIF_NEED_RESCHED_LAZY
-+ beq+ restore
+ beq+ restore
+ lwz r8,TI_PREEMPT_LAZY(r9)
+
-+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+- lwz r8,TI_PREEMPT(r9)
+check_count:
cmpwi cr1,r8,0
ld r0,SOFTE(r1)
cmpdi r0,0
-@@ -738,7 +746,7 @@ _GLOBAL(ret_from_except_lite)
+@@ -738,7 +746,7 @@ resume_kernel:
/* Re-test flags and eventually loop */
CURRENT_THREAD_INFO(r9, r1)
ld r4,TI_FLAGS(r9)
@@ -1505,10 +1503,10 @@ index 767ef6d68c9e..2cb4d5552319 100644
/*
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
-index 3c05c311e35e..f83f6ac1274d 100644
+index 028a22bfa90c..a75e2dd3e71f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
-@@ -638,6 +638,7 @@ void irq_ctx_init(void)
+@@ -651,6 +651,7 @@ void irq_ctx_init(void)
}
}
@@ -1516,7 +1514,7 @@ index 3c05c311e35e..f83f6ac1274d 100644
void do_softirq_own_stack(void)
{
struct thread_info *curtp, *irqtp;
-@@ -655,6 +656,7 @@ void do_softirq_own_stack(void)
+@@ -668,6 +669,7 @@ void do_softirq_own_stack(void)
if (irqtp->flags)
set_bits(irqtp->flags, &curtp->flags);
}
@@ -1610,7 +1608,7 @@ index 6c0378c0b8b5..abd58b4dff97 100644
static inline void handle_one_irq(unsigned int irq)
{
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
-index 165ecdd24d22..b68a464a22be 100644
+index 8b4152f3a764..c5cca159692a 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -194,12 +194,10 @@ config NR_CPUS
@@ -1629,7 +1627,7 @@ index 165ecdd24d22..b68a464a22be 100644
config GENERIC_HWEIGHT
bool
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
-index 34a7930b76ef..773740521008 100644
+index 5cbf03c14981..6067d9379e5b 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
@@ -1649,7 +1647,7 @@ index 34a7930b76ef..773740521008 100644
#ifdef CONFIG_HOTPLUG_CPU
void fixup_irqs(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index bada636d1065..f8a995c90c01 100644
+index b9c546a305a4..e96c2975af4f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,6 +17,7 @@ config X86_64
@@ -2042,10 +2040,10 @@ index edba8606b99a..4a3389535fc6 100644
jz restore_all
call preempt_schedule_irq
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index ef766a358b37..28401f826ab1 100644
+index e7b0e7ff4c58..65916d49dbc9 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
-@@ -546,7 +546,23 @@ GLOBAL(retint_user)
+@@ -546,7 +546,23 @@ retint_kernel:
bt $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
0: cmpl $0, PER_CPU_VAR(__preempt_count)
@@ -2069,7 +2067,7 @@ index ef766a358b37..28401f826ab1 100644
call preempt_schedule_irq
jmp 0b
1:
-@@ -894,6 +910,7 @@ EXPORT_SYMBOL(native_load_gs_index)
+@@ -894,6 +910,7 @@ bad_gs:
jmp 2b
.previous
@@ -2283,7 +2281,7 @@ index 57ab86d94d64..35d25e27180f 100644
}
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
-index 931ced8ca345..167975ac8af7 100644
+index b89bef95f63b..c3c1ad2fce5c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -2297,7 +2295,7 @@ index 931ced8ca345..167975ac8af7 100644
/* --------------------------------------------------------------------------
Boot-time Configuration
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
-index d1e25564b3c1..67e585fa801f 100644
+index cf89928dbd46..18b5ec2a71df 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1712,7 +1712,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
@@ -2329,7 +2327,7 @@ index c62e015b126c..0cc71257fca6 100644
+ DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
-index 22cda29d654e..57c85e3af092 100644
+index 8ca5f8ad008e..edcbd18b3189 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -41,6 +41,8 @@
@@ -2341,7 +2339,7 @@ index 22cda29d654e..57c85e3af092 100644
#include <linux/jump_label.h>
#include <asm/processor.h>
-@@ -1307,7 +1309,7 @@ void mce_log_therm_throt_event(__u64 status)
+@@ -1306,7 +1308,7 @@ void mce_log_therm_throt_event(__u64 status)
static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
@@ -2350,7 +2348,7 @@ index 22cda29d654e..57c85e3af092 100644
static unsigned long mce_adjust_timer_default(unsigned long interval)
{
-@@ -1316,32 +1318,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
+@@ -1315,32 +1317,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
@@ -2389,7 +2387,7 @@ index 22cda29d654e..57c85e3af092 100644
iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) {
-@@ -1364,7 +1352,7 @@ static void mce_timer_fn(unsigned long data)
+@@ -1363,7 +1351,7 @@ static void mce_timer_fn(unsigned long data)
done:
__this_cpu_write(mce_next_interval, iv);
@@ -2398,7 +2396,7 @@ index 22cda29d654e..57c85e3af092 100644
}
/*
-@@ -1372,7 +1360,7 @@ static void mce_timer_fn(unsigned long data)
+@@ -1371,7 +1359,7 @@ static void mce_timer_fn(unsigned long data)
*/
void mce_timer_kick(unsigned long interval)
{
@@ -2407,7 +2405,7 @@ index 22cda29d654e..57c85e3af092 100644
unsigned long iv = __this_cpu_read(mce_next_interval);
__restart_timer(t, interval);
-@@ -1387,7 +1375,7 @@ static void mce_timer_delete_all(void)
+@@ -1386,7 +1374,7 @@ static void mce_timer_delete_all(void)
int cpu;
for_each_online_cpu(cpu)
@@ -2416,7 +2414,7 @@ index 22cda29d654e..57c85e3af092 100644
}
static void mce_do_trigger(struct work_struct *work)
-@@ -1397,6 +1385,56 @@ static void mce_do_trigger(struct work_struct *work)
+@@ -1396,6 +1384,56 @@ static void mce_do_trigger(struct work_struct *work)
static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
@@ -2473,7 +2471,7 @@ index 22cda29d654e..57c85e3af092 100644
/*
* Notify the user(s) about new machine check events.
* Can be called from interrupt context, but not from machine check/NMI
-@@ -1404,19 +1442,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
+@@ -1403,19 +1441,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
*/
int mce_notify_irq(void)
{
@@ -2494,7 +2492,7 @@ index 22cda29d654e..57c85e3af092 100644
return 1;
}
return 0;
-@@ -1722,7 +1749,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
+@@ -1721,7 +1748,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
}
}
@@ -2503,7 +2501,7 @@ index 22cda29d654e..57c85e3af092 100644
{
unsigned long iv = check_interval * HZ;
-@@ -1731,16 +1758,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
+@@ -1730,16 +1757,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
per_cpu(mce_next_interval, cpu) = iv;
@@ -2525,7 +2523,7 @@ index 22cda29d654e..57c85e3af092 100644
mce_start_timer(cpu, t);
}
-@@ -2465,6 +2493,8 @@ static void mce_disable_cpu(void *h)
+@@ -2464,6 +2492,8 @@ static void mce_disable_cpu(void *h)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
@@ -2534,7 +2532,7 @@ index 22cda29d654e..57c85e3af092 100644
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
-@@ -2487,6 +2517,7 @@ static void mce_reenable_cpu(void *h)
+@@ -2486,6 +2516,7 @@ static void mce_reenable_cpu(void *h)
if (b->init)
wrmsrl(msr_ops.ctl(i), b->ctl);
}
@@ -2542,7 +2540,7 @@ index 22cda29d654e..57c85e3af092 100644
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-@@ -2494,7 +2525,6 @@ static int
+@@ -2493,7 +2524,6 @@ static int
mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
@@ -2550,7 +2548,7 @@ index 22cda29d654e..57c85e3af092 100644
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
-@@ -2514,11 +2544,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+@@ -2513,11 +2543,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
@@ -2562,7 +2560,7 @@ index 22cda29d654e..57c85e3af092 100644
break;
}
-@@ -2557,6 +2585,10 @@ static __init int mcheck_init_device(void)
+@@ -2556,6 +2584,10 @@ static __init int mcheck_init_device(void)
goto err_out;
}
@@ -2663,10 +2661,10 @@ index 3f05c044720b..fe68afd37162 100644
/*
* APIC is created enabled. This will prevent kvm_lapic_set_base from
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index e5bc139d1ba7..fa0aa5931a4b 100644
+index 595f8149c0d9..31b15149f412 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
-@@ -5933,6 +5933,13 @@ int kvm_arch_init(void *opaque)
+@@ -5961,6 +5961,13 @@ int kvm_arch_init(void *opaque)
goto out;
}
@@ -2959,7 +2957,7 @@ index b333fc45f9ec..8b85916e6986 100644
/*
diff --git a/block/blk-core.c b/block/blk-core.c
-index d1f2801ce836..6f945bb0fa1a 100644
+index 95379fc83805..e531da0c9232 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
@@ -2976,8 +2974,8 @@ index d1f2801ce836..6f945bb0fa1a 100644
**/
void blk_start_queue(struct request_queue *q)
{
-- WARN_ON(!irqs_disabled());
-+ WARN_ON_NONRT(!irqs_disabled());
+- WARN_ON(!in_interrupt() && !irqs_disabled());
++ WARN_ON_NONRT(!in_interrupt() && !irqs_disabled());
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
__blk_run_queue(q);
@@ -3461,17 +3459,17 @@ index 051b6158d1b7..7ad293bef6ed 100644
buf = page_address(page);
consumed = ap->ops->sff_data_xfer(dev, buf + offset,
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
-index 4b5cd3a7b2b6..fa8329ad79fd 100644
+index 4b5cd3a7b2b6..8c93ee150ee8 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
-@@ -118,12 +118,19 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
+@@ -118,12 +118,20 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
{
- return *get_cpu_ptr(comp->stream);
+ struct zcomp_strm *zstrm;
+
-+ zstrm = *this_cpu_ptr(comp->stream);
++ zstrm = *get_local_ptr(comp->stream);
+ spin_lock(&zstrm->zcomp_lock);
+ return zstrm;
}
@@ -3483,10 +3481,11 @@ index 4b5cd3a7b2b6..fa8329ad79fd 100644
+
+ zstrm = *this_cpu_ptr(comp->stream);
+ spin_unlock(&zstrm->zcomp_lock);
++ put_local_ptr(zstrm);
}
int zcomp_compress(struct zcomp_strm *zstrm,
-@@ -174,6 +181,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp,
+@@ -174,6 +182,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp,
pr_err("Can't allocate a compression stream\n");
return NOTIFY_BAD;
}
@@ -3793,6 +3792,60 @@ index 08d1dd58c0d2..25ee319dc8e3 100644
return ret;
}
#endif
+diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
+index 8022bea27fed..247330efd310 100644
+--- a/drivers/char/tpm/tpm_tis.c
++++ b/drivers/char/tpm/tpm_tis.c
+@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
+ return container_of(data, struct tpm_tis_tcg_phy, priv);
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++/*
++ * Flushes previous write operations to chip so that a subsequent
++ * ioread*()s won't stall a cpu.
++ */
++static inline void tpm_tis_flush(void __iomem *iobase)
++{
++ ioread8(iobase + TPM_ACCESS(0));
++}
++#else
++#define tpm_tis_flush(iobase) do { } while (0)
++#endif
++
++static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
++{
++ iowrite8(b, iobase + addr);
++ tpm_tis_flush(iobase);
++}
++
++static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
++{
++ iowrite32(b, iobase + addr);
++ tpm_tis_flush(iobase);
++}
++
+ static bool interrupts = true;
+ module_param(interrupts, bool, 0444);
+ MODULE_PARM_DESC(interrupts, "Enable interrupts");
+@@ -103,7 +128,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
+ struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
+
+ while (len--)
+- iowrite8(*value++, phy->iobase + addr);
++ tpm_tis_iowrite8(*value++, phy->iobase, addr);
+ return 0;
+ }
+
+@@ -127,7 +152,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
+ {
+ struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
+
+- iowrite32(value, phy->iobase + addr);
++ tpm_tis_iowrite32(value, phy->iobase, addr);
+ return 0;
+ }
+
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index 4da2af9694a2..5b6f57f500b8 100644
--- a/drivers/clocksource/tcb_clksrc.c
@@ -4191,10 +4244,10 @@ index 02908e37c228..05c0480576e1 100644
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
-index 5dc6082639db..c32458fb3be2 100644
+index ce32303b3013..c0a53bf2e952 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
-@@ -12131,7 +12131,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe)
+@@ -12138,7 +12138,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe)
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
struct intel_flip_work *work;
@@ -4204,7 +4257,7 @@ index 5dc6082639db..c32458fb3be2 100644
if (crtc == NULL)
return;
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
-index dbed12c484c9..5c540b78e8b5 100644
+index 64f4e2e18594..aebf1e9eabcb 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -35,6 +35,7 @@
@@ -4224,7 +4277,7 @@ index dbed12c484c9..5c540b78e8b5 100644
/**
* intel_pipe_update_start() - start update of a set of display registers
* @crtc: the crtc of which the registers are going to be updated
-@@ -95,7 +98,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
+@@ -98,7 +101,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100);
max = vblank_start - 1;
@@ -4233,7 +4286,7 @@ index dbed12c484c9..5c540b78e8b5 100644
if (min <= 0 || max <= 0)
return;
-@@ -125,11 +128,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
+@@ -128,11 +131,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
break;
}
@@ -4247,7 +4300,7 @@ index dbed12c484c9..5c540b78e8b5 100644
}
finish_wait(wq, &wait);
-@@ -181,7 +184,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work
+@@ -202,7 +205,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work
crtc->base.state->event = NULL;
}
@@ -4547,7 +4600,7 @@ index 4a2a9e370be7..e970d9afd179 100644
if (t2 - t1 < tx) tx = t2 - t1;
}
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
-index 11a13b5be73a..baaed0ac274b 100644
+index 1a0b110f12c0..ff5c2424eb9e 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1923,10 +1923,10 @@ static int __attach_device(struct iommu_dev_data *dev_data,
@@ -4578,8 +4631,26 @@ index 11a13b5be73a..baaed0ac274b 100644
if (WARN_ON(!dev_data->domain))
return;
+@@ -2283,7 +2283,7 @@ static void queue_add(struct dma_ops_domain *dma_dom,
+ pages = __roundup_pow_of_two(pages);
+ address >>= PAGE_SHIFT;
+
+- queue = get_cpu_ptr(&flush_queue);
++ queue = raw_cpu_ptr(&flush_queue);
+ spin_lock_irqsave(&queue->lock, flags);
+
+ if (queue->next == FLUSH_QUEUE_SIZE)
+@@ -2300,8 +2300,6 @@ static void queue_add(struct dma_ops_domain *dma_dom,
+
+ if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
+ mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
+-
+- put_cpu_ptr(&flush_queue);
+ }
+
+
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
-index b9e50c10213b..fd3b4657723f 100644
+index 002f8a421efa..980f41f1a194 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -479,7 +479,7 @@ struct deferred_flush_data {
@@ -4591,7 +4662,7 @@ index b9e50c10213b..fd3b4657723f 100644
/* bitmap for indexing intel_iommus */
static int g_num_of_iommus;
-@@ -3716,10 +3716,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
+@@ -3719,10 +3719,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
struct intel_iommu *iommu;
struct deferred_flush_entry *entry;
struct deferred_flush_data *flush_data;
@@ -4603,7 +4674,7 @@ index b9e50c10213b..fd3b4657723f 100644
/* Flush all CPUs' entries to avoid deferring too much. If
* this becomes a bottleneck, can just flush us, and rely on
-@@ -3752,8 +3750,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
+@@ -3755,8 +3753,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
}
flush_data->size++;
spin_unlock_irqrestore(&flush_data->lock, flags);
@@ -4695,7 +4766,7 @@ index 4d200883c505..98b64ed5cb81 100644
Allows a block device to be used as cache for other devices; uses
a btree for indexing and the layout is optimized for SSDs.
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
-index 2c965424d383..2c8877f50626 100644
+index ba7c4c685db3..834ec328f217 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -842,7 +842,7 @@ static void dm_old_request_fn(struct request_queue *q)
@@ -4708,10 +4779,28 @@ index 2c965424d383..2c8877f50626 100644
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
-index cce6057b9aca..fa2c4de32a64 100644
+index 7aea0221530c..4dde911925dc 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
-@@ -1928,8 +1928,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+@@ -429,7 +429,7 @@ void raid5_release_stripe(struct stripe_head *sh)
+ md_wakeup_thread(conf->mddev->thread);
+ return;
+ slow_path:
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
+ if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
+ INIT_LIST_HEAD(&list);
+@@ -438,7 +438,7 @@ void raid5_release_stripe(struct stripe_head *sh)
+ spin_unlock(&conf->device_lock);
+ release_inactive_stripe_list(conf, &list, hash);
+ }
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ }
+
+ static inline void remove_hash(struct stripe_head *sh)
+@@ -1934,8 +1934,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
struct raid5_percpu *percpu;
unsigned long cpu;
@@ -4722,7 +4811,7 @@ index cce6057b9aca..fa2c4de32a64 100644
if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
ops_run_biofill(sh);
overlap_clear++;
-@@ -1985,7 +1986,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+@@ -1991,7 +1992,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
if (test_and_clear_bit(R5_Overlap, &dev->flags))
wake_up(&sh->raid_conf->wait_for_overlap);
}
@@ -4732,7 +4821,7 @@ index cce6057b9aca..fa2c4de32a64 100644
}
static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
-@@ -6391,6 +6393,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
+@@ -6407,6 +6409,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
__func__, cpu);
return -ENOMEM;
}
@@ -4740,7 +4829,7 @@ index cce6057b9aca..fa2c4de32a64 100644
return 0;
}
-@@ -6401,7 +6404,6 @@ static int raid5_alloc_percpu(struct r5conf *conf)
+@@ -6417,7 +6420,6 @@ static int raid5_alloc_percpu(struct r5conf *conf)
conf->percpu = alloc_percpu(struct raid5_percpu);
if (!conf->percpu)
return -ENOMEM;
@@ -4884,19 +4973,6 @@ index bca6935a94db..d7a35ee34d03 100644
ctx->done.done);
}
break;
-diff --git a/drivers/pci/access.c b/drivers/pci/access.c
-index d11cdbb8fba3..223bbb9acb03 100644
---- a/drivers/pci/access.c
-+++ b/drivers/pci/access.c
-@@ -672,7 +672,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev)
- WARN_ON(!dev->block_cfg_access);
-
- dev->block_cfg_access = 0;
-- wake_up_all(&pci_cfg_wait);
-+ wake_up_all_locked(&pci_cfg_wait);
- raw_spin_unlock_irqrestore(&pci_lock, flags);
- }
- EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index bedce3453dd3..faf038978650 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -5245,10 +5321,10 @@ index edc48f3b8230..ee5c6f9dfb6f 100644
static inline uint8_t *
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
-index 068c4e47fac9..a2090f640397 100644
+index bddaabb288d4..8de0ec4222fe 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
-@@ -3125,7 +3125,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
+@@ -3129,7 +3129,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
* kref_put().
*/
kref_get(&qentry->irq_notify.kref);
@@ -5389,7 +5465,7 @@ index e8819aa20415..dd7f9bf45d6c 100644
#include <asm/serial.h>
/*
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
-index 080d5a59d0a7..eecc4f111473 100644
+index 1ef31e3ee4a1..ff9f4e50563c 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -35,6 +35,7 @@
@@ -5400,7 +5476,7 @@ index 080d5a59d0a7..eecc4f111473 100644
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>
#include <linux/timer.h>
-@@ -3144,9 +3145,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+@@ -3140,9 +3141,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
serial8250_rpm_get(up);
@@ -5450,7 +5526,7 @@ index e2c33b9528d8..53af53c43e8c 100644
clk_disable(uap->clk);
}
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
-index a2a529994ba5..0ee7c4c518df 100644
+index 472ba3c813c1..e654cb421fb7 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s,
@@ -5481,7 +5557,7 @@ index a2a529994ba5..0ee7c4c518df 100644
static int __init
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
-index f029aad67183..87c026876640 100644
+index fcc7aa248ce7..fb2c38d875f9 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1764,9 +1764,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
@@ -5497,7 +5573,7 @@ index f029aad67183..87c026876640 100644
usb_anchor_resume_wakeups(anchor);
atomic_dec(&urb->use_count);
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
-index 89081b834615..90b231b7ad0a 100644
+index 273320fa30ae..f2a125841653 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1593,7 +1593,7 @@ static void ffs_data_put(struct ffs_data *ffs)
@@ -5510,10 +5586,10 @@ index 89081b834615..90b231b7ad0a 100644
kfree(ffs);
}
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
-index 1468d8f085a3..6aae3ae25c18 100644
+index f69dbd4bcd18..3b7638322f9f 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
-@@ -346,7 +346,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
+@@ -347,7 +347,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
spin_unlock_irq (&epdata->dev->lock);
if (likely (value == 0)) {
@@ -5522,7 +5598,7 @@ index 1468d8f085a3..6aae3ae25c18 100644
if (value != 0) {
spin_lock_irq (&epdata->dev->lock);
if (likely (epdata->ep != NULL)) {
-@@ -355,7 +355,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
+@@ -356,7 +356,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
usb_ep_dequeue (epdata->ep, epdata->req);
spin_unlock_irq (&epdata->dev->lock);
@@ -5532,7 +5608,7 @@ index 1468d8f085a3..6aae3ae25c18 100644
epdata->status = -EINTR;
} else {
diff --git a/fs/aio.c b/fs/aio.c
-index 428484f2f841..2b02e2eb2158 100644
+index 0fcb49ad67d4..211ebc21e4db 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -40,6 +40,7 @@
@@ -5636,7 +5712,7 @@ index d8e6d421c27f..2e689ab1306b 100644
}
spin_unlock(&p->d_lock);
diff --git a/fs/buffer.c b/fs/buffer.c
-index b205a629001d..5646afc022ba 100644
+index 5d8f496d624e..48074bd91ea3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -301,8 +301,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
@@ -5720,7 +5796,7 @@ index a27fc8791551..791aecb7c1ac 100644
cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
diff --git a/fs/dcache.c b/fs/dcache.c
-index 4485a48f4091..691039a6a872 100644
+index 67957f5b325c..f0719b2f1be5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -19,6 +19,7 @@
@@ -5731,7 +5807,7 @@ index 4485a48f4091..691039a6a872 100644
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
-@@ -750,6 +751,8 @@ static inline bool fast_dput(struct dentry *dentry)
+@@ -777,6 +778,8 @@ static inline bool fast_dput(struct dentry *dentry)
*/
void dput(struct dentry *dentry)
{
@@ -5740,7 +5816,7 @@ index 4485a48f4091..691039a6a872 100644
if (unlikely(!dentry))
return;
-@@ -788,9 +791,18 @@ void dput(struct dentry *dentry)
+@@ -815,9 +818,18 @@ void dput(struct dentry *dentry)
return;
kill_it:
@@ -5762,7 +5838,7 @@ index 4485a48f4091..691039a6a872 100644
goto repeat;
}
}
-@@ -2324,7 +2336,7 @@ void d_delete(struct dentry * dentry)
+@@ -2352,7 +2364,7 @@ void d_delete(struct dentry * dentry)
if (dentry->d_lockref.count == 1) {
if (!spin_trylock(&inode->i_lock)) {
spin_unlock(&dentry->d_lock);
@@ -5771,7 +5847,27 @@ index 4485a48f4091..691039a6a872 100644
goto again;
}
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
-@@ -2384,21 +2396,24 @@ static inline void end_dir_add(struct inode *dir, unsigned n)
+@@ -2397,9 +2409,10 @@ EXPORT_SYMBOL(d_rehash);
+ static inline unsigned start_dir_add(struct inode *dir)
+ {
+
++ preempt_disable_rt();
+ for (;;) {
+- unsigned n = dir->i_dir_seq;
+- if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
++ unsigned n = dir->__i_dir_seq;
++ if (!(n & 1) && cmpxchg(&dir->__i_dir_seq, n, n + 1) == n)
+ return n;
+ cpu_relax();
+ }
+@@ -2407,26 +2420,30 @@ static inline unsigned start_dir_add(struct inode *dir)
+
+ static inline void end_dir_add(struct inode *dir, unsigned n)
+ {
+- smp_store_release(&dir->i_dir_seq, n + 2);
++ smp_store_release(&dir->__i_dir_seq, n + 2);
++ preempt_enable_rt();
+ }
static void d_wait_lookup(struct dentry *dentry)
{
@@ -5807,7 +5903,25 @@ index 4485a48f4091..691039a6a872 100644
{
unsigned int hash = name->hash;
struct hlist_bl_head *b = in_lookup_hash(parent, hash);
-@@ -2507,7 +2522,7 @@ void __d_lookup_done(struct dentry *dentry)
+@@ -2440,7 +2457,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
+
+ retry:
+ rcu_read_lock();
+- seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
++ seq = smp_load_acquire(&parent->d_inode->__i_dir_seq) & ~1;
+ r_seq = read_seqbegin(&rename_lock);
+ dentry = __d_lookup_rcu(parent, name, &d_seq);
+ if (unlikely(dentry)) {
+@@ -2462,7 +2479,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
+ goto retry;
+ }
+ hlist_bl_lock(b);
+- if (unlikely(parent->d_inode->i_dir_seq != seq)) {
++ if (unlikely(parent->d_inode->__i_dir_seq != seq)) {
+ hlist_bl_unlock(b);
+ rcu_read_unlock();
+ goto retry;
+@@ -2535,7 +2552,7 @@ void __d_lookup_done(struct dentry *dentry)
hlist_bl_lock(b);
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
@@ -5816,7 +5930,7 @@ index 4485a48f4091..691039a6a872 100644
dentry->d_wait = NULL;
hlist_bl_unlock(b);
INIT_HLIST_NODE(&dentry->d_u.d_alias);
-@@ -3604,6 +3619,11 @@ EXPORT_SYMBOL(d_genocide);
+@@ -3632,6 +3649,11 @@ EXPORT_SYMBOL(d_genocide);
void __init vfs_caches_init_early(void)
{
@@ -5829,7 +5943,7 @@ index 4485a48f4091..691039a6a872 100644
inode_init_early();
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
-index 10db91218933..42af0a06f657 100644
+index 3cbc30413add..41a94f552aab 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -510,12 +510,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
@@ -5848,10 +5962,10 @@ index 10db91218933..42af0a06f657 100644
static void ep_remove_wait_queue(struct eppoll_entry *pwq)
diff --git a/fs/exec.c b/fs/exec.c
-index 67e86571685a..fe14cdd84016 100644
+index b8c43be24751..71f4c6ec2bb8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
-@@ -1017,12 +1017,14 @@ static int exec_mmap(struct mm_struct *mm)
+@@ -1038,12 +1038,14 @@ static int exec_mmap(struct mm_struct *mm)
}
}
task_lock(tsk);
@@ -5866,8 +5980,32 @@ index 67e86571685a..fe14cdd84016 100644
task_unlock(tsk);
if (old_mm) {
up_read(&old_mm->mmap_sem);
+diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
+index 0094923e5ebf..37fa06ef5417 100644
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -95,8 +95,7 @@ static void ext4_finish_bio(struct bio *bio)
+ * We check all buffers in the page under BH_Uptodate_Lock
+ * to avoid races with other end io clearing async_write flags
+ */
+- local_irq_save(flags);
+- bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
++ flags = bh_uptodate_lock_irqsave(head);
+ do {
+ if (bh_offset(bh) < bio_start ||
+ bh_offset(bh) + bh->b_size > bio_end) {
+@@ -108,8 +107,7 @@ static void ext4_finish_bio(struct bio *bio)
+ if (bio->bi_error)
+ buffer_io_error(bh);
+ } while ((bh = bh->b_this_page) != head);
+- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
+- local_irq_restore(flags);
++ bh_uptodate_unlock_irqrestore(head, flags);
+ if (!under_io) {
+ #ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (data_page)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
-index 642c57b8de7b..8494b9308333 100644
+index 4bbad745415a..5f91ca248ab0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1191,7 +1191,7 @@ static int fuse_direntplus_link(struct file *file,
@@ -5879,19 +6017,51 @@ index 642c57b8de7b..8494b9308333 100644
if (!o->nodeid) {
/*
-diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
-index 684996c8a3a4..6e18a06aaabe 100644
---- a/fs/jbd2/checkpoint.c
-+++ b/fs/jbd2/checkpoint.c
-@@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t *journal)
- nblocks = jbd2_space_needed(journal);
- while (jbd2_log_space_left(journal) < nblocks) {
- write_unlock(&journal->j_state_lock);
-+ if (current->plug)
-+ io_schedule();
- mutex_lock(&journal->j_checkpoint_mutex);
+diff --git a/fs/inode.c b/fs/inode.c
+index 920aa0b1c6b0..3d6b5fd1bf06 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -153,7 +153,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
+ inode->i_bdev = NULL;
+ inode->i_cdev = NULL;
+ inode->i_link = NULL;
+- inode->i_dir_seq = 0;
++ inode->__i_dir_seq = 0;
+ inode->i_rdev = 0;
+ inode->dirtied_when = 0;
+
+diff --git a/fs/libfs.c b/fs/libfs.c
+index 48826d4da189..3ea54d1fc431 100644
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -89,7 +89,7 @@ static struct dentry *next_positive(struct dentry *parent,
+ struct list_head *from,
+ int count)
+ {
+- unsigned *seq = &parent->d_inode->i_dir_seq, n;
++ unsigned *seq = &parent->d_inode->__i_dir_seq, n;
+ struct dentry *res;
+ struct list_head *p;
+ bool skipped;
+@@ -122,8 +122,9 @@ static struct dentry *next_positive(struct dentry *parent,
+ static void move_cursor(struct dentry *cursor, struct list_head *after)
+ {
+ struct dentry *parent = cursor->d_parent;
+- unsigned n, *seq = &parent->d_inode->i_dir_seq;
++ unsigned n, *seq = &parent->d_inode->__i_dir_seq;
+ spin_lock(&parent->d_lock);
++ preempt_disable_rt();
+ for (;;) {
+ n = *seq;
+ if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
+@@ -136,6 +137,7 @@ static void move_cursor(struct dentry *cursor, struct list_head *after)
+ else
+ list_add_tail(&cursor->d_child, &parent->d_subdirs);
+ smp_store_release(seq, n + 2);
++ preempt_enable_rt();
+ spin_unlock(&parent->d_lock);
+ }
- /*
diff --git a/fs/locks.c b/fs/locks.c
index 22c5b4aa4961..269c6a44449a 100644
--- a/fs/locks.c
@@ -6037,7 +6207,7 @@ index 22c5b4aa4961..269c6a44449a 100644
locks_dispose_list(&dispose);
}
diff --git a/fs/namei.c b/fs/namei.c
-index d5e5140c1045..150fbdd8e04c 100644
+index e7d125c23aa6..072a2f724437 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1626,7 +1626,7 @@ static struct dentry *lookup_slow(const struct qstr *name,
@@ -6049,7 +6219,7 @@ index d5e5140c1045..150fbdd8e04c 100644
inode_lock_shared(inode);
/* Don't go there if it's already dead */
-@@ -3083,7 +3083,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
+@@ -3089,7 +3089,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
struct dentry *dentry;
int error, create_error = 0;
umode_t mode = op->mode;
@@ -6059,7 +6229,7 @@ index d5e5140c1045..150fbdd8e04c 100644
if (unlikely(IS_DEADDIR(dir_inode)))
return -ENOENT;
diff --git a/fs/namespace.c b/fs/namespace.c
-index 5e35057f07ac..843d274ba167 100644
+index d7360f9897b4..da188c6966a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -14,6 +14,7 @@
@@ -6070,7 +6240,7 @@ index 5e35057f07ac..843d274ba167 100644
#include <linux/security.h>
#include <linux/idr.h>
#include <linux/init.h> /* init_rootfs */
-@@ -356,8 +357,11 @@ int __mnt_want_write(struct vfsmount *m)
+@@ -357,8 +358,11 @@ int __mnt_want_write(struct vfsmount *m)
* incremented count after it has set MNT_WRITE_HOLD.
*/
smp_mb();
@@ -6103,7 +6273,7 @@ index dff600ae0d74..d726d2e09353 100644
mutex_unlock(&sp->so_delegreturn_mutex);
put_nfs_open_context(ctx);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
-index 53e02b8bd9bd..a66e7d77cfbb 100644
+index d04ec3814779..ba90d41d3c34 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -485,7 +485,7 @@ static
@@ -6115,7 +6285,7 @@ index 53e02b8bd9bd..a66e7d77cfbb 100644
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = d_inode(parent);
-@@ -1487,7 +1487,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+@@ -1491,7 +1491,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
umode_t mode, int *opened)
{
@@ -6124,7 +6294,7 @@ index 53e02b8bd9bd..a66e7d77cfbb 100644
struct nfs_open_context *ctx;
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
-@@ -1802,7 +1802,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
+@@ -1806,7 +1806,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
trace_nfs_rmdir_enter(dir, dentry);
if (d_really_is_positive(dentry)) {
@@ -6136,7 +6306,7 @@ index 53e02b8bd9bd..a66e7d77cfbb 100644
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
/* Ensure the VFS deletes this inode */
switch (error) {
-@@ -1812,7 +1816,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
+@@ -1816,7 +1820,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
case -ENOENT:
nfs_dentry_handle_enoent(dentry);
}
@@ -6149,7 +6319,7 @@ index 53e02b8bd9bd..a66e7d77cfbb 100644
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
trace_nfs_rmdir_exit(dir, dentry, error);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
-index bf4ec5ecc97e..36cd5fc9192c 100644
+index 76ae25661d3f..89159d298278 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1957,7 +1957,11 @@ static void init_once(void *foo)
@@ -6178,7 +6348,7 @@ index 1452177c822d..f43b01d54c59 100644
};
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
-index 4e894d301c88..3300a4b5c87c 100644
+index a53b8e0c896a..da0a483c5442 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2695,7 +2695,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
@@ -6200,7 +6370,7 @@ index 4e894d301c88..3300a4b5c87c 100644
}
out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
-index 0959c9661662..dabd834d7686 100644
+index 92671914067f..44038480c88c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -488,7 +488,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
@@ -6212,7 +6382,7 @@ index 0959c9661662..dabd834d7686 100644
mutex_init(&sp->so_delegreturn_mutex);
return sp;
}
-@@ -1497,8 +1497,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
+@@ -1498,8 +1498,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
* recovering after a network partition or a reboot from a
* server that doesn't support a grace period.
*/
@@ -6226,7 +6396,7 @@ index 0959c9661662..dabd834d7686 100644
restart:
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
-@@ -1567,14 +1571,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
+@@ -1568,14 +1572,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
spin_lock(&sp->so_lock);
goto restart;
}
@@ -6402,7 +6572,7 @@ index fe251f187ff8..e89da4fb14c2 100644
/**
diff --git a/fs/proc/base.c b/fs/proc/base.c
-index ca651ac00660..41d9dc789285 100644
+index e67fec3c9856..0edc16f95596 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1834,7 +1834,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
@@ -6443,6 +6613,30 @@ index ab8dd1538381..5580853f57dd 100644
}
/*
+diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
+index d31cd1ebd8e9..5ea3f933a52a 100644
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -112,8 +112,7 @@ xfs_finish_page_writeback(
+ ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
+ ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
+
+- local_irq_save(flags);
+- bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
++ flags = bh_uptodate_lock_irqsave(head);
+ do {
+ if (off >= bvec->bv_offset &&
+ off < bvec->bv_offset + bvec->bv_len) {
+@@ -136,8 +135,7 @@ xfs_finish_page_writeback(
+ }
+ off += bh->b_size;
+ } while ((bh = bh->b_this_page) != head);
+- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
+- local_irq_restore(flags);
++ bh_uptodate_unlock_irqrestore(head, flags);
+
+ if (!busy)
+ end_page_writeback(bvec->bv_page);
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index e861a24f06f2..b5c97d3059c7 100644
--- a/include/acpi/platform/aclinux.h
@@ -6587,7 +6781,7 @@ index 8fdcb783197d..d07dbeec7bc1 100644
#endif /* _LINUX_BH_H */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
-index ebbacd14d450..be5e87f6360a 100644
+index 447a915db25d..e187a3356345 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -75,8 +75,50 @@ struct buffer_head {
@@ -6642,7 +6836,7 @@ index ebbacd14d450..be5e87f6360a 100644
* macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
* and buffer_foo() functions.
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
-index 5b17de62c962..56027cc01a56 100644
+index 6fb1c34cf805..ccd2a5addb56 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -16,6 +16,7 @@
@@ -6653,7 +6847,7 @@ index 5b17de62c962..56027cc01a56 100644
#ifdef CONFIG_CGROUPS
-@@ -137,6 +138,7 @@ struct cgroup_subsys_state {
+@@ -138,6 +139,7 @@ struct cgroup_subsys_state {
/* percpu_ref killing and RCU release */
struct rcu_head rcu_head;
struct work_struct destroy_work;
@@ -6721,7 +6915,7 @@ index e571128ad99a..5e52d28c20c1 100644
#define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
/* These aren't inline functions due to a GCC bug. */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
-index 5beed7b30561..61cab7ef458e 100644
+index ff295e166b2c..d532c60f3fb5 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -11,6 +11,7 @@
@@ -6765,6 +6959,19 @@ index a6ecb34cf547..37caab306336 100644
+#endif
+
#endif /* defined(_LINUX_DELAY_H) */
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index d705ae084edd..ab1946f4a729 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -688,7 +688,7 @@ struct inode {
+ struct block_device *i_bdev;
+ struct cdev *i_cdev;
+ char *i_link;
+- unsigned i_dir_seq;
++ unsigned __i_dir_seq;
+ };
+
+ __u32 i_generation;
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index bb3f3297062a..a117a33ef72c 100644
--- a/include/linux/highmem.h
@@ -7495,10 +7702,10 @@ index cb483305e1f5..4e5062316bb6 100644
static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
diff --git a/include/linux/locallock.h b/include/linux/locallock.h
new file mode 100644
-index 000000000000..845c77f1a5ca
+index 000000000000..280f884a05a3
--- /dev/null
+++ b/include/linux/locallock.h
-@@ -0,0 +1,278 @@
+@@ -0,0 +1,287 @@
+#ifndef _LINUX_LOCALLOCK_H
+#define _LINUX_LOCALLOCK_H
+
@@ -7578,6 +7785,9 @@ index 000000000000..845c77f1a5ca
+ lv->owner = current;
+ lv->nestcnt = 1;
+ return 1;
++ } else if (lv->owner == current) {
++ lv->nestcnt++;
++ return 1;
+ }
+ return 0;
+}
@@ -7751,6 +7961,12 @@ index 000000000000..845c77f1a5ca
+
+static inline void local_irq_lock_init(int lvar) { }
+
++#define local_trylock(lvar) \
++ ({ \
++ preempt_disable(); \
++ 1; \
++ })
++
+#define local_lock(lvar) preempt_disable()
+#define local_unlock(lvar) preempt_enable()
+#define local_lock_irq(lvar) local_irq_disable()
@@ -7778,7 +7994,7 @@ index 000000000000..845c77f1a5ca
+
+#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
-index 08d947fc4c59..705fb564a605 100644
+index e8471c2ca83a..08bde1a7a987 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -11,6 +11,7 @@
@@ -7789,8 +8005,8 @@ index 08d947fc4c59..705fb564a605 100644
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <asm/page.h>
-@@ -509,6 +510,9 @@ struct mm_struct {
- bool tlb_flush_pending;
+@@ -513,6 +514,9 @@ struct mm_struct {
+ bool tlb_flush_batched;
#endif
struct uprobes_state uprobes_state;
+#ifdef CONFIG_PREEMPT_RT_BASE
@@ -7964,7 +8180,7 @@ index 000000000000..e0284edec655
+
+#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
-index bb9b102c15cd..a5b12b8ad196 100644
+index 47c7f5b8f675..85fc72b8a92b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -396,7 +396,19 @@ typedef enum rx_handler_result rx_handler_result_t;
@@ -7987,7 +8203,7 @@ index bb9b102c15cd..a5b12b8ad196 100644
static inline bool napi_disable_pending(struct napi_struct *n)
{
-@@ -2463,14 +2475,53 @@ void netdev_freemem(struct net_device *dev);
+@@ -2464,14 +2476,53 @@ void netdev_freemem(struct net_device *dev);
void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev);
@@ -8042,7 +8258,7 @@ index bb9b102c15cd..a5b12b8ad196 100644
struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
-@@ -2855,6 +2906,7 @@ struct softnet_data {
+@@ -2856,6 +2907,7 @@ struct softnet_data {
unsigned int dropped;
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
@@ -8106,7 +8322,7 @@ index 810124b33327..d54ca43d571f 100644
#if IS_ENABLED(CONFIG_NFS_V4)
struct nfs4_cached_acl *nfs4_acl;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
-index beb1e10f446e..ebaf2e7bfe29 100644
+index 3bf867a0c3b3..71c6bdd14c8a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1490,7 +1490,7 @@ struct nfs_unlinkdata {
@@ -8335,7 +8551,7 @@ index 56939d3f6e53..b988bf40ad3e 100644
#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
diff --git a/include/linux/pid.h b/include/linux/pid.h
-index 23705a53abba..2cc64b779f03 100644
+index 97b745ddece5..01a5460a0c85 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -2,6 +2,7 @@
@@ -8347,7 +8563,7 @@ index 23705a53abba..2cc64b779f03 100644
enum pid_type
{
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
-index 75e4e30677f1..1cfb1cb72354 100644
+index 7eeceac52dea..f97c54265904 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -50,7 +50,11 @@
@@ -8380,15 +8596,15 @@ index 75e4e30677f1..1cfb1cb72354 100644
/*
* Are we doing bottom half or hardware interrupt processing?
-@@ -72,7 +82,6 @@
+@@ -79,7 +89,6 @@
#define in_irq() (hardirq_count())
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
-#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
-
- /*
- * Are we in NMI context?
-@@ -91,7 +100,11 @@
+ #define in_nmi() (preempt_count() & NMI_MASK)
+ #define in_task() (!(preempt_count() & \
+ (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+@@ -96,7 +105,11 @@
/*
* The preempt_count offset after spin_lock()
*/
@@ -8400,7 +8616,7 @@ index 75e4e30677f1..1cfb1cb72354 100644
/*
* The preempt_count offset needed for things like:
-@@ -140,6 +153,20 @@ extern void preempt_count_sub(int val);
+@@ -145,6 +158,20 @@ extern void preempt_count_sub(int val);
#define preempt_count_inc() preempt_count_add(1)
#define preempt_count_dec() preempt_count_sub(1)
@@ -8421,7 +8637,7 @@ index 75e4e30677f1..1cfb1cb72354 100644
#ifdef CONFIG_PREEMPT_COUNT
#define preempt_disable() \
-@@ -148,13 +175,25 @@ do { \
+@@ -153,13 +180,25 @@ do { \
barrier(); \
} while (0)
@@ -8448,7 +8664,7 @@ index 75e4e30677f1..1cfb1cb72354 100644
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
-@@ -179,6 +218,13 @@ do { \
+@@ -184,6 +223,13 @@ do { \
__preempt_schedule(); \
} while (0)
@@ -8462,7 +8678,7 @@ index 75e4e30677f1..1cfb1cb72354 100644
#else /* !CONFIG_PREEMPT */
#define preempt_enable() \
do { \
-@@ -224,6 +270,7 @@ do { \
+@@ -229,6 +275,7 @@ do { \
#define preempt_disable_notrace() barrier()
#define preempt_enable_no_resched_notrace() barrier()
#define preempt_enable_notrace() barrier()
@@ -8470,7 +8686,7 @@ index 75e4e30677f1..1cfb1cb72354 100644
#define preemptible() 0
#endif /* CONFIG_PREEMPT_COUNT */
-@@ -244,10 +291,31 @@ do { \
+@@ -249,10 +296,31 @@ do { \
} while (0)
#define preempt_fold_need_resched() \
do { \
@@ -9198,7 +9414,7 @@ index 000000000000..2ffbf093ae92
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
-index f425eb3318ab..e010fb4d640d 100644
+index a4d0afc009a7..e775696b480a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -26,6 +26,7 @@ struct sched_param {
@@ -9209,7 +9425,14 @@ index f425eb3318ab..e010fb4d640d 100644
#include <asm/page.h>
#include <asm/ptrace.h>
-@@ -243,10 +244,7 @@ extern char ___assert_task_state[1 - 2*!!(
+@@ -236,17 +237,13 @@ extern char ___assert_task_state[1 - 2*!!(
+
+ /* Convenience macros for the sake of wake_up */
+ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+-#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+
+ /* get_task_state() */
+ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
__TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
@@ -9220,7 +9443,7 @@ index f425eb3318ab..e010fb4d640d 100644
#define task_contributes_to_load(task) \
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
(task->flags & PF_FROZEN) == 0 && \
-@@ -312,6 +310,11 @@ extern char ___assert_task_state[1 - 2*!!(
+@@ -312,6 +309,11 @@ extern char ___assert_task_state[1 - 2*!!(
#endif
@@ -9232,13 +9455,27 @@ index f425eb3318ab..e010fb4d640d 100644
/* Task command name length */
#define TASK_COMM_LEN 16
-@@ -1013,8 +1016,18 @@ struct wake_q_head {
+@@ -1022,9 +1024,31 @@ struct wake_q_head {
+ #define WAKE_Q(name) \
struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
- extern void wake_q_add(struct wake_q_head *head,
+-extern void wake_q_add(struct wake_q_head *head,
- struct task_struct *task);
-extern void wake_up_q(struct wake_q_head *head);
-+ struct task_struct *task);
++extern void __wake_q_add(struct wake_q_head *head,
++ struct task_struct *task, bool sleeper);
++static inline void wake_q_add(struct wake_q_head *head,
++ struct task_struct *task)
++{
++ __wake_q_add(head, task, false);
++}
++
++static inline void wake_q_add_sleeper(struct wake_q_head *head,
++ struct task_struct *task)
++{
++ __wake_q_add(head, task, true);
++}
++
+extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
+
+static inline void wake_up_q(struct wake_q_head *head)
@@ -9253,7 +9490,7 @@ index f425eb3318ab..e010fb4d640d 100644
/*
* sched-domains (multiprocessor balancing) declarations:
-@@ -1481,6 +1494,7 @@ struct task_struct {
+@@ -1491,6 +1515,7 @@ struct task_struct {
struct thread_info thread_info;
#endif
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
@@ -9261,12 +9498,13 @@ index f425eb3318ab..e010fb4d640d 100644
void *stack;
atomic_t usage;
unsigned int flags; /* per process flags, defined below */
-@@ -1520,6 +1534,12 @@ struct task_struct {
+@@ -1530,6 +1555,13 @@ struct task_struct {
#endif
unsigned int policy;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ int migrate_disable;
++ int migrate_disable_update;
+# ifdef CONFIG_SCHED_DEBUG
+ int migrate_disable_atomic;
+# endif
@@ -9274,7 +9512,7 @@ index f425eb3318ab..e010fb4d640d 100644
int nr_cpus_allowed;
cpumask_t cpus_allowed;
-@@ -1658,6 +1678,9 @@ struct task_struct {
+@@ -1668,6 +1700,9 @@ struct task_struct {
struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
@@ -9284,7 +9522,7 @@ index f425eb3318ab..e010fb4d640d 100644
/* process credentials */
const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
-@@ -1689,10 +1712,15 @@ struct task_struct {
+@@ -1699,10 +1734,15 @@ struct task_struct {
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;
@@ -9300,7 +9538,13 @@ index f425eb3318ab..e010fb4d640d 100644
unsigned long sas_ss_sp;
size_t sas_ss_size;
-@@ -1723,6 +1751,8 @@ struct task_struct {
+@@ -1728,11 +1768,14 @@ struct task_struct {
+ raw_spinlock_t pi_lock;
+
+ struct wake_q_node wake_q;
++ struct wake_q_node wake_q_sleeper;
+
+ #ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task */
struct rb_root pi_waiters;
struct rb_node *pi_waiters_leftmost;
@@ -9309,7 +9553,7 @@ index f425eb3318ab..e010fb4d640d 100644
/* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif
-@@ -1921,6 +1951,12 @@ struct task_struct {
+@@ -1931,6 +1974,12 @@ struct task_struct {
/* bitmask and counter of trace recursion */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
@@ -9322,7 +9566,7 @@ index f425eb3318ab..e010fb4d640d 100644
#ifdef CONFIG_KCOV
/* Coverage collection mode enabled for this task (0 if disabled). */
enum kcov_mode kcov_mode;
-@@ -1946,9 +1982,23 @@ struct task_struct {
+@@ -1956,9 +2005,23 @@ struct task_struct {
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
@@ -9346,7 +9590,7 @@ index f425eb3318ab..e010fb4d640d 100644
int pagefault_disabled;
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
-@@ -1988,14 +2038,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+@@ -1998,14 +2061,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
}
#endif
@@ -9361,7 +9605,7 @@ index f425eb3318ab..e010fb4d640d 100644
#define TNF_MIGRATED 0x01
#define TNF_NO_GROUP 0x02
#define TNF_SHARED 0x04
-@@ -2211,6 +2253,15 @@ extern struct pid *cad_pid;
+@@ -2225,6 +2280,15 @@ extern struct pid *cad_pid;
extern void free_task(struct task_struct *tsk);
#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
@@ -9377,7 +9621,7 @@ index f425eb3318ab..e010fb4d640d 100644
extern void __put_task_struct(struct task_struct *t);
static inline void put_task_struct(struct task_struct *t)
-@@ -2218,6 +2269,7 @@ static inline void put_task_struct(struct task_struct *t)
+@@ -2232,6 +2296,7 @@ static inline void put_task_struct(struct task_struct *t)
if (atomic_dec_and_test(&t->usage))
__put_task_struct(t);
}
@@ -9385,7 +9629,7 @@ index f425eb3318ab..e010fb4d640d 100644
struct task_struct *task_rcu_dereference(struct task_struct **ptask);
struct task_struct *try_get_task_struct(struct task_struct **ptask);
-@@ -2259,6 +2311,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
+@@ -2273,6 +2338,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
/*
* Per process flags
*/
@@ -9393,7 +9637,7 @@ index f425eb3318ab..e010fb4d640d 100644
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
-@@ -2427,6 +2480,10 @@ extern void do_set_cpus_allowed(struct task_struct *p,
+@@ -2441,6 +2507,10 @@ extern void do_set_cpus_allowed(struct task_struct *p,
extern int set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask);
@@ -9404,7 +9648,7 @@ index f425eb3318ab..e010fb4d640d 100644
#else
static inline void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask)
-@@ -2439,6 +2496,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
+@@ -2453,6 +2523,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
return -EINVAL;
return 0;
}
@@ -9414,7 +9658,7 @@ index f425eb3318ab..e010fb4d640d 100644
#endif
#ifdef CONFIG_NO_HZ_COMMON
-@@ -2677,6 +2737,7 @@ extern void xtime_update(unsigned long ticks);
+@@ -2691,6 +2764,7 @@ extern void xtime_update(unsigned long ticks);
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
@@ -9422,7 +9666,7 @@ index f425eb3318ab..e010fb4d640d 100644
extern void wake_up_new_task(struct task_struct *tsk);
#ifdef CONFIG_SMP
extern void kick_process(struct task_struct *tsk);
-@@ -2885,6 +2946,17 @@ static inline void mmdrop(struct mm_struct *mm)
+@@ -2899,6 +2973,17 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
@@ -9440,7 +9684,7 @@ index f425eb3318ab..e010fb4d640d 100644
static inline void mmdrop_async_fn(struct work_struct *work)
{
struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
-@@ -3277,6 +3349,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
+@@ -3291,6 +3376,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
@@ -9484,7 +9728,7 @@ index f425eb3318ab..e010fb4d640d 100644
static inline int restart_syscall(void)
{
set_tsk_thread_flag(current, TIF_SIGPENDING);
-@@ -3308,6 +3417,51 @@ static inline int signal_pending_state(long state, struct task_struct *p)
+@@ -3322,6 +3444,51 @@ static inline int signal_pending_state(long state, struct task_struct *p)
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}
@@ -9536,7 +9780,7 @@ index f425eb3318ab..e010fb4d640d 100644
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
-@@ -3333,12 +3487,16 @@ extern int __cond_resched_lock(spinlock_t *lock);
+@@ -3347,12 +3514,16 @@ extern int __cond_resched_lock(spinlock_t *lock);
__cond_resched_lock(lock); \
})
@@ -9553,7 +9797,7 @@ index f425eb3318ab..e010fb4d640d 100644
static inline void cond_resched_rcu(void)
{
-@@ -3513,6 +3671,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+@@ -3527,6 +3698,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */
@@ -9780,7 +10024,7 @@ index b63f63eaa39c..295540fdfc72 100644
/* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
static inline int valid_signal(unsigned long sig)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
-index 32810f279f8e..0db6e31161f6 100644
+index 601dfa849d30..dca387a8fa6b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -284,6 +284,7 @@ struct sk_buff_head {
@@ -10071,18 +10315,10 @@ index 73548eb13a5d..10bac715ea96 100644
-#if defined(CONFIG_SMP)
-# include <asm/spinlock_types.h>
-+#include <linux/spinlock_types_raw.h>
-+
-+#ifndef CONFIG_PREEMPT_RT_FULL
-+# include <linux/spinlock_types_nort.h>
-+# include <linux/rwlock_types.h>
- #else
+-#else
-# include <linux/spinlock_types_up.h>
-+# include <linux/rtmutex.h>
-+# include <linux/spinlock_types_rt.h>
-+# include <linux/rwlock_types_rt.h>
- #endif
-
+-#endif
+-
-#include <linux/lockdep.h>
-
-typedef struct raw_spinlock {
@@ -10108,16 +10344,23 @@ index 73548eb13a5d..10bac715ea96 100644
-#else
-# define SPIN_DEP_MAP_INIT(lockname)
-#endif
--
++#include <linux/spinlock_types_raw.h>
+
-#ifdef CONFIG_DEBUG_SPINLOCK
-# define SPIN_DEBUG_INIT(lockname) \
- .magic = SPINLOCK_MAGIC, \
- .owner_cpu = -1, \
- .owner = SPINLOCK_OWNER_INIT,
--#else
++#ifndef CONFIG_PREEMPT_RT_FULL
++# include <linux/spinlock_types_nort.h>
++# include <linux/rwlock_types.h>
+ #else
-# define SPIN_DEBUG_INIT(lockname)
--#endif
--
++# include <linux/rtmutex.h>
++# include <linux/spinlock_types_rt.h>
++# include <linux/rwlock_types_rt.h>
+ #endif
+
-#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
- { \
- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
@@ -10461,7 +10704,7 @@ index 51d601f192d4..83cea629efe1 100644
#else
# define del_timer_sync(t) del_timer(t)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
-index be007610ceb0..15154b13a53b 100644
+index ba57266d9e80..5c36934ec2bc 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -56,6 +56,9 @@ struct trace_entry {
@@ -10543,10 +10786,10 @@ index 2408e8d5c05c..db50d6609195 100644
typedef struct __wait_queue wait_queue_t;
typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
diff --git a/include/net/dst.h b/include/net/dst.h
-index 6835d224d47b..55a5a9698f14 100644
+index ddcff17615da..a1fc787b1a8c 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
-@@ -446,7 +446,7 @@ static inline void dst_confirm(struct dst_entry *dst)
+@@ -452,7 +452,7 @@ static inline void dst_confirm(struct dst_entry *dst)
static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
struct sk_buff *skb)
{
@@ -10654,7 +10897,7 @@ index 7adf4386ac8f..d3fd5c357268 100644
int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
-index e6aa0a249672..b57736f2a8a3 100644
+index f18fc1a0321f..5d2c9b89c168 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -10,6 +10,7 @@
@@ -10996,17 +11239,17 @@ diff --git a/init/Makefile b/init/Makefile
index c4fb45525d08..821190dfaa75 100644
--- a/init/Makefile
+++ b/init/Makefile
-@@ -35,4 +35,4 @@ $(obj)/version.o: include/generated/compile.h
+@@ -35,4 +35,4 @@ silent_chk_compile.h = :
include/generated/compile.h: FORCE
@$($(quiet)chk_compile.h)
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
- "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
+ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
diff --git a/init/main.c b/init/main.c
-index ae3996ae9bac..6470deef01c9 100644
+index 25bac88bc66e..a4a61e7d2248 100644
--- a/init/main.c
+++ b/init/main.c
-@@ -507,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void)
+@@ -506,6 +506,7 @@ asmlinkage __visible void __init start_kernel(void)
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
@@ -11131,7 +11374,7 @@ index 3f9c97419f02..11dbe26a8279 100644
config PREEMPT_COUNT
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
-index a3d2aad2443f..bb6b252648ff 100644
+index 4c233437ee1a..6c3c9f298f22 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5041,10 +5041,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
@@ -11158,7 +11401,7 @@ index a3d2aad2443f..bb6b252648ff 100644
}
static void init_and_link_css(struct cgroup_subsys_state *css,
-@@ -5740,6 +5740,7 @@ static int __init cgroup_wq_init(void)
+@@ -5749,6 +5749,7 @@ static int __init cgroup_wq_init(void)
*/
cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
BUG_ON(!cgroup_destroy_wq);
@@ -11167,7 +11410,7 @@ index a3d2aad2443f..bb6b252648ff 100644
/*
* Used to destroy pidlists and separate to serve as flush domain.
diff --git a/kernel/cpu.c b/kernel/cpu.c
-index 99c6c568bc55..f1c64e563970 100644
+index 26a4f74bff83..010db3c943cd 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -239,6 +239,289 @@ static struct {
@@ -11460,7 +11703,7 @@ index 99c6c568bc55..f1c64e563970 100644
void get_online_cpus(void)
{
-@@ -789,10 +1072,14 @@ static int takedown_cpu(unsigned int cpu)
+@@ -802,10 +1085,14 @@ static int takedown_cpu(unsigned int cpu)
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int err;
@@ -11475,7 +11718,7 @@ index 99c6c568bc55..f1c64e563970 100644
/*
* Prevent irq alloc/free while the dying cpu reorganizes the
* interrupt affinities.
-@@ -877,6 +1164,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+@@ -890,6 +1177,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int prev_state, ret = 0;
bool hasdied = false;
@@ -11485,7 +11728,7 @@ index 99c6c568bc55..f1c64e563970 100644
if (num_online_cpus() == 1)
return -EBUSY;
-@@ -884,7 +1174,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+@@ -897,7 +1187,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
if (!cpu_present(cpu))
return -EINVAL;
@@ -11520,7 +11763,7 @@ index 99c6c568bc55..f1c64e563970 100644
cpuhp_tasks_frozen = tasks_frozen;
-@@ -923,10 +1240,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+@@ -936,10 +1253,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
out:
@@ -11536,7 +11779,7 @@ index 99c6c568bc55..f1c64e563970 100644
return ret;
}
-@@ -1240,6 +1562,8 @@ core_initcall(cpu_hotplug_pm_sync_init);
+@@ -1242,6 +1564,8 @@ core_initcall(cpu_hotplug_pm_sync_init);
#endif /* CONFIG_PM_SLEEP_SMP */
@@ -11545,7 +11788,7 @@ index 99c6c568bc55..f1c64e563970 100644
#endif /* CONFIG_SMP */
/* Boot processor state steps */
-@@ -1924,6 +2248,10 @@ void __init boot_cpu_init(void)
+@@ -1926,6 +2250,10 @@ void __init boot_cpu_init(void)
set_cpu_active(cpu, true);
set_cpu_present(cpu, true);
set_cpu_possible(cpu, true);
@@ -11556,11 +11799,134 @@ index 99c6c568bc55..f1c64e563970 100644
}
/*
+diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
+index 009cc9a17d95..67b02e138a47 100644
+--- a/kernel/cpu_pm.c
++++ b/kernel/cpu_pm.c
+@@ -22,15 +22,21 @@
+ #include <linux/spinlock.h>
+ #include <linux/syscore_ops.h>
+
+-static DEFINE_RWLOCK(cpu_pm_notifier_lock);
+-static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain);
++static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain);
+
+ static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
+ {
+ int ret;
+
+- ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
++ /*
++ * __atomic_notifier_call_chain has a RCU read critical section, which
++ * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
++ * RCU know this.
++ */
++ rcu_irq_enter_irqson();
++ ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
+ nr_to_call, nr_calls);
++ rcu_irq_exit_irqson();
+
+ return notifier_to_errno(ret);
+ }
+@@ -47,14 +53,7 @@ static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
+ */
+ int cpu_pm_register_notifier(struct notifier_block *nb)
+ {
+- unsigned long flags;
+- int ret;
+-
+- write_lock_irqsave(&cpu_pm_notifier_lock, flags);
+- ret = raw_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+- write_unlock_irqrestore(&cpu_pm_notifier_lock, flags);
+-
+- return ret;
++ return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+ }
+ EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
+
+@@ -69,14 +68,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
+ */
+ int cpu_pm_unregister_notifier(struct notifier_block *nb)
+ {
+- unsigned long flags;
+- int ret;
+-
+- write_lock_irqsave(&cpu_pm_notifier_lock, flags);
+- ret = raw_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+- write_unlock_irqrestore(&cpu_pm_notifier_lock, flags);
+-
+- return ret;
++ return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+ }
+ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
+
+@@ -100,7 +92,6 @@ int cpu_pm_enter(void)
+ int nr_calls;
+ int ret = 0;
+
+- read_lock(&cpu_pm_notifier_lock);
+ ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls);
+ if (ret)
+ /*
+@@ -108,7 +99,6 @@ int cpu_pm_enter(void)
+ * PM entry who are notified earlier to prepare for it.
+ */
+ cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL);
+- read_unlock(&cpu_pm_notifier_lock);
+
+ return ret;
+ }
+@@ -128,13 +118,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter);
+ */
+ int cpu_pm_exit(void)
+ {
+- int ret;
+-
+- read_lock(&cpu_pm_notifier_lock);
+- ret = cpu_pm_notify(CPU_PM_EXIT, -1, NULL);
+- read_unlock(&cpu_pm_notifier_lock);
+-
+- return ret;
++ return cpu_pm_notify(CPU_PM_EXIT, -1, NULL);
+ }
+ EXPORT_SYMBOL_GPL(cpu_pm_exit);
+
+@@ -159,7 +143,6 @@ int cpu_cluster_pm_enter(void)
+ int nr_calls;
+ int ret = 0;
+
+- read_lock(&cpu_pm_notifier_lock);
+ ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls);
+ if (ret)
+ /*
+@@ -167,7 +150,6 @@ int cpu_cluster_pm_enter(void)
+ * PM entry who are notified earlier to prepare for it.
+ */
+ cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL);
+- read_unlock(&cpu_pm_notifier_lock);
+
+ return ret;
+ }
+@@ -190,13 +172,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
+ */
+ int cpu_cluster_pm_exit(void)
+ {
+- int ret;
+-
+- read_lock(&cpu_pm_notifier_lock);
+- ret = cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL);
+- read_unlock(&cpu_pm_notifier_lock);
+-
+- return ret;
++ return cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL);
+ }
+ EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit);
+
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
-index 29f815d2ef7e..341b17f24f95 100644
+index 511b1dd8ff09..1dd63833ecdc 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
-@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = {
+@@ -285,7 +285,7 @@ static struct cpuset top_cpuset = {
*/
static DEFINE_MUTEX(cpuset_mutex);
@@ -11569,7 +11935,7 @@ index 29f815d2ef7e..341b17f24f95 100644
static struct workqueue_struct *cpuset_migrate_mm_wq;
-@@ -907,9 +907,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
+@@ -908,9 +908,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
continue;
rcu_read_unlock();
@@ -11581,7 +11947,7 @@ index 29f815d2ef7e..341b17f24f95 100644
WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
-@@ -974,9 +974,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+@@ -975,9 +975,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (retval < 0)
return retval;
@@ -11593,7 +11959,7 @@ index 29f815d2ef7e..341b17f24f95 100644
/* use trialcs->cpus_allowed as a temp variable */
update_cpumasks_hier(cs, trialcs->cpus_allowed);
-@@ -1176,9 +1176,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
+@@ -1177,9 +1177,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
continue;
rcu_read_unlock();
@@ -11605,7 +11971,7 @@ index 29f815d2ef7e..341b17f24f95 100644
WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
!nodes_equal(cp->mems_allowed, cp->effective_mems));
-@@ -1246,9 +1246,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
+@@ -1247,9 +1247,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
if (retval < 0)
goto done;
@@ -11617,7 +11983,7 @@ index 29f815d2ef7e..341b17f24f95 100644
/* use trialcs->mems_allowed as a temp variable */
update_nodemasks_hier(cs, &trialcs->mems_allowed);
-@@ -1339,9 +1339,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
+@@ -1340,9 +1340,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
|| (is_spread_page(cs) != is_spread_page(trialcs)));
@@ -11629,7 +11995,7 @@ index 29f815d2ef7e..341b17f24f95 100644
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
rebuild_sched_domains_locked();
-@@ -1756,7 +1756,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
+@@ -1757,7 +1757,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
cpuset_filetype_t type = seq_cft(sf)->private;
int ret = 0;
@@ -11638,7 +12004,7 @@ index 29f815d2ef7e..341b17f24f95 100644
switch (type) {
case FILE_CPULIST:
-@@ -1775,7 +1775,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
+@@ -1776,7 +1776,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
ret = -EINVAL;
}
@@ -11647,7 +12013,7 @@ index 29f815d2ef7e..341b17f24f95 100644
return ret;
}
-@@ -1989,12 +1989,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+@@ -1991,12 +1991,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
cpuset_inc();
@@ -11662,7 +12028,7 @@ index 29f815d2ef7e..341b17f24f95 100644
if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
goto out_unlock;
-@@ -2021,12 +2021,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+@@ -2023,12 +2023,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
}
rcu_read_unlock();
@@ -11677,7 +12043,7 @@ index 29f815d2ef7e..341b17f24f95 100644
out_unlock:
mutex_unlock(&cpuset_mutex);
return 0;
-@@ -2065,7 +2065,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
+@@ -2067,7 +2067,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
static void cpuset_bind(struct cgroup_subsys_state *root_css)
{
mutex_lock(&cpuset_mutex);
@@ -11686,7 +12052,7 @@ index 29f815d2ef7e..341b17f24f95 100644
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
-@@ -2076,7 +2076,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+@@ -2078,7 +2078,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
top_cpuset.mems_allowed = top_cpuset.effective_mems;
}
@@ -11695,7 +12061,7 @@ index 29f815d2ef7e..341b17f24f95 100644
mutex_unlock(&cpuset_mutex);
}
-@@ -2177,12 +2177,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
+@@ -2179,12 +2179,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
{
bool is_empty;
@@ -11710,7 +12076,7 @@ index 29f815d2ef7e..341b17f24f95 100644
/*
* Don't call update_tasks_cpumask() if the cpuset becomes empty,
-@@ -2219,10 +2219,10 @@ hotplug_update_tasks(struct cpuset *cs,
+@@ -2221,10 +2221,10 @@ hotplug_update_tasks(struct cpuset *cs,
if (nodes_empty(*new_mems))
*new_mems = parent_cs(cs)->effective_mems;
@@ -11723,7 +12089,7 @@ index 29f815d2ef7e..341b17f24f95 100644
if (cpus_updated)
update_tasks_cpumask(cs);
-@@ -2308,21 +2308,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
+@@ -2317,21 +2317,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* synchronize cpus_allowed to cpu_active_mask */
if (cpus_updated) {
@@ -11749,7 +12115,7 @@ index 29f815d2ef7e..341b17f24f95 100644
update_tasks_nodemask(&top_cpuset);
}
-@@ -2420,11 +2420,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
+@@ -2436,11 +2436,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
{
unsigned long flags;
@@ -11763,7 +12129,7 @@ index 29f815d2ef7e..341b17f24f95 100644
}
void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
-@@ -2472,11 +2472,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
+@@ -2488,11 +2488,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
nodemask_t mask;
unsigned long flags;
@@ -11777,7 +12143,7 @@ index 29f815d2ef7e..341b17f24f95 100644
return mask;
}
-@@ -2568,14 +2568,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
+@@ -2584,14 +2584,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
return true;
/* Not hardwall and node outside mems_allowed: scan up cpusets */
@@ -11836,7 +12202,7 @@ index fc1ef736253c..83c666537a7a 100644
return r;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
-index 07c0dc806dfc..baf1a2867d74 100644
+index 36ff2d93f222..554aebf7e88b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1050,6 +1050,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
@@ -11847,7 +12213,7 @@ index 07c0dc806dfc..baf1a2867d74 100644
}
static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
-@@ -8363,6 +8364,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
+@@ -8390,6 +8391,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hwc->hrtimer.function = perf_swevent_hrtimer;
@@ -11869,7 +12235,7 @@ index 3076f3089919..fb2ebcf3ca7c 100644
spin_unlock(&sighand->siglock);
diff --git a/kernel/fork.c b/kernel/fork.c
-index ba8a01564985..416d91e4af97 100644
+index 9321b1ad3335..276acd8acf0a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -76,6 +76,7 @@
@@ -11925,7 +12291,15 @@ index ba8a01564985..416d91e4af97 100644
void __init __weak arch_task_cache_init(void) { }
-@@ -852,6 +875,19 @@ void __mmdrop(struct mm_struct *mm)
+@@ -535,6 +558,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ tsk->splice_pipe = NULL;
+ tsk->task_frag.page = NULL;
+ tsk->wake_q.next = NULL;
++ tsk->wake_q_sleeper.next = NULL;
+
+ account_kernel_stack(tsk, 1);
+
+@@ -861,6 +885,19 @@ void __mmdrop(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -11945,7 +12319,7 @@ index ba8a01564985..416d91e4af97 100644
static inline void __mmput(struct mm_struct *mm)
{
VM_BUG_ON(atomic_read(&mm->mm_users));
-@@ -1417,6 +1453,7 @@ static void rt_mutex_init_task(struct task_struct *p)
+@@ -1426,6 +1463,7 @@ static void rt_mutex_init_task(struct task_struct *p)
#ifdef CONFIG_RT_MUTEXES
p->pi_waiters = RB_ROOT;
p->pi_waiters_leftmost = NULL;
@@ -11953,7 +12327,7 @@ index ba8a01564985..416d91e4af97 100644
p->pi_blocked_on = NULL;
#endif
}
-@@ -1426,6 +1463,9 @@ static void rt_mutex_init_task(struct task_struct *p)
+@@ -1435,6 +1473,9 @@ static void rt_mutex_init_task(struct task_struct *p)
*/
static void posix_cpu_timers_init(struct task_struct *tsk)
{
@@ -11963,7 +12337,7 @@ index ba8a01564985..416d91e4af97 100644
tsk->cputime_expires.prof_exp = 0;
tsk->cputime_expires.virt_exp = 0;
tsk->cputime_expires.sched_exp = 0;
-@@ -1552,6 +1592,7 @@ static __latent_entropy struct task_struct *copy_process(
+@@ -1561,6 +1602,7 @@ static __latent_entropy struct task_struct *copy_process(
spin_lock_init(&p->alloc_lock);
init_sigpending(&p->pending);
@@ -11972,10 +12346,10 @@ index ba8a01564985..416d91e4af97 100644
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
diff --git a/kernel/futex.c b/kernel/futex.c
-index 4c6b6e697b73..d9bab63efccb 100644
+index 88bad86180ac..2e074d63e8fa 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
-@@ -800,7 +800,7 @@ static int refill_pi_state_cache(void)
+@@ -801,7 +801,7 @@ static int refill_pi_state_cache(void)
return 0;
}
@@ -11984,7 +12358,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
{
struct futex_pi_state *pi_state = current->pi_state_cache;
-@@ -810,6 +810,11 @@ static struct futex_pi_state * alloc_pi_state(void)
+@@ -811,6 +811,11 @@ static struct futex_pi_state * alloc_pi_state(void)
return pi_state;
}
@@ -11996,7 +12370,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
/*
* Drops a reference to the pi_state object and frees or caches it
* when the last reference is gone.
-@@ -854,7 +859,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
+@@ -855,7 +860,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
* Look up the task based on what TID userspace gave us.
* We dont trust it.
*/
@@ -12005,7 +12379,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
{
struct task_struct *p;
-@@ -904,7 +909,9 @@ void exit_pi_state_list(struct task_struct *curr)
+@@ -905,7 +910,9 @@ void exit_pi_state_list(struct task_struct *curr)
* task still owns the PI-state:
*/
if (head->next != next) {
@@ -12015,7 +12389,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
continue;
}
-@@ -914,10 +921,12 @@ void exit_pi_state_list(struct task_struct *curr)
+@@ -915,10 +922,12 @@ void exit_pi_state_list(struct task_struct *curr)
pi_state->owner = NULL;
raw_spin_unlock_irq(&curr->pi_lock);
@@ -12030,7 +12404,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
raw_spin_lock_irq(&curr->pi_lock);
}
raw_spin_unlock_irq(&curr->pi_lock);
-@@ -971,6 +980,39 @@ void exit_pi_state_list(struct task_struct *curr)
+@@ -972,6 +981,39 @@ void exit_pi_state_list(struct task_struct *curr)
*
* [10] There is no transient state which leaves owner and user space
* TID out of sync.
@@ -12070,7 +12444,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
*/
/*
-@@ -978,10 +1020,13 @@ void exit_pi_state_list(struct task_struct *curr)
+@@ -979,10 +1021,13 @@ void exit_pi_state_list(struct task_struct *curr)
* the pi_state against the user space value. If correct, attach to
* it.
*/
@@ -12085,7 +12459,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
/*
* Userspace might have messed up non-PI and PI futexes [3]
-@@ -989,9 +1034,39 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
+@@ -990,9 +1035,39 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
if (unlikely(!pi_state))
return -EINVAL;
@@ -12125,7 +12499,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
* Handle the owner died case:
*/
if (uval & FUTEX_OWNER_DIED) {
-@@ -1006,11 +1081,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
+@@ -1007,11 +1082,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* is not 0. Inconsistent state. [5]
*/
if (pid)
@@ -12139,7 +12513,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
}
/*
-@@ -1022,14 +1097,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
+@@ -1023,14 +1098,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* Take a ref on the state and return success. [6]
*/
if (!pid)
@@ -12156,7 +12530,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
}
/*
-@@ -1038,11 +1113,29 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
+@@ -1039,11 +1114,29 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* user space TID. [9/10]
*/
if (pid != task_pid_vnr(pi_state->owner))
@@ -12189,7 +12563,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
}
/*
-@@ -1093,6 +1186,9 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
+@@ -1094,6 +1187,9 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
/*
* No existing pi state. First waiter. [2]
@@ -12199,7 +12573,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
*/
pi_state = alloc_pi_state();
-@@ -1117,17 +1213,18 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
+@@ -1118,17 +1214,18 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
return 0;
}
@@ -12222,7 +12596,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
/*
* We are the first waiter - try to look up the owner based on
-@@ -1146,7 +1243,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+@@ -1147,7 +1244,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
return -EFAULT;
@@ -12231,7 +12605,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
return curval != uval ? -EAGAIN : 0;
}
-@@ -1174,7 +1271,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+@@ -1175,7 +1272,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
struct task_struct *task, int set_waiters)
{
u32 uval, newval, vpid = task_pid_vnr(task);
@@ -12240,7 +12614,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
int ret;
/*
-@@ -1200,9 +1297,9 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+@@ -1201,9 +1298,9 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* Lookup existing state first. If it exists, try to attach to
* its pi_state.
*/
@@ -12253,7 +12627,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
/*
* No waiter and user TID is 0. We are here because the
-@@ -1283,50 +1380,45 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
+@@ -1284,50 +1381,45 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
wake_q_add(wake_q, p);
__unqueue_futex(q);
/*
@@ -12332,7 +12706,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
*/
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
-@@ -1335,6 +1427,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
+@@ -1336,6 +1428,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
ret = -EFAULT;
@@ -12340,7 +12714,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
} else if (curval != uval) {
/*
* If a unconditional UNLOCK_PI operation (user space did not
-@@ -1347,10 +1440,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
+@@ -1348,10 +1441,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
else
ret = -EINVAL;
}
@@ -12359,7 +12733,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
raw_spin_lock(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
-@@ -1363,22 +1460,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
+@@ -1364,22 +1461,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
pi_state->owner = new_owner;
raw_spin_unlock(&new_owner->pi_lock);
@@ -12388,7 +12762,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
}
/*
-@@ -1824,7 +1914,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+@@ -1825,7 +1915,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* If that call succeeds then we have pi_state and an
* initial refcount on it.
*/
@@ -12397,7 +12771,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
}
switch (ret) {
-@@ -1907,7 +1997,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+@@ -1908,7 +1998,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* refcount on the pi_state and store the pointer in
* the futex_q object of the waiter.
*/
@@ -12406,7 +12780,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
this->pi_state = pi_state;
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
this->rt_waiter,
-@@ -1924,6 +2014,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+@@ -1925,6 +2015,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue;
@@ -12423,7 +12797,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
} else if (ret) {
/*
* rt_mutex_start_proxy_lock() detected a
-@@ -2007,20 +2107,7 @@ queue_unlock(struct futex_hash_bucket *hb)
+@@ -2008,20 +2108,7 @@ queue_unlock(struct futex_hash_bucket *hb)
hb_waiters_dec(hb);
}
@@ -12445,7 +12819,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
{
int prio;
-@@ -2037,6 +2124,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+@@ -2038,6 +2125,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
plist_node_init(&q->list, prio);
plist_add(&q->list, &hb->chain);
q->task = current;
@@ -12470,7 +12844,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
spin_unlock(&hb->lock);
}
-@@ -2123,10 +2228,13 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+@@ -2124,10 +2229,13 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
{
u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
@@ -12485,7 +12859,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
/* Owner died? */
if (!pi_state->owner)
newtid |= FUTEX_OWNER_DIED;
-@@ -2134,7 +2242,8 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+@@ -2135,7 +2243,8 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
/*
* We are here either because we stole the rtmutex from the
* previous highest priority waiter or we are the highest priority
@@ -12495,7 +12869,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
* We have to replace the newowner TID in the user space variable.
* This must be atomic as we have to preserve the owner died bit here.
*
-@@ -2142,17 +2251,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+@@ -2143,17 +2252,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* because we can fault here. Imagine swapped out pages or a fork
* that marked all the anonymous memory readonly for cow.
*
@@ -12518,7 +12892,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
newval = (uval & FUTEX_OWNER_DIED) | newtid;
if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
-@@ -2167,47 +2275,60 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+@@ -2168,47 +2276,60 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* itself.
*/
if (pi_state->owner != NULL) {
@@ -12594,7 +12968,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
}
static long futex_wait_restart(struct restart_block *restart);
-@@ -2229,13 +2350,16 @@ static long futex_wait_restart(struct restart_block *restart);
+@@ -2230,13 +2351,16 @@ static long futex_wait_restart(struct restart_block *restart);
*/
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{
@@ -12612,7 +12986,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
*/
if (q->pi_state->owner != current)
ret = fixup_pi_state_owner(uaddr, q, current);
-@@ -2243,43 +2367,15 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
+@@ -2244,43 +2368,15 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
}
/*
@@ -12658,7 +13032,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
out:
return ret ? ret : locked;
-@@ -2503,6 +2599,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2504,6 +2600,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
@@ -12667,7 +13041,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
int res, ret;
-@@ -2555,25 +2653,77 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2556,25 +2654,77 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
}
}
@@ -12692,8 +13066,8 @@ index 4c6b6e697b73..d9bab63efccb 100644
/* Fixup the trylock return value: */
ret = ret ? 0 : -EWOULDBLOCK;
+ goto no_block;
- }
-
++ }
++
+ rt_mutex_init_waiter(&rt_waiter, false);
+
+ /*
@@ -12728,8 +13102,8 @@ index 4c6b6e697b73..d9bab63efccb 100644
+
+ spin_lock(q.lock_ptr);
+ goto no_block;
-+ }
-+
+ }
+
+
+ if (unlikely(to))
+ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
@@ -12754,7 +13128,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
* Fixup the pi_state owner and possibly acquire the lock if we
* haven't already.
*/
-@@ -2589,12 +2739,19 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2590,12 +2740,19 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
* If fixup_owner() faulted and was unable to handle the fault, unlock
* it and return the fault to userspace.
*/
@@ -12776,7 +13150,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
goto out_put_key;
out_unlock_put_key:
-@@ -2603,8 +2760,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2604,8 +2761,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
out_put_key:
put_futex_key(&q.key);
out:
@@ -12788,7 +13162,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
return ret != -EINTR ? ret : -ERESTARTNOINTR;
uaddr_faulted:
-@@ -2631,7 +2790,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
+@@ -2632,7 +2791,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
union futex_key key = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb;
@@ -12797,7 +13171,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
int ret;
retry:
-@@ -2655,12 +2814,48 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
+@@ -2656,12 +2815,48 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* all and we at least want to know if user space fiddled
* with the futex value instead of blindly unlocking.
*/
@@ -12812,9 +13186,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
+ if (!pi_state)
+ goto out_unlock;
+
- /*
-- * In case of success wake_futex_pi dropped the hash
-- * bucket lock.
++ /*
+ * If current does not own the pi_state then the futex is
+ * inconsistent and user space fiddled with the futex value.
+ */
@@ -12846,12 +13218,14 @@ index 4c6b6e697b73..d9bab63efccb 100644
+
+ put_pi_state(pi_state);
+
-+ /*
+ /*
+- * In case of success wake_futex_pi dropped the hash
+- * bucket lock.
+ * Success, we're done! No tricky corner cases.
*/
if (!ret)
goto out_putkey;
-@@ -2675,7 +2870,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
+@@ -2676,7 +2871,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* setting the FUTEX_WAITERS bit. Try again.
*/
if (ret == -EAGAIN) {
@@ -12859,7 +13233,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
put_futex_key(&key);
goto retry;
}
-@@ -2683,7 +2877,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
+@@ -2684,7 +2878,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* wake_futex_pi has detected invalid state. Tell user
* space.
*/
@@ -12868,7 +13242,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
}
/*
-@@ -2693,8 +2887,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
+@@ -2694,8 +2888,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* preserve the WAITERS bit not the OWNER_DIED one. We are the
* owner.
*/
@@ -12880,7 +13254,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
/*
* If uval has changed, let user space handle it.
-@@ -2708,7 +2904,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
+@@ -2709,7 +2905,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
return ret;
pi_faulted:
@@ -12888,7 +13262,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
put_futex_key(&key);
ret = fault_in_user_writeable(uaddr);
-@@ -2812,8 +3007,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2813,8 +3008,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
u32 __user *uaddr2)
{
struct hrtimer_sleeper timeout, *to = NULL;
@@ -12899,7 +13273,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
union futex_key key2 = FUTEX_KEY_INIT;
struct futex_q q = futex_q_init;
int res, ret;
-@@ -2838,10 +3034,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2839,10 +3035,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* The waiter is allocated on our stack, manipulated by the requeue
* code while we sleep on uaddr.
*/
@@ -12911,7 +13285,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
-@@ -2872,20 +3065,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2873,20 +3066,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to);
@@ -12978,7 +13352,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
/* Check if the requeue code acquired the second futex for us. */
if (!q.rt_waiter) {
-@@ -2894,16 +3122,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2895,16 +3123,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* did a lock-steal - fix up the PI-state in that case.
*/
if (q.pi_state && (q.pi_state->owner != current)) {
@@ -13002,7 +13376,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
}
} else {
struct rt_mutex *pi_mutex;
-@@ -2915,10 +3146,14 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2916,10 +3147,14 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
WARN_ON(!q.pi_state);
pi_mutex = &q.pi_state->pi_mutex;
@@ -13020,7 +13394,7 @@ index 4c6b6e697b73..d9bab63efccb 100644
/*
* Fixup the pi_state owner and possibly acquire the lock if we
* haven't already.
-@@ -2936,13 +3171,20 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2937,13 +3172,20 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* the fault, unlock the rt_mutex and return the fault to
* userspace.
*/
@@ -13066,7 +13440,7 @@ index d3f24905852c..f87aa8fdcc51 100644
if (!noirqdebug)
note_interrupt(desc, retval);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 6b669593e7eb..e357bf6c59d5 100644
+index ea41820ab12e..5994867526f3 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -22,6 +22,7 @@
@@ -13189,7 +13563,7 @@ index 6b669593e7eb..e357bf6c59d5 100644
wake_threads_waitq(desc);
}
-@@ -1336,6 +1389,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
+@@ -1338,6 +1391,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
}
@@ -13199,7 +13573,7 @@ index 6b669593e7eb..e357bf6c59d5 100644
/* Set default affinity mask once everything is setup */
setup_affinity(desc, mask);
-@@ -2061,7 +2117,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
+@@ -2063,7 +2119,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
* This call sets the internal irqchip state of an interrupt,
* depending on the value of @which.
*
@@ -13463,7 +13837,7 @@ index 6f88e352cd4f..6ff9e8011dd0 100644
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
-index 4d7ffc0a0d00..3d157b3128eb 100644
+index 6599c7f3071d..79f8e00e802e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -658,6 +658,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
@@ -13533,7 +13907,7 @@ index 4d7ffc0a0d00..3d157b3128eb 100644
return NULL;
}
-@@ -3410,7 +3423,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
+@@ -3417,7 +3430,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
* Clearly if the lock hasn't been acquired _ever_, we're not
* holding it either, so report failure.
*/
@@ -13542,7 +13916,7 @@ index 4d7ffc0a0d00..3d157b3128eb 100644
return 0;
/*
-@@ -3689,6 +3702,7 @@ static void check_flags(unsigned long flags)
+@@ -3696,6 +3709,7 @@ static void check_flags(unsigned long flags)
}
}
@@ -13550,7 +13924,7 @@ index 4d7ffc0a0d00..3d157b3128eb 100644
/*
* We dont accurately track softirq state in e.g.
* hardirq contexts (such as on 4KSTACKS), so only
-@@ -3703,6 +3717,7 @@ static void check_flags(unsigned long flags)
+@@ -3710,6 +3724,7 @@ static void check_flags(unsigned long flags)
DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
}
}
@@ -13558,7 +13932,7 @@ index 4d7ffc0a0d00..3d157b3128eb 100644
if (!debug_locks)
print_irqtrace_events(current);
-@@ -4159,7 +4174,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
+@@ -4166,7 +4181,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
* If the class exists we look it up and zap it:
*/
class = look_up_lock_class(lock, j);
@@ -13568,7 +13942,7 @@ index 4d7ffc0a0d00..3d157b3128eb 100644
}
/*
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
-index f8c5af52a131..788068773e61 100644
+index d3de04b12f8c..0f49abeae337 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -26,7 +26,6 @@
@@ -13979,7 +14353,7 @@ index d0519c3432b6..b585af9a1b50 100644
extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index 2c49d76f96c3..eec63f064b3f 100644
+index 2c49d76f96c3..3a8b5d44aaf8 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -7,6 +7,11 @@
@@ -13994,15 +14368,16 @@ index 2c49d76f96c3..eec63f064b3f 100644
*
* See Documentation/locking/rt-mutex-design.txt for details.
*/
-@@ -16,6 +21,7 @@
+@@ -16,6 +21,8 @@
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/timer.h>
+#include <linux/ww_mutex.h>
++#include <linux/blkdev.h>
#include "rtmutex_common.h"
-@@ -133,6 +139,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
+@@ -133,6 +140,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
}
@@ -14015,36 +14390,20 @@ index 2c49d76f96c3..eec63f064b3f 100644
/*
* We can speed up the acquire/release, if there's no debugging state to be
* set up.
-@@ -222,12 +234,25 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+@@ -222,6 +235,12 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
}
#endif
-+#define STEAL_NORMAL 0
-+#define STEAL_LATERAL 1
+/*
+ * Only use with rt_mutex_waiter_{less,equal}()
+ */
-+#define task_to_waiter(p) \
-+ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
++#define task_to_waiter(p) &(struct rt_mutex_waiter) \
++ { .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) }
+
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
-- struct rt_mutex_waiter *right)
-+ struct rt_mutex_waiter *right, int mode)
- {
-- if (left->prio < right->prio)
-- return 1;
-+ if (mode == STEAL_NORMAL) {
-+ if (left->prio < right->prio)
-+ return 1;
-+ } else {
-+ if (left->prio <= right->prio)
-+ return 1;
-+ }
-
- /*
- * If both waiters have dl_prio(), we check the deadlines of the
-@@ -236,12 +261,30 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+ struct rt_mutex_waiter *right)
+@@ -236,12 +255,51 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
* then right waiter has a dl_prio() too.
*/
if (dl_prio(left->prio))
@@ -14073,29 +14432,32 @@ index 2c49d76f96c3..eec63f064b3f 100644
+
+ return 1;
+}
++
++#define STEAL_NORMAL 0
++#define STEAL_LATERAL 1
++
++static inline int
++rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode)
++{
++ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
++
++ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter))
++ return 1;
++
++ /*
++ * Note that RT tasks are excluded from lateral-steals
++ * to prevent the introduction of an unbounded latency.
++ */
++ if (mode == STEAL_NORMAL || rt_task(waiter->task))
++ return 0;
++
++ return rt_mutex_waiter_equal(waiter, top_waiter);
++}
+
static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
-@@ -253,7 +296,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
- while (*link) {
- parent = *link;
- entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
-- if (rt_mutex_waiter_less(waiter, entry)) {
-+ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
- link = &parent->rb_left;
- } else {
- link = &parent->rb_right;
-@@ -292,7 +335,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
- while (*link) {
- parent = *link;
- entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
-- if (rt_mutex_waiter_less(waiter, entry)) {
-+ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
- link = &parent->rb_left;
- } else {
- link = &parent->rb_right;
-@@ -320,72 +363,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
+@@ -320,72 +378,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
RB_CLEAR_NODE(&waiter->pi_tree_entry);
}
@@ -14106,24 +14468,19 @@ index 2c49d76f96c3..eec63f064b3f 100644
- * the waiter is not allowed to do priority boosting
- */
-int rt_mutex_getprio(struct task_struct *task)
-+static void rt_mutex_adjust_prio(struct task_struct *p)
- {
+-{
- if (likely(!task_has_pi_waiters(task)))
- return task->normal_prio;
-+ struct task_struct *pi_task = NULL;
-
+-
- return min(task_top_pi_waiter(task)->prio,
- task->normal_prio);
-}
-+ lockdep_assert_held(&p->pi_lock);
-
+-
-struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
-{
- if (likely(!task_has_pi_waiters(task)))
- return NULL;
-+ if (task_has_pi_waiters(p))
-+ pi_task = task_top_pi_waiter(p)->task;
-
+-
- return task_top_pi_waiter(task)->task;
-}
-
@@ -14147,13 +14504,16 @@ index 2c49d76f96c3..eec63f064b3f 100644
- * This can be both boosting and unboosting. task->pi_lock must be held.
- */
-static void __rt_mutex_adjust_prio(struct task_struct *task)
--{
++static void rt_mutex_adjust_prio(struct task_struct *p)
+ {
- int prio = rt_mutex_getprio(task);
--
++ struct task_struct *pi_task = NULL;
+
- if (task->prio != prio || dl_prio(prio))
- rt_mutex_setprio(task, prio);
-}
--
++ lockdep_assert_held(&p->pi_lock);
+
-/*
- * Adjust task priority (undo boosting). Called from the exit path of
- * rt_mutex_slowunlock() and rt_mutex_slowlock().
@@ -14166,7 +14526,9 @@ index 2c49d76f96c3..eec63f064b3f 100644
-void rt_mutex_adjust_prio(struct task_struct *task)
-{
- unsigned long flags;
--
++ if (task_has_pi_waiters(p))
++ pi_task = task_top_pi_waiter(p)->task;
+
- raw_spin_lock_irqsave(&task->pi_lock, flags);
- __rt_mutex_adjust_prio(task);
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
@@ -14174,7 +14536,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
}
/*
-@@ -414,6 +401,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
+@@ -414,6 +416,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
return debug_rt_mutex_detect_deadlock(waiter, chwalk);
}
@@ -14189,7 +14551,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
/*
* Max number of times we'll walk the boosting chain:
*/
-@@ -421,7 +416,8 @@ int max_lock_depth = 1024;
+@@ -421,7 +431,8 @@ int max_lock_depth = 1024;
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
{
@@ -14199,7 +14561,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
}
/*
-@@ -557,7 +553,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+@@ -557,7 +568,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* reached or the state of the chain has changed while we
* dropped the locks.
*/
@@ -14208,7 +14570,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
goto out_unlock_pi;
/*
-@@ -608,7 +604,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+@@ -608,7 +619,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* enabled we continue, but stop the requeueing in the chain
* walk.
*/
@@ -14217,7 +14579,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
if (!detect_deadlock)
goto out_unlock_pi;
else
-@@ -704,7 +700,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+@@ -704,7 +715,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* [7] Requeue the waiter in the lock waiter tree. */
rt_mutex_dequeue(lock, waiter);
@@ -14244,7 +14606,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
rt_mutex_enqueue(lock, waiter);
/* [8] Release the task */
-@@ -719,13 +734,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+@@ -719,13 +749,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* follow here. This is the end of the chain we are walking.
*/
if (!rt_mutex_owner(lock)) {
@@ -14263,7 +14625,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
-@@ -745,7 +763,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+@@ -745,7 +778,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
*/
rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
rt_mutex_enqueue_pi(task, waiter);
@@ -14272,7 +14634,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
} else if (prerequeue_top_waiter == waiter) {
/*
-@@ -761,7 +779,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+@@ -761,7 +794,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock);
rt_mutex_enqueue_pi(task, waiter);
@@ -14281,7 +14643,7 @@ index 2c49d76f96c3..eec63f064b3f 100644
} else {
/*
* Nothing changed. No need to do any priority
-@@ -818,6 +836,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+@@ -818,6 +851,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
return ret;
}
@@ -14289,9 +14651,11 @@ index 2c49d76f96c3..eec63f064b3f 100644
/*
* Try to take an rt-mutex
*
-@@ -828,9 +847,12 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+@@ -827,10 +861,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * @task: The task which wants to acquire the lock
* @waiter: The waiter that is queued to the lock's wait tree if the
* callsite called task_blocked_on_lock(), otherwise NULL
++ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL)
*/
-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
- struct rt_mutex_waiter *waiter)
@@ -14304,48 +14668,41 @@ index 2c49d76f96c3..eec63f064b3f 100644
/*
* Before testing whether we can acquire @lock, we set the
* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
-@@ -866,8 +888,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
- * If waiter is not the highest priority waiter of
- * @lock, give up.
+@@ -863,12 +901,11 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ */
+ if (waiter) {
+ /*
+- * If waiter is not the highest priority waiter of
+- * @lock, give up.
++ * If waiter is not the highest priority waiter of @lock,
++ * or its peer when lateral steal is allowed, give up.
*/
- if (waiter != rt_mutex_top_waiter(lock))
-+ if (waiter != rt_mutex_top_waiter(lock)) {
-+ /* XXX rt_mutex_waiter_less() ? */
++ if (!rt_mutex_steal(lock, waiter, mode))
return 0;
-+ }
-
+-
/*
* We can acquire the lock. Remove the waiter from the
-@@ -885,14 +909,26 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
- * not need to be dequeued.
+ * lock waiters tree.
+@@ -886,13 +923,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
*/
if (rt_mutex_has_waiters(lock)) {
-+ struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
-+
-+ if (task != pown)
-+ return 0;
-+
-+ /*
-+ * Note that RT tasks are excluded from lateral-steals
-+ * to prevent the introduction of an unbounded latency.
-+ */
-+ if (rt_task(task))
-+ mode = STEAL_NORMAL;
/*
- * If @task->prio is greater than or equal to
- * the top waiter priority (kernel view),
- * @task lost.
+- * If @task->prio is greater than or equal to
+- * the top waiter priority (kernel view),
+- * @task lost.
++ * If @task->prio is greater than the top waiter
++ * priority (kernel view), or equal to it when a
++ * lateral steal is forbidden, @task lost.
*/
- if (task->prio >= rt_mutex_top_waiter(lock)->prio)
-+ if (!rt_mutex_waiter_less(task_to_waiter(task),
-+ rt_mutex_top_waiter(lock),
-+ mode))
++ if (!rt_mutex_steal(lock, task_to_waiter(task), mode))
return 0;
-
/*
* The current top waiter stays enqueued. We
* don't have to change anything in the lock
-@@ -936,11 +972,384 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+@@ -936,177 +972,589 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
*/
rt_mutex_set_owner(lock, task);
@@ -14355,25 +14712,58 @@ index 2c49d76f96c3..eec63f064b3f 100644
}
+#ifdef CONFIG_PREEMPT_RT_FULL
-+/*
+ /*
+- * Task blocks on lock.
+- *
+- * Prepare waiter and propagate pi chain
+- *
+- * This must be called with lock->wait_lock held and interrupts disabled
+ * preemptible spin_lock functions:
-+ */
+ */
+-static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+- struct rt_mutex_waiter *waiter,
+- struct task_struct *task,
+- enum rtmutex_chainwalk chwalk)
+static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
+ void (*slowfn)(struct rt_mutex *lock,
+ bool mg_off),
+ bool do_mig_dis)
-+{
+ {
+- struct task_struct *owner = rt_mutex_owner(lock);
+- struct rt_mutex_waiter *top_waiter = waiter;
+- struct rt_mutex *next_lock;
+- int chain_walk = 0, res;
+ might_sleep_no_state_check();
-+
+
+- /*
+- * Early deadlock detection. We really don't want the task to
+- * enqueue on itself just to untangle the mess later. It's not
+- * only an optimization. We drop the locks, so another waiter
+- * can come in before the chain walk detects the deadlock. So
+- * the other will detect the deadlock and return -EDEADLOCK,
+- * which is wrong, as the other waiter is not in a deadlock
+- * situation.
+- */
+- if (owner == task)
+- return -EDEADLK;
+ if (do_mig_dis)
+ migrate_disable();
-+
+
+- raw_spin_lock(&task->pi_lock);
+- __rt_mutex_adjust_prio(task);
+- waiter->task = task;
+- waiter->lock = lock;
+- waiter->prio = task->prio;
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
+ return;
+ else
+ slowfn(lock, do_mig_dis);
+}
-+
+
+- /* Get the top priority waiter on the lock */
+- if (rt_mutex_has_waiters(lock))
+- top_waiter = rt_mutex_top_waiter(lock);
+- rt_mutex_enqueue(lock, waiter);
+static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
+ void (*slowfn)(struct rt_mutex *lock))
+{
@@ -14391,7 +14781,8 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ struct task_struct *owner)
+{
+ int res = 0;
-+
+
+- task->pi_blocked_on = waiter;
+ rcu_read_lock();
+ for (;;) {
+ if (owner != rt_mutex_owner(lock))
@@ -14417,7 +14808,8 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ return 1;
+}
+#endif
-+
+
+- raw_spin_unlock(&task->pi_lock);
+static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task,
@@ -14436,44 +14828,85 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ struct rt_mutex_waiter waiter, *top_waiter;
+ unsigned long flags;
+ int ret;
-+
+
+- if (!owner)
+- return 0;
+ rt_mutex_init_waiter(&waiter, true);
-+
+
+- raw_spin_lock(&owner->pi_lock);
+- if (waiter == rt_mutex_top_waiter(lock)) {
+- rt_mutex_dequeue_pi(owner, top_waiter);
+- rt_mutex_enqueue_pi(owner, waiter);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
-+
+
+- __rt_mutex_adjust_prio(owner);
+- if (owner->pi_blocked_on)
+- chain_walk = 1;
+- } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
+- chain_walk = 1;
+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ return;
-+ }
-+
+ }
+
+- /* Store the lock on which owner is blocked or NULL */
+- next_lock = task_blocked_on_lock(owner);
+ BUG_ON(rt_mutex_owner(lock) == self);
-+
-+ /*
+
+- raw_spin_unlock(&owner->pi_lock);
+ /*
+- * Even if full deadlock detection is on, if the owner is not
+- * blocked itself, we can avoid finding this out in the chain
+- * walk.
+ * We save whatever state the task is in and we'll restore it
+ * after acquiring the lock taking real wakeups into account
+ * as well. We are serialized via pi_lock against wakeups. See
+ * try_to_wake_up().
-+ */
+ */
+- if (!chain_walk || !next_lock)
+- return 0;
+ raw_spin_lock(&self->pi_lock);
+ self->saved_state = self->state;
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+ raw_spin_unlock(&self->pi_lock);
-+
+
+- /*
+- * The owner can't disappear while holding a lock,
+- * so the owner struct is protected by wait_lock.
+- * Gets dropped in rt_mutex_adjust_prio_chain()!
+- */
+- get_task_struct(owner);
+ ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
+ BUG_ON(ret);
-+
+
+- raw_spin_unlock_irq(&lock->wait_lock);
+ for (;;) {
+ /* Try to acquire the lock again. */
+ if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
+ break;
-+
+
+- res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
+- next_lock, waiter, task);
+ top_waiter = rt_mutex_top_waiter(lock);
+ lock_owner = rt_mutex_owner(lock);
-+
+
+- raw_spin_lock_irq(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-+
+
+- return res;
+-}
+ debug_rt_mutex_print_deadlock(&waiter);
-+
+
+-/*
+- * Remove the top waiter from the current tasks pi waiter tree and
+- * queue it up.
+- *
+- * Called with lock->wait_lock held and interrupts disabled.
+- */
+-static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
+- struct rt_mutex *lock)
+-{
+- struct rt_mutex_waiter *waiter;
+ if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
+ if (mg_off)
+ migrate_enable();
@@ -14481,87 +14914,129 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ if (mg_off)
+ migrate_disable();
+ }
-+
+
+- raw_spin_lock(¤t->pi_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
-+
+
+- waiter = rt_mutex_top_waiter(lock);
+ raw_spin_lock(&self->pi_lock);
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+ raw_spin_unlock(&self->pi_lock);
+ }
-+
-+ /*
+
+ /*
+- * Remove it from current->pi_waiters. We do not adjust a
+- * possible priority boost right now. We execute wakeup in the
+- * boosted mode and go back to normal after releasing
+- * lock->wait_lock.
+ * Restore the task state to current->saved_state. We set it
+ * to the original state above and the try_to_wake_up() code
+ * has possibly updated it when a real (non-rtmutex) wakeup
+ * happened while we were blocked. Clear saved_state so
+ * try_to_wakeup() does not get confused.
-+ */
+ */
+- rt_mutex_dequeue_pi(current, waiter);
+ raw_spin_lock(&self->pi_lock);
+ __set_current_state_no_track(self->saved_state);
+ self->saved_state = TASK_RUNNING;
+ raw_spin_unlock(&self->pi_lock);
-+
-+ /*
+
+ /*
+- * As we are waking up the top waiter, and the waiter stays
+- * queued on the lock until it gets the lock, this lock
+- * obviously has waiters. Just set the bit here and this has
+- * the added benefit of forcing all new tasks into the
+- * slow path making sure no task of lower priority than
+- * the top waiter can steal this lock.
+ * try_to_take_rt_mutex() sets the waiter bit
+ * unconditionally. We might have to fix that up:
-+ */
+ */
+- lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
+ fixup_rt_mutex_waiters(lock);
-+
+
+- raw_spin_unlock(¤t->pi_lock);
+ BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
+ BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
-+
+
+- wake_q_add(wake_q, waiter->task);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ debug_rt_mutex_free_waiter(&waiter);
-+}
-+
+ }
+
+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
+ struct wake_q_head *wake_q,
+ struct wake_q_head *wq_sleeper);
-+/*
+ /*
+- * Remove a waiter from a lock and give up
+- *
+- * Must be called with lock->wait_lock held and interrupts disabled. I must
+- * have just failed to try_to_take_rt_mutex().
+ * Slow path to release a rt_mutex spin_lock style
-+ */
+ */
+-static void remove_waiter(struct rt_mutex *lock,
+- struct rt_mutex_waiter *waiter)
+static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
-+{
+ {
+- bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
+- struct task_struct *owner = rt_mutex_owner(lock);
+- struct rt_mutex *next_lock;
+ unsigned long flags;
+ WAKE_Q(wake_q);
+ WAKE_Q(wake_sleeper_q);
+ bool postunlock;
-+
+
+- raw_spin_lock(¤t->pi_lock);
+- rt_mutex_dequeue(lock, waiter);
+- current->pi_blocked_on = NULL;
+- raw_spin_unlock(¤t->pi_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-+
+
+- /*
+- * Only update priority if the waiter was the highest priority
+- * waiter of the lock and there is an owner to update.
+- */
+- if (!owner || !is_top_waiter)
+- return;
+ if (postunlock)
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
+}
-+
+
+- raw_spin_lock(&owner->pi_lock);
+void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
+{
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+}
+EXPORT_SYMBOL(rt_spin_lock__no_mg);
-+
+
+- rt_mutex_dequeue_pi(owner, waiter);
+void __lockfunc rt_spin_lock(spinlock_t *lock)
+{
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+}
+EXPORT_SYMBOL(rt_spin_lock);
-+
+
+- if (rt_mutex_has_waiters(lock))
+- rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
+void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
+{
+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
+}
+EXPORT_SYMBOL(__rt_spin_lock);
-+
+
+- __rt_mutex_adjust_prio(owner);
+void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
+{
+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
+}
+EXPORT_SYMBOL(__rt_spin_lock__no_mg);
-+
+
+- /* Store the lock on which owner is blocked or NULL */
+- next_lock = task_blocked_on_lock(owner);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
+{
@@ -14570,7 +15045,8 @@ index 2c49d76f96c3..eec63f064b3f 100644
+}
+EXPORT_SYMBOL(rt_spin_lock_nested);
+#endif
-+
+
+- raw_spin_unlock(&owner->pi_lock);
+void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
+{
+ /* NOTE: we always pass in '1' for nested, for simplicity */
@@ -14578,7 +15054,8 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
+}
+EXPORT_SYMBOL(rt_spin_unlock__no_mg);
-+
+
+- /*
+void __lockfunc rt_spin_unlock(spinlock_t *lock)
+{
+ /* NOTE: we always pass in '1' for nested, for simplicity */
@@ -14729,23 +15206,38 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
+}
+
- /*
- * Task blocks on lock.
- *
-@@ -958,6 +1367,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- struct rt_mutex *next_lock;
- int chain_walk = 0, res;
-
++/*
++ * Task blocks on lock.
++ *
++ * Prepare waiter and propagate pi chain
++ *
++ * This must be called with lock->wait_lock held and interrupts disabled
++ */
++static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
++ struct rt_mutex_waiter *waiter,
++ struct task_struct *task,
++ enum rtmutex_chainwalk chwalk)
++{
++ struct task_struct *owner = rt_mutex_owner(lock);
++ struct rt_mutex_waiter *top_waiter = waiter;
++ struct rt_mutex *next_lock;
++ int chain_walk = 0, res;
++
+ lockdep_assert_held(&lock->wait_lock);
+
- /*
- * Early deadlock detection. We really don't want the task to
- * enqueue on itself just to untangle the mess later. It's not
-@@ -971,10 +1382,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- return -EDEADLK;
-
- raw_spin_lock(&task->pi_lock);
-- __rt_mutex_adjust_prio(task);
++ /*
++ * Early deadlock detection. We really don't want the task to
++ * enqueue on itself just to untangle the mess later. It's not
++ * only an optimization. We drop the locks, so another waiter
++ * can come in before the chain walk detects the deadlock. So
++ * the other will detect the deadlock and return -EDEADLOCK,
++ * which is wrong, as the other waiter is not in a deadlock
++ * situation.
++ */
++ if (owner == task)
++ return -EDEADLK;
++
++ raw_spin_lock(&task->pi_lock);
+
+ /*
+ * In the case of futex requeue PI, this will be a proxy
@@ -14764,55 +15256,100 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
+
+ rt_mutex_adjust_prio(task);
- waiter->task = task;
- waiter->lock = lock;
- waiter->prio = task->prio;
++ waiter->task = task;
++ waiter->lock = lock;
++ waiter->prio = task->prio;
+ waiter->deadline = task->dl.deadline;
-
- /* Get the top priority waiter on the lock */
- if (rt_mutex_has_waiters(lock))
-@@ -993,8 +1422,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- rt_mutex_dequeue_pi(owner, top_waiter);
- rt_mutex_enqueue_pi(owner, waiter);
-
-- __rt_mutex_adjust_prio(owner);
-- if (owner->pi_blocked_on)
++
++ /* Get the top priority waiter on the lock */
++ if (rt_mutex_has_waiters(lock))
++ top_waiter = rt_mutex_top_waiter(lock);
++ rt_mutex_enqueue(lock, waiter);
++
++ task->pi_blocked_on = waiter;
++
++ raw_spin_unlock(&task->pi_lock);
++
++ if (!owner)
++ return 0;
++
++ raw_spin_lock(&owner->pi_lock);
++ if (waiter == rt_mutex_top_waiter(lock)) {
++ rt_mutex_dequeue_pi(owner, top_waiter);
++ rt_mutex_enqueue_pi(owner, waiter);
++
+ rt_mutex_adjust_prio(owner);
+ if (rt_mutex_real_waiter(owner->pi_blocked_on))
- chain_walk = 1;
- } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
- chain_walk = 1;
-@@ -1036,6 +1465,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
- * Called with lock->wait_lock held and interrupts disabled.
- */
- static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
++ chain_walk = 1;
++ } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
++ chain_walk = 1;
++ }
++
++ /* Store the lock on which owner is blocked or NULL */
++ next_lock = task_blocked_on_lock(owner);
++
++ raw_spin_unlock(&owner->pi_lock);
++ /*
++ * Even if full deadlock detection is on, if the owner is not
++ * blocked itself, we can avoid finding this out in the chain
++ * walk.
++ */
++ if (!chain_walk || !next_lock)
++ return 0;
++
++ /*
++ * The owner can't disappear while holding a lock,
++ * so the owner struct is protected by wait_lock.
++ * Gets dropped in rt_mutex_adjust_prio_chain()!
++ */
++ get_task_struct(owner);
++
++ raw_spin_unlock_irq(&lock->wait_lock);
++
++ res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
++ next_lock, waiter, task);
++
++ raw_spin_lock_irq(&lock->wait_lock);
++
++ return res;
++}
++
++/*
++ * Remove the top waiter from the current tasks pi waiter tree and
++ * queue it up.
++ *
++ * Called with lock->wait_lock held and interrupts disabled.
++ */
++static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
+ struct wake_q_head *wake_sleeper_q,
- struct rt_mutex *lock)
- {
- struct rt_mutex_waiter *waiter;
-@@ -1045,12 +1475,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
- waiter = rt_mutex_top_waiter(lock);
-
- /*
-- * Remove it from current->pi_waiters. We do not adjust a
-- * possible priority boost right now. We execute wakeup in the
-- * boosted mode and go back to normal after releasing
-- * lock->wait_lock.
++ struct rt_mutex *lock)
++{
++ struct rt_mutex_waiter *waiter;
++
++ raw_spin_lock(¤t->pi_lock);
++
++ waiter = rt_mutex_top_waiter(lock);
++
++ /*
+ * Remove it from current->pi_waiters and deboost.
+ *
+ * We must in fact deboost here in order to ensure we call
+ * rt_mutex_setprio() to update p->pi_top_task before the
+ * task unblocks.
- */
- rt_mutex_dequeue_pi(current, waiter);
++ */
++ rt_mutex_dequeue_pi(current, waiter);
+ rt_mutex_adjust_prio(current);
-
- /*
- * As we are waking up the top waiter, and the waiter stays
-@@ -1062,9 +1494,22 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
- */
- lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
-
++
++ /*
++ * As we are waking up the top waiter, and the waiter stays
++ * queued on the lock until it gets the lock, this lock
++ * obviously has waiters. Just set the bit here and this has
++ * the added benefit of forcing all new tasks into the
++ * slow path making sure no task of lower priority than
++ * the top waiter can steal this lock.
++ */
++ lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
++
+ /*
+ * We deboosted before waking the top waiter task such that we don't
+ * run two tasks with the 'same' priority (and ensure the
@@ -14825,40 +15362,58 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ */
+ preempt_disable();
+ if (waiter->savestate)
-+ wake_q_add(wake_sleeper_q, waiter->task);
++ wake_q_add_sleeper(wake_sleeper_q, waiter->task);
+ else
+ wake_q_add(wake_q, waiter->task);
- raw_spin_unlock(¤t->pi_lock);
--
-- wake_q_add(wake_q, waiter->task);
- }
-
- /*
-@@ -1078,7 +1523,9 @@ static void remove_waiter(struct rt_mutex *lock,
- {
- bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
- struct task_struct *owner = rt_mutex_owner(lock);
-- struct rt_mutex *next_lock;
++ raw_spin_unlock(¤t->pi_lock);
++}
++
++/*
++ * Remove a waiter from a lock and give up
++ *
++ * Must be called with lock->wait_lock held and interrupts disabled. I must
++ * have just failed to try_to_take_rt_mutex().
++ */
++static void remove_waiter(struct rt_mutex *lock,
++ struct rt_mutex_waiter *waiter)
++{
++ bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
++ struct task_struct *owner = rt_mutex_owner(lock);
+ struct rt_mutex *next_lock = NULL;
+
+ lockdep_assert_held(&lock->wait_lock);
-
- raw_spin_lock(¤t->pi_lock);
- rt_mutex_dequeue(lock, waiter);
-@@ -1099,10 +1546,11 @@ static void remove_waiter(struct rt_mutex *lock,
- if (rt_mutex_has_waiters(lock))
- rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
-
-- __rt_mutex_adjust_prio(owner);
++
++ raw_spin_lock(¤t->pi_lock);
++ rt_mutex_dequeue(lock, waiter);
++ current->pi_blocked_on = NULL;
++ raw_spin_unlock(¤t->pi_lock);
++
++ /*
++ * Only update priority if the waiter was the highest priority
++ * waiter of the lock and there is an owner to update.
++ */
++ if (!owner || !is_top_waiter)
++ return;
++
++ raw_spin_lock(&owner->pi_lock);
++
++ rt_mutex_dequeue_pi(owner, waiter);
++
++ if (rt_mutex_has_waiters(lock))
++ rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
++
+ rt_mutex_adjust_prio(owner);
-
- /* Store the lock on which owner is blocked or NULL */
-- next_lock = task_blocked_on_lock(owner);
++
++ /* Store the lock on which owner is blocked or NULL */
+ if (rt_mutex_real_waiter(owner->pi_blocked_on))
+ next_lock = task_blocked_on_lock(owner);
-
- raw_spin_unlock(&owner->pi_lock);
-
++
++ raw_spin_unlock(&owner->pi_lock);
++
++ /*
+ * Don't walk the chain, if the owner task is not blocked
+ * itself.
+ */
@@ -1138,21 +1586,30 @@ void rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
@@ -14931,10 +15486,17 @@ index 2c49d76f96c3..eec63f064b3f 100644
if (ret)
break;
}
-@@ -1223,21 +1682,148 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
+@@ -1223,35 +1682,94 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
}
}
+-/*
+- * Slow path lock function:
+- */
+-static int __sched
+-rt_mutex_slowlock(struct rt_mutex *lock, int state,
+- struct hrtimer_sleeper *timeout,
+- enum rtmutex_chainwalk chwalk)
+static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
@@ -14978,21 +15540,34 @@ index 2c49d76f96c3..eec63f064b3f 100644
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void ww_mutex_account_lock(struct rt_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
-+{
+ {
+- struct rt_mutex_waiter waiter;
+- unsigned long flags;
+- int ret = 0;
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
+ struct rt_mutex_waiter *waiter, *n;
-+
+
+- debug_rt_mutex_init_waiter(&waiter);
+- RB_CLEAR_NODE(&waiter.pi_tree_entry);
+- RB_CLEAR_NODE(&waiter.tree_entry);
+ /*
+ * This branch gets optimized out for the common case,
+ * and is only important for ww_mutex_lock.
+ */
+ ww_mutex_lock_acquired(ww, ww_ctx);
+ ww->ctx = ww_ctx;
-+
-+ /*
+
+ /*
+- * Technically we could use raw_spin_[un]lock_irq() here, but this can
+- * be called in early boot if the cmpxchg() fast path is disabled
+- * (debug, no architecture support). In this case we will acquire the
+- * rtmutex with lock->wait_lock held. But we cannot unconditionally
+- * enable interrupts in that early boot case. So we need to use the
+- * irqsave/restore variants.
+ * Give any possible sleeping processes the chance to wake up,
+ * so they can recheck if they have to back off.
-+ */
+ */
+- raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
+ tree_entry) {
+ /* XXX debug rt mutex waiter wakeup */
@@ -15018,24 +15593,26 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ struct rt_mutex_waiter *waiter)
+{
+ int ret;
-+
-+ /* Try to acquire the lock again: */
-+ if (try_to_take_rt_mutex(lock, current, NULL)) {
+
+ /* Try to acquire the lock again: */
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
+- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ if (ww_ctx)
+ ww_mutex_account_lock(lock, ww_ctx);
-+ return 0;
-+ }
-+
-+ set_current_state(state);
-+
-+ /* Setup the timer, when timeout != NULL */
-+ if (unlikely(timeout))
-+ hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
-+
+ return 0;
+ }
+
+@@ -1261,17 +1779,27 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ if (unlikely(timeout))
+ hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+
+- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
+ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
-+
+
+- if (likely(!ret))
+ if (likely(!ret)) {
-+ /* sleep on the mutex */
+ /* sleep on the mutex */
+- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
+ ret = __rt_mutex_slowlock(lock, state, timeout, waiter,
+ ww_ctx);
+ } else if (ww_ctx) {
@@ -15043,81 +15620,53 @@ index 2c49d76f96c3..eec63f064b3f 100644
+ ret = __mutex_lock_check_stamp(lock, ww_ctx);
+ BUG_ON(!ret);
+ }
-+
-+ if (unlikely(ret)) {
-+ __set_current_state(TASK_RUNNING);
-+ if (rt_mutex_has_waiters(lock))
+
+ if (unlikely(ret)) {
+ __set_current_state(TASK_RUNNING);
+ if (rt_mutex_has_waiters(lock))
+- remove_waiter(lock, &waiter);
+- rt_mutex_handle_deadlock(ret, chwalk, &waiter);
+ remove_waiter(lock, waiter);
+ /* ww_mutex want to report EDEADLK/EALREADY, let them */
+ if (!ww_ctx)
+ rt_mutex_handle_deadlock(ret, chwalk, waiter);
+ } else if (ww_ctx) {
+ ww_mutex_account_lock(lock, ww_ctx);
-+ }
-+
-+ /*
-+ * try_to_take_rt_mutex() sets the waiter bit
-+ * unconditionally. We might have to fix that up.
-+ */
-+ fixup_rt_mutex_waiters(lock);
+ }
+
+ /*
+@@ -1279,6 +1807,36 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+ * unconditionally. We might have to fix that up.
+ */
+ fixup_rt_mutex_waiters(lock);
+ return ret;
+}
+
- /*
- * Slow path lock function:
- */
- static int __sched
- rt_mutex_slowlock(struct rt_mutex *lock, int state,
- struct hrtimer_sleeper *timeout,
-- enum rtmutex_chainwalk chwalk)
++/*
++ * Slow path lock function:
++ */
++static int __sched
++rt_mutex_slowlock(struct rt_mutex *lock, int state,
++ struct hrtimer_sleeper *timeout,
+ enum rtmutex_chainwalk chwalk,
+ struct ww_acquire_ctx *ww_ctx)
- {
- struct rt_mutex_waiter waiter;
- unsigned long flags;
- int ret = 0;
-
-- debug_rt_mutex_init_waiter(&waiter);
-- RB_CLEAR_NODE(&waiter.pi_tree_entry);
-- RB_CLEAR_NODE(&waiter.tree_entry);
++{
++ struct rt_mutex_waiter waiter;
++ unsigned long flags;
++ int ret = 0;
++
+ rt_mutex_init_waiter(&waiter, false);
-
- /*
- * Technically we could use raw_spin_[un]lock_irq() here, but this can
-@@ -1249,36 +1835,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
- */
- raw_spin_lock_irqsave(&lock->wait_lock, flags);
-
-- /* Try to acquire the lock again: */
-- if (try_to_take_rt_mutex(lock, current, NULL)) {
-- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-- return 0;
-- }
--
-- set_current_state(state);
--
-- /* Setup the timer, when timeout != NULL */
-- if (unlikely(timeout))
-- hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
--
-- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
<Skipped 3923 lines>
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/kernel.git/commitdiff/7323c6802138dc483e23b468239a67b3ca38cdac
More information about the pld-cvs-commit
mailing list