• R/O
  • HTTP
  • SSH
  • HTTPS

BrynKernel-AOSP: 提交

https://bryn-lab.my.id/kernel.html


Commit MetaInfo

修订版0c56aa8589d7dc4342e0d425b21232a08badfd7d (tree)
时间2020-11-19 02:26:31
作者Juergen Gross <jgross@suse...>
CommiterGreg Kroah-Hartman

Log Message

xen/events: defer eoi in case of excessive number of events

commit e99502f76271d6bc4e374fe368c50c67a1fd3070 upstream.

In case rogue guests are sending events at high frequency it might
happen that xen_evtchn_do_upcall() won't stop processing events in
dom0. As this is done in irq handling a crash might be the result.

In order to avoid that, delay further inter-domain events after some
time in xen_evtchn_do_upcall() by forcing eoi processing into a
worker on the same cpu, thus inhibiting new events coming in.

The time after which eoi processing is to be delayed is configurable
via a new module parameter "event_loop_timeout" which specifies the
maximum event loop time in jiffies (default: 2, the value was chosen
after some tests showing that a value of 2 was the lowest with an
only slight drop of dom0 network throughput while multiple guests
performed an event storm).

How long eoi processing will be delayed can be specified via another
parameter "event_eoi_delay" (again in jiffies, default 10, again the
value was chosen after testing with different delay values).

This is part of XSA-332.

Cc: stable@vger.kernel.org
Reported-by: Julien Grall <julien@xen.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Wei Liu <wl@xen.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

更改概述

差异

--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -5020,6 +5020,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
50205020 Disables the PV optimizations forcing the HVM guest to
50215021 run as generic HVM guest with no PV drivers.
50225022
5023+ xen.event_eoi_delay= [XEN]
5024+ How long to delay EOI handling in case of event
5025+ storms (jiffies). Default is 10.
5026+
5027+ xen.event_loop_timeout= [XEN]
5028+ After which time (jiffies) the event handling loop
5029+ should start to delay EOI handling. Default is 2.
5030+
50235031 xirc2ps_cs= [NET,PCMCIA]
50245032 Format:
50255033 <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -160,7 +160,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
160160 * a bitset of words which contain pending event bits. The second
161161 * level is a bitset of pending events themselves.
162162 */
163-static void evtchn_2l_handle_events(unsigned cpu)
163+static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
164164 {
165165 int irq;
166166 xen_ulong_t pending_words;
@@ -241,10 +241,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
241241
242242 /* Process port. */
243243 port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
244- irq = get_evtchn_to_irq(port);
245-
246- if (irq != -1)
247- generic_handle_irq(irq);
244+ handle_irq_for_port(port, ctrl);
248245
249246 bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
250247
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -34,6 +34,8 @@
3434 #include <linux/pci.h>
3535 #include <linux/spinlock.h>
3636 #include <linux/cpuhotplug.h>
37+#include <linux/atomic.h>
38+#include <linux/ktime.h>
3739
3840 #ifdef CONFIG_X86
3941 #include <asm/desc.h>
@@ -64,6 +66,15 @@
6466
6567 #include "events_internal.h"
6668
69+#undef MODULE_PARAM_PREFIX
70+#define MODULE_PARAM_PREFIX "xen."
71+
72+static uint __read_mostly event_loop_timeout = 2;
73+module_param(event_loop_timeout, uint, 0644);
74+
75+static uint __read_mostly event_eoi_delay = 10;
76+module_param(event_eoi_delay, uint, 0644);
77+
6778 const struct evtchn_ops *evtchn_ops;
6879
6980 /*
@@ -87,6 +98,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
8798 * irq_mapping_update_lock
8899 * evtchn_rwlock
89100 * IRQ-desc lock
101+ * percpu eoi_list_lock
90102 */
91103
92104 static LIST_HEAD(xen_irq_list_head);
@@ -119,6 +131,8 @@ static struct irq_chip xen_pirq_chip;
119131 static void enable_dynirq(struct irq_data *data);
120132 static void disable_dynirq(struct irq_data *data);
121133
134+static DEFINE_PER_CPU(unsigned int, irq_epoch);
135+
122136 static void clear_evtchn_to_irq_row(unsigned row)
123137 {
124138 unsigned col;
@@ -406,17 +420,120 @@ void notify_remote_via_irq(int irq)
406420 }
407421 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
408422
423+struct lateeoi_work {
424+ struct delayed_work delayed;
425+ spinlock_t eoi_list_lock;
426+ struct list_head eoi_list;
427+};
428+
429+static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
430+
431+static void lateeoi_list_del(struct irq_info *info)
432+{
433+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
434+ unsigned long flags;
435+
436+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
437+ list_del_init(&info->eoi_list);
438+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
439+}
440+
441+static void lateeoi_list_add(struct irq_info *info)
442+{
443+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
444+ struct irq_info *elem;
445+ u64 now = get_jiffies_64();
446+ unsigned long delay;
447+ unsigned long flags;
448+
449+ if (now < info->eoi_time)
450+ delay = info->eoi_time - now;
451+ else
452+ delay = 1;
453+
454+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
455+
456+ if (list_empty(&eoi->eoi_list)) {
457+ list_add(&info->eoi_list, &eoi->eoi_list);
458+ mod_delayed_work_on(info->eoi_cpu, system_wq,
459+ &eoi->delayed, delay);
460+ } else {
461+ list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
462+ if (elem->eoi_time <= info->eoi_time)
463+ break;
464+ }
465+ list_add(&info->eoi_list, &elem->eoi_list);
466+ }
467+
468+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
469+}
470+
409471 static void xen_irq_lateeoi_locked(struct irq_info *info)
410472 {
411473 evtchn_port_t evtchn;
474+ unsigned int cpu;
412475
413476 evtchn = info->evtchn;
414- if (!VALID_EVTCHN(evtchn))
477+ if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
415478 return;
416479
480+ cpu = info->eoi_cpu;
481+ if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) {
482+ lateeoi_list_add(info);
483+ return;
484+ }
485+
486+ info->eoi_time = 0;
417487 unmask_evtchn(evtchn);
418488 }
419489
490+static void xen_irq_lateeoi_worker(struct work_struct *work)
491+{
492+ struct lateeoi_work *eoi;
493+ struct irq_info *info;
494+ u64 now = get_jiffies_64();
495+ unsigned long flags;
496+
497+ eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
498+
499+ read_lock_irqsave(&evtchn_rwlock, flags);
500+
501+ while (true) {
502+ spin_lock(&eoi->eoi_list_lock);
503+
504+ info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
505+ eoi_list);
506+
507+ if (info == NULL || now < info->eoi_time) {
508+ spin_unlock(&eoi->eoi_list_lock);
509+ break;
510+ }
511+
512+ list_del_init(&info->eoi_list);
513+
514+ spin_unlock(&eoi->eoi_list_lock);
515+
516+ info->eoi_time = 0;
517+
518+ xen_irq_lateeoi_locked(info);
519+ }
520+
521+ if (info)
522+ mod_delayed_work_on(info->eoi_cpu, system_wq,
523+ &eoi->delayed, info->eoi_time - now);
524+
525+ read_unlock_irqrestore(&evtchn_rwlock, flags);
526+}
527+
528+static void xen_cpu_init_eoi(unsigned int cpu)
529+{
530+ struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
531+
532+ INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
533+ spin_lock_init(&eoi->eoi_list_lock);
534+ INIT_LIST_HEAD(&eoi->eoi_list);
535+}
536+
420537 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
421538 {
422539 struct irq_info *info;
@@ -436,6 +553,7 @@ EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
436553 static void xen_irq_init(unsigned irq)
437554 {
438555 struct irq_info *info;
556+
439557 #ifdef CONFIG_SMP
440558 /* By default all event channels notify CPU#0. */
441559 cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
@@ -450,6 +568,7 @@ static void xen_irq_init(unsigned irq)
450568
451569 set_info_for_irq(irq, info);
452570
571+ INIT_LIST_HEAD(&info->eoi_list);
453572 list_add_tail(&info->list, &xen_irq_list_head);
454573 }
455574
@@ -505,6 +624,9 @@ static void xen_free_irq(unsigned irq)
505624
506625 write_lock_irqsave(&evtchn_rwlock, flags);
507626
627+ if (!list_empty(&info->eoi_list))
628+ lateeoi_list_del(info);
629+
508630 list_del(&info->list);
509631
510632 set_info_for_irq(irq, NULL);
@@ -1364,6 +1486,54 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
13641486 notify_remote_via_irq(irq);
13651487 }
13661488
1489+struct evtchn_loop_ctrl {
1490+ ktime_t timeout;
1491+ unsigned count;
1492+ bool defer_eoi;
1493+};
1494+
1495+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1496+{
1497+ int irq;
1498+ struct irq_info *info;
1499+
1500+ irq = get_evtchn_to_irq(port);
1501+ if (irq == -1)
1502+ return;
1503+
1504+ /*
1505+ * Check for timeout every 256 events.
1506+ * We are setting the timeout value only after the first 256
1507+ * events in order to not hurt the common case of few loop
1508+ * iterations. The 256 is basically an arbitrary value.
1509+ *
1510+ * In case we are hitting the timeout we need to defer all further
1511+ * EOIs in order to ensure to leave the event handling loop rather
1512+ * sooner than later.
1513+ */
1514+ if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1515+ ktime_t kt = ktime_get();
1516+
1517+ if (!ctrl->timeout.tv64) {
1518+ kt = ktime_add_ms(kt,
1519+ jiffies_to_msecs(event_loop_timeout));
1520+ ctrl->timeout = kt;
1521+ } else if (kt.tv64 > ctrl->timeout.tv64) {
1522+ ctrl->defer_eoi = true;
1523+ }
1524+ }
1525+
1526+ info = info_for_irq(irq);
1527+
1528+ if (ctrl->defer_eoi) {
1529+ info->eoi_cpu = smp_processor_id();
1530+ info->irq_epoch = __this_cpu_read(irq_epoch);
1531+ info->eoi_time = get_jiffies_64() + event_eoi_delay;
1532+ }
1533+
1534+ generic_handle_irq(irq);
1535+}
1536+
13671537 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
13681538
13691539 static void __xen_evtchn_do_upcall(void)
@@ -1371,6 +1541,7 @@ static void __xen_evtchn_do_upcall(void)
13711541 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
13721542 int cpu = get_cpu();
13731543 unsigned count;
1544+ struct evtchn_loop_ctrl ctrl = { 0 };
13741545
13751546 read_lock(&evtchn_rwlock);
13761547
@@ -1380,7 +1551,7 @@ static void __xen_evtchn_do_upcall(void)
13801551 if (__this_cpu_inc_return(xed_nesting_count) - 1)
13811552 goto out;
13821553
1383- xen_evtchn_handle_events(cpu);
1554+ xen_evtchn_handle_events(cpu, &ctrl);
13841555
13851556 BUG_ON(!irqs_disabled());
13861557
@@ -1391,6 +1562,13 @@ static void __xen_evtchn_do_upcall(void)
13911562 out:
13921563 read_unlock(&evtchn_rwlock);
13931564
1565+ /*
1566+ * Increment irq_epoch only now to defer EOIs only for
1567+ * xen_irq_lateeoi() invocations occurring from inside the loop
1568+ * above.
1569+ */
1570+ __this_cpu_inc(irq_epoch);
1571+
13941572 put_cpu();
13951573 }
13961574
@@ -1828,9 +2006,6 @@ void xen_callback_vector(void)
18282006 void xen_callback_vector(void) {}
18292007 #endif
18302008
1831-#undef MODULE_PARAM_PREFIX
1832-#define MODULE_PARAM_PREFIX "xen."
1833-
18342009 static bool fifo_events = true;
18352010 module_param(fifo_events, bool, 0);
18362011
@@ -1838,6 +2013,8 @@ static int xen_evtchn_cpu_prepare(unsigned int cpu)
18382013 {
18392014 int ret = 0;
18402015
2016+ xen_cpu_init_eoi(cpu);
2017+
18412018 if (evtchn_ops->percpu_init)
18422019 ret = evtchn_ops->percpu_init(cpu);
18432020
@@ -1863,6 +2040,8 @@ void __init xen_init_IRQ(void)
18632040 if (ret < 0)
18642041 xen_evtchn_2l_init();
18652042
2043+ xen_cpu_init_eoi(smp_processor_id());
2044+
18662045 cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
18672046 "CPUHP_XEN_EVTCHN_PREPARE",
18682047 xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -275,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word)
275275 return w & EVTCHN_FIFO_LINK_MASK;
276276 }
277277
278-static void handle_irq_for_port(unsigned port)
279-{
280- int irq;
281-
282- irq = get_evtchn_to_irq(port);
283- if (irq != -1)
284- generic_handle_irq(irq);
285-}
286-
287-static void consume_one_event(unsigned cpu,
278+static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
288279 struct evtchn_fifo_control_block *control_block,
289- unsigned priority, unsigned long *ready,
290- bool drop)
280+ unsigned priority, unsigned long *ready)
291281 {
292282 struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
293283 uint32_t head;
@@ -320,16 +310,17 @@ static void consume_one_event(unsigned cpu,
320310 clear_bit(priority, ready);
321311
322312 if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
323- if (unlikely(drop))
313+ if (unlikely(!ctrl))
324314 pr_warn("Dropping pending event for port %u\n", port);
325315 else
326- handle_irq_for_port(port);
316+ handle_irq_for_port(port, ctrl);
327317 }
328318
329319 q->head[priority] = head;
330320 }
331321
332-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
322+static void __evtchn_fifo_handle_events(unsigned cpu,
323+ struct evtchn_loop_ctrl *ctrl)
333324 {
334325 struct evtchn_fifo_control_block *control_block;
335326 unsigned long ready;
@@ -341,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
341332
342333 while (ready) {
343334 q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
344- consume_one_event(cpu, control_block, q, &ready, drop);
335+ consume_one_event(cpu, ctrl, control_block, q, &ready);
345336 ready |= xchg(&control_block->ready, 0);
346337 }
347338 }
348339
349-static void evtchn_fifo_handle_events(unsigned cpu)
340+static void evtchn_fifo_handle_events(unsigned cpu,
341+ struct evtchn_loop_ctrl *ctrl)
350342 {
351- __evtchn_fifo_handle_events(cpu, false);
343+ __evtchn_fifo_handle_events(cpu, ctrl);
352344 }
353345
354346 static void evtchn_fifo_resume(void)
@@ -417,7 +409,7 @@ static int evtchn_fifo_percpu_init(unsigned int cpu)
417409
418410 static int evtchn_fifo_percpu_deinit(unsigned int cpu)
419411 {
420- __evtchn_fifo_handle_events(cpu, true);
412+ __evtchn_fifo_handle_events(cpu, NULL);
421413 return 0;
422414 }
423415
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -32,11 +32,15 @@ enum xen_irq_type {
3232 */
3333 struct irq_info {
3434 struct list_head list;
35+ struct list_head eoi_list;
3536 int refcnt;
3637 enum xen_irq_type type; /* type */
3738 unsigned irq;
3839 unsigned int evtchn; /* event channel */
3940 unsigned short cpu; /* cpu bound */
41+ unsigned short eoi_cpu; /* EOI must happen on this cpu */
42+ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
43+ u64 eoi_time; /* Time in jiffies when to EOI. */
4044
4145 union {
4246 unsigned short virq;
@@ -55,6 +59,8 @@ struct irq_info {
5559 #define PIRQ_SHAREABLE (1 << 1)
5660 #define PIRQ_MSI_GROUP (1 << 2)
5761
62+struct evtchn_loop_ctrl;
63+
5864 struct evtchn_ops {
5965 unsigned (*max_channels)(void);
6066 unsigned (*nr_channels)(void);
@@ -69,7 +75,7 @@ struct evtchn_ops {
6975 void (*mask)(unsigned port);
7076 void (*unmask)(unsigned port);
7177
72- void (*handle_events)(unsigned cpu);
78+ void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
7379 void (*resume)(void);
7480
7581 int (*percpu_init)(unsigned int cpu);
@@ -80,6 +86,7 @@ extern const struct evtchn_ops *evtchn_ops;
8086
8187 extern int **evtchn_to_irq;
8288 int get_evtchn_to_irq(unsigned int evtchn);
89+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
8390
8491 struct irq_info *info_for_irq(unsigned irq);
8592 unsigned cpu_from_irq(unsigned irq);
@@ -137,9 +144,10 @@ static inline void unmask_evtchn(unsigned port)
137144 return evtchn_ops->unmask(port);
138145 }
139146
140-static inline void xen_evtchn_handle_events(unsigned cpu)
147+static inline void xen_evtchn_handle_events(unsigned cpu,
148+ struct evtchn_loop_ctrl *ctrl)
141149 {
142- return evtchn_ops->handle_events(cpu);
150+ return evtchn_ops->handle_events(cpu, ctrl);
143151 }
144152
145153 static inline void xen_evtchn_resume(void)
Show on old repository browser