Print this page
6062 Workaround broken KVM handling of directed EOIs
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/i86pc/io/pcplusmp/apic_regops.c
+++ new/usr/src/uts/i86pc/io/pcplusmp/apic_regops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
|
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright 2014 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
27 + * Copyright (c) 2014 by Delphix. All rights reserved.
27 28 */
28 29
29 30 #include <sys/cpuvar.h>
30 31 #include <sys/psm.h>
31 32 #include <sys/archsystm.h>
32 33 #include <sys/apic.h>
33 34 #include <sys/sunddi.h>
34 35 #include <sys/ddi_impldefs.h>
35 36 #include <sys/mach_intr.h>
36 37 #include <sys/sysmacros.h>
37 38 #include <sys/trap.h>
38 39 #include <sys/x86_archext.h>
39 40 #include <sys/privregs.h>
40 41 #include <sys/psm_common.h>
41 42
42 43 /* Function prototypes of local apic and X2APIC */
43 44 static uint64_t local_apic_read(uint32_t reg);
44 45 static void local_apic_write(uint32_t reg, uint64_t value);
45 46 static int get_local_apic_pri(void);
46 47 static void local_apic_write_task_reg(uint64_t value);
47 48 static void local_apic_write_int_cmd(uint32_t cpu_id, uint32_t cmd1);
48 49 static uint64_t local_x2apic_read(uint32_t msr);
49 50 static void local_x2apic_write(uint32_t msr, uint64_t value);
50 51 static int get_local_x2apic_pri(void);
51 52 static void local_x2apic_write_task_reg(uint64_t value);
52 53 static void local_x2apic_write_int_cmd(uint32_t cpu_id, uint32_t cmd1);
53 54
54 55 /*
55 56 * According to the X2APIC specification:
56 57 *
57 58 * xAPIC global enable X2APIC enable Description
58 59 * (IA32_APIC_BASE[11]) (IA32_APIC_BASE[10])
|
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
59 60 * -----------------------------------------------------------
60 61 * 0 0 APIC is disabled
61 62 * 0 1 Invalid
62 63 * 1 0 APIC is enabled in xAPIC mode
63 64 * 1 1 APIC is enabled in X2APIC mode
64 65 * -----------------------------------------------------------
65 66 */
66 67 int x2apic_enable = 1;
67 68 apic_mode_t apic_mode = LOCAL_APIC; /* Default mode is Local APIC */
68 69
70 +/* See apic_directed_EOI_supported(). Currently 3-state variable. */
71 +volatile int apic_directed_eoi_state = 2;
72 +
69 73 /* Uses MMIO (Memory Mapped IO) */
70 74 static apic_reg_ops_t local_apic_regs_ops = {
71 75 local_apic_read,
72 76 local_apic_write,
73 77 get_local_apic_pri,
74 78 local_apic_write_task_reg,
75 79 local_apic_write_int_cmd,
76 80 apic_send_EOI,
77 81 };
78 82
79 83 /* X2APIC : Uses RDMSR/WRMSR instructions to access APIC registers */
80 84 static apic_reg_ops_t x2apic_regs_ops = {
81 85 local_x2apic_read,
82 86 local_x2apic_write,
83 87 get_local_x2apic_pri,
84 88 local_x2apic_write_task_reg,
85 89 local_x2apic_write_int_cmd,
86 90 apic_send_EOI,
87 91 };
88 92
89 93 int apic_have_32bit_cr8 = 0;
90 94
91 95 /* The default ops is local APIC (Memory Mapped IO) */
92 96 apic_reg_ops_t *apic_reg_ops = &local_apic_regs_ops;
93 97
94 98 /*
95 99 * APIC register ops related data sturctures and functions.
96 100 */
97 101 void apic_send_EOI();
98 102 void apic_send_directed_EOI(uint32_t irq);
99 103
100 104 #define X2APIC_ENABLE_BIT 10
101 105
102 106 /*
103 107 * Local APIC Implementation
104 108 */
105 109 static uint64_t
106 110 local_apic_read(uint32_t reg)
107 111 {
108 112 return ((uint32_t)apicadr[reg]);
109 113 }
110 114
111 115 static void
112 116 local_apic_write(uint32_t reg, uint64_t value)
113 117 {
114 118 apicadr[reg] = (uint32_t)value;
115 119 }
116 120
117 121 static int
118 122 get_local_apic_pri(void)
119 123 {
120 124 #if defined(__amd64)
121 125 return ((int)getcr8());
122 126 #else
123 127 if (apic_have_32bit_cr8)
124 128 return ((int)getcr8());
125 129 return (apicadr[APIC_TASK_REG]);
126 130 #endif
127 131 }
128 132
129 133 static void
130 134 local_apic_write_task_reg(uint64_t value)
131 135 {
132 136 #if defined(__amd64)
133 137 setcr8((ulong_t)(value >> APIC_IPL_SHIFT));
134 138 #else
135 139 if (apic_have_32bit_cr8)
136 140 setcr8((ulong_t)(value >> APIC_IPL_SHIFT));
137 141 else
138 142 apicadr[APIC_TASK_REG] = (uint32_t)value;
139 143 #endif
140 144 }
141 145
142 146 static void
143 147 local_apic_write_int_cmd(uint32_t cpu_id, uint32_t cmd1)
144 148 {
145 149 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
146 150 apicadr[APIC_INT_CMD1] = cmd1;
147 151 }
148 152
149 153 /*
150 154 * X2APIC Implementation.
151 155 */
152 156 static uint64_t
153 157 local_x2apic_read(uint32_t msr)
154 158 {
155 159 uint64_t i;
156 160
157 161 i = (uint64_t)(rdmsr(REG_X2APIC_BASE_MSR + (msr >> 2)) & 0xffffffff);
158 162 return (i);
159 163 }
160 164
161 165 static void
162 166 local_x2apic_write(uint32_t msr, uint64_t value)
163 167 {
164 168 uint64_t tmp;
165 169
166 170 if (msr != APIC_EOI_REG) {
167 171 tmp = rdmsr(REG_X2APIC_BASE_MSR + (msr >> 2));
168 172 tmp = (tmp & 0xffffffff00000000) | value;
169 173 } else {
170 174 tmp = 0;
171 175 }
172 176
173 177 wrmsr((REG_X2APIC_BASE_MSR + (msr >> 2)), tmp);
174 178 }
175 179
176 180 static int
177 181 get_local_x2apic_pri(void)
178 182 {
179 183 return (rdmsr(REG_X2APIC_BASE_MSR + (APIC_TASK_REG >> 2)));
180 184 }
181 185
182 186 static void
183 187 local_x2apic_write_task_reg(uint64_t value)
184 188 {
185 189 X2APIC_WRITE(APIC_TASK_REG, value);
186 190 }
187 191
188 192 static void
189 193 local_x2apic_write_int_cmd(uint32_t cpu_id, uint32_t cmd1)
190 194 {
191 195 wrmsr((REG_X2APIC_BASE_MSR + (APIC_INT_CMD1 >> 2)),
192 196 (((uint64_t)cpu_id << 32) | cmd1));
193 197 }
194 198
195 199 /*ARGSUSED*/
196 200 void
197 201 apic_send_EOI(uint32_t irq)
198 202 {
199 203 apic_reg_ops->apic_write(APIC_EOI_REG, 0);
200 204 }
201 205
202 206 /*
203 207 * Support for Directed EOI capability is available in both the xAPIC
204 208 * and x2APIC mode.
205 209 */
206 210 void
207 211 apic_send_directed_EOI(uint32_t irq)
208 212 {
209 213 uchar_t ioapicindex;
210 214 uchar_t vector;
211 215 apic_irq_t *apic_irq;
212 216 short intr_index;
213 217
214 218 /*
215 219 * Following the EOI to the local APIC unit, perform a directed
216 220 * EOI to the IOxAPIC generating the interrupt by writing to its
217 221 * EOI register.
218 222 *
219 223 * A broadcast EOI is not generated.
220 224 */
221 225 apic_reg_ops->apic_write(APIC_EOI_REG, 0);
222 226
223 227 apic_irq = apic_irq_table[irq];
224 228 while (apic_irq) {
225 229 intr_index = apic_irq->airq_mps_intr_index;
226 230 if (intr_index == ACPI_INDEX || intr_index >= 0) {
227 231 ioapicindex = apic_irq->airq_ioapicindex;
228 232 vector = apic_irq->airq_vector;
229 233 ioapic_write_eoi(ioapicindex, vector);
230 234 }
231 235 apic_irq = apic_irq->airq_next;
232 236 }
233 237 }
234 238
235 239 int
236 240 apic_detect_x2apic(void)
237 241 {
238 242 if (x2apic_enable == 0)
239 243 return (0);
240 244
241 245 return (is_x86_feature(x86_featureset, X86FSET_X2APIC));
242 246 }
243 247
244 248 void
245 249 apic_enable_x2apic(void)
246 250 {
247 251 uint64_t apic_base_msr;
248 252
249 253 if (apic_local_mode() == LOCAL_X2APIC) {
250 254 /* BIOS apparently has enabled X2APIC */
251 255 if (apic_mode != LOCAL_X2APIC)
252 256 x2apic_update_psm();
253 257 return;
254 258 }
255 259
256 260 /*
257 261 * This is the first time we are enabling X2APIC on this CPU
258 262 */
259 263 apic_base_msr = rdmsr(REG_APIC_BASE_MSR);
260 264 apic_base_msr = apic_base_msr | (0x1 << X2APIC_ENABLE_BIT);
261 265 wrmsr(REG_APIC_BASE_MSR, apic_base_msr);
262 266
263 267 if (apic_mode != LOCAL_X2APIC)
264 268 x2apic_update_psm();
265 269 }
266 270
267 271 /*
268 272 * Determine which mode the current CPU is in. See the table above.
269 273 * (IA32_APIC_BASE[11]) (IA32_APIC_BASE[10])
270 274 */
271 275 int
272 276 apic_local_mode(void)
273 277 {
274 278 uint64_t apic_base_msr;
275 279 int bit = ((0x1 << (X2APIC_ENABLE_BIT + 1)) |
276 280 (0x1 << X2APIC_ENABLE_BIT));
277 281
278 282 apic_base_msr = rdmsr(REG_APIC_BASE_MSR);
279 283
280 284 if ((apic_base_msr & bit) == bit)
281 285 return (LOCAL_X2APIC);
282 286 else
283 287 return (LOCAL_APIC);
284 288 }
285 289
286 290 void
|
↓ open down ↓ |
208 lines elided |
↑ open up ↑ |
287 291 apic_set_directed_EOI_handler()
288 292 {
289 293 apic_reg_ops->apic_send_eoi = apic_send_directed_EOI;
290 294 }
291 295
292 296 int
293 297 apic_directed_EOI_supported()
294 298 {
295 299 uint32_t ver;
296 300
301 + /*
302 + * There are some known issues with some versions of Linux KVM and QEMU
303 + * where by directed EOIs do not properly function and instead get
304 + * coalesced at the hypervisor, causing the host not to see interrupts.
305 + * Thus, when the platform is KVM, we would like to disable it by
306 + * default, but keep it available otherwise.
307 + *
308 + * We use a three-state variable (apic_directed_eoi_state) to determine
309 + * how we handle directed EOI.
310 + *
311 + * 0 --> Don't do directed EOI at all.
312 + * 1 --> Do directed EOI if available, no matter the HW environment.
313 + * 2 --> Don't do directed EOI on KVM, but do it otherwise if available.
314 + *
315 + * If some grinning weirdo put something else in there, treat it as '2'
316 + * (i.e. the current default).
317 + *
318 + * Note, at this time illumos KVM does not identify as KVM. If it does,
319 + * we'll need to do some work to determine if it should be caught by
320 + * this or if it should show up as its own value of platform_type.
321 + */
322 + switch (apic_directed_eoi_state) {
323 + case 0:
324 + /* Don't do it at all. */
325 + return (0);
326 + case 1:
327 + break;
328 + case 2:
329 + default:
330 + /* Only do it if we aren't on KVM. */
331 + if (get_hwenv() == HW_KVM)
332 + return (0);
333 + /* FALLTHRU */
334 + }
335 +
297 336 ver = apic_reg_ops->apic_read(APIC_VERS_REG);
298 337 if (ver & APIC_DIRECTED_EOI_BIT)
299 338 return (1);
300 339
301 340 return (0);
302 341 }
303 342
304 343 /*
305 344 * Change apic_reg_ops depending upon the apic_mode.
306 345 */
307 346 void
308 347 apic_change_ops()
309 348 {
310 349 if (apic_mode == LOCAL_APIC)
311 350 apic_reg_ops = &local_apic_regs_ops;
312 351 else if (apic_mode == LOCAL_X2APIC)
313 352 apic_reg_ops = &x2apic_regs_ops;
314 353 }
315 354
316 355 /*
317 356 * Generates an interprocessor interrupt to another CPU when X2APIC mode is
318 357 * enabled.
319 358 */
320 359 void
321 360 x2apic_send_ipi(int cpun, int ipl)
322 361 {
323 362 int vector;
324 363 ulong_t flag;
325 364
326 365 ASSERT(apic_mode == LOCAL_X2APIC);
327 366
328 367 /*
329 368 * With X2APIC, Intel relaxed the semantics of the
330 369 * WRMSR instruction such that references to the X2APIC
331 370 * MSR registers are no longer serializing instructions.
332 371 * The code that initiates IPIs assumes that some sort
333 372 * of memory serialization occurs. The old APIC code
334 373 * did a write to uncachable memory mapped registers.
335 374 * Any reference to uncached memory is a serializing
336 375 * operation. To mimic those semantics here, we do an
337 376 * atomic operation, which translates to a LOCK OR instruction,
338 377 * which is serializing.
339 378 */
340 379 atomic_or_ulong(&flag, 1);
341 380
342 381 vector = apic_resv_vector[ipl];
343 382
344 383 flag = intr_clear();
345 384
346 385 /*
347 386 * According to X2APIC specification in section '2.3.5.1' of
348 387 * Interrupt Command Register Semantics, the semantics of
349 388 * programming Interrupt Command Register to dispatch an interrupt
350 389 * is simplified. A single MSR write to the 64-bit ICR is required
351 390 * for dispatching an interrupt. Specifically with the 64-bit MSR
352 391 * interface to ICR, system software is not required to check the
353 392 * status of the delivery status bit prior to writing to the ICR
354 393 * to send an IPI. With the removal of the Delivery Status bit,
355 394 * system software no longer has a reason to read the ICR. It remains
356 395 * readable only to aid in debugging.
357 396 */
358 397 #ifdef DEBUG
359 398 APIC_AV_PENDING_SET();
360 399 #endif /* DEBUG */
361 400
362 401 if ((cpun == psm_get_cpu_id())) {
363 402 X2APIC_WRITE(X2APIC_SELF_IPI, vector);
364 403 } else {
365 404 apic_reg_ops->apic_write_int_cmd(
366 405 apic_cpus[cpun].aci_local_id, vector);
367 406 }
368 407
369 408 intr_restore(flag);
370 409 }
371 410
372 411 /*
373 412 * Generates IPI to another CPU depending on the local APIC mode.
374 413 * apic_send_ipi() and x2apic_send_ipi() depends on the configured
375 414 * mode of the local APIC, but that may not match the actual mode
376 415 * early in CPU startup.
377 416 *
378 417 * Any changes made to this routine must be accompanied by similar
379 418 * changes to apic_send_ipi().
380 419 */
381 420 void
382 421 apic_common_send_ipi(int cpun, int ipl)
383 422 {
384 423 int vector;
385 424 ulong_t flag;
386 425 int mode = apic_local_mode();
387 426
388 427 if (mode == LOCAL_X2APIC) {
389 428 x2apic_send_ipi(cpun, ipl);
390 429 return;
391 430 }
392 431
393 432 ASSERT(mode == LOCAL_APIC);
394 433
395 434 vector = apic_resv_vector[ipl];
396 435 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
397 436 flag = intr_clear();
398 437 while (local_apic_regs_ops.apic_read(APIC_INT_CMD1) & AV_PENDING)
399 438 apic_ret();
400 439 local_apic_regs_ops.apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
401 440 vector);
402 441 intr_restore(flag);
403 442 }
|
↓ open down ↓ |
97 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX