11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 */
25
26 #include <sys/asm_linkage.h>
27 #include <sys/asm_misc.h>
28 #include <sys/regset.h>
29 #include <sys/privregs.h>
30 #include <sys/psw.h>
31 #include <sys/machbrand.h>
32
33 #if defined(__lint)
34
35 #include <sys/types.h>
36 #include <sys/thread.h>
37 #include <sys/systm.h>
38
39 #else /* __lint */
40
41 #include <sys/segments.h>
42 #include <sys/pcb.h>
43 #include <sys/trap.h>
44 #include <sys/ftrace.h>
45 #include <sys/traptrace.h>
46 #include <sys/clock.h>
47 #include <sys/model.h>
48 #include <sys/panic.h>
49
50 #if defined(__xpv)
51 #include <sys/hypervisor.h>
52 #endif
53
54 #include "assym.h"
55
56 #endif /* __lint */
57
58 /*
59 * We implement five flavours of system call entry points
60 *
514 movb $LWP_SYS, LWP_STATE(%r14)
515 incq LWP_RU_SYSC(%r14)
516 movb $NORMALRETURN, LWP_EOSYS(%r14)
517
518 incq %gs:CPU_STATS_SYS_SYSCALL
519
520 /*
521 * If our LWP has an alternate system call handler, run that instead of
522 * the regular system call path.
523 */
524 movq LWP_BRAND_SYSCALL(%r14), %rdi
525 testq %rdi, %rdi
526 jz _syscall_no_brand
527
528 pushq %rax
529 subq $8, %rsp /* align stack for call to C */
530 call *%rdi
531 addq $8, %rsp
532
533 /*
534 * If the alternate handler returns non-zero, the normal system call
535 * processing is resumed.
536 */
537 testl %eax, %eax
538 popq %rax
539 jnz _syscall_no_brand
540
541 /*
542 * For branded syscalls which were handled in-kernel, shuffle the
543 * register state as would be done by the native handler before jumping
544 * to the post-syscall logic.
545 */
546 movq REGOFF_RAX(%rsp), %r12
547 movq REGOFF_RDX(%rsp), %r13
548 jmp _syscall_after_brand
549
550 _syscall_no_brand:
551 movw %ax, T_SYSNUM(%r15)
552 movzbl T_PRE_SYS(%r15), %ebx
553 ORL_SYSCALLTRACE(%ebx)
554 testl %ebx, %ebx
555 jne _syscall_pre
556
557 _syscall_invoke:
558 movq REGOFF_RDI(%rbp), %rdi
559 movq REGOFF_RSI(%rbp), %rsi
560 movq REGOFF_RDX(%rbp), %rdx
561 movq REGOFF_RCX(%rbp), %rcx
562 movq REGOFF_R8(%rbp), %r8
563 movq REGOFF_R9(%rbp), %r9
564
565 cmpl $NSYSCALL, %eax
566 jae _syscall_ill
567 shll $SYSENT_SIZE_SHIFT, %eax
568 leaq sysent(%rax), %rbx
569
823
824 MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
825 movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate call) */
826
827 ASSERT_LWPTOREGS(%r14, %rsp)
828
829 incq %gs:CPU_STATS_SYS_SYSCALL
830
831 /*
832 * If our lwp has an alternate system call handler, run that instead
833 * of the regular system call path.
834 */
835 movq LWP_BRAND_SYSCALL(%r14), %rax
836 testq %rax, %rax
837 jz _syscall32_no_brand
838
839 movb $LWP_SYS, LWP_STATE(%r14)
840 call *%rax
841
842 /*
843 * If the alternate handler returns non-zero, the normal system call
844 * processing is resumed.
845 */
846 testl %eax, %eax
847 jnz _syscall32_no_brand
848
849 /*
850 * For branded syscalls which were handled in-kernel, shuffle the
851 * register state as would be done by the native handler before jumping
852 * to the post-syscall logic.
853 */
854 movl REGOFF_RAX(%rsp), %r12d
855 movl REGOFF_RDX(%rsp), %r13d
856 jmp _syscall32_after_brand
857
858 _syscall32_no_brand:
859 /*
860 * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
861 * into 64-bit (long) arg slots, maintaining 16 byte alignment. Or
862 * more succinctly:
863 *
864 * SA(MAXSYSARGS * sizeof (long)) == 64
865 *
866 * Note, this space is used both to copy in the arguments from user
867 * land, but also to as part of the old UNIX style syscall_ap() method.
868 * syscall_entry expects that we do not change the values of this space
869 * that we give it. However, this means that when we end up in the more
870 * recent model of passing the arguments based on the calling
871 * conventions, we'll need to save an additional 16 bytes of stack.
872 */
873 #define SYS_DROP 64 /* drop for args */
874 subq $SYS_DROP, %rsp
875 movb $LWP_SYS, LWP_STATE(%r14)
876 movq %r15, %rdi
877 movq %rsp, %rsi
1238 SET_SIZE(_sys_sysenter_post_swapgs)
1239 SET_SIZE(brand_sys_sysenter)
1240
1241 #endif /* __lint */
1242
1243 #if defined(__lint)
1244 /*
1245 * System call via an int80. This entry point is only used by the Linux
1246 * application environment. Unlike the other entry points, there is no
1247 * default action to take if no callback is registered for this process.
1248 */
1249 void
1250 sys_int80()
1251 {}
1252
1253 #else /* __lint */
1254
1255 ENTRY_NP(brand_sys_int80)
1256 SWAPGS /* kernel gsbase */
1257 XPV_TRAP_POP
1258 call smap_enable
1259
1260 /*
1261 * We first attempt to call the "b_int80" handler from the "struct
1262 * brand_mach_ops" for this brand. If no handler function is installed
1263 * for this brand, the BRAND_CALLBACK() macro returns here and we
1264 * check the lwp for a "lwp_brand_syscall" handler.
1265 */
1266 BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
1267
1268 /*
1269 * Check to see if this lwp provides "lwp_brand_syscall". If so, we
1270 * will route this int80 through the regular system call handling path.
1271 */
1272 movq %r15, %gs:CPU_RTMP_R15
1273 movq %gs:CPU_THREAD, %r15
1274 movq T_LWP(%r15), %r15
1275 movq LWP_BRAND_SYSCALL(%r15), %r15
1276 testq %r15, %r15
1277 movq %gs:CPU_RTMP_R15, %r15
1278 jnz nopop_syscall_int
1279
1280 /*
1281 * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
1282 * function, and has thus opted out of handling this trap.
1283 */
1284 SWAPGS /* user gsbase */
1285 jmp nopop_int80
1286
1287 ENTRY_NP(sys_int80)
1288 /*
1289 * We hit an int80, but this process isn't of a brand with an int80
1290 * handler. Bad process! Make it look as if the INT failed.
1291 * Modify %rip to point before the INT, push the expected error
1292 * code and fake a GP fault. Note on 64-bit hypervisor we need
1293 * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
1294 * because gptrap will pop them again with its own XPV_TRAP_POP.
1295 */
1296 XPV_TRAP_POP
1297 call smap_enable
1298 nopop_int80:
1299 subq $2, (%rsp) /* int insn 2-bytes */
1300 pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
1301 #if defined(__xpv)
1302 push %r11
1303 push %rcx
1304 #endif
1305 jmp gptrap / GP fault
1306 SET_SIZE(sys_int80)
1307 SET_SIZE(brand_sys_int80)
1308 #endif /* __lint */
1309
1310
1311 /*
1312 * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1313 * the generic i386 libc to do system calls. We do a small amount of setup
1314 * before jumping into the existing sys_syscall32 path.
1315 */
1316 #if defined(__lint)
1317
|
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 */
25
26 #include <sys/asm_linkage.h>
27 #include <sys/asm_misc.h>
28 #include <sys/regset.h>
29 #include <sys/privregs.h>
30 #include <sys/psw.h>
31
32 #if defined(__lint)
33
34 #include <sys/types.h>
35 #include <sys/thread.h>
36 #include <sys/systm.h>
37
38 #else /* __lint */
39
40 #include <sys/machbrand.h>
41 #include <sys/segments.h>
42 #include <sys/pcb.h>
43 #include <sys/trap.h>
44 #include <sys/ftrace.h>
45 #include <sys/traptrace.h>
46 #include <sys/clock.h>
47 #include <sys/model.h>
48 #include <sys/panic.h>
49
50 #if defined(__xpv)
51 #include <sys/hypervisor.h>
52 #endif
53
54 #include "assym.h"
55
56 #endif /* __lint */
57
58 /*
59 * We implement five flavours of system call entry points
60 *
514 movb $LWP_SYS, LWP_STATE(%r14)
515 incq LWP_RU_SYSC(%r14)
516 movb $NORMALRETURN, LWP_EOSYS(%r14)
517
518 incq %gs:CPU_STATS_SYS_SYSCALL
519
520 /*
521 * If our LWP has an alternate system call handler, run that instead of
522 * the regular system call path.
523 */
524 movq LWP_BRAND_SYSCALL(%r14), %rdi
525 testq %rdi, %rdi
526 jz _syscall_no_brand
527
528 pushq %rax
529 subq $8, %rsp /* align stack for call to C */
530 call *%rdi
531 addq $8, %rsp
532
533 /*
534 * If the alternate handler returns 0, we skip straight to the return to
535 * usermode. Otherwise, we resume regular system call processing.
536 */
537 testl %eax, %eax
538 popq %rax
539 jz _syscall_after_brand
540
541 _syscall_no_brand:
542 movw %ax, T_SYSNUM(%r15)
543 movzbl T_PRE_SYS(%r15), %ebx
544 ORL_SYSCALLTRACE(%ebx)
545 testl %ebx, %ebx
546 jne _syscall_pre
547
548 _syscall_invoke:
549 movq REGOFF_RDI(%rbp), %rdi
550 movq REGOFF_RSI(%rbp), %rsi
551 movq REGOFF_RDX(%rbp), %rdx
552 movq REGOFF_RCX(%rbp), %rcx
553 movq REGOFF_R8(%rbp), %r8
554 movq REGOFF_R9(%rbp), %r9
555
556 cmpl $NSYSCALL, %eax
557 jae _syscall_ill
558 shll $SYSENT_SIZE_SHIFT, %eax
559 leaq sysent(%rax), %rbx
560
814
815 MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
816 movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate call) */
817
818 ASSERT_LWPTOREGS(%r14, %rsp)
819
820 incq %gs:CPU_STATS_SYS_SYSCALL
821
822 /*
823 * If our lwp has an alternate system call handler, run that instead
824 * of the regular system call path.
825 */
826 movq LWP_BRAND_SYSCALL(%r14), %rax
827 testq %rax, %rax
828 jz _syscall32_no_brand
829
830 movb $LWP_SYS, LWP_STATE(%r14)
831 call *%rax
832
833 /*
834 * If the alternate handler returns 0, we skip straight to the return
835 * to usermode. Otherwise, we resume regular system call processing.
836 */
837 testl %eax, %eax
838 jz _syscall32_after_brand
839
840 _syscall32_no_brand:
841 /*
842 * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
843 * into 64-bit (long) arg slots, maintaining 16 byte alignment. Or
844 * more succinctly:
845 *
846 * SA(MAXSYSARGS * sizeof (long)) == 64
847 *
848 * Note, this space is used both to copy in the arguments from user
849 * land, but also to as part of the old UNIX style syscall_ap() method.
850 * syscall_entry expects that we do not change the values of this space
851 * that we give it. However, this means that when we end up in the more
852 * recent model of passing the arguments based on the calling
853 * conventions, we'll need to save an additional 16 bytes of stack.
854 */
855 #define SYS_DROP 64 /* drop for args */
856 subq $SYS_DROP, %rsp
857 movb $LWP_SYS, LWP_STATE(%r14)
858 movq %r15, %rdi
859 movq %rsp, %rsi
1220 SET_SIZE(_sys_sysenter_post_swapgs)
1221 SET_SIZE(brand_sys_sysenter)
1222
1223 #endif /* __lint */
1224
1225 #if defined(__lint)
1226 /*
1227 * System call via an int80. This entry point is only used by the Linux
1228 * application environment. Unlike the other entry points, there is no
1229 * default action to take if no callback is registered for this process.
1230 */
1231 void
1232 sys_int80()
1233 {}
1234
1235 #else /* __lint */
1236
1237 ENTRY_NP(brand_sys_int80)
1238 SWAPGS /* kernel gsbase */
1239 XPV_TRAP_POP
1240
1241 /*
1242 * We first attempt to call the "b_int80" handler from the "struct
1243 * brand_mach_ops" for this brand. If no handler function is installed
1244 * for this brand, the BRAND_CALLBACK() macro returns here and we
1245 * check the lwp for a "lwp_brand_syscall" handler.
1246 */
1247 BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
1248
1249 /*
1250 * Check to see if this lwp provides "lwp_brand_syscall". If so, we
1251 * will route this int80 through the regular system call handling path.
1252 */
1253 movq %r15, %gs:CPU_RTMP_R15
1254 movq %gs:CPU_THREAD, %r15
1255 movq T_LWP(%r15), %r15
1256 movq LWP_BRAND_SYSCALL(%r15), %r15
1257 testq %r15, %r15
1258 movq %gs:CPU_RTMP_R15, %r15
1259 jnz nopop_syscall_int
1260
1261 /*
1262 * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
1263 * function, and has thus opted out of handling this trap.
1264 */
1265 SWAPGS /* user gsbase */
1266 jmp nopop_int80
1267
1268 ENTRY_NP(sys_int80)
1269 /*
1270 * We hit an int80, but this process isn't of a brand with an int80
1271 * handler. Bad process! Make it look as if the INT failed.
1272 * Modify %rip to point before the INT, push the expected error
1273 * code and fake a GP fault. Note on 64-bit hypervisor we need
1274 * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
1275 * because gptrap will pop them again with its own XPV_TRAP_POP.
1276 */
1277 XPV_TRAP_POP
1278 nopop_int80:
1279 subq $2, (%rsp) /* int insn 2-bytes */
1280 pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
1281 #if defined(__xpv)
1282 push %r11
1283 push %rcx
1284 #endif
1285 jmp gptrap / GP fault
1286 SET_SIZE(sys_int80)
1287 SET_SIZE(brand_sys_int80)
1288 #endif /* __lint */
1289
1290
1291 /*
1292 * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1293 * the generic i386 libc to do system calls. We do a small amount of setup
1294 * before jumping into the existing sys_syscall32 path.
1295 */
1296 #if defined(__lint)
1297
|