3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 Joyent, Inc.
24 */
25
26 #include <sys/asm_linkage.h>
27 #include <sys/asm_misc.h>
28 #include <sys/regset.h>
29 #include <sys/privregs.h>
30 #include <sys/psw.h>
31 #include <sys/machbrand.h>
32
33 #if defined(__lint)
34
35 #include <sys/types.h>
36 #include <sys/thread.h>
37 #include <sys/systm.h>
38
39 #else /* __lint */
40
41 #include <sys/segments.h>
42 #include <sys/pcb.h>
43 #include <sys/trap.h>
44 #include <sys/ftrace.h>
45 #include <sys/traptrace.h>
46 #include <sys/clock.h>
47 #include <sys/model.h>
48 #include <sys/panic.h>
49
50 #if defined(__xpv)
51 #include <sys/hypervisor.h>
52 #endif
53
54 #include "assym.h"
55
56 #endif /* __lint */
57
58 /*
59 * We implement five flavours of system call entry points
60 *
486 movq %rbx, REGOFF_ES(%rsp)
487 movw %fs, %bx
488 movq %rbx, REGOFF_FS(%rsp)
489 movw %gs, %bx
490 movq %rbx, REGOFF_GS(%rsp)
491
492 /*
493 * Machine state saved in the regs structure on the stack
494 * First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9
495 * %eax is the syscall number
496 * %rsp is the thread's stack, %r15 is curthread
497 * REG_RSP(%rsp) is the user's stack
498 */
499
500 SYSCALL_TRAPTRACE($TT_SYSC64)
501
502 movq %rsp, %rbp
503
504 movq T_LWP(%r15), %r14
505 ASSERT_NO_RUPDATE_PENDING(%r14)
506 ENABLE_INTR_FLAGS
507
508 MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
509 movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate call) */
510
511 ASSERT_LWPTOREGS(%r14, %rsp)
512
513 movb $LWP_SYS, LWP_STATE(%r14)
514 incq LWP_RU_SYSC(%r14)
515 movb $NORMALRETURN, LWP_EOSYS(%r14)
516
517 incq %gs:CPU_STATS_SYS_SYSCALL
518
519 movw %ax, T_SYSNUM(%r15)
520 movzbl T_PRE_SYS(%r15), %ebx
521 ORL_SYSCALLTRACE(%ebx)
522 testl %ebx, %ebx
523 jne _syscall_pre
524
525 _syscall_invoke:
526 movq REGOFF_RDI(%rbp), %rdi
527 movq REGOFF_RSI(%rbp), %rsi
528 movq REGOFF_RDX(%rbp), %rdx
529 movq REGOFF_RCX(%rbp), %rcx
530 movq REGOFF_R8(%rbp), %r8
531 movq REGOFF_R9(%rbp), %r9
532
533 cmpl $NSYSCALL, %eax
534 jae _syscall_ill
535 shll $SYSENT_SIZE_SHIFT, %eax
536 leaq sysent(%rax), %rbx
537
538 call *SY_CALLC(%rbx)
539
540 movq %rax, %r12
541 movq %rdx, %r13
542
543 /*
544 * If the handler returns two ints, then we need to split the
545 * 64-bit return value into two 32-bit values.
546 */
547 testw $SE_32RVAL2, SY_FLAGS(%rbx)
548 je 5f
549 movq %r12, %r13
550 shrq $32, %r13 /* upper 32-bits into %edx */
551 movl %r12d, %r12d /* lower 32-bits into %eax */
552 5:
553 /*
554 * Optimistically assume that there's no post-syscall
555 * work to do. (This is to avoid having to call syscall_mstate()
556 * with interrupts disabled)
557 */
558 MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
559
560 /*
561 * We must protect ourselves from being descheduled here;
562 * If we were, and we ended up on another cpu, or another
563 * lwp got in ahead of us, it could change the segment
564 * registers without us noticing before we return to userland.
565 */
566 CLI(%r14)
567 CHECK_POSTSYS_NE(%r15, %r14, %ebx)
568 jne _syscall_post
569
570 /*
571 * We need to protect ourselves against non-canonical return values
572 * because Intel doesn't check for them on sysret (AMD does). Canonical
778 * REG_RSP(%rsp) is the user's stack
779 */
780
781 SYSCALL_TRAPTRACE32($TT_SYSC)
782
783 movq %rsp, %rbp
784
785 movq T_LWP(%r15), %r14
786 ASSERT_NO_RUPDATE_PENDING(%r14)
787
788 ENABLE_INTR_FLAGS
789
790 MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
791 movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate call) */
792
793 ASSERT_LWPTOREGS(%r14, %rsp)
794
795 incq %gs:CPU_STATS_SYS_SYSCALL
796
797 /*
798 * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
799 * into 64-bit (long) arg slots, maintaining 16 byte alignment. Or
800 * more succinctly:
801 *
802 * SA(MAXSYSARGS * sizeof (long)) == 64
803 */
804 #define SYS_DROP 64 /* drop for args */
805 subq $SYS_DROP, %rsp
806 movb $LWP_SYS, LWP_STATE(%r14)
807 movq %r15, %rdi
808 movq %rsp, %rsi
809 call syscall_entry
810
811 /*
812 * Fetch the arguments copied onto the kernel stack and put
813 * them in the right registers to invoke a C-style syscall handler.
814 * %rax contains the handler address.
815 *
816 * Ideas for making all this go faster of course include simply
817 * forcibly fetching 6 arguments from the user stack under lofault
818 * protection, reverting to copyin_args only when watchpoints
819 * are in effect.
820 *
821 * (If we do this, make sure that exec and libthread leave
822 * enough space at the top of the stack to ensure that we'll
823 * never do a fetch from an invalid page.)
824 *
825 * Lots of ideas here, but they won't really help with bringup B-)
826 * Correctness can't wait, performance can wait a little longer ..
827 */
828
829 movq %rax, %rbx
830 movl 0(%rsp), %edi
831 movl 8(%rsp), %esi
832 movl 0x10(%rsp), %edx
833 movl 0x18(%rsp), %ecx
834 movl 0x20(%rsp), %r8d
835 movl 0x28(%rsp), %r9d
836
837 call *SY_CALLC(%rbx)
838
839 movq %rbp, %rsp /* pop the args */
840
841 /*
842 * amd64 syscall handlers -always- return a 64-bit value in %rax.
843 * On the 32-bit kernel, they always return that value in %eax:%edx
844 * as required by the 32-bit ABI.
845 *
846 * Simulate the same behaviour by unconditionally splitting the
847 * return value in the same way.
848 */
849 movq %rax, %r13
850 shrq $32, %r13 /* upper 32-bits into %edx */
851 movl %eax, %r12d /* lower 32-bits into %eax */
852
853 /*
854 * Optimistically assume that there's no post-syscall
855 * work to do. (This is to avoid having to call syscall_mstate()
856 * with interrupts disabled)
857 */
858 MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
859
860 /*
861 * We must protect ourselves from being descheduled here;
862 * If we were, and we ended up on another cpu, or another
863 * lwp got in ahead of us, it could change the segment
864 * registers without us noticing before we return to userland.
865 */
866 CLI(%r14)
867 CHECK_POSTSYS_NE(%r15, %r14, %ebx)
868 jne _full_syscall_postsys32
869 SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
870
871 /*
872 * To get back to userland, we need to put the return %rip in %rcx and
1062 movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate calls) */
1063
1064 ASSERT_LWPTOREGS(%r14, %rsp)
1065
1066 incq %gs:CPU_STATS_SYS_SYSCALL
1067
1068 /*
1069 * Make some space for MAXSYSARGS (currently 8) 32-bit args
1070 * placed into 64-bit (long) arg slots, plus one 64-bit
1071 * (long) arg count, maintaining 16 byte alignment.
1072 */
1073 subq $SYS_DROP, %rsp
1074 movb $LWP_SYS, LWP_STATE(%r14)
1075 movq %r15, %rdi
1076 movq %rsp, %rsi
1077 call syscall_entry
1078
1079 /*
1080 * Fetch the arguments copied onto the kernel stack and put
1081 * them in the right registers to invoke a C-style syscall handler.
1082 * %rax contains the handler address.
1083 */
1084 movq %rax, %rbx
1085 movl 0(%rsp), %edi
1086 movl 8(%rsp), %esi
1087 movl 0x10(%rsp), %edx
1088 movl 0x18(%rsp), %ecx
1089 movl 0x20(%rsp), %r8d
1090 movl 0x28(%rsp), %r9d
1091
1092 call *SY_CALLC(%rbx)
1093
1094 movq %rbp, %rsp /* pop the args */
1095
1096 /*
1097 * amd64 syscall handlers -always- return a 64-bit value in %rax.
1098 * On the 32-bit kernel, the always return that value in %eax:%edx
1099 * as required by the 32-bit ABI.
1100 *
1101 * Simulate the same behaviour by unconditionally splitting the
1102 * return value in the same way.
1103 */
1104 movq %rax, %r13
1105 shrq $32, %r13 /* upper 32-bits into %edx */
1106 movl %eax, %r12d /* lower 32-bits into %eax */
1107
1108 /*
1109 * Optimistically assume that there's no post-syscall
1110 * work to do. (This is to avoid having to call syscall_mstate()
1143 movl REGOFF_RBX(%rsp), %ebx
1144 movl REGOFF_RBP(%rsp), %ebp
1145 movl REGOFF_RSI(%rsp), %esi
1146 movl REGOFF_RDI(%rsp), %edi
1147
1148 movl REGOFF_RIP(%rsp), %edx /* sysexit: %edx -> %eip */
1149 pushq REGOFF_RFL(%rsp)
1150 popfq
1151 movl REGOFF_RSP(%rsp), %ecx /* sysexit: %ecx -> %esp */
1152 ALTENTRY(sys_sysenter_swapgs_sysexit)
1153 swapgs
1154 sti
1155 sysexit
1156 SET_SIZE(sys_sysenter_swapgs_sysexit)
1157 SET_SIZE(sys_sysenter)
1158 SET_SIZE(_sys_sysenter_post_swapgs)
1159 SET_SIZE(brand_sys_sysenter)
1160
1161 #endif /* __lint */
1162
1163 /*
1164 * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1165 * the generic i386 libc to do system calls. We do a small amount of setup
1166 * before jumping into the existing sys_syscall32 path.
1167 */
1168 #if defined(__lint)
1169
1170 /*ARGSUSED*/
1171 void
1172 sys_syscall_int()
1173 {}
1174
1175 #else /* __lint */
1176
1177 ENTRY_NP(brand_sys_syscall_int)
1178 SWAPGS /* kernel gsbase */
1179 XPV_TRAP_POP
1180 call smap_enable
1181 BRAND_CALLBACK(BRAND_CB_INT91, BRAND_URET_FROM_INTR_STACK())
1182 jmp nopop_syscall_int
1183
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 */
25
26 #include <sys/asm_linkage.h>
27 #include <sys/asm_misc.h>
28 #include <sys/regset.h>
29 #include <sys/privregs.h>
30 #include <sys/psw.h>
31
32 #if defined(__lint)
33
34 #include <sys/types.h>
35 #include <sys/thread.h>
36 #include <sys/systm.h>
37
38 #else /* __lint */
39
40 #include <sys/machbrand.h>
41 #include <sys/segments.h>
42 #include <sys/pcb.h>
43 #include <sys/trap.h>
44 #include <sys/ftrace.h>
45 #include <sys/traptrace.h>
46 #include <sys/clock.h>
47 #include <sys/model.h>
48 #include <sys/panic.h>
49
50 #if defined(__xpv)
51 #include <sys/hypervisor.h>
52 #endif
53
54 #include "assym.h"
55
56 #endif /* __lint */
57
58 /*
59 * We implement five flavours of system call entry points
60 *
486 movq %rbx, REGOFF_ES(%rsp)
487 movw %fs, %bx
488 movq %rbx, REGOFF_FS(%rsp)
489 movw %gs, %bx
490 movq %rbx, REGOFF_GS(%rsp)
491
492 /*
493 * Machine state saved in the regs structure on the stack
494 * First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9
495 * %eax is the syscall number
496 * %rsp is the thread's stack, %r15 is curthread
497 * REG_RSP(%rsp) is the user's stack
498 */
499
500 SYSCALL_TRAPTRACE($TT_SYSC64)
501
502 movq %rsp, %rbp
503
504 movq T_LWP(%r15), %r14
505 ASSERT_NO_RUPDATE_PENDING(%r14)
506
507 ENABLE_INTR_FLAGS
508
509 MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
510 movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate call) */
511
512 ASSERT_LWPTOREGS(%r14, %rsp)
513
514 movb $LWP_SYS, LWP_STATE(%r14)
515 incq LWP_RU_SYSC(%r14)
516 movb $NORMALRETURN, LWP_EOSYS(%r14)
517
518 incq %gs:CPU_STATS_SYS_SYSCALL
519
520 /*
521 * If our LWP has an alternate system call handler, run that instead of
522 * the regular system call path.
523 */
524 movq LWP_BRAND_SYSCALL(%r14), %rdi
525 testq %rdi, %rdi
526 jz _syscall_no_brand
527
528 pushq %rax
529 subq $8, %rsp /* align stack for call to C */
530 call *%rdi
531 addq $8, %rsp
532
533 /*
534 * If the alternate handler returns 0, we skip straight to the return to
535 * usermode. Otherwise, we resume regular system call processing.
536 */
537 testl %eax, %eax
538 popq %rax
539 jz _syscall_after_brand
540
541 _syscall_no_brand:
542 movw %ax, T_SYSNUM(%r15)
543 movzbl T_PRE_SYS(%r15), %ebx
544 ORL_SYSCALLTRACE(%ebx)
545 testl %ebx, %ebx
546 jne _syscall_pre
547
548 _syscall_invoke:
549 movq REGOFF_RDI(%rbp), %rdi
550 movq REGOFF_RSI(%rbp), %rsi
551 movq REGOFF_RDX(%rbp), %rdx
552 movq REGOFF_RCX(%rbp), %rcx
553 movq REGOFF_R8(%rbp), %r8
554 movq REGOFF_R9(%rbp), %r9
555
556 cmpl $NSYSCALL, %eax
557 jae _syscall_ill
558 shll $SYSENT_SIZE_SHIFT, %eax
559 leaq sysent(%rax), %rbx
560
561 call *SY_CALLC(%rbx)
562
563 movq %rax, %r12
564 movq %rdx, %r13
565
566 /*
567 * If the handler returns two ints, then we need to split the
568 * 64-bit return value into two 32-bit values.
569 */
570 testw $SE_32RVAL2, SY_FLAGS(%rbx)
571 je 5f
572 movq %r12, %r13
573 shrq $32, %r13 /* upper 32-bits into %edx */
574 movl %r12d, %r12d /* lower 32-bits into %eax */
575 5:
576
577 _syscall_after_brand:
578 /*
579 * Optimistically assume that there's no post-syscall
580 * work to do. (This is to avoid having to call syscall_mstate()
581 * with interrupts disabled)
582 */
583 MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
584
585 /*
586 * We must protect ourselves from being descheduled here;
587 * If we were, and we ended up on another cpu, or another
588 * lwp got in ahead of us, it could change the segment
589 * registers without us noticing before we return to userland.
590 */
591 CLI(%r14)
592 CHECK_POSTSYS_NE(%r15, %r14, %ebx)
593 jne _syscall_post
594
595 /*
596 * We need to protect ourselves against non-canonical return values
597 * because Intel doesn't check for them on sysret (AMD does). Canonical
803 * REG_RSP(%rsp) is the user's stack
804 */
805
806 SYSCALL_TRAPTRACE32($TT_SYSC)
807
808 movq %rsp, %rbp
809
810 movq T_LWP(%r15), %r14
811 ASSERT_NO_RUPDATE_PENDING(%r14)
812
813 ENABLE_INTR_FLAGS
814
815 MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
816 movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate call) */
817
818 ASSERT_LWPTOREGS(%r14, %rsp)
819
820 incq %gs:CPU_STATS_SYS_SYSCALL
821
822 /*
823 * If our lwp has an alternate system call handler, run that instead
824 * of the regular system call path.
825 */
826 movq LWP_BRAND_SYSCALL(%r14), %rax
827 testq %rax, %rax
828 jz _syscall32_no_brand
829
830 movb $LWP_SYS, LWP_STATE(%r14)
831 call *%rax
832
833 /*
834 * If the alternate handler returns 0, we skip straight to the return
835 * to usermode. Otherwise, we resume regular system call processing.
836 */
837 testl %eax, %eax
838 jz _syscall32_after_brand
839
840 _syscall32_no_brand:
841 /*
842 * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
843 * into 64-bit (long) arg slots, maintaining 16 byte alignment. Or
844 * more succinctly:
845 *
846 * SA(MAXSYSARGS * sizeof (long)) == 64
847 *
848 * Note, this space is used both to copy in the arguments from user
849 * land, but also to as part of the old UNIX style syscall_ap() method.
850 * syscall_entry expects that we do not change the values of this space
851 * that we give it. However, this means that when we end up in the more
852 * recent model of passing the arguments based on the calling
853 * conventions, we'll need to save an additional 16 bytes of stack.
854 */
855 #define SYS_DROP 64 /* drop for args */
856 subq $SYS_DROP, %rsp
857 movb $LWP_SYS, LWP_STATE(%r14)
858 movq %r15, %rdi
859 movq %rsp, %rsi
860 call syscall_entry
861
862 /*
863 * Fetch the arguments copied onto the kernel stack and put
864 * them in the right registers to invoke a C-style syscall handler.
865 * %rax contains the handler address.
866 *
867 * Ideas for making all this go faster of course include simply
868 * forcibly fetching 6 arguments from the user stack under lofault
869 * protection, reverting to copyin_args only when watchpoints
870 * are in effect.
871 *
872 * (If we do this, make sure that exec and libthread leave
873 * enough space at the top of the stack to ensure that we'll
874 * never do a fetch from an invalid page.)
875 *
876 * Lots of ideas here, but they won't really help with bringup B-)
877 * Correctness can't wait, performance can wait a little longer ..
878 */
879
880 movq %rax, %rbx
881 movl 0x0(%rsp), %edi /* arg0 */
882 movl 0x8(%rsp), %esi /* arg1 */
883 movl 0x10(%rsp), %edx /* arg2 */
884 movl 0x38(%rsp), %eax /* arg7 load */
885 movl 0x18(%rsp), %ecx /* arg3 */
886 pushq %rax /* arg7 saved to stack */
887 movl 0x28(%rsp), %r8d /* arg4 */
888 movl 0x38(%rsp), %eax /* arg6 load */
889 movl 0x30(%rsp), %r9d /* arg5 */
890 pushq %rax /* arg6 saved to stack */
891
892 call *SY_CALLC(%rbx)
893
894 movq %rbp, %rsp /* pop the args */
895
896 /*
897 * amd64 syscall handlers -always- return a 64-bit value in %rax.
898 * On the 32-bit kernel, they always return that value in %eax:%edx
899 * as required by the 32-bit ABI.
900 *
901 * Simulate the same behaviour by unconditionally splitting the
902 * return value in the same way.
903 */
904 movq %rax, %r13
905 shrq $32, %r13 /* upper 32-bits into %edx */
906 movl %eax, %r12d /* lower 32-bits into %eax */
907
908 _syscall32_after_brand:
909
910 /*
911 * Optimistically assume that there's no post-syscall
912 * work to do. (This is to avoid having to call syscall_mstate()
913 * with interrupts disabled)
914 */
915 MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
916
917 /*
918 * We must protect ourselves from being descheduled here;
919 * If we were, and we ended up on another cpu, or another
920 * lwp got in ahead of us, it could change the segment
921 * registers without us noticing before we return to userland.
922 */
923 CLI(%r14)
924 CHECK_POSTSYS_NE(%r15, %r14, %ebx)
925 jne _full_syscall_postsys32
926 SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
927
928 /*
929 * To get back to userland, we need to put the return %rip in %rcx and
1119 movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate calls) */
1120
1121 ASSERT_LWPTOREGS(%r14, %rsp)
1122
1123 incq %gs:CPU_STATS_SYS_SYSCALL
1124
1125 /*
1126 * Make some space for MAXSYSARGS (currently 8) 32-bit args
1127 * placed into 64-bit (long) arg slots, plus one 64-bit
1128 * (long) arg count, maintaining 16 byte alignment.
1129 */
1130 subq $SYS_DROP, %rsp
1131 movb $LWP_SYS, LWP_STATE(%r14)
1132 movq %r15, %rdi
1133 movq %rsp, %rsi
1134 call syscall_entry
1135
1136 /*
1137 * Fetch the arguments copied onto the kernel stack and put
1138 * them in the right registers to invoke a C-style syscall handler.
1139 * %rax contains the handler address. For the last two arguments, we
1140 * push them onto the stack -- we can't clobber the old arguments.
1141 */
1142 movq %rax, %rbx
1143 movl 0x0(%rsp), %edi /* arg0 */
1144 movl 0x8(%rsp), %esi /* arg1 */
1145 movl 0x10(%rsp), %edx /* arg2 */
1146 movl 0x38(%rsp), %eax /* arg7 load */
1147 movl 0x18(%rsp), %ecx /* arg3 */
1148 pushq %rax /* arg7 saved to stack */
1149 movl 0x28(%rsp), %r8d /* arg4 */
1150 movl 0x38(%rsp), %eax /* arg6 load */
1151 movl 0x30(%rsp), %r9d /* arg5 */
1152 pushq %rax /* arg6 saved to stack */
1153
1154 call *SY_CALLC(%rbx)
1155
1156 movq %rbp, %rsp /* pop the args */
1157
1158 /*
1159 * amd64 syscall handlers -always- return a 64-bit value in %rax.
1160 * On the 32-bit kernel, the always return that value in %eax:%edx
1161 * as required by the 32-bit ABI.
1162 *
1163 * Simulate the same behaviour by unconditionally splitting the
1164 * return value in the same way.
1165 */
1166 movq %rax, %r13
1167 shrq $32, %r13 /* upper 32-bits into %edx */
1168 movl %eax, %r12d /* lower 32-bits into %eax */
1169
1170 /*
1171 * Optimistically assume that there's no post-syscall
1172 * work to do. (This is to avoid having to call syscall_mstate()
1205 movl REGOFF_RBX(%rsp), %ebx
1206 movl REGOFF_RBP(%rsp), %ebp
1207 movl REGOFF_RSI(%rsp), %esi
1208 movl REGOFF_RDI(%rsp), %edi
1209
1210 movl REGOFF_RIP(%rsp), %edx /* sysexit: %edx -> %eip */
1211 pushq REGOFF_RFL(%rsp)
1212 popfq
1213 movl REGOFF_RSP(%rsp), %ecx /* sysexit: %ecx -> %esp */
1214 ALTENTRY(sys_sysenter_swapgs_sysexit)
1215 swapgs
1216 sti
1217 sysexit
1218 SET_SIZE(sys_sysenter_swapgs_sysexit)
1219 SET_SIZE(sys_sysenter)
1220 SET_SIZE(_sys_sysenter_post_swapgs)
1221 SET_SIZE(brand_sys_sysenter)
1222
1223 #endif /* __lint */
1224
1225 #if defined(__lint)
1226 /*
1227 * System call via an int80. This entry point is only used by the Linux
1228 * application environment. Unlike the other entry points, there is no
1229 * default action to take if no callback is registered for this process.
1230 */
1231 void
1232 sys_int80()
1233 {}
1234
1235 #else /* __lint */
1236
1237 ENTRY_NP(brand_sys_int80)
1238 SWAPGS /* kernel gsbase */
1239 XPV_TRAP_POP
1240
1241 /*
1242 * We first attempt to call the "b_int80" handler from the "struct
1243 * brand_mach_ops" for this brand. If no handler function is installed
1244 * for this brand, the BRAND_CALLBACK() macro returns here and we
1245 * check the lwp for a "lwp_brand_syscall" handler.
1246 */
1247 BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
1248
1249 /*
1250 * Check to see if this lwp provides "lwp_brand_syscall". If so, we
1251 * will route this int80 through the regular system call handling path.
1252 */
1253 movq %r15, %gs:CPU_RTMP_R15
1254 movq %gs:CPU_THREAD, %r15
1255 movq T_LWP(%r15), %r15
1256 movq LWP_BRAND_SYSCALL(%r15), %r15
1257 testq %r15, %r15
1258 movq %gs:CPU_RTMP_R15, %r15
1259 jnz nopop_syscall_int
1260
1261 /*
1262 * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
1263 * function, and has thus opted out of handling this trap.
1264 */
1265 SWAPGS /* user gsbase */
1266 jmp nopop_int80
1267
1268 ENTRY_NP(sys_int80)
1269 /*
1270 * We hit an int80, but this process isn't of a brand with an int80
1271 * handler. Bad process! Make it look as if the INT failed.
1272 * Modify %rip to point before the INT, push the expected error
1273 * code and fake a GP fault. Note on 64-bit hypervisor we need
1274 * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
1275 * because gptrap will pop them again with its own XPV_TRAP_POP.
1276 */
1277 XPV_TRAP_POP
1278 nopop_int80:
1279 subq $2, (%rsp) /* int insn 2-bytes */
1280 pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
1281 #if defined(__xpv)
1282 push %r11
1283 push %rcx
1284 #endif
1285 jmp gptrap / GP fault
1286 SET_SIZE(sys_int80)
1287 SET_SIZE(brand_sys_int80)
1288 #endif /* __lint */
1289
1290
1291 /*
1292 * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1293 * the generic i386 libc to do system calls. We do a small amount of setup
1294 * before jumping into the existing sys_syscall32 path.
1295 */
1296 #if defined(__lint)
1297
1298 /*ARGSUSED*/
1299 void
1300 sys_syscall_int()
1301 {}
1302
1303 #else /* __lint */
1304
1305 ENTRY_NP(brand_sys_syscall_int)
1306 SWAPGS /* kernel gsbase */
1307 XPV_TRAP_POP
1308 call smap_enable
1309 BRAND_CALLBACK(BRAND_CB_INT91, BRAND_URET_FROM_INTR_STACK())
1310 jmp nopop_syscall_int
1311
|