3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 Joyent, Inc.
  24  */
  25 
  26 #include <sys/asm_linkage.h>
  27 #include <sys/asm_misc.h>
  28 #include <sys/regset.h>
  29 #include <sys/privregs.h>
  30 #include <sys/psw.h>
  31 #include <sys/machbrand.h>
  32 
  33 #if defined(__lint)
  34 
  35 #include <sys/types.h>
  36 #include <sys/thread.h>
  37 #include <sys/systm.h>
  38 
  39 #else   /* __lint */
  40 
  41 #include <sys/segments.h>
  42 #include <sys/pcb.h>
  43 #include <sys/trap.h>
  44 #include <sys/ftrace.h>
  45 #include <sys/traptrace.h>
  46 #include <sys/clock.h>
  47 #include <sys/model.h>
  48 #include <sys/panic.h>
  49 
  50 #if defined(__xpv)
  51 #include <sys/hypervisor.h>
  52 #endif
  53 
  54 #include "assym.h"
  55 
  56 #endif  /* __lint */
  57 
  58 /*
  59  * We implement five flavours of system call entry points
  60  *
 
 
 486         movq    %rbx, REGOFF_ES(%rsp)
 487         movw    %fs, %bx
 488         movq    %rbx, REGOFF_FS(%rsp)
 489         movw    %gs, %bx
 490         movq    %rbx, REGOFF_GS(%rsp)
 491 
 492         /*
 493          * Machine state saved in the regs structure on the stack
 494          * First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9
 495          * %eax is the syscall number
 496          * %rsp is the thread's stack, %r15 is curthread
 497          * REG_RSP(%rsp) is the user's stack
 498          */
 499 
 500         SYSCALL_TRAPTRACE($TT_SYSC64)
 501 
 502         movq    %rsp, %rbp
 503         
 504         movq    T_LWP(%r15), %r14
 505         ASSERT_NO_RUPDATE_PENDING(%r14)
 506         ENABLE_INTR_FLAGS
 507 
 508         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 509         movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
 510 
 511         ASSERT_LWPTOREGS(%r14, %rsp)
 512 
 513         movb    $LWP_SYS, LWP_STATE(%r14)
 514         incq    LWP_RU_SYSC(%r14)
 515         movb    $NORMALRETURN, LWP_EOSYS(%r14)
 516 
 517         incq    %gs:CPU_STATS_SYS_SYSCALL
 518 
 519         movw    %ax, T_SYSNUM(%r15)
 520         movzbl  T_PRE_SYS(%r15), %ebx
 521         ORL_SYSCALLTRACE(%ebx)
 522         testl   %ebx, %ebx
 523         jne     _syscall_pre
 524 
 525 _syscall_invoke:
 526         movq    REGOFF_RDI(%rbp), %rdi
 527         movq    REGOFF_RSI(%rbp), %rsi
 528         movq    REGOFF_RDX(%rbp), %rdx
 529         movq    REGOFF_RCX(%rbp), %rcx
 530         movq    REGOFF_R8(%rbp), %r8
 531         movq    REGOFF_R9(%rbp), %r9
 532 
 533         cmpl    $NSYSCALL, %eax
 534         jae     _syscall_ill    
 535         shll    $SYSENT_SIZE_SHIFT, %eax
 536         leaq    sysent(%rax), %rbx
 537 
 538         call    *SY_CALLC(%rbx)
 539 
 540         movq    %rax, %r12
 541         movq    %rdx, %r13
 542 
 543         /*
 544          * If the handler returns two ints, then we need to split the
 545          * 64-bit return value into two 32-bit values.
 546          */
 547         testw   $SE_32RVAL2, SY_FLAGS(%rbx)
 548         je      5f
 549         movq    %r12, %r13
 550         shrq    $32, %r13       /* upper 32-bits into %edx */
 551         movl    %r12d, %r12d    /* lower 32-bits into %eax */
 552 5:
 553         /*
 554          * Optimistically assume that there's no post-syscall
 555          * work to do.  (This is to avoid having to call syscall_mstate()
 556          * with interrupts disabled)
 557          */
 558         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 559 
 560         /*
 561          * We must protect ourselves from being descheduled here;
 562          * If we were, and we ended up on another cpu, or another
 563          * lwp got in ahead of us, it could change the segment
 564          * registers without us noticing before we return to userland.
 565          */
 566         CLI(%r14)
 567         CHECK_POSTSYS_NE(%r15, %r14, %ebx)
 568         jne     _syscall_post
 569 
 570         /*
 571          * We need to protect ourselves against non-canonical return values
 572          * because Intel doesn't check for them on sysret (AMD does).  Canonical
 
 
 778          * REG_RSP(%rsp) is the user's stack
 779          */
 780 
 781         SYSCALL_TRAPTRACE32($TT_SYSC)
 782 
 783         movq    %rsp, %rbp
 784 
 785         movq    T_LWP(%r15), %r14
 786         ASSERT_NO_RUPDATE_PENDING(%r14)
 787 
 788         ENABLE_INTR_FLAGS
 789 
 790         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 791         movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
 792 
 793         ASSERT_LWPTOREGS(%r14, %rsp)
 794 
 795         incq     %gs:CPU_STATS_SYS_SYSCALL
 796 
 797         /*
 798          * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
 799          * into 64-bit (long) arg slots, maintaining 16 byte alignment.  Or
 800          * more succinctly:
 801          *
 802          *      SA(MAXSYSARGS * sizeof (long)) == 64
 803          */
 804 #define SYS_DROP        64                      /* drop for args */
 805         subq    $SYS_DROP, %rsp
 806         movb    $LWP_SYS, LWP_STATE(%r14)
 807         movq    %r15, %rdi
 808         movq    %rsp, %rsi
 809         call    syscall_entry
 810 
 811         /*
 812          * Fetch the arguments copied onto the kernel stack and put
 813          * them in the right registers to invoke a C-style syscall handler.
 814          * %rax contains the handler address.
 815          *
 816          * Ideas for making all this go faster of course include simply
 817          * forcibly fetching 6 arguments from the user stack under lofault
 818          * protection, reverting to copyin_args only when watchpoints
 819          * are in effect.
 820          *
 821          * (If we do this, make sure that exec and libthread leave
 822          * enough space at the top of the stack to ensure that we'll
 823          * never do a fetch from an invalid page.)
 824          *
 825          * Lots of ideas here, but they won't really help with bringup B-)
 826          * Correctness can't wait, performance can wait a little longer ..
 827          */
 828 
 829         movq    %rax, %rbx
 830         movl    0(%rsp), %edi
 831         movl    8(%rsp), %esi
 832         movl    0x10(%rsp), %edx
 833         movl    0x18(%rsp), %ecx
 834         movl    0x20(%rsp), %r8d
 835         movl    0x28(%rsp), %r9d
 836 
 837         call    *SY_CALLC(%rbx)
 838 
 839         movq    %rbp, %rsp      /* pop the args */
 840 
 841         /*
 842          * amd64 syscall handlers -always- return a 64-bit value in %rax.
 843          * On the 32-bit kernel, they always return that value in %eax:%edx
 844          * as required by the 32-bit ABI.
 845          *
 846          * Simulate the same behaviour by unconditionally splitting the
 847          * return value in the same way.
 848          */
 849         movq    %rax, %r13
 850         shrq    $32, %r13       /* upper 32-bits into %edx */
 851         movl    %eax, %r12d     /* lower 32-bits into %eax */
 852 
 853         /*
 854          * Optimistically assume that there's no post-syscall
 855          * work to do.  (This is to avoid having to call syscall_mstate()
 856          * with interrupts disabled)
 857          */
 858         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 859 
 860         /*
 861          * We must protect ourselves from being descheduled here;
 862          * If we were, and we ended up on another cpu, or another
 863          * lwp got in ahead of us, it could change the segment
 864          * registers without us noticing before we return to userland.
 865          */
 866         CLI(%r14)
 867         CHECK_POSTSYS_NE(%r15, %r14, %ebx)
 868         jne     _full_syscall_postsys32
 869         SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
 870 
 871         /*
 872          * To get back to userland, we need to put the return %rip in %rcx and
 
 
1062         movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate calls) */
1063 
1064         ASSERT_LWPTOREGS(%r14, %rsp)
1065 
1066         incq    %gs:CPU_STATS_SYS_SYSCALL
1067 
1068         /*
1069          * Make some space for MAXSYSARGS (currently 8) 32-bit args
1070          * placed into 64-bit (long) arg slots, plus one 64-bit
1071          * (long) arg count, maintaining 16 byte alignment.
1072          */
1073         subq    $SYS_DROP, %rsp
1074         movb    $LWP_SYS, LWP_STATE(%r14)
1075         movq    %r15, %rdi
1076         movq    %rsp, %rsi
1077         call    syscall_entry
1078 
1079         /*
1080          * Fetch the arguments copied onto the kernel stack and put
1081          * them in the right registers to invoke a C-style syscall handler.
1082          * %rax contains the handler address.
1083          */
1084         movq    %rax, %rbx
1085         movl    0(%rsp), %edi
1086         movl    8(%rsp), %esi
1087         movl    0x10(%rsp), %edx
1088         movl    0x18(%rsp), %ecx
1089         movl    0x20(%rsp), %r8d
1090         movl    0x28(%rsp), %r9d
1091 
1092         call    *SY_CALLC(%rbx)
1093 
1094         movq    %rbp, %rsp      /* pop the args */
1095 
1096         /*
1097          * amd64 syscall handlers -always- return a 64-bit value in %rax.
1098          * On the 32-bit kernel, the always return that value in %eax:%edx
1099          * as required by the 32-bit ABI.
1100          *
1101          * Simulate the same behaviour by unconditionally splitting the
1102          * return value in the same way.
1103          */
1104         movq    %rax, %r13
1105         shrq    $32, %r13       /* upper 32-bits into %edx */
1106         movl    %eax, %r12d     /* lower 32-bits into %eax */
1107 
1108         /*
1109          * Optimistically assume that there's no post-syscall
1110          * work to do.  (This is to avoid having to call syscall_mstate()
 
 
1143         movl    REGOFF_RBX(%rsp), %ebx
1144         movl    REGOFF_RBP(%rsp), %ebp
1145         movl    REGOFF_RSI(%rsp), %esi
1146         movl    REGOFF_RDI(%rsp), %edi
1147 
1148         movl    REGOFF_RIP(%rsp), %edx  /* sysexit: %edx -> %eip */
1149         pushq   REGOFF_RFL(%rsp)
1150         popfq
1151         movl    REGOFF_RSP(%rsp), %ecx  /* sysexit: %ecx -> %esp */
1152         ALTENTRY(sys_sysenter_swapgs_sysexit)
1153         swapgs
1154         sti
1155         sysexit
1156         SET_SIZE(sys_sysenter_swapgs_sysexit)
1157         SET_SIZE(sys_sysenter)
1158         SET_SIZE(_sys_sysenter_post_swapgs)
1159         SET_SIZE(brand_sys_sysenter)
1160 
1161 #endif  /* __lint */
1162 
1163 /*
1164  * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1165  * the generic i386 libc to do system calls. We do a small amount of setup
1166  * before jumping into the existing sys_syscall32 path.
1167  */
1168 #if defined(__lint)
1169 
1170 /*ARGSUSED*/
1171 void
1172 sys_syscall_int()
1173 {}
1174 
1175 #else   /* __lint */
1176 
1177         ENTRY_NP(brand_sys_syscall_int)
1178         SWAPGS                          /* kernel gsbase */
1179         XPV_TRAP_POP
1180         call    smap_enable
1181         BRAND_CALLBACK(BRAND_CB_INT91, BRAND_URET_FROM_INTR_STACK())
1182         jmp     nopop_syscall_int
1183 
 
 | 
 
 
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 
  26 #include <sys/asm_linkage.h>
  27 #include <sys/asm_misc.h>
  28 #include <sys/regset.h>
  29 #include <sys/privregs.h>
  30 #include <sys/psw.h>
  31 
  32 #if defined(__lint)
  33 
  34 #include <sys/types.h>
  35 #include <sys/thread.h>
  36 #include <sys/systm.h>
  37 
  38 #else   /* __lint */
  39 
  40 #include <sys/machbrand.h>
  41 #include <sys/segments.h>
  42 #include <sys/pcb.h>
  43 #include <sys/trap.h>
  44 #include <sys/ftrace.h>
  45 #include <sys/traptrace.h>
  46 #include <sys/clock.h>
  47 #include <sys/model.h>
  48 #include <sys/panic.h>
  49 
  50 #if defined(__xpv)
  51 #include <sys/hypervisor.h>
  52 #endif
  53 
  54 #include "assym.h"
  55 
  56 #endif  /* __lint */
  57 
  58 /*
  59  * We implement five flavours of system call entry points
  60  *
 
 
 486         movq    %rbx, REGOFF_ES(%rsp)
 487         movw    %fs, %bx
 488         movq    %rbx, REGOFF_FS(%rsp)
 489         movw    %gs, %bx
 490         movq    %rbx, REGOFF_GS(%rsp)
 491 
 492         /*
 493          * Machine state saved in the regs structure on the stack
 494          * First six args in %rdi, %rsi, %rdx, %rcx, %r8, %r9
 495          * %eax is the syscall number
 496          * %rsp is the thread's stack, %r15 is curthread
 497          * REG_RSP(%rsp) is the user's stack
 498          */
 499 
 500         SYSCALL_TRAPTRACE($TT_SYSC64)
 501 
 502         movq    %rsp, %rbp
 503         
 504         movq    T_LWP(%r15), %r14
 505         ASSERT_NO_RUPDATE_PENDING(%r14)
 506 
 507         ENABLE_INTR_FLAGS
 508 
 509         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 510         movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
 511 
 512         ASSERT_LWPTOREGS(%r14, %rsp)
 513 
 514         movb    $LWP_SYS, LWP_STATE(%r14)
 515         incq    LWP_RU_SYSC(%r14)
 516         movb    $NORMALRETURN, LWP_EOSYS(%r14)
 517 
 518         incq    %gs:CPU_STATS_SYS_SYSCALL
 519 
 520         /*
 521          * If our LWP has an alternate system call handler, run that instead of
 522          * the regular system call path.
 523          */
 524         movq    LWP_BRAND_SYSCALL(%r14), %rdi
 525         testq   %rdi, %rdi
 526         jz      _syscall_no_brand
 527 
 528         pushq   %rax
 529         subq    $8, %rsp        /* align stack for call to C */
 530         call    *%rdi
 531         addq    $8, %rsp
 532 
 533         /*
 534          * If the alternate handler returns 0, we skip straight to the return to
 535          * usermode.  Otherwise, we resume regular system call processing.
 536          */
 537         testl   %eax, %eax
 538         popq    %rax
 539         jz      _syscall_after_brand
 540 
 541 _syscall_no_brand:
 542         movw    %ax, T_SYSNUM(%r15)
 543         movzbl  T_PRE_SYS(%r15), %ebx
 544         ORL_SYSCALLTRACE(%ebx)
 545         testl   %ebx, %ebx
 546         jne     _syscall_pre
 547 
 548 _syscall_invoke:
 549         movq    REGOFF_RDI(%rbp), %rdi
 550         movq    REGOFF_RSI(%rbp), %rsi
 551         movq    REGOFF_RDX(%rbp), %rdx
 552         movq    REGOFF_RCX(%rbp), %rcx
 553         movq    REGOFF_R8(%rbp), %r8
 554         movq    REGOFF_R9(%rbp), %r9
 555 
 556         cmpl    $NSYSCALL, %eax
 557         jae     _syscall_ill    
 558         shll    $SYSENT_SIZE_SHIFT, %eax
 559         leaq    sysent(%rax), %rbx
 560 
 561         call    *SY_CALLC(%rbx)
 562 
 563         movq    %rax, %r12
 564         movq    %rdx, %r13
 565 
 566         /*
 567          * If the handler returns two ints, then we need to split the
 568          * 64-bit return value into two 32-bit values.
 569          */
 570         testw   $SE_32RVAL2, SY_FLAGS(%rbx)
 571         je      5f
 572         movq    %r12, %r13
 573         shrq    $32, %r13       /* upper 32-bits into %edx */
 574         movl    %r12d, %r12d    /* lower 32-bits into %eax */
 575 5:
 576 
 577 _syscall_after_brand:
 578         /*
 579          * Optimistically assume that there's no post-syscall
 580          * work to do.  (This is to avoid having to call syscall_mstate()
 581          * with interrupts disabled)
 582          */
 583         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 584 
 585         /*
 586          * We must protect ourselves from being descheduled here;
 587          * If we were, and we ended up on another cpu, or another
 588          * lwp got in ahead of us, it could change the segment
 589          * registers without us noticing before we return to userland.
 590          */
 591         CLI(%r14)
 592         CHECK_POSTSYS_NE(%r15, %r14, %ebx)
 593         jne     _syscall_post
 594 
 595         /*
 596          * We need to protect ourselves against non-canonical return values
 597          * because Intel doesn't check for them on sysret (AMD does).  Canonical
 
 
 803          * REG_RSP(%rsp) is the user's stack
 804          */
 805 
 806         SYSCALL_TRAPTRACE32($TT_SYSC)
 807 
 808         movq    %rsp, %rbp
 809 
 810         movq    T_LWP(%r15), %r14
 811         ASSERT_NO_RUPDATE_PENDING(%r14)
 812 
 813         ENABLE_INTR_FLAGS
 814 
 815         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 816         movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
 817 
 818         ASSERT_LWPTOREGS(%r14, %rsp)
 819 
 820         incq     %gs:CPU_STATS_SYS_SYSCALL
 821 
 822         /*
 823          * If our lwp has an alternate system call handler, run that instead
 824          * of the regular system call path.
 825          */
 826         movq    LWP_BRAND_SYSCALL(%r14), %rax
 827         testq   %rax, %rax
 828         jz      _syscall32_no_brand
 829 
 830         movb    $LWP_SYS, LWP_STATE(%r14)
 831         call    *%rax
 832 
 833         /*
 834          * If the alternate handler returns 0, we skip straight to the return
 835          * to usermode.  Otherwise, we resume regular system call processing.
 836          */
 837         testl   %eax, %eax
 838         jz      _syscall32_after_brand
 839 
 840 _syscall32_no_brand:
 841         /*
 842          * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
 843          * into 64-bit (long) arg slots, maintaining 16 byte alignment.  Or
 844          * more succinctly:
 845          *
 846          *      SA(MAXSYSARGS * sizeof (long)) == 64
 847          *
 848          * Note, this space is used both to copy in the arguments from user
 849          * land, but also to as part of the old UNIX style syscall_ap() method.
 850          * syscall_entry expects that we do not change the values of this space
 851          * that we give it. However, this means that when we end up in the more
 852          * recent model of passing the arguments based on the calling
 853          * conventions, we'll need to save an additional 16 bytes of stack.
 854          */
 855 #define SYS_DROP        64                      /* drop for args */
 856         subq    $SYS_DROP, %rsp
 857         movb    $LWP_SYS, LWP_STATE(%r14)
 858         movq    %r15, %rdi
 859         movq    %rsp, %rsi
 860         call    syscall_entry
 861 
 862         /*
 863          * Fetch the arguments copied onto the kernel stack and put
 864          * them in the right registers to invoke a C-style syscall handler.
 865          * %rax contains the handler address.
 866          *
 867          * Ideas for making all this go faster of course include simply
 868          * forcibly fetching 6 arguments from the user stack under lofault
 869          * protection, reverting to copyin_args only when watchpoints
 870          * are in effect.
 871          *
 872          * (If we do this, make sure that exec and libthread leave
 873          * enough space at the top of the stack to ensure that we'll
 874          * never do a fetch from an invalid page.)
 875          *
 876          * Lots of ideas here, but they won't really help with bringup B-)
 877          * Correctness can't wait, performance can wait a little longer ..
 878          */
 879 
 880         movq    %rax, %rbx
 881         movl    0x0(%rsp), %edi         /* arg0 */
 882         movl    0x8(%rsp), %esi         /* arg1 */
 883         movl    0x10(%rsp), %edx        /* arg2 */
 884         movl    0x38(%rsp), %eax        /* arg7 load */
 885         movl    0x18(%rsp), %ecx        /* arg3 */
 886         pushq   %rax                    /* arg7 saved to stack */
 887         movl    0x28(%rsp), %r8d        /* arg4 */
 888         movl    0x38(%rsp), %eax        /* arg6 load */
 889         movl    0x30(%rsp), %r9d        /* arg5 */
 890         pushq   %rax                    /* arg6 saved to stack */
 891 
 892         call    *SY_CALLC(%rbx)
 893 
 894         movq    %rbp, %rsp      /* pop the args */
 895 
 896         /*
 897          * amd64 syscall handlers -always- return a 64-bit value in %rax.
 898          * On the 32-bit kernel, they always return that value in %eax:%edx
 899          * as required by the 32-bit ABI.
 900          *
 901          * Simulate the same behaviour by unconditionally splitting the
 902          * return value in the same way.
 903          */
 904         movq    %rax, %r13
 905         shrq    $32, %r13       /* upper 32-bits into %edx */
 906         movl    %eax, %r12d     /* lower 32-bits into %eax */
 907 
 908 _syscall32_after_brand:
 909 
 910         /*
 911          * Optimistically assume that there's no post-syscall
 912          * work to do.  (This is to avoid having to call syscall_mstate()
 913          * with interrupts disabled)
 914          */
 915         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 916 
 917         /*
 918          * We must protect ourselves from being descheduled here;
 919          * If we were, and we ended up on another cpu, or another
 920          * lwp got in ahead of us, it could change the segment
 921          * registers without us noticing before we return to userland.
 922          */
 923         CLI(%r14)
 924         CHECK_POSTSYS_NE(%r15, %r14, %ebx)
 925         jne     _full_syscall_postsys32
 926         SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
 927 
 928         /*
 929          * To get back to userland, we need to put the return %rip in %rcx and
 
 
1119         movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate calls) */
1120 
1121         ASSERT_LWPTOREGS(%r14, %rsp)
1122 
1123         incq    %gs:CPU_STATS_SYS_SYSCALL
1124 
1125         /*
1126          * Make some space for MAXSYSARGS (currently 8) 32-bit args
1127          * placed into 64-bit (long) arg slots, plus one 64-bit
1128          * (long) arg count, maintaining 16 byte alignment.
1129          */
1130         subq    $SYS_DROP, %rsp
1131         movb    $LWP_SYS, LWP_STATE(%r14)
1132         movq    %r15, %rdi
1133         movq    %rsp, %rsi
1134         call    syscall_entry
1135 
1136         /*
1137          * Fetch the arguments copied onto the kernel stack and put
1138          * them in the right registers to invoke a C-style syscall handler.
1139          * %rax contains the handler address. For the last two arguments, we
1140          * push them onto the stack -- we can't clobber the old arguments.
1141          */
1142         movq    %rax, %rbx
1143         movl    0x0(%rsp), %edi         /* arg0 */
1144         movl    0x8(%rsp), %esi         /* arg1 */
1145         movl    0x10(%rsp), %edx        /* arg2 */
1146         movl    0x38(%rsp), %eax        /* arg7 load */
1147         movl    0x18(%rsp), %ecx        /* arg3 */
1148         pushq   %rax                    /* arg7 saved to stack */
1149         movl    0x28(%rsp), %r8d        /* arg4 */
1150         movl    0x38(%rsp), %eax        /* arg6 load */
1151         movl    0x30(%rsp), %r9d        /* arg5 */
1152         pushq   %rax                    /* arg6 saved to stack */
1153 
1154         call    *SY_CALLC(%rbx)
1155 
1156         movq    %rbp, %rsp      /* pop the args */
1157 
1158         /*
1159          * amd64 syscall handlers -always- return a 64-bit value in %rax.
1160          * On the 32-bit kernel, the always return that value in %eax:%edx
1161          * as required by the 32-bit ABI.
1162          *
1163          * Simulate the same behaviour by unconditionally splitting the
1164          * return value in the same way.
1165          */
1166         movq    %rax, %r13
1167         shrq    $32, %r13       /* upper 32-bits into %edx */
1168         movl    %eax, %r12d     /* lower 32-bits into %eax */
1169 
1170         /*
1171          * Optimistically assume that there's no post-syscall
1172          * work to do.  (This is to avoid having to call syscall_mstate()
 
 
1205         movl    REGOFF_RBX(%rsp), %ebx
1206         movl    REGOFF_RBP(%rsp), %ebp
1207         movl    REGOFF_RSI(%rsp), %esi
1208         movl    REGOFF_RDI(%rsp), %edi
1209 
1210         movl    REGOFF_RIP(%rsp), %edx  /* sysexit: %edx -> %eip */
1211         pushq   REGOFF_RFL(%rsp)
1212         popfq
1213         movl    REGOFF_RSP(%rsp), %ecx  /* sysexit: %ecx -> %esp */
1214         ALTENTRY(sys_sysenter_swapgs_sysexit)
1215         swapgs
1216         sti
1217         sysexit
1218         SET_SIZE(sys_sysenter_swapgs_sysexit)
1219         SET_SIZE(sys_sysenter)
1220         SET_SIZE(_sys_sysenter_post_swapgs)
1221         SET_SIZE(brand_sys_sysenter)
1222 
1223 #endif  /* __lint */
1224  
1225 #if defined(__lint)
1226 /*
1227  * System call via an int80.  This entry point is only used by the Linux
1228  * application environment.  Unlike the other entry points, there is no
1229  * default action to take if no callback is registered for this process.
1230  */
1231 void
1232 sys_int80()
1233 {}
1234 
1235 #else   /* __lint */
1236 
1237         ENTRY_NP(brand_sys_int80)
1238         SWAPGS                          /* kernel gsbase */
1239         XPV_TRAP_POP
1240 
1241         /*
1242          * We first attempt to call the "b_int80" handler from the "struct
1243          * brand_mach_ops" for this brand.  If no handler function is installed
1244          * for this brand, the BRAND_CALLBACK() macro returns here and we
1245          * check the lwp for a "lwp_brand_syscall" handler.
1246          */
1247         BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
1248 
1249         /*
1250          * Check to see if this lwp provides "lwp_brand_syscall".  If so, we
1251          * will route this int80 through the regular system call handling path.
1252          */
1253         movq    %r15, %gs:CPU_RTMP_R15
1254         movq    %gs:CPU_THREAD, %r15
1255         movq    T_LWP(%r15), %r15
1256         movq    LWP_BRAND_SYSCALL(%r15), %r15
1257         testq   %r15, %r15
1258         movq    %gs:CPU_RTMP_R15, %r15
1259         jnz     nopop_syscall_int
1260 
1261         /*
1262          * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
1263          * function, and has thus opted out of handling this trap.
1264          */
1265         SWAPGS                          /* user gsbase */
1266         jmp     nopop_int80
1267 
1268         ENTRY_NP(sys_int80)
1269         /*
1270          * We hit an int80, but this process isn't of a brand with an int80
1271          * handler.  Bad process!  Make it look as if the INT failed.
1272          * Modify %rip to point before the INT, push the expected error
1273          * code and fake a GP fault. Note on 64-bit hypervisor we need
1274          * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
1275          * because gptrap will pop them again with its own XPV_TRAP_POP.
1276          */
1277         XPV_TRAP_POP
1278 nopop_int80:
1279         subq    $2, (%rsp)      /* int insn 2-bytes */
1280         pushq   $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
1281 #if defined(__xpv)
1282         push    %r11
1283         push    %rcx
1284 #endif
1285         jmp     gptrap                  / GP fault
1286         SET_SIZE(sys_int80)
1287         SET_SIZE(brand_sys_int80)
1288 #endif  /* __lint */
1289 
1290 
1291 /*
1292  * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1293  * the generic i386 libc to do system calls. We do a small amount of setup
1294  * before jumping into the existing sys_syscall32 path.
1295  */
1296 #if defined(__lint)
1297 
1298 /*ARGSUSED*/
1299 void
1300 sys_syscall_int()
1301 {}
1302 
1303 #else   /* __lint */
1304 
1305         ENTRY_NP(brand_sys_syscall_int)
1306         SWAPGS                          /* kernel gsbase */
1307         XPV_TRAP_POP
1308         call    smap_enable
1309         BRAND_CALLBACK(BRAND_CB_INT91, BRAND_URET_FROM_INTR_STACK())
1310         jmp     nopop_syscall_int
1311 
 
 |