Print this page




  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 
  26 #include <sys/asm_linkage.h>
  27 #include <sys/asm_misc.h>
  28 #include <sys/regset.h>
  29 #include <sys/privregs.h>
  30 #include <sys/psw.h>
  31 #include <sys/machbrand.h>
  32 
  33 #if defined(__lint)
  34 
  35 #include <sys/types.h>
  36 #include <sys/thread.h>
  37 #include <sys/systm.h>
  38 
  39 #else   /* __lint */
  40 

  41 #include <sys/segments.h>
  42 #include <sys/pcb.h>
  43 #include <sys/trap.h>
  44 #include <sys/ftrace.h>
  45 #include <sys/traptrace.h>
  46 #include <sys/clock.h>
  47 #include <sys/model.h>
  48 #include <sys/panic.h>
  49 
  50 #if defined(__xpv)
  51 #include <sys/hypervisor.h>
  52 #endif
  53 
  54 #include "assym.h"
  55 
  56 #endif  /* __lint */
  57 
  58 /*
  59  * We implement five flavours of system call entry points
  60  *


 514         movb    $LWP_SYS, LWP_STATE(%r14)
 515         incq    LWP_RU_SYSC(%r14)
 516         movb    $NORMALRETURN, LWP_EOSYS(%r14)
 517 
 518         incq    %gs:CPU_STATS_SYS_SYSCALL
 519 
 520         /*
 521          * If our LWP has an alternate system call handler, run that instead of
 522          * the regular system call path.
 523          */
 524         movq    LWP_BRAND_SYSCALL(%r14), %rdi
 525         testq   %rdi, %rdi
 526         jz      _syscall_no_brand
 527 
 528         pushq   %rax
 529         subq    $8, %rsp        /* align stack for call to C */
 530         call    *%rdi
 531         addq    $8, %rsp
 532 
 533         /*
 534          * If the alternate handler returns non-zero, the normal system call
 535          * processing is resumed.
 536          */
 537         testl   %eax, %eax
 538         popq    %rax
 539         jnz     _syscall_no_brand
 540 
 541         /*
 542          * For branded syscalls which were handled in-kernel, shuffle the
 543          * register state as would be done by the native handler before jumping
 544          * to the post-syscall logic.
 545          */
 546         movq    REGOFF_RAX(%rsp), %r12
 547         movq    REGOFF_RDX(%rsp), %r13
 548         jmp     _syscall_after_brand
 549 
 550 _syscall_no_brand:
 551         movw    %ax, T_SYSNUM(%r15)
 552         movzbl  T_PRE_SYS(%r15), %ebx
 553         ORL_SYSCALLTRACE(%ebx)
 554         testl   %ebx, %ebx
 555         jne     _syscall_pre
 556 
 557 _syscall_invoke:
 558         movq    REGOFF_RDI(%rbp), %rdi
 559         movq    REGOFF_RSI(%rbp), %rsi
 560         movq    REGOFF_RDX(%rbp), %rdx
 561         movq    REGOFF_RCX(%rbp), %rcx
 562         movq    REGOFF_R8(%rbp), %r8
 563         movq    REGOFF_R9(%rbp), %r9
 564 
 565         cmpl    $NSYSCALL, %eax
 566         jae     _syscall_ill    
 567         shll    $SYSENT_SIZE_SHIFT, %eax
 568         leaq    sysent(%rax), %rbx
 569 


 823 
 824         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 825         movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
 826 
 827         ASSERT_LWPTOREGS(%r14, %rsp)
 828 
 829         incq     %gs:CPU_STATS_SYS_SYSCALL
 830 
 831         /*
 832          * If our lwp has an alternate system call handler, run that instead
 833          * of the regular system call path.
 834          */
 835         movq    LWP_BRAND_SYSCALL(%r14), %rax
 836         testq   %rax, %rax
 837         jz      _syscall32_no_brand
 838 
 839         movb    $LWP_SYS, LWP_STATE(%r14)
 840         call    *%rax
 841 
 842         /*
 843          * If the alternate handler returns non-zero, the normal system call
 844          * processing is resumed.
 845          */
 846         testl   %eax, %eax
 847         jnz     _syscall32_no_brand
 848 
 849         /*
 850          * For branded syscalls which were handled in-kernel, shuffle the
 851          * register state as would be done by the native handler before jumping
 852          * to the post-syscall logic.
 853          */
 854         movl    REGOFF_RAX(%rsp), %r12d
 855         movl    REGOFF_RDX(%rsp), %r13d
 856         jmp     _syscall32_after_brand
 857 
 858 _syscall32_no_brand:
 859         /*
 860          * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
 861          * into 64-bit (long) arg slots, maintaining 16 byte alignment.  Or
 862          * more succinctly:
 863          *
 864          *      SA(MAXSYSARGS * sizeof (long)) == 64
 865          *
 866          * Note, this space is used both to copy in the arguments from user
 867          * land, but also to as part of the old UNIX style syscall_ap() method.
 868          * syscall_entry expects that we do not change the values of this space
 869          * that we give it. However, this means that when we end up in the more
 870          * recent model of passing the arguments based on the calling
 871          * conventions, we'll need to save an additional 16 bytes of stack.
 872          */
 873 #define SYS_DROP        64                      /* drop for args */
 874         subq    $SYS_DROP, %rsp
 875         movb    $LWP_SYS, LWP_STATE(%r14)
 876         movq    %r15, %rdi
 877         movq    %rsp, %rsi


1238         SET_SIZE(_sys_sysenter_post_swapgs)
1239         SET_SIZE(brand_sys_sysenter)
1240 
1241 #endif  /* __lint */
1242  
1243 #if defined(__lint)
1244 /*
1245  * System call via an int80.  This entry point is only used by the Linux
1246  * application environment.  Unlike the other entry points, there is no
1247  * default action to take if no callback is registered for this process.
1248  */
1249 void
1250 sys_int80()
1251 {}
1252 
1253 #else   /* __lint */
1254 
1255         ENTRY_NP(brand_sys_int80)
1256         SWAPGS                          /* kernel gsbase */
1257         XPV_TRAP_POP
1258         call    smap_enable
1259 
1260         /*
1261          * We first attempt to call the "b_int80" handler from the "struct
1262          * brand_mach_ops" for this brand.  If no handler function is installed
1263          * for this brand, the BRAND_CALLBACK() macro returns here and we
1264          * check the lwp for a "lwp_brand_syscall" handler.
1265          */
1266         BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
1267 
1268         /*
1269          * Check to see if this lwp provides "lwp_brand_syscall".  If so, we
1270          * will route this int80 through the regular system call handling path.
1271          */
1272         movq    %r15, %gs:CPU_RTMP_R15
1273         movq    %gs:CPU_THREAD, %r15
1274         movq    T_LWP(%r15), %r15
1275         movq    LWP_BRAND_SYSCALL(%r15), %r15
1276         testq   %r15, %r15
1277         movq    %gs:CPU_RTMP_R15, %r15
1278         jnz     nopop_syscall_int
1279 
1280         /*
1281          * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
1282          * function, and has thus opted out of handling this trap.
1283          */
1284         SWAPGS                          /* user gsbase */
1285         jmp     nopop_int80
1286 
1287         ENTRY_NP(sys_int80)
1288         /*
1289          * We hit an int80, but this process isn't of a brand with an int80
1290          * handler.  Bad process!  Make it look as if the INT failed.
1291          * Modify %rip to point before the INT, push the expected error
1292          * code and fake a GP fault. Note on 64-bit hypervisor we need
1293          * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
1294          * because gptrap will pop them again with its own XPV_TRAP_POP.
1295          */
1296         XPV_TRAP_POP
1297         call    smap_enable
1298 nopop_int80:
1299         subq    $2, (%rsp)      /* int insn 2-bytes */
1300         pushq   $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
1301 #if defined(__xpv)
1302         push    %r11
1303         push    %rcx
1304 #endif
1305         jmp     gptrap                  / GP fault
1306         SET_SIZE(sys_int80)
1307         SET_SIZE(brand_sys_int80)
1308 #endif  /* __lint */
1309 
1310 
1311 /*
1312  * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1313  * the generic i386 libc to do system calls. We do a small amount of setup
1314  * before jumping into the existing sys_syscall32 path.
1315  */
1316 #if defined(__lint)
1317 




  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 
  26 #include <sys/asm_linkage.h>
  27 #include <sys/asm_misc.h>
  28 #include <sys/regset.h>
  29 #include <sys/privregs.h>
  30 #include <sys/psw.h>

  31 
  32 #if defined(__lint)
  33 
  34 #include <sys/types.h>
  35 #include <sys/thread.h>
  36 #include <sys/systm.h>
  37 
  38 #else   /* __lint */
  39 
  40 #include <sys/machbrand.h>
  41 #include <sys/segments.h>
  42 #include <sys/pcb.h>
  43 #include <sys/trap.h>
  44 #include <sys/ftrace.h>
  45 #include <sys/traptrace.h>
  46 #include <sys/clock.h>
  47 #include <sys/model.h>
  48 #include <sys/panic.h>
  49 
  50 #if defined(__xpv)
  51 #include <sys/hypervisor.h>
  52 #endif
  53 
  54 #include "assym.h"
  55 
  56 #endif  /* __lint */
  57 
  58 /*
  59  * We implement five flavours of system call entry points
  60  *


 514         movb    $LWP_SYS, LWP_STATE(%r14)
 515         incq    LWP_RU_SYSC(%r14)
 516         movb    $NORMALRETURN, LWP_EOSYS(%r14)
 517 
 518         incq    %gs:CPU_STATS_SYS_SYSCALL
 519 
 520         /*
 521          * If our LWP has an alternate system call handler, run that instead of
 522          * the regular system call path.
 523          */
 524         movq    LWP_BRAND_SYSCALL(%r14), %rdi
 525         testq   %rdi, %rdi
 526         jz      _syscall_no_brand
 527 
 528         pushq   %rax
 529         subq    $8, %rsp        /* align stack for call to C */
 530         call    *%rdi
 531         addq    $8, %rsp
 532 
 533         /*
 534          * If the alternate handler returns 0, we skip straight to the return to
 535          * usermode.  Otherwise, we resume regular system call processing.
 536          */
 537         testl   %eax, %eax
 538         popq    %rax
 539         jz      _syscall_after_brand
 540 









 541 _syscall_no_brand:
 542         movw    %ax, T_SYSNUM(%r15)
 543         movzbl  T_PRE_SYS(%r15), %ebx
 544         ORL_SYSCALLTRACE(%ebx)
 545         testl   %ebx, %ebx
 546         jne     _syscall_pre
 547 
 548 _syscall_invoke:
 549         movq    REGOFF_RDI(%rbp), %rdi
 550         movq    REGOFF_RSI(%rbp), %rsi
 551         movq    REGOFF_RDX(%rbp), %rdx
 552         movq    REGOFF_RCX(%rbp), %rcx
 553         movq    REGOFF_R8(%rbp), %r8
 554         movq    REGOFF_R9(%rbp), %r9
 555 
 556         cmpl    $NSYSCALL, %eax
 557         jae     _syscall_ill    
 558         shll    $SYSENT_SIZE_SHIFT, %eax
 559         leaq    sysent(%rax), %rbx
 560 


 814 
 815         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 816         movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
 817 
 818         ASSERT_LWPTOREGS(%r14, %rsp)
 819 
 820         incq     %gs:CPU_STATS_SYS_SYSCALL
 821 
 822         /*
 823          * If our lwp has an alternate system call handler, run that instead
 824          * of the regular system call path.
 825          */
 826         movq    LWP_BRAND_SYSCALL(%r14), %rax
 827         testq   %rax, %rax
 828         jz      _syscall32_no_brand
 829 
 830         movb    $LWP_SYS, LWP_STATE(%r14)
 831         call    *%rax
 832 
 833         /*
 834          * If the alternate handler returns 0, we skip straight to the return
 835          * to usermode.  Otherwise, we resume regular system call processing.
 836          */
 837         testl   %eax, %eax
 838         jz      _syscall32_after_brand
 839 









 840 _syscall32_no_brand:
 841         /*
 842          * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
 843          * into 64-bit (long) arg slots, maintaining 16 byte alignment.  Or
 844          * more succinctly:
 845          *
 846          *      SA(MAXSYSARGS * sizeof (long)) == 64
 847          *
 848          * Note, this space is used both to copy in the arguments from user
 849          * land, but also to as part of the old UNIX style syscall_ap() method.
 850          * syscall_entry expects that we do not change the values of this space
 851          * that we give it. However, this means that when we end up in the more
 852          * recent model of passing the arguments based on the calling
 853          * conventions, we'll need to save an additional 16 bytes of stack.
 854          */
 855 #define SYS_DROP        64                      /* drop for args */
 856         subq    $SYS_DROP, %rsp
 857         movb    $LWP_SYS, LWP_STATE(%r14)
 858         movq    %r15, %rdi
 859         movq    %rsp, %rsi


1220         SET_SIZE(_sys_sysenter_post_swapgs)
1221         SET_SIZE(brand_sys_sysenter)
1222 
1223 #endif  /* __lint */
1224  
1225 #if defined(__lint)
1226 /*
1227  * System call via an int80.  This entry point is only used by the Linux
1228  * application environment.  Unlike the other entry points, there is no
1229  * default action to take if no callback is registered for this process.
1230  */
1231 void
1232 sys_int80()
1233 {}
1234 
1235 #else   /* __lint */
1236 
1237         ENTRY_NP(brand_sys_int80)
1238         SWAPGS                          /* kernel gsbase */
1239         XPV_TRAP_POP

1240 
1241         /*
1242          * We first attempt to call the "b_int80" handler from the "struct
1243          * brand_mach_ops" for this brand.  If no handler function is installed
1244          * for this brand, the BRAND_CALLBACK() macro returns here and we
1245          * check the lwp for a "lwp_brand_syscall" handler.
1246          */
1247         BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
1248 
1249         /*
1250          * Check to see if this lwp provides "lwp_brand_syscall".  If so, we
1251          * will route this int80 through the regular system call handling path.
1252          */
1253         movq    %r15, %gs:CPU_RTMP_R15
1254         movq    %gs:CPU_THREAD, %r15
1255         movq    T_LWP(%r15), %r15
1256         movq    LWP_BRAND_SYSCALL(%r15), %r15
1257         testq   %r15, %r15
1258         movq    %gs:CPU_RTMP_R15, %r15
1259         jnz     nopop_syscall_int
1260 
1261         /*
1262          * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
1263          * function, and has thus opted out of handling this trap.
1264          */
1265         SWAPGS                          /* user gsbase */
1266         jmp     nopop_int80
1267 
1268         ENTRY_NP(sys_int80)
1269         /*
1270          * We hit an int80, but this process isn't of a brand with an int80
1271          * handler.  Bad process!  Make it look as if the INT failed.
1272          * Modify %rip to point before the INT, push the expected error
1273          * code and fake a GP fault. Note on 64-bit hypervisor we need
1274          * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
1275          * because gptrap will pop them again with its own XPV_TRAP_POP.
1276          */
1277         XPV_TRAP_POP

1278 nopop_int80:
1279         subq    $2, (%rsp)      /* int insn 2-bytes */
1280         pushq   $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
1281 #if defined(__xpv)
1282         push    %r11
1283         push    %rcx
1284 #endif
1285         jmp     gptrap                  / GP fault
1286         SET_SIZE(sys_int80)
1287         SET_SIZE(brand_sys_int80)
1288 #endif  /* __lint */
1289 
1290 
1291 /*
1292  * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1293  * the generic i386 libc to do system calls. We do a small amount of setup
1294  * before jumping into the existing sys_syscall32 path.
1295  */
1296 #if defined(__lint)
1297