Print this page
OS-5510 remove lwp_brand_syscall_fast handler
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Lint-clean syscall_asm_amd64.s
OS-4961 lxbrand want fasttrap-like brand hook
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3937 lxbrand incorrect stack alignment for lx_syscall_enter()
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3561 lxbrand emulation library should execute on alternate stack
OS-3558 lxbrand add support for full in-kernel syscall handling
OS-3545 lx_syscall_regs should not walk stack
OS-3868 many LTP testcases now hang
OS-3901 lxbrand lx_recvmsg fails to translate control messages when 64-bit
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3215 32bit syscalls with more than 6 parameters re-use arg0/arg1 as arg6/arg7
OS-3223 Passing arg6 and arg7 can't clobber the stack for ap-style calls
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Keith M Wesolowski <wesolows@foobazco.org>
back out OS-3215: causes OS-3223
OS-3215 32bit syscalls with more than 6 parameters re-use arg0/arg1 as arg6/arg7
OS-2834 ship lx brand
        
*** 18,45 ****
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
!  * Copyright 2015 Joyent, Inc.
   */
  
  #include <sys/asm_linkage.h>
  #include <sys/asm_misc.h>
  #include <sys/regset.h>
  #include <sys/privregs.h>
  #include <sys/psw.h>
- #include <sys/machbrand.h>
  
  #if defined(__lint)
  
  #include <sys/types.h>
  #include <sys/thread.h>
  #include <sys/systm.h>
  
  #else   /* __lint */
  
  #include <sys/segments.h>
  #include <sys/pcb.h>
  #include <sys/trap.h>
  #include <sys/ftrace.h>
  #include <sys/traptrace.h>
--- 18,45 ----
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
!  * Copyright 2016 Joyent, Inc.
   */
  
  #include <sys/asm_linkage.h>
  #include <sys/asm_misc.h>
  #include <sys/regset.h>
  #include <sys/privregs.h>
  #include <sys/psw.h>
  
  #if defined(__lint)
  
  #include <sys/types.h>
  #include <sys/thread.h>
  #include <sys/systm.h>
  
  #else   /* __lint */
  
+ #include <sys/machbrand.h>
  #include <sys/segments.h>
  #include <sys/pcb.h>
  #include <sys/trap.h>
  #include <sys/ftrace.h>
  #include <sys/traptrace.h>
*** 501,510 ****
--- 501,511 ----
  
          movq    %rsp, %rbp
          
          movq    T_LWP(%r15), %r14
          ASSERT_NO_RUPDATE_PENDING(%r14)
+ 
          ENABLE_INTR_FLAGS
  
          MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
          movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
  
*** 514,523 ****
--- 515,546 ----
          incq    LWP_RU_SYSC(%r14)
          movb    $NORMALRETURN, LWP_EOSYS(%r14)
  
          incq    %gs:CPU_STATS_SYS_SYSCALL
  
+         /*
+          * If our LWP has an alternate system call handler, run that instead of
+          * the regular system call path.
+          */
+         movq    LWP_BRAND_SYSCALL(%r14), %rdi
+         testq   %rdi, %rdi
+         jz      _syscall_no_brand
+ 
+         pushq   %rax
+         subq    $8, %rsp        /* align stack for call to C */
+         call    *%rdi
+         addq    $8, %rsp
+ 
+         /*
+          * If the alternate handler returns 0, we skip straight to the return to
+          * usermode.  Otherwise, we resume regular system call processing.
+          */
+         testl   %eax, %eax
+         popq    %rax
+         jz      _syscall_after_brand
+ 
+ _syscall_no_brand:
          movw    %ax, T_SYSNUM(%r15)
          movzbl  T_PRE_SYS(%r15), %ebx
          ORL_SYSCALLTRACE(%ebx)
          testl   %ebx, %ebx
          jne     _syscall_pre
*** 548,557 ****
--- 571,582 ----
          je      5f
          movq    %r12, %r13
          shrq    $32, %r13       /* upper 32-bits into %edx */
          movl    %r12d, %r12d    /* lower 32-bits into %eax */
  5:
+ 
+ _syscall_after_brand:
          /*
           * Optimistically assume that there's no post-syscall
           * work to do.  (This is to avoid having to call syscall_mstate()
           * with interrupts disabled)
           */
*** 793,807 ****
--- 818,858 ----
          ASSERT_LWPTOREGS(%r14, %rsp)
  
          incq     %gs:CPU_STATS_SYS_SYSCALL
  
          /*
+          * If our lwp has an alternate system call handler, run that instead
+          * of the regular system call path.
+          */
+         movq    LWP_BRAND_SYSCALL(%r14), %rax
+         testq   %rax, %rax
+         jz      _syscall32_no_brand
+ 
+         movb    $LWP_SYS, LWP_STATE(%r14)
+         call    *%rax
+ 
+         /*
+          * If the alternate handler returns 0, we skip straight to the return
+          * to usermode.  Otherwise, we resume regular system call processing.
+          */
+         testl   %eax, %eax
+         jz      _syscall32_after_brand
+ 
+ _syscall32_no_brand:
+         /*
           * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
           * into 64-bit (long) arg slots, maintaining 16 byte alignment.  Or
           * more succinctly:
           *
           *      SA(MAXSYSARGS * sizeof (long)) == 64
+          *
+          * Note, this space is used both to copy in the arguments from user
+          * land, but also to as part of the old UNIX style syscall_ap() method.
+          * syscall_entry expects that we do not change the values of this space
+          * that we give it. However, this means that when we end up in the more
+          * recent model of passing the arguments based on the calling
+          * conventions, we'll need to save an additional 16 bytes of stack.
           */
  #define SYS_DROP        64                      /* drop for args */
          subq    $SYS_DROP, %rsp
          movb    $LWP_SYS, LWP_STATE(%r14)
          movq    %r15, %rdi
*** 825,840 ****
           * Lots of ideas here, but they won't really help with bringup B-)
           * Correctness can't wait, performance can wait a little longer ..
           */
  
          movq    %rax, %rbx
!         movl    0(%rsp), %edi
!         movl    8(%rsp), %esi
!         movl    0x10(%rsp), %edx
!         movl    0x18(%rsp), %ecx
!         movl    0x20(%rsp), %r8d
!         movl    0x28(%rsp), %r9d
  
          call    *SY_CALLC(%rbx)
  
          movq    %rbp, %rsp      /* pop the args */
  
--- 876,895 ----
           * Lots of ideas here, but they won't really help with bringup B-)
           * Correctness can't wait, performance can wait a little longer ..
           */
  
          movq    %rax, %rbx
!         movl    0x0(%rsp), %edi         /* arg0 */
!         movl    0x8(%rsp), %esi         /* arg1 */
!         movl    0x10(%rsp), %edx        /* arg2 */
!         movl    0x38(%rsp), %eax        /* arg7 load */
!         movl    0x18(%rsp), %ecx        /* arg3 */
!         pushq   %rax                    /* arg7 saved to stack */
!         movl    0x28(%rsp), %r8d        /* arg4 */
!         movl    0x38(%rsp), %eax        /* arg6 load */
!         movl    0x30(%rsp), %r9d        /* arg5 */
!         pushq   %rax                    /* arg6 saved to stack */
  
          call    *SY_CALLC(%rbx)
  
          movq    %rbp, %rsp      /* pop the args */
  
*** 848,857 ****
--- 903,914 ----
           */
          movq    %rax, %r13
          shrq    $32, %r13       /* upper 32-bits into %edx */
          movl    %eax, %r12d     /* lower 32-bits into %eax */
  
+ _syscall32_after_brand:
+ 
          /*
           * Optimistically assume that there's no post-syscall
           * work to do.  (This is to avoid having to call syscall_mstate()
           * with interrupts disabled)
           */
*** 1077,1095 ****
          call    syscall_entry
  
          /*
           * Fetch the arguments copied onto the kernel stack and put
           * them in the right registers to invoke a C-style syscall handler.
!          * %rax contains the handler address.
           */
          movq    %rax, %rbx
!         movl    0(%rsp), %edi
!         movl    8(%rsp), %esi
!         movl    0x10(%rsp), %edx
!         movl    0x18(%rsp), %ecx
!         movl    0x20(%rsp), %r8d
!         movl    0x28(%rsp), %r9d
  
          call    *SY_CALLC(%rbx)
  
          movq    %rbp, %rsp      /* pop the args */
  
--- 1134,1157 ----
          call    syscall_entry
  
          /*
           * Fetch the arguments copied onto the kernel stack and put
           * them in the right registers to invoke a C-style syscall handler.
!          * %rax contains the handler address. For the last two arguments, we
!          * push them onto the stack -- we can't clobber the old arguments.
           */
          movq    %rax, %rbx
!         movl    0x0(%rsp), %edi         /* arg0 */
!         movl    0x8(%rsp), %esi         /* arg1 */
!         movl    0x10(%rsp), %edx        /* arg2 */
!         movl    0x38(%rsp), %eax        /* arg7 load */
!         movl    0x18(%rsp), %ecx        /* arg3 */
!         pushq   %rax                    /* arg7 saved to stack */
!         movl    0x28(%rsp), %r8d        /* arg4 */
!         movl    0x38(%rsp), %eax        /* arg6 load */
!         movl    0x30(%rsp), %r9d        /* arg5 */
!         pushq   %rax                    /* arg6 saved to stack */
  
          call    *SY_CALLC(%rbx)
  
          movq    %rbp, %rsp      /* pop the args */
  
*** 1158,1168 ****
--- 1220,1296 ----
          SET_SIZE(_sys_sysenter_post_swapgs)
          SET_SIZE(brand_sys_sysenter)
  
  #endif  /* __lint */
   
+ #if defined(__lint)
  /*
+  * System call via an int80.  This entry point is only used by the Linux
+  * application environment.  Unlike the other entry points, there is no
+  * default action to take if no callback is registered for this process.
+  */
+ void
+ sys_int80()
+ {}
+ 
+ #else   /* __lint */
+ 
+         ENTRY_NP(brand_sys_int80)
+         SWAPGS                          /* kernel gsbase */
+         XPV_TRAP_POP
+ 
+         /*
+          * We first attempt to call the "b_int80" handler from the "struct
+          * brand_mach_ops" for this brand.  If no handler function is installed
+          * for this brand, the BRAND_CALLBACK() macro returns here and we
+          * check the lwp for a "lwp_brand_syscall" handler.
+          */
+         BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
+ 
+         /*
+          * Check to see if this lwp provides "lwp_brand_syscall".  If so, we
+          * will route this int80 through the regular system call handling path.
+          */
+         movq    %r15, %gs:CPU_RTMP_R15
+         movq    %gs:CPU_THREAD, %r15
+         movq    T_LWP(%r15), %r15
+         movq    LWP_BRAND_SYSCALL(%r15), %r15
+         testq   %r15, %r15
+         movq    %gs:CPU_RTMP_R15, %r15
+         jnz     nopop_syscall_int
+ 
+         /*
+          * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
+          * function, and has thus opted out of handling this trap.
+          */
+         SWAPGS                          /* user gsbase */
+         jmp     nopop_int80
+ 
+         ENTRY_NP(sys_int80)
+         /*
+          * We hit an int80, but this process isn't of a brand with an int80
+          * handler.  Bad process!  Make it look as if the INT failed.
+          * Modify %rip to point before the INT, push the expected error
+          * code and fake a GP fault. Note on 64-bit hypervisor we need
+          * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
+          * because gptrap will pop them again with its own XPV_TRAP_POP.
+          */
+         XPV_TRAP_POP
+ nopop_int80:
+         subq    $2, (%rsp)      /* int insn 2-bytes */
+         pushq   $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
+ #if defined(__xpv)
+         push    %r11
+         push    %rcx
+ #endif
+         jmp     gptrap                  / GP fault
+         SET_SIZE(sys_int80)
+         SET_SIZE(brand_sys_int80)
+ #endif  /* __lint */
+ 
+ 
+ /*
   * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
   * the generic i386 libc to do system calls. We do a small amount of setup
   * before jumping into the existing sys_syscall32 path.
   */
  #if defined(__lint)