Print this page
OS-5510 remove lwp_brand_syscall_fast handler
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Lint-clean syscall_asm_amd64.s
OS-4961 lxbrand want fasttrap-like brand hook
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3937 lxbrand incorrect stack alignment for lx_syscall_enter()
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3561 lxbrand emulation library should execute on alternate stack
OS-3558 lxbrand add support for full in-kernel syscall handling
OS-3545 lx_syscall_regs should not walk stack
OS-3868 many LTP testcases now hang
OS-3901 lxbrand lx_recvmsg fails to translate control messages when 64-bit
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3215 32bit syscalls with more than 6 parameters re-use arg0/arg1 as arg6/arg7
OS-3223 Passing arg6 and arg7 can't clobber the stack for ap-style calls
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Keith M Wesolowski <wesolows@foobazco.org>
back out OS-3215: causes OS-3223
OS-3215 32bit syscalls with more than 6 parameters re-use arg0/arg1 as arg6/arg7
OS-2834 ship lx brand

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
          +++ new/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright 2015 Joyent, Inc.
       23 + * Copyright 2016 Joyent, Inc.
  24   24   */
  25   25  
  26   26  #include <sys/asm_linkage.h>
  27   27  #include <sys/asm_misc.h>
  28   28  #include <sys/regset.h>
  29   29  #include <sys/privregs.h>
  30   30  #include <sys/psw.h>
  31      -#include <sys/machbrand.h>
  32   31  
  33   32  #if defined(__lint)
  34   33  
  35   34  #include <sys/types.h>
  36   35  #include <sys/thread.h>
  37   36  #include <sys/systm.h>
  38   37  
  39   38  #else   /* __lint */
  40   39  
       40 +#include <sys/machbrand.h>
  41   41  #include <sys/segments.h>
  42   42  #include <sys/pcb.h>
  43   43  #include <sys/trap.h>
  44   44  #include <sys/ftrace.h>
  45   45  #include <sys/traptrace.h>
  46   46  #include <sys/clock.h>
  47   47  #include <sys/model.h>
  48   48  #include <sys/panic.h>
  49   49  
  50   50  #if defined(__xpv)
↓ open down ↓ 445 lines elided ↑ open up ↑
 496  496           * %rsp is the thread's stack, %r15 is curthread
 497  497           * REG_RSP(%rsp) is the user's stack
 498  498           */
 499  499  
 500  500          SYSCALL_TRAPTRACE($TT_SYSC64)
 501  501  
 502  502          movq    %rsp, %rbp
 503  503          
 504  504          movq    T_LWP(%r15), %r14
 505  505          ASSERT_NO_RUPDATE_PENDING(%r14)
      506 +
 506  507          ENABLE_INTR_FLAGS
 507  508  
 508  509          MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 509  510          movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
 510  511  
 511  512          ASSERT_LWPTOREGS(%r14, %rsp)
 512  513  
 513  514          movb    $LWP_SYS, LWP_STATE(%r14)
 514  515          incq    LWP_RU_SYSC(%r14)
 515  516          movb    $NORMALRETURN, LWP_EOSYS(%r14)
 516  517  
 517  518          incq    %gs:CPU_STATS_SYS_SYSCALL
 518  519  
      520 +        /*
      521 +         * If our LWP has an alternate system call handler, run that instead of
      522 +         * the regular system call path.
      523 +         */
      524 +        movq    LWP_BRAND_SYSCALL(%r14), %rdi
      525 +        testq   %rdi, %rdi
      526 +        jz      _syscall_no_brand
      527 +
      528 +        pushq   %rax
      529 +        subq    $8, %rsp        /* align stack for call to C */
      530 +        call    *%rdi
      531 +        addq    $8, %rsp
      532 +
      533 +        /*
      534 +         * If the alternate handler returns 0, we skip straight to the return to
      535 +         * usermode.  Otherwise, we resume regular system call processing.
      536 +         */
      537 +        testl   %eax, %eax
      538 +        popq    %rax
      539 +        jz      _syscall_after_brand
      540 +
      541 +_syscall_no_brand:
 519  542          movw    %ax, T_SYSNUM(%r15)
 520  543          movzbl  T_PRE_SYS(%r15), %ebx
 521  544          ORL_SYSCALLTRACE(%ebx)
 522  545          testl   %ebx, %ebx
 523  546          jne     _syscall_pre
 524  547  
 525  548  _syscall_invoke:
 526  549          movq    REGOFF_RDI(%rbp), %rdi
 527  550          movq    REGOFF_RSI(%rbp), %rsi
 528  551          movq    REGOFF_RDX(%rbp), %rdx
↓ open down ↓ 14 lines elided ↑ open up ↑
 543  566          /*
 544  567           * If the handler returns two ints, then we need to split the
 545  568           * 64-bit return value into two 32-bit values.
 546  569           */
 547  570          testw   $SE_32RVAL2, SY_FLAGS(%rbx)
 548  571          je      5f
 549  572          movq    %r12, %r13
 550  573          shrq    $32, %r13       /* upper 32-bits into %edx */
 551  574          movl    %r12d, %r12d    /* lower 32-bits into %eax */
 552  575  5:
      576 +
      577 +_syscall_after_brand:
 553  578          /*
 554  579           * Optimistically assume that there's no post-syscall
 555  580           * work to do.  (This is to avoid having to call syscall_mstate()
 556  581           * with interrupts disabled)
 557  582           */
 558  583          MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 559  584  
 560  585          /*
 561  586           * We must protect ourselves from being descheduled here;
 562  587           * If we were, and we ended up on another cpu, or another
↓ open down ↓ 225 lines elided ↑ open up ↑
 788  813          ENABLE_INTR_FLAGS
 789  814  
 790  815          MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 791  816          movl    REGOFF_RAX(%rsp), %eax  /* (%rax damaged by mstate call) */
 792  817  
 793  818          ASSERT_LWPTOREGS(%r14, %rsp)
 794  819  
 795  820          incq     %gs:CPU_STATS_SYS_SYSCALL
 796  821  
 797  822          /*
      823 +         * If our lwp has an alternate system call handler, run that instead
      824 +         * of the regular system call path.
      825 +         */
      826 +        movq    LWP_BRAND_SYSCALL(%r14), %rax
      827 +        testq   %rax, %rax
      828 +        jz      _syscall32_no_brand
      829 +
      830 +        movb    $LWP_SYS, LWP_STATE(%r14)
      831 +        call    *%rax
      832 +
      833 +        /*
      834 +         * If the alternate handler returns 0, we skip straight to the return
      835 +         * to usermode.  Otherwise, we resume regular system call processing.
      836 +         */
      837 +        testl   %eax, %eax
      838 +        jz      _syscall32_after_brand
      839 +
      840 +_syscall32_no_brand:
      841 +        /*
 798  842           * Make some space for MAXSYSARGS (currently 8) 32-bit args placed
 799  843           * into 64-bit (long) arg slots, maintaining 16 byte alignment.  Or
 800  844           * more succinctly:
 801  845           *
 802  846           *      SA(MAXSYSARGS * sizeof (long)) == 64
      847 +         *
      848 +         * Note, this space is used both to copy in the arguments from user
      849 +         * land, but also to as part of the old UNIX style syscall_ap() method.
      850 +         * syscall_entry expects that we do not change the values of this space
      851 +         * that we give it. However, this means that when we end up in the more
      852 +         * recent model of passing the arguments based on the calling
      853 +         * conventions, we'll need to save an additional 16 bytes of stack.
 803  854           */
 804  855  #define SYS_DROP        64                      /* drop for args */
 805  856          subq    $SYS_DROP, %rsp
 806  857          movb    $LWP_SYS, LWP_STATE(%r14)
 807  858          movq    %r15, %rdi
 808  859          movq    %rsp, %rsi
 809  860          call    syscall_entry
 810  861  
 811  862          /*
 812  863           * Fetch the arguments copied onto the kernel stack and put
↓ open down ↓ 7 lines elided ↑ open up ↑
 820  871           *
 821  872           * (If we do this, make sure that exec and libthread leave
 822  873           * enough space at the top of the stack to ensure that we'll
 823  874           * never do a fetch from an invalid page.)
 824  875           *
 825  876           * Lots of ideas here, but they won't really help with bringup B-)
 826  877           * Correctness can't wait, performance can wait a little longer ..
 827  878           */
 828  879  
 829  880          movq    %rax, %rbx
 830      -        movl    0(%rsp), %edi
 831      -        movl    8(%rsp), %esi
 832      -        movl    0x10(%rsp), %edx
 833      -        movl    0x18(%rsp), %ecx
 834      -        movl    0x20(%rsp), %r8d
 835      -        movl    0x28(%rsp), %r9d
      881 +        movl    0x0(%rsp), %edi         /* arg0 */
      882 +        movl    0x8(%rsp), %esi         /* arg1 */
      883 +        movl    0x10(%rsp), %edx        /* arg2 */
      884 +        movl    0x38(%rsp), %eax        /* arg7 load */
      885 +        movl    0x18(%rsp), %ecx        /* arg3 */
      886 +        pushq   %rax                    /* arg7 saved to stack */
      887 +        movl    0x28(%rsp), %r8d        /* arg4 */
      888 +        movl    0x38(%rsp), %eax        /* arg6 load */
      889 +        movl    0x30(%rsp), %r9d        /* arg5 */
      890 +        pushq   %rax                    /* arg6 saved to stack */
 836  891  
 837  892          call    *SY_CALLC(%rbx)
 838  893  
 839  894          movq    %rbp, %rsp      /* pop the args */
 840  895  
 841  896          /*
 842  897           * amd64 syscall handlers -always- return a 64-bit value in %rax.
 843  898           * On the 32-bit kernel, they always return that value in %eax:%edx
 844  899           * as required by the 32-bit ABI.
 845  900           *
 846  901           * Simulate the same behaviour by unconditionally splitting the
 847  902           * return value in the same way.
 848  903           */
 849  904          movq    %rax, %r13
 850  905          shrq    $32, %r13       /* upper 32-bits into %edx */
 851  906          movl    %eax, %r12d     /* lower 32-bits into %eax */
 852  907  
      908 +_syscall32_after_brand:
      909 +
 853  910          /*
 854  911           * Optimistically assume that there's no post-syscall
 855  912           * work to do.  (This is to avoid having to call syscall_mstate()
 856  913           * with interrupts disabled)
 857  914           */
 858  915          MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 859  916  
 860  917          /*
 861  918           * We must protect ourselves from being descheduled here;
 862  919           * If we were, and we ended up on another cpu, or another
↓ open down ↓ 209 lines elided ↑ open up ↑
1072 1129           */
1073 1130          subq    $SYS_DROP, %rsp
1074 1131          movb    $LWP_SYS, LWP_STATE(%r14)
1075 1132          movq    %r15, %rdi
1076 1133          movq    %rsp, %rsi
1077 1134          call    syscall_entry
1078 1135  
1079 1136          /*
1080 1137           * Fetch the arguments copied onto the kernel stack and put
1081 1138           * them in the right registers to invoke a C-style syscall handler.
1082      -         * %rax contains the handler address.
     1139 +         * %rax contains the handler address. For the last two arguments, we
     1140 +         * push them onto the stack -- we can't clobber the old arguments.
1083 1141           */
1084 1142          movq    %rax, %rbx
1085      -        movl    0(%rsp), %edi
1086      -        movl    8(%rsp), %esi
1087      -        movl    0x10(%rsp), %edx
1088      -        movl    0x18(%rsp), %ecx
1089      -        movl    0x20(%rsp), %r8d
1090      -        movl    0x28(%rsp), %r9d
     1143 +        movl    0x0(%rsp), %edi         /* arg0 */
     1144 +        movl    0x8(%rsp), %esi         /* arg1 */
     1145 +        movl    0x10(%rsp), %edx        /* arg2 */
     1146 +        movl    0x38(%rsp), %eax        /* arg7 load */
     1147 +        movl    0x18(%rsp), %ecx        /* arg3 */
     1148 +        pushq   %rax                    /* arg7 saved to stack */
     1149 +        movl    0x28(%rsp), %r8d        /* arg4 */
     1150 +        movl    0x38(%rsp), %eax        /* arg6 load */
     1151 +        movl    0x30(%rsp), %r9d        /* arg5 */
     1152 +        pushq   %rax                    /* arg6 saved to stack */
1091 1153  
1092 1154          call    *SY_CALLC(%rbx)
1093 1155  
1094 1156          movq    %rbp, %rsp      /* pop the args */
1095 1157  
1096 1158          /*
1097 1159           * amd64 syscall handlers -always- return a 64-bit value in %rax.
1098 1160           * On the 32-bit kernel, the always return that value in %eax:%edx
1099 1161           * as required by the 32-bit ABI.
1100 1162           *
↓ open down ↓ 51 lines elided ↑ open up ↑
1152 1214          ALTENTRY(sys_sysenter_swapgs_sysexit)
1153 1215          swapgs
1154 1216          sti
1155 1217          sysexit
1156 1218          SET_SIZE(sys_sysenter_swapgs_sysexit)
1157 1219          SET_SIZE(sys_sysenter)
1158 1220          SET_SIZE(_sys_sysenter_post_swapgs)
1159 1221          SET_SIZE(brand_sys_sysenter)
1160 1222  
1161 1223  #endif  /* __lint */
     1224 + 
     1225 +#if defined(__lint)
     1226 +/*
     1227 + * System call via an int80.  This entry point is only used by the Linux
     1228 + * application environment.  Unlike the other entry points, there is no
     1229 + * default action to take if no callback is registered for this process.
     1230 + */
     1231 +void
     1232 +sys_int80()
     1233 +{}
1162 1234  
     1235 +#else   /* __lint */
     1236 +
     1237 +        ENTRY_NP(brand_sys_int80)
     1238 +        SWAPGS                          /* kernel gsbase */
     1239 +        XPV_TRAP_POP
     1240 +
     1241 +        /*
     1242 +         * We first attempt to call the "b_int80" handler from the "struct
     1243 +         * brand_mach_ops" for this brand.  If no handler function is installed
     1244 +         * for this brand, the BRAND_CALLBACK() macro returns here and we
     1245 +         * check the lwp for a "lwp_brand_syscall" handler.
     1246 +         */
     1247 +        BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
     1248 +
     1249 +        /*
     1250 +         * Check to see if this lwp provides "lwp_brand_syscall".  If so, we
     1251 +         * will route this int80 through the regular system call handling path.
     1252 +         */
     1253 +        movq    %r15, %gs:CPU_RTMP_R15
     1254 +        movq    %gs:CPU_THREAD, %r15
     1255 +        movq    T_LWP(%r15), %r15
     1256 +        movq    LWP_BRAND_SYSCALL(%r15), %r15
     1257 +        testq   %r15, %r15
     1258 +        movq    %gs:CPU_RTMP_R15, %r15
     1259 +        jnz     nopop_syscall_int
     1260 +
     1261 +        /*
     1262 +         * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
     1263 +         * function, and has thus opted out of handling this trap.
     1264 +         */
     1265 +        SWAPGS                          /* user gsbase */
     1266 +        jmp     nopop_int80
     1267 +
     1268 +        ENTRY_NP(sys_int80)
     1269 +        /*
     1270 +         * We hit an int80, but this process isn't of a brand with an int80
     1271 +         * handler.  Bad process!  Make it look as if the INT failed.
     1272 +         * Modify %rip to point before the INT, push the expected error
     1273 +         * code and fake a GP fault. Note on 64-bit hypervisor we need
     1274 +         * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
     1275 +         * because gptrap will pop them again with its own XPV_TRAP_POP.
     1276 +         */
     1277 +        XPV_TRAP_POP
     1278 +nopop_int80:
     1279 +        subq    $2, (%rsp)      /* int insn 2-bytes */
     1280 +        pushq   $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
     1281 +#if defined(__xpv)
     1282 +        push    %r11
     1283 +        push    %rcx
     1284 +#endif
     1285 +        jmp     gptrap                  / GP fault
     1286 +        SET_SIZE(sys_int80)
     1287 +        SET_SIZE(brand_sys_int80)
     1288 +#endif  /* __lint */
     1289 +
     1290 +
1163 1291  /*
1164 1292   * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1165 1293   * the generic i386 libc to do system calls. We do a small amount of setup
1166 1294   * before jumping into the existing sys_syscall32 path.
1167 1295   */
1168 1296  #if defined(__lint)
1169 1297  
1170 1298  /*ARGSUSED*/
1171 1299  void
1172 1300  sys_syscall_int()
↓ open down ↓ 138 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX