Print this page
    
OS-2834 ship lx brand
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/intel/ia32/os/desctbls.c
          +++ new/usr/src/uts/intel/ia32/os/desctbls.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright 2011 Joyent, Inc. All rights reserved.
  28   28   */
  29   29  
  30   30  /*
  31   31   * Copyright (c) 1992 Terrence R. Lambert.
  32   32   * Copyright (c) 1990 The Regents of the University of California.
  33   33   * All rights reserved.
  34   34   *
  35   35   * This code is derived from software contributed to Berkeley by
  36   36   * William Jolitz.
  37   37   *
  38   38   * Redistribution and use in source and binary forms, with or without
  39   39   * modification, are permitted provided that the following conditions
  40   40   * are met:
  41   41   * 1. Redistributions of source code must retain the above copyright
  42   42   *    notice, this list of conditions and the following disclaimer.
  43   43   * 2. Redistributions in binary form must reproduce the above copyright
  44   44   *    notice, this list of conditions and the following disclaimer in the
  45   45   *    documentation and/or other materials provided with the distribution.
  46   46   * 3. All advertising materials mentioning features or use of this software
  47   47   *    must display the following acknowledgement:
  48   48   *      This product includes software developed by the University of
  49   49   *      California, Berkeley and its contributors.
  50   50   * 4. Neither the name of the University nor the names of its contributors
  51   51   *    may be used to endorse or promote products derived from this software
  52   52   *    without specific prior written permission.
  53   53   *
  54   54   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55   55   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56   56   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57   57   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58   58   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59   59   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60   60   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61   61   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62   62   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63   63   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64   64   * SUCH DAMAGE.
  65   65   *
  66   66   *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
  67   67   */
  68   68  
  69   69  #include <sys/types.h>
  70   70  #include <sys/sysmacros.h>
  71   71  #include <sys/tss.h>
  72   72  #include <sys/segments.h>
  73   73  #include <sys/trap.h>
  74   74  #include <sys/cpuvar.h>
  75   75  #include <sys/bootconf.h>
  76   76  #include <sys/x86_archext.h>
  77   77  #include <sys/controlregs.h>
  78   78  #include <sys/archsystm.h>
  79   79  #include <sys/machsystm.h>
  80   80  #include <sys/kobj.h>
  81   81  #include <sys/cmn_err.h>
  82   82  #include <sys/reboot.h>
  83   83  #include <sys/kdi.h>
  84   84  #include <sys/mach_mmu.h>
  85   85  #include <sys/systm.h>
  86   86  
  87   87  #ifdef __xpv
  88   88  #include <sys/hypervisor.h>
  89   89  #include <vm/as.h>
  90   90  #endif
  91   91  
  92   92  #include <sys/promif.h>
  93   93  #include <sys/bootinfo.h>
  94   94  #include <vm/kboot_mmu.h>
  95   95  #include <vm/hat_pte.h>
  96   96  
  97   97  /*
  98   98   * cpu0 and default tables and structures.
  99   99   */
 100  100  user_desc_t     *gdt0;
 101  101  #if !defined(__xpv)
 102  102  desctbr_t       gdt0_default_r;
 103  103  #endif
 104  104  
 105  105  gate_desc_t     *idt0;          /* interrupt descriptor table */
 106  106  #if defined(__i386)
 107  107  desctbr_t       idt0_default_r;         /* describes idt0 in IDTR format */
 108  108  #endif
 109  109  
 110  110  tss_t           *ktss0;                 /* kernel task state structure */
 111  111  
 112  112  #if defined(__i386)
 113  113  tss_t           *dftss0;                /* #DF double-fault exception */
 114  114  #endif  /* __i386 */
 115  115  
 116  116  user_desc_t     zero_udesc;             /* base zero user desc native procs */
 117  117  user_desc_t     null_udesc;             /* null user descriptor */
 118  118  system_desc_t   null_sdesc;             /* null system descriptor */
 119  119  
 120  120  #if defined(__amd64)
 121  121  user_desc_t     zero_u32desc;           /* 32-bit compatibility procs */
 122  122  #endif  /* __amd64 */
 123  123  
 124  124  #if defined(__amd64)
 125  125  user_desc_t     ucs_on;
 126  126  user_desc_t     ucs_off;
 127  127  user_desc_t     ucs32_on;
 128  128  user_desc_t     ucs32_off;
 129  129  #endif  /* __amd64 */
 130  130  
 131  131  #pragma align   16(dblfault_stack0)
 132  132  char            dblfault_stack0[DEFAULTSTKSZ];
 133  133  
 134  134  extern void     fast_null(void);
 135  135  extern hrtime_t get_hrtime(void);
 136  136  extern hrtime_t gethrvtime(void);
 137  137  extern hrtime_t get_hrestime(void);
 138  138  extern uint64_t getlgrp(void);
 139  139  
 140  140  void (*(fasttable[]))(void) = {
 141  141          fast_null,                      /* T_FNULL routine */
 142  142          fast_null,                      /* T_FGETFP routine (initially null) */
 143  143          fast_null,                      /* T_FSETFP routine (initially null) */
 144  144          (void (*)())get_hrtime,         /* T_GETHRTIME */
 145  145          (void (*)())gethrvtime,         /* T_GETHRVTIME */
 146  146          (void (*)())get_hrestime,       /* T_GETHRESTIME */
 147  147          (void (*)())getlgrp             /* T_GETLGRP */
 148  148  };
 149  149  
 150  150  /*
 151  151   * Structure containing pre-computed descriptors to allow us to temporarily
 152  152   * interpose on a standard handler.
 153  153   */
  
    | 
      ↓ open down ↓ | 
    153 lines elided | 
    
      ↑ open up ↑ | 
  
 154  154  struct interposing_handler {
 155  155          int ih_inum;
 156  156          gate_desc_t ih_interp_desc;
 157  157          gate_desc_t ih_default_desc;
 158  158  };
 159  159  
 160  160  /*
 161  161   * The brand infrastructure interposes on two handlers, and we use one as a
 162  162   * NULL signpost.
 163  163   */
 164      -static struct interposing_handler brand_tbl[2];
      164 +static struct interposing_handler brand_tbl[3];
 165  165  
 166  166  /*
 167  167   * software prototypes for default local descriptor table
 168  168   */
 169  169  
 170  170  /*
 171  171   * Routines for loading segment descriptors in format the hardware
 172  172   * can understand.
 173  173   */
 174  174  
 175  175  #if defined(__amd64)
 176  176  
 177  177  /*
 178  178   * In long mode we have the new L or long mode attribute bit
 179  179   * for code segments. Only the conforming bit in type is used along
 180  180   * with descriptor priority and present bits. Default operand size must
 181  181   * be zero when in long mode. In 32-bit compatibility mode all fields
 182  182   * are treated as in legacy mode. For data segments while in long mode
 183  183   * only the present bit is loaded.
 184  184   */
 185  185  void
 186  186  set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
 187  187      uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
 188  188  {
 189  189          ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
 190  190  
 191  191          /*
 192  192           * 64-bit long mode.
 193  193           */
 194  194          if (lmode == SDP_LONG)
 195  195                  dp->usd_def32 = 0;              /* 32-bit operands only */
 196  196          else
 197  197                  /*
 198  198                   * 32-bit compatibility mode.
 199  199                   */
 200  200                  dp->usd_def32 = defopsz;        /* 0 = 16, 1 = 32-bit ops */
 201  201  
 202  202          dp->usd_long = lmode;   /* 64-bit mode */
 203  203          dp->usd_type = type;
 204  204          dp->usd_dpl = dpl;
 205  205          dp->usd_p = 1;
 206  206          dp->usd_gran = gran;            /* 0 = bytes, 1 = pages */
 207  207  
 208  208          dp->usd_lobase = (uintptr_t)base;
 209  209          dp->usd_midbase = (uintptr_t)base >> 16;
 210  210          dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 211  211          dp->usd_lolimit = size;
 212  212          dp->usd_hilimit = (uintptr_t)size >> 16;
 213  213  }
 214  214  
 215  215  #elif defined(__i386)
 216  216  
 217  217  /*
 218  218   * Install user segment descriptor for code and data.
 219  219   */
 220  220  void
 221  221  set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
 222  222      uint_t dpl, uint_t gran, uint_t defopsz)
 223  223  {
 224  224          dp->usd_lolimit = size;
 225  225          dp->usd_hilimit = (uintptr_t)size >> 16;
 226  226  
 227  227          dp->usd_lobase = (uintptr_t)base;
 228  228          dp->usd_midbase = (uintptr_t)base >> 16;
 229  229          dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 230  230  
 231  231          dp->usd_type = type;
 232  232          dp->usd_dpl = dpl;
 233  233          dp->usd_p = 1;
 234  234          dp->usd_def32 = defopsz;        /* 0 = 16, 1 = 32 bit operands */
 235  235          dp->usd_gran = gran;            /* 0 = bytes, 1 = pages */
 236  236  }
 237  237  
 238  238  #endif  /* __i386 */
 239  239  
 240  240  /*
 241  241   * Install system segment descriptor for LDT and TSS segments.
 242  242   */
 243  243  
 244  244  #if defined(__amd64)
 245  245  
 246  246  void
 247  247  set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 248  248      uint_t dpl)
 249  249  {
 250  250          dp->ssd_lolimit = size;
 251  251          dp->ssd_hilimit = (uintptr_t)size >> 16;
 252  252  
 253  253          dp->ssd_lobase = (uintptr_t)base;
 254  254          dp->ssd_midbase = (uintptr_t)base >> 16;
 255  255          dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 256  256          dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
 257  257  
 258  258          dp->ssd_type = type;
 259  259          dp->ssd_zero1 = 0;      /* must be zero */
 260  260          dp->ssd_zero2 = 0;
 261  261          dp->ssd_dpl = dpl;
 262  262          dp->ssd_p = 1;
 263  263          dp->ssd_gran = 0;       /* force byte units */
 264  264  }
 265  265  
 266  266  void *
 267  267  get_ssd_base(system_desc_t *dp)
 268  268  {
 269  269          uintptr_t       base;
 270  270  
 271  271          base = (uintptr_t)dp->ssd_lobase |
 272  272              (uintptr_t)dp->ssd_midbase << 16 |
 273  273              (uintptr_t)dp->ssd_hibase << (16 + 8) |
 274  274              (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
 275  275          return ((void *)base);
 276  276  }
 277  277  
 278  278  #elif defined(__i386)
 279  279  
 280  280  void
 281  281  set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 282  282      uint_t dpl)
 283  283  {
 284  284          dp->ssd_lolimit = size;
 285  285          dp->ssd_hilimit = (uintptr_t)size >> 16;
 286  286  
 287  287          dp->ssd_lobase = (uintptr_t)base;
 288  288          dp->ssd_midbase = (uintptr_t)base >> 16;
 289  289          dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 290  290  
 291  291          dp->ssd_type = type;
 292  292          dp->ssd_zero = 0;       /* must be zero */
 293  293          dp->ssd_dpl = dpl;
 294  294          dp->ssd_p = 1;
 295  295          dp->ssd_gran = 0;       /* force byte units */
 296  296  }
 297  297  
 298  298  void *
 299  299  get_ssd_base(system_desc_t *dp)
 300  300  {
 301  301          uintptr_t       base;
 302  302  
 303  303          base = (uintptr_t)dp->ssd_lobase |
 304  304              (uintptr_t)dp->ssd_midbase << 16 |
 305  305              (uintptr_t)dp->ssd_hibase << (16 + 8);
 306  306          return ((void *)base);
 307  307  }
 308  308  
 309  309  #endif  /* __i386 */
 310  310  
 311  311  /*
 312  312   * Install gate segment descriptor for interrupt, trap, call and task gates.
 313  313   */
 314  314  
 315  315  #if defined(__amd64)
 316  316  
 317  317  /*ARGSUSED*/
 318  318  void
 319  319  set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 320  320      uint_t type, uint_t dpl, uint_t vector)
 321  321  {
 322  322          dp->sgd_looffset = (uintptr_t)func;
 323  323          dp->sgd_hioffset = (uintptr_t)func >> 16;
 324  324          dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
 325  325  
 326  326          dp->sgd_selector =  (uint16_t)sel;
 327  327  
 328  328          /*
 329  329           * For 64 bit native we use the IST stack mechanism
 330  330           * for double faults. All other traps use the CPL = 0
 331  331           * (tss_rsp0) stack.
 332  332           */
 333  333  #if !defined(__xpv)
 334  334          if (vector == T_DBLFLT)
 335  335                  dp->sgd_ist = 1;
 336  336          else
 337  337  #endif
 338  338                  dp->sgd_ist = 0;
 339  339  
 340  340          dp->sgd_type = type;
 341  341          dp->sgd_dpl = dpl;
 342  342          dp->sgd_p = 1;
 343  343  }
 344  344  
 345  345  #elif defined(__i386)
 346  346  
 347  347  /*ARGSUSED*/
 348  348  void
 349  349  set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 350  350      uint_t type, uint_t dpl, uint_t unused)
 351  351  {
 352  352          dp->sgd_looffset = (uintptr_t)func;
 353  353          dp->sgd_hioffset = (uintptr_t)func >> 16;
 354  354  
 355  355          dp->sgd_selector =  (uint16_t)sel;
 356  356          dp->sgd_stkcpy = 0;     /* always zero bytes */
 357  357          dp->sgd_type = type;
 358  358          dp->sgd_dpl = dpl;
 359  359          dp->sgd_p = 1;
 360  360  }
 361  361  
 362  362  #endif  /* __i386 */
 363  363  
 364  364  /*
 365  365   * Updates a single user descriptor in the the GDT of the current cpu.
 366  366   * Caller is responsible for preventing cpu migration.
 367  367   */
 368  368  
 369  369  void
 370  370  gdt_update_usegd(uint_t sidx, user_desc_t *udp)
 371  371  {
 372  372  #if defined(__xpv)
 373  373  
 374  374          uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
 375  375  
 376  376          if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
 377  377                  panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
 378  378  
 379  379  #else   /* __xpv */
 380  380  
 381  381          CPU->cpu_gdt[sidx] = *udp;
 382  382  
 383  383  #endif  /* __xpv */
 384  384  }
 385  385  
 386  386  /*
 387  387   * Writes single descriptor pointed to by udp into a processes
 388  388   * LDT entry pointed to by ldp.
 389  389   */
 390  390  int
 391  391  ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
 392  392  {
 393  393  #if defined(__xpv)
 394  394  
 395  395          uint64_t dpa;
 396  396  
 397  397          dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
 398  398              ((uintptr_t)ldp & PAGEOFFSET);
 399  399  
 400  400          /*
 401  401           * The hypervisor is a little more restrictive about what it
 402  402           * supports in the LDT.
 403  403           */
 404  404          if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
 405  405                  return (EINVAL);
 406  406  
 407  407  #else   /* __xpv */
 408  408  
 409  409          *ldp = *udp;
 410  410  
 411  411  #endif  /* __xpv */
 412  412          return (0);
 413  413  }
 414  414  
 415  415  #if defined(__xpv)
 416  416  
 417  417  /*
 418  418   * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
 419  419   * Returns true if a valid entry was written.
 420  420   */
 421  421  int
 422  422  xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
 423  423  {
 424  424          trap_info_t *ti = ti_arg;       /* XXPV Aargh - segments.h comment */
 425  425  
 426  426          /*
 427  427           * skip holes in the IDT
 428  428           */
 429  429          if (GATESEG_GETOFFSET(sgd) == 0)
 430  430                  return (0);
 431  431  
 432  432          ASSERT(sgd->sgd_type == SDT_SYSIGT);
 433  433          ti->vector = vec;
 434  434          TI_SET_DPL(ti, sgd->sgd_dpl);
 435  435  
 436  436          /*
 437  437           * Is this an interrupt gate?
 438  438           */
 439  439          if (sgd->sgd_type == SDT_SYSIGT) {
 440  440                  /* LINTED */
 441  441                  TI_SET_IF(ti, 1);
 442  442          }
 443  443          ti->cs = sgd->sgd_selector;
 444  444  #if defined(__amd64)
 445  445          ti->cs |= SEL_KPL;      /* force into ring 3. see KCS_SEL  */
 446  446  #endif
 447  447          ti->address = GATESEG_GETOFFSET(sgd);
 448  448          return (1);
 449  449  }
 450  450  
 451  451  /*
 452  452   * Convert a single hw format gate descriptor and write it into our virtual IDT.
 453  453   */
 454  454  void
 455  455  xen_idt_write(gate_desc_t *sgd, uint_t vec)
 456  456  {
 457  457          trap_info_t trapinfo[2];
 458  458  
 459  459          bzero(trapinfo, sizeof (trapinfo));
 460  460          if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
 461  461                  return;
 462  462          if (xen_set_trap_table(trapinfo) != 0)
 463  463                  panic("xen_idt_write: xen_set_trap_table() failed");
 464  464  }
 465  465  
 466  466  #endif  /* __xpv */
 467  467  
 468  468  #if defined(__amd64)
 469  469  
 470  470  /*
 471  471   * Build kernel GDT.
 472  472   */
 473  473  
 474  474  static void
 475  475  init_gdt_common(user_desc_t *gdt)
 476  476  {
 477  477          int i;
 478  478  
 479  479          /*
 480  480           * 64-bit kernel code segment.
 481  481           */
 482  482          set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
 483  483              SDP_PAGES, SDP_OP32);
 484  484  
 485  485          /*
 486  486           * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
 487  487           * mode, but we set it here to 0xFFFF so that we can use the SYSRET
 488  488           * instruction to return from system calls back to 32-bit applications.
 489  489           * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
 490  490           * descriptors. We therefore must ensure that the kernel uses something,
 491  491           * though it will be ignored by hardware, that is compatible with 32-bit
 492  492           * apps. For the same reason we must set the default op size of this
 493  493           * descriptor to 32-bit operands.
 494  494           */
 495  495          set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 496  496              SEL_KPL, SDP_PAGES, SDP_OP32);
 497  497          gdt[GDT_KDATA].usd_def32 = 1;
 498  498  
 499  499          /*
 500  500           * 64-bit user code segment.
 501  501           */
 502  502          set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
 503  503              SDP_PAGES, SDP_OP32);
 504  504  
 505  505          /*
 506  506           * 32-bit user code segment.
 507  507           */
 508  508          set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
 509  509              SEL_UPL, SDP_PAGES, SDP_OP32);
 510  510  
 511  511          /*
 512  512           * See gdt_ucode32() and gdt_ucode_native().
 513  513           */
 514  514          ucs_on = ucs_off = gdt[GDT_UCODE];
 515  515          ucs_off.usd_p = 0;      /* forces #np fault */
 516  516  
 517  517          ucs32_on = ucs32_off = gdt[GDT_U32CODE];
 518  518          ucs32_off.usd_p = 0;    /* forces #np fault */
 519  519  
 520  520          /*
 521  521           * 32 and 64 bit data segments can actually share the same descriptor.
 522  522           * In long mode only the present bit is checked but all other fields
 523  523           * are loaded. But in compatibility mode all fields are interpreted
 524  524           * as in legacy mode so they must be set correctly for a 32-bit data
 525  525           * segment.
 526  526           */
 527  527          set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
 528  528              SDP_PAGES, SDP_OP32);
 529  529  
 530  530  #if !defined(__xpv)
 531  531  
 532  532          /*
 533  533           * The 64-bit kernel has no default LDT. By default, the LDT descriptor
 534  534           * in the GDT is 0.
 535  535           */
 536  536  
 537  537          /*
 538  538           * Kernel TSS
 539  539           */
 540  540          set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 541  541              sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 542  542  
 543  543  #endif  /* !__xpv */
 544  544  
 545  545          /*
 546  546           * Initialize fs and gs descriptors for 32 bit processes.
 547  547           * Only attributes and limits are initialized, the effective
 548  548           * base address is programmed via fsbase/gsbase.
 549  549           */
 550  550          set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 551  551              SEL_UPL, SDP_PAGES, SDP_OP32);
 552  552          set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 553  553              SEL_UPL, SDP_PAGES, SDP_OP32);
 554  554  
 555  555          /*
 556  556           * Initialize the descriptors set aside for brand usage.
 557  557           * Only attributes and limits are initialized.
 558  558           */
 559  559          for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 560  560                  set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 561  561                      SEL_UPL, SDP_PAGES, SDP_OP32);
 562  562  
 563  563          /*
 564  564           * Initialize convenient zero base user descriptors for clearing
 565  565           * lwp private %fs and %gs descriptors in GDT. See setregs() for
 566  566           * an example.
 567  567           */
 568  568          set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
 569  569              SDP_BYTES, SDP_OP32);
 570  570          set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
 571  571              SDP_PAGES, SDP_OP32);
 572  572  }
 573  573  
 574  574  #if defined(__xpv)
 575  575  
 576  576  static user_desc_t *
 577  577  init_gdt(void)
 578  578  {
 579  579          uint64_t gdtpa;
 580  580          ulong_t ma[1];          /* XXPV should be a memory_t */
 581  581          ulong_t addr;
 582  582  
 583  583  #if !defined(__lint)
 584  584          /*
 585  585           * Our gdt is never larger than a single page.
 586  586           */
 587  587          ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 588  588  #endif
 589  589          gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 590  590              PAGESIZE, PAGESIZE);
 591  591          bzero(gdt0, PAGESIZE);
 592  592  
 593  593          init_gdt_common(gdt0);
 594  594  
 595  595          /*
 596  596           * XXX Since we never invoke kmdb until after the kernel takes
 597  597           * over the descriptor tables why not have it use the kernel's
 598  598           * selectors?
 599  599           */
 600  600          if (boothowto & RB_DEBUG) {
 601  601                  set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 602  602                      SEL_KPL, SDP_PAGES, SDP_OP32);
 603  603                  set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
 604  604                      SEL_KPL, SDP_PAGES, SDP_OP32);
 605  605          }
 606  606  
 607  607          /*
 608  608           * Clear write permission for page containing the gdt and install it.
 609  609           */
 610  610          gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 611  611          ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 612  612          kbm_read_only((uintptr_t)gdt0, gdtpa);
 613  613          xen_set_gdt(ma, NGDT);
 614  614  
 615  615          /*
 616  616           * Reload the segment registers to use the new GDT.
 617  617           * On 64-bit, fixup KCS_SEL to be in ring 3.
 618  618           * See KCS_SEL in segments.h.
 619  619           */
 620  620          load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
 621  621  
 622  622          /*
 623  623           *  setup %gs for kernel
 624  624           */
 625  625          xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
 626  626  
 627  627          /*
 628  628           * XX64 We should never dereference off "other gsbase" or
 629  629           * "fsbase".  So, we should arrange to point FSBASE and
 630  630           * KGSBASE somewhere truly awful e.g. point it at the last
 631  631           * valid address below the hole so that any attempts to index
 632  632           * off them cause an exception.
 633  633           *
 634  634           * For now, point it at 8G -- at least it should be unmapped
 635  635           * until some 64-bit processes run.
 636  636           */
 637  637          addr = 0x200000000ul;
 638  638          xen_set_segment_base(SEGBASE_FS, addr);
 639  639          xen_set_segment_base(SEGBASE_GS_USER, addr);
 640  640          xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
 641  641  
 642  642          return (gdt0);
 643  643  }
 644  644  
 645  645  #else   /* __xpv */
 646  646  
 647  647  static user_desc_t *
 648  648  init_gdt(void)
 649  649  {
 650  650          desctbr_t       r_bgdt, r_gdt;
 651  651          user_desc_t     *bgdt;
 652  652  
 653  653  #if !defined(__lint)
 654  654          /*
 655  655           * Our gdt is never larger than a single page.
 656  656           */
 657  657          ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 658  658  #endif
 659  659          gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 660  660              PAGESIZE, PAGESIZE);
 661  661          bzero(gdt0, PAGESIZE);
 662  662  
 663  663          init_gdt_common(gdt0);
 664  664  
 665  665          /*
 666  666           * Copy in from boot's gdt to our gdt.
 667  667           * Entry 0 is the null descriptor by definition.
 668  668           */
 669  669          rd_gdtr(&r_bgdt);
 670  670          bgdt = (user_desc_t *)r_bgdt.dtr_base;
 671  671          if (bgdt == NULL)
 672  672                  panic("null boot gdt");
 673  673  
 674  674          gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 675  675          gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 676  676          gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 677  677          gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 678  678          gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
 679  679  
 680  680          /*
 681  681           * Install our new GDT
 682  682           */
 683  683          r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 684  684          r_gdt.dtr_base = (uintptr_t)gdt0;
 685  685          wr_gdtr(&r_gdt);
 686  686  
 687  687          /*
 688  688           * Reload the segment registers to use the new GDT
 689  689           */
 690  690          load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 691  691  
 692  692          /*
 693  693           *  setup %gs for kernel
 694  694           */
 695  695          wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
 696  696  
 697  697          /*
 698  698           * XX64 We should never dereference off "other gsbase" or
 699  699           * "fsbase".  So, we should arrange to point FSBASE and
 700  700           * KGSBASE somewhere truly awful e.g. point it at the last
 701  701           * valid address below the hole so that any attempts to index
 702  702           * off them cause an exception.
 703  703           *
 704  704           * For now, point it at 8G -- at least it should be unmapped
 705  705           * until some 64-bit processes run.
 706  706           */
 707  707          wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
 708  708          wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
 709  709          return (gdt0);
 710  710  }
 711  711  
 712  712  #endif  /* __xpv */
 713  713  
 714  714  #elif defined(__i386)
 715  715  
 716  716  static void
 717  717  init_gdt_common(user_desc_t *gdt)
 718  718  {
 719  719          int i;
 720  720  
 721  721          /*
 722  722           * Text and data for both kernel and user span entire 32 bit
 723  723           * address space.
 724  724           */
 725  725  
 726  726          /*
 727  727           * kernel code segment.
 728  728           */
 729  729          set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
 730  730              SDP_OP32);
 731  731  
 732  732          /*
 733  733           * kernel data segment.
 734  734           */
 735  735          set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
 736  736              SDP_OP32);
 737  737  
 738  738          /*
 739  739           * user code segment.
 740  740           */
 741  741          set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
 742  742              SDP_OP32);
 743  743  
 744  744          /*
 745  745           * user data segment.
 746  746           */
 747  747          set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
 748  748              SDP_OP32);
 749  749  
 750  750  #if !defined(__xpv)
 751  751  
 752  752          /*
 753  753           * TSS for T_DBLFLT (double fault) handler
 754  754           */
 755  755          set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
 756  756              sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
 757  757  
 758  758          /*
 759  759           * TSS for kernel
 760  760           */
 761  761          set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 762  762              sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 763  763  
 764  764  #endif  /* !__xpv */
 765  765  
 766  766          /*
 767  767           * %gs selector for kernel
 768  768           */
 769  769          set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
 770  770              SEL_KPL, SDP_BYTES, SDP_OP32);
 771  771  
 772  772          /*
 773  773           * Initialize lwp private descriptors.
 774  774           * Only attributes and limits are initialized, the effective
 775  775           * base address is programmed via fsbase/gsbase.
 776  776           */
 777  777          set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 778  778              SDP_PAGES, SDP_OP32);
 779  779          set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 780  780              SDP_PAGES, SDP_OP32);
 781  781  
 782  782          /*
 783  783           * Initialize the descriptors set aside for brand usage.
 784  784           * Only attributes and limits are initialized.
 785  785           */
 786  786          for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 787  787                  set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 788  788                      SDP_PAGES, SDP_OP32);
 789  789          /*
 790  790           * Initialize convenient zero base user descriptor for clearing
 791  791           * lwp  private %fs and %gs descriptors in GDT. See setregs() for
 792  792           * an example.
 793  793           */
 794  794          set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
 795  795              SDP_BYTES, SDP_OP32);
 796  796  }
 797  797  
 798  798  #if defined(__xpv)
 799  799  
 800  800  static user_desc_t *
 801  801  init_gdt(void)
 802  802  {
 803  803          uint64_t gdtpa;
 804  804          ulong_t ma[1];          /* XXPV should be a memory_t */
 805  805  
 806  806  #if !defined(__lint)
 807  807          /*
 808  808           * Our gdt is never larger than a single page.
 809  809           */
 810  810          ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 811  811  #endif
 812  812          gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 813  813              PAGESIZE, PAGESIZE);
 814  814          bzero(gdt0, PAGESIZE);
 815  815  
 816  816          init_gdt_common(gdt0);
 817  817          gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 818  818  
 819  819          /*
 820  820           * XXX Since we never invoke kmdb until after the kernel takes
 821  821           * over the descriptor tables why not have it use the kernel's
 822  822           * selectors?
 823  823           */
 824  824          if (boothowto & RB_DEBUG) {
 825  825                  set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
 826  826                      SDP_PAGES, SDP_OP32);
 827  827                  set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
 828  828                      SDP_PAGES, SDP_OP32);
 829  829          }
 830  830  
 831  831          /*
 832  832           * Clear write permission for page containing the gdt and install it.
 833  833           */
 834  834          ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 835  835          kbm_read_only((uintptr_t)gdt0, gdtpa);
 836  836          xen_set_gdt(ma, NGDT);
 837  837  
 838  838          /*
 839  839           * Reload the segment registers to use the new GDT
 840  840           */
 841  841          load_segment_registers(
 842  842              KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 843  843  
 844  844          return (gdt0);
 845  845  }
 846  846  
 847  847  #else   /* __xpv */
 848  848  
 849  849  static user_desc_t *
 850  850  init_gdt(void)
 851  851  {
 852  852          desctbr_t       r_bgdt, r_gdt;
 853  853          user_desc_t     *bgdt;
 854  854  
 855  855  #if !defined(__lint)
 856  856          /*
 857  857           * Our gdt is never larger than a single page.
 858  858           */
 859  859          ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 860  860  #endif
 861  861          /*
 862  862           * XXX this allocation belongs in our caller, not here.
 863  863           */
 864  864          gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 865  865              PAGESIZE, PAGESIZE);
 866  866          bzero(gdt0, PAGESIZE);
 867  867  
 868  868          init_gdt_common(gdt0);
 869  869  
 870  870          /*
 871  871           * Copy in from boot's gdt to our gdt entries.
 872  872           * Entry 0 is null descriptor by definition.
 873  873           */
 874  874          rd_gdtr(&r_bgdt);
 875  875          bgdt = (user_desc_t *)r_bgdt.dtr_base;
 876  876          if (bgdt == NULL)
 877  877                  panic("null boot gdt");
 878  878  
 879  879          gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 880  880          gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 881  881          gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 882  882          gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 883  883  
 884  884          /*
 885  885           * Install our new GDT
 886  886           */
 887  887          r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 888  888          r_gdt.dtr_base = (uintptr_t)gdt0;
 889  889          wr_gdtr(&r_gdt);
 890  890  
 891  891          /*
 892  892           * Reload the segment registers to use the new GDT
 893  893           */
 894  894          load_segment_registers(
 895  895              KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 896  896  
 897  897          return (gdt0);
 898  898  }
 899  899  
 900  900  #endif  /* __xpv */
 901  901  #endif  /* __i386 */
 902  902  
 903  903  /*
 904  904   * Build kernel IDT.
 905  905   *
 906  906   * Note that for amd64 we pretty much require every gate to be an interrupt
 907  907   * gate which blocks interrupts atomically on entry; that's because of our
 908  908   * dependency on using 'swapgs' every time we come into the kernel to find
 909  909   * the cpu structure. If we get interrupted just before doing that, %cs could
 910  910   * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
 911  911   * %gsbase is really still pointing at something in userland. Bad things will
 912  912   * ensue. We also use interrupt gates for i386 as well even though this is not
 913  913   * required for some traps.
 914  914   *
 915  915   * Perhaps they should have invented a trap gate that does an atomic swapgs?
 916  916   */
 917  917  static void
 918  918  init_idt_common(gate_desc_t *idt)
 919  919  {
 920  920          set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 921  921              0);
 922  922          set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 923  923              0);
 924  924          set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 925  925              0);
 926  926          set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 927  927              0);
 928  928          set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 929  929              0);
 930  930          set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
 931  931              TRP_KPL, 0);
 932  932          set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 933  933              0);
 934  934          set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
 935  935              0);
 936  936  
 937  937          /*
 938  938           * double fault handler.
 939  939           *
 940  940           * Note that on the hypervisor a guest does not receive #df faults.
 941  941           * Instead a failsafe event is injected into the guest if its selectors
 942  942           * and/or stack is in a broken state. See xen_failsafe_callback.
 943  943           */
 944  944  #if !defined(__xpv)
 945  945  #if defined(__amd64)
 946  946  
 947  947          set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 948  948              T_DBLFLT);
 949  949  
 950  950  #elif defined(__i386)
 951  951  
 952  952          /*
 953  953           * task gate required.
 954  954           */
 955  955          set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
 956  956              0);
 957  957  
 958  958  #endif  /* __i386 */
 959  959  #endif  /* !__xpv */
 960  960  
 961  961          /*
 962  962           * T_EXTOVRFLT coprocessor-segment-overrun not supported.
 963  963           */
 964  964  
 965  965          set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 966  966              0);
 967  967          set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 968  968              0);
  
    | 
      ↓ open down ↓ | 
    794 lines elided | 
    
      ↑ open up ↑ | 
  
 969  969          set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 970  970          set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 971  971          set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 972  972          set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 973  973              0);
 974  974          set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
 975  975              TRP_KPL, 0);
 976  976          set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 977  977          set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 978  978  
      979 +        /*
      980 +         * install "int80" handler at, well, 0x80.
      981 +         */
      982 +        set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
      983 +            0);
      984 +
 979  985          /*
 980  986           * install fast trap handler at 210.
 981  987           */
 982  988          set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 983  989              0);
 984  990  
 985  991          /*
 986  992           * System call handler.
 987  993           */
 988  994  #if defined(__amd64)
 989  995          set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
 990  996              TRP_UPL, 0);
 991  997  
 992  998  #elif defined(__i386)
 993  999          set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
  
    | 
      ↓ open down ↓ | 
    5 lines elided | 
    
      ↑ open up ↑ | 
  
 994 1000              TRP_UPL, 0);
 995 1001  #endif  /* __i386 */
 996 1002  
 997 1003          /*
 998 1004           * Install the DTrace interrupt handler for the pid provider.
 999 1005           */
1000 1006          set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001 1007              SDT_SYSIGT, TRP_UPL, 0);
1002 1008  
1003 1009          /*
1004      -         * Prepare interposing descriptor for the syscall handler
1005      -         * and cache copy of the default descriptor.
     1010 +-        * Prepare interposing descriptors for the branded "int80"
     1011 +-        * and syscall handlers and cache copies of the default
     1012 +-        * descriptors.
1006 1013           */
1007      -        brand_tbl[0].ih_inum = T_SYSCALLINT;
1008      -        brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
     1014 +        brand_tbl[0].ih_inum = T_INT80;
     1015 +        brand_tbl[0].ih_default_desc = idt0[T_INT80];
     1016 +        set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
     1017 +            SDT_SYSIGT, TRP_UPL, 0);
1009 1018  
     1019 +        brand_tbl[1].ih_inum = T_SYSCALLINT;
     1020 +        brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
     1021 +
1010 1022  #if defined(__amd64)
1011      -        set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
     1023 +        set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
1012 1024              KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1013 1025  #elif defined(__i386)
1014      -        set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
     1026 +        set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
1015 1027              KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1016 1028  #endif  /* __i386 */
1017 1029  
1018      -        brand_tbl[1].ih_inum = 0;
     1030 +        brand_tbl[2].ih_inum = 0;
1019 1031  }
1020 1032  
1021 1033  #if defined(__xpv)
1022 1034  
1023 1035  static void
1024 1036  init_idt(gate_desc_t *idt)
1025 1037  {
1026 1038          init_idt_common(idt);
1027 1039  }
1028 1040  
1029 1041  #else   /* __xpv */
1030 1042  
1031 1043  static void
1032 1044  init_idt(gate_desc_t *idt)
1033 1045  {
1034 1046          char    ivctname[80];
1035 1047          void    (*ivctptr)(void);
1036 1048          int     i;
1037 1049  
1038 1050          /*
1039 1051           * Initialize entire table with 'reserved' trap and then overwrite
1040 1052           * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1041 1053           * since it can only be generated on a 386 processor. 15 is also
1042 1054           * unsupported and reserved.
1043 1055           */
1044 1056          for (i = 0; i < NIDT; i++)
1045 1057                  set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1046 1058                      0);
1047 1059  
1048 1060          /*
1049 1061           * 20-31 reserved
1050 1062           */
1051 1063          for (i = 20; i < 32; i++)
1052 1064                  set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1053 1065                      0);
1054 1066  
1055 1067          /*
1056 1068           * interrupts 32 - 255
1057 1069           */
1058 1070          for (i = 32; i < 256; i++) {
1059 1071                  (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1060 1072                  ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1061 1073                  if (ivctptr == NULL)
1062 1074                          panic("kobj_getsymvalue(%s) failed", ivctname);
1063 1075  
1064 1076                  set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1065 1077          }
1066 1078  
1067 1079          /*
1068 1080           * Now install the common ones. Note that it will overlay some
1069 1081           * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1070 1082           */
1071 1083          init_idt_common(idt);
1072 1084  }
1073 1085  
1074 1086  #endif  /* __xpv */
1075 1087  
1076 1088  /*
1077 1089   * The kernel does not deal with LDTs unless a user explicitly creates
1078 1090   * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1079 1091   * to reference the LDT will therefore cause a #gp. System calls made via the
1080 1092   * obsolete lcall mechanism are emulated by the #gp fault handler.
1081 1093   */
1082 1094  static void
1083 1095  init_ldt(void)
1084 1096  {
1085 1097  #if defined(__xpv)
1086 1098          xen_set_ldt(NULL, 0);
1087 1099  #else
1088 1100          wr_ldtr(0);
1089 1101  #endif
1090 1102  }
1091 1103  
1092 1104  #if !defined(__xpv)
1093 1105  #if defined(__amd64)
1094 1106  
1095 1107  static void
1096 1108  init_tss(void)
1097 1109  {
1098 1110          /*
1099 1111           * tss_rsp0 is dynamically filled in by resume() on each context switch.
1100 1112           * All exceptions but #DF will run on the thread stack.
1101 1113           * Set up the double fault stack here.
1102 1114           */
1103 1115          ktss0->tss_ist1 =
1104 1116              (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1105 1117  
1106 1118          /*
1107 1119           * Set I/O bit map offset equal to size of TSS segment limit
1108 1120           * for no I/O permission map. This will force all user I/O
1109 1121           * instructions to generate #gp fault.
1110 1122           */
1111 1123          ktss0->tss_bitmapbase = sizeof (*ktss0);
1112 1124  
1113 1125          /*
1114 1126           * Point %tr to descriptor for ktss0 in gdt.
1115 1127           */
1116 1128          wr_tsr(KTSS_SEL);
1117 1129  }
1118 1130  
1119 1131  #elif defined(__i386)
1120 1132  
1121 1133  static void
1122 1134  init_tss(void)
1123 1135  {
1124 1136          /*
1125 1137           * ktss0->tss_esp dynamically filled in by resume() on each
1126 1138           * context switch.
1127 1139           */
1128 1140          ktss0->tss_ss0  = KDS_SEL;
1129 1141          ktss0->tss_eip  = (uint32_t)_start;
1130 1142          ktss0->tss_ds   = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1131 1143          ktss0->tss_cs   = KCS_SEL;
1132 1144          ktss0->tss_fs   = KFS_SEL;
1133 1145          ktss0->tss_gs   = KGS_SEL;
1134 1146          ktss0->tss_ldt  = ULDT_SEL;
1135 1147  
1136 1148          /*
1137 1149           * Initialize double fault tss.
1138 1150           */
1139 1151          dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1140 1152          dftss0->tss_ss0 = KDS_SEL;
1141 1153  
1142 1154          /*
1143 1155           * tss_cr3 will get initialized in hat_kern_setup() once our page
1144 1156           * tables have been setup.
1145 1157           */
1146 1158          dftss0->tss_eip = (uint32_t)syserrtrap;
1147 1159          dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148 1160          dftss0->tss_cs  = KCS_SEL;
1149 1161          dftss0->tss_ds  = KDS_SEL;
1150 1162          dftss0->tss_es  = KDS_SEL;
1151 1163          dftss0->tss_ss  = KDS_SEL;
1152 1164          dftss0->tss_fs  = KFS_SEL;
1153 1165          dftss0->tss_gs  = KGS_SEL;
1154 1166  
1155 1167          /*
1156 1168           * Set I/O bit map offset equal to size of TSS segment limit
1157 1169           * for no I/O permission map. This will force all user I/O
1158 1170           * instructions to generate #gp fault.
1159 1171           */
1160 1172          ktss0->tss_bitmapbase = sizeof (*ktss0);
1161 1173  
1162 1174          /*
1163 1175           * Point %tr to descriptor for ktss0 in gdt.
1164 1176           */
1165 1177          wr_tsr(KTSS_SEL);
1166 1178  }
1167 1179  
1168 1180  #endif  /* __i386 */
1169 1181  #endif  /* !__xpv */
1170 1182  
1171 1183  #if defined(__xpv)
1172 1184  
1173 1185  void
1174 1186  init_desctbls(void)
1175 1187  {
1176 1188          uint_t vec;
1177 1189          user_desc_t *gdt;
1178 1190  
1179 1191          /*
1180 1192           * Setup and install our GDT.
1181 1193           */
1182 1194          gdt = init_gdt();
1183 1195  
1184 1196          /*
1185 1197           * Store static pa of gdt to speed up pa_to_ma() translations
1186 1198           * on lwp context switches.
1187 1199           */
1188 1200          ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1189 1201          CPU->cpu_gdt = gdt;
1190 1202          CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1191 1203  
1192 1204          /*
1193 1205           * Setup and install our IDT.
1194 1206           */
1195 1207  #if !defined(__lint)
1196 1208          ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1197 1209  #endif
1198 1210          idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1199 1211              PAGESIZE, PAGESIZE);
1200 1212          bzero(idt0, PAGESIZE);
1201 1213          init_idt(idt0);
1202 1214          for (vec = 0; vec < NIDT; vec++)
1203 1215                  xen_idt_write(&idt0[vec], vec);
1204 1216  
1205 1217          CPU->cpu_idt = idt0;
1206 1218  
1207 1219          /*
1208 1220           * set default kernel stack
1209 1221           */
1210 1222          xen_stack_switch(KDS_SEL,
1211 1223              (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1212 1224  
1213 1225          xen_init_callbacks();
1214 1226  
1215 1227          init_ldt();
1216 1228  }
1217 1229  
1218 1230  #else   /* __xpv */
1219 1231  
1220 1232  void
1221 1233  init_desctbls(void)
1222 1234  {
1223 1235          user_desc_t *gdt;
1224 1236          desctbr_t idtr;
1225 1237  
1226 1238          /*
1227 1239           * Allocate IDT and TSS structures on unique pages for better
1228 1240           * performance in virtual machines.
1229 1241           */
1230 1242  #if !defined(__lint)
1231 1243          ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1232 1244  #endif
1233 1245          idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1234 1246              PAGESIZE, PAGESIZE);
1235 1247          bzero(idt0, PAGESIZE);
1236 1248  #if !defined(__lint)
1237 1249          ASSERT(sizeof (*ktss0) <= PAGESIZE);
1238 1250  #endif
1239 1251          ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1240 1252              PAGESIZE, PAGESIZE);
1241 1253          bzero(ktss0, PAGESIZE);
1242 1254  
1243 1255  #if defined(__i386)
1244 1256  #if !defined(__lint)
1245 1257          ASSERT(sizeof (*dftss0) <= PAGESIZE);
1246 1258  #endif
1247 1259          dftss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1248 1260              PAGESIZE, PAGESIZE);
1249 1261          bzero(dftss0, PAGESIZE);
1250 1262  #endif
1251 1263  
1252 1264          /*
1253 1265           * Setup and install our GDT.
1254 1266           */
1255 1267          gdt = init_gdt();
1256 1268          ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1257 1269          CPU->cpu_gdt = gdt;
1258 1270  
1259 1271          /*
1260 1272           * Setup and install our IDT.
1261 1273           */
1262 1274          init_idt(idt0);
1263 1275  
1264 1276          idtr.dtr_base = (uintptr_t)idt0;
1265 1277          idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1266 1278          wr_idtr(&idtr);
1267 1279          CPU->cpu_idt = idt0;
1268 1280  
1269 1281  #if defined(__i386)
1270 1282          /*
1271 1283           * We maintain a description of idt0 in convenient IDTR format
1272 1284           * for #pf's on some older pentium processors. See pentium_pftrap().
1273 1285           */
1274 1286          idt0_default_r = idtr;
1275 1287  #endif  /* __i386 */
1276 1288  
1277 1289          init_tss();
1278 1290          CPU->cpu_tss = ktss0;
1279 1291          init_ldt();
1280 1292  }
1281 1293  
1282 1294  #endif  /* __xpv */
1283 1295  
1284 1296  /*
1285 1297   * In the early kernel, we need to set up a simple GDT to run on.
1286 1298   *
1287 1299   * XXPV Can dboot use this too?  See dboot_gdt.s
1288 1300   */
1289 1301  void
1290 1302  init_boot_gdt(user_desc_t *bgdt)
1291 1303  {
1292 1304  #if defined(__amd64)
1293 1305          set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1294 1306              SDP_PAGES, SDP_OP32);
1295 1307          set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1296 1308              SDP_PAGES, SDP_OP32);
1297 1309  #elif defined(__i386)
1298 1310          set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1299 1311              SDP_PAGES, SDP_OP32);
1300 1312          set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1301 1313              SDP_PAGES, SDP_OP32);
1302 1314  #endif  /* __i386 */
1303 1315  }
1304 1316  
1305 1317  /*
1306 1318   * Enable interpositioning on the system call path by rewriting the
1307 1319   * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1308 1320   * the branded entry points.
1309 1321   */
1310 1322  void
1311 1323  brand_interpositioning_enable(void)
1312 1324  {
1313 1325          gate_desc_t     *idt = CPU->cpu_idt;
1314 1326          int             i;
1315 1327  
1316 1328          ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1317 1329  
1318 1330          for (i = 0; brand_tbl[i].ih_inum; i++) {
1319 1331                  idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1320 1332  #if defined(__xpv)
1321 1333                  xen_idt_write(&idt[brand_tbl[i].ih_inum],
1322 1334                      brand_tbl[i].ih_inum);
1323 1335  #endif
1324 1336          }
1325 1337  
1326 1338  #if defined(__amd64)
1327 1339  #if defined(__xpv)
1328 1340  
1329 1341          /*
1330 1342           * Currently the hypervisor only supports 64-bit syscalls via
1331 1343           * syscall instruction. The 32-bit syscalls are handled by
1332 1344           * interrupt gate above.
1333 1345           */
1334 1346          xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1335 1347              CALLBACKF_mask_events);
1336 1348  
1337 1349  #else
1338 1350  
1339 1351          if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1340 1352                  wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1341 1353                  wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1342 1354          }
1343 1355  
1344 1356  #endif
1345 1357  #endif  /* __amd64 */
1346 1358  
1347 1359          if (is_x86_feature(x86_featureset, X86FSET_SEP))
1348 1360                  wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1349 1361  }
1350 1362  
1351 1363  /*
1352 1364   * Disable interpositioning on the system call path by rewriting the
1353 1365   * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1354 1366   * the standard entry points, which bypass the interpositioning hooks.
1355 1367   */
1356 1368  void
1357 1369  brand_interpositioning_disable(void)
1358 1370  {
1359 1371          gate_desc_t     *idt = CPU->cpu_idt;
1360 1372          int i;
1361 1373  
1362 1374          ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1363 1375  
1364 1376          for (i = 0; brand_tbl[i].ih_inum; i++) {
1365 1377                  idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1366 1378  #if defined(__xpv)
1367 1379                  xen_idt_write(&idt[brand_tbl[i].ih_inum],
1368 1380                      brand_tbl[i].ih_inum);
1369 1381  #endif
1370 1382          }
1371 1383  
1372 1384  #if defined(__amd64)
1373 1385  #if defined(__xpv)
1374 1386  
1375 1387          /*
1376 1388           * See comment above in brand_interpositioning_enable.
1377 1389           */
1378 1390          xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1379 1391              CALLBACKF_mask_events);
1380 1392  
1381 1393  #else
1382 1394  
1383 1395          if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1384 1396                  wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1385 1397                  wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1386 1398          }
1387 1399  
1388 1400  #endif
1389 1401  #endif  /* __amd64 */
1390 1402  
1391 1403          if (is_x86_feature(x86_featureset, X86FSET_SEP))
1392 1404                  wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1393 1405  }
  
    | 
      ↓ open down ↓ | 
    365 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX