Print this page
    
NEX-13644 File access audit logging
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-15035 Allow user ACE in ACL to match SID in token extra SIDs (cleanup)
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-15035 Allow user ACE in ACL to match SID in token extra SIDs (cleanup)
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-15035 Allow user ACE in ACL to match SID in token extra SIDs (part 2)
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15035 Allow user ACE in ACL to match SID in token extra SIDs (part 2)
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15035 Allow user ACE in ACL to match SID in token extra SIDs
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-15035 Allow user ACE in ACL to match SID in token extra SIDs
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-10069 ZFS_READONLY is a little too strict
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
OS-158 zfs_zaccess_delete() comments do not accurately reflect delete permissions for ACLs
OS-40 zfs issues with inheritance flags during chmod(2) with aclmode=passthrough
OS-139 POSIX write should imply DELETE_CHILD on directories - and some additional considerations (fix lint)
OS-123 aclinherit=restricted masks inherited permissions by group perms (groupmask)
OS-139 POSIX write should imply DELETE_CHILD on directories - and some additional considerations
Fixup merge results
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
re #6815 rb1758 need WORM in nza-kernel (4.0)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/zfs_acl.c
          +++ new/usr/src/uts/common/fs/zfs/zfs_acl.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013 by Delphix. All rights reserved.
  24      - * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
       24 + * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/time.h>
  30   30  #include <sys/systm.h>
  31   31  #include <sys/sysmacros.h>
  32   32  #include <sys/resource.h>
  33   33  #include <sys/vfs.h>
  34   34  #include <sys/vnode.h>
  35   35  #include <sys/sid.h>
  36   36  #include <sys/file.h>
  37   37  #include <sys/stat.h>
  38   38  #include <sys/kmem.h>
  39   39  #include <sys/cmn_err.h>
  40   40  #include <sys/errno.h>
  41   41  #include <sys/unistd.h>
  42   42  #include <sys/sdt.h>
  43   43  #include <sys/fs/zfs.h>
  44   44  #include <sys/mode.h>
  45   45  #include <sys/policy.h>
  46   46  #include <sys/zfs_znode.h>
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  47   47  #include <sys/zfs_fuid.h>
  48   48  #include <sys/zfs_acl.h>
  49   49  #include <sys/zfs_dir.h>
  50   50  #include <sys/zfs_vfsops.h>
  51   51  #include <sys/dmu.h>
  52   52  #include <sys/dnode.h>
  53   53  #include <sys/zap.h>
  54   54  #include <sys/sa.h>
  55   55  #include "fs/fs_subr.h"
  56   56  #include <acl/acl_common.h>
       57 +#include <c2/audit.h>
       58 +#include <c2/audit_kernel.h>
  57   59  
  58   60  #define ALLOW   ACE_ACCESS_ALLOWED_ACE_TYPE
  59   61  #define DENY    ACE_ACCESS_DENIED_ACE_TYPE
  60   62  #define MAX_ACE_TYPE    ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
  61   63  #define MIN_ACE_TYPE    ALLOW
  62   64  
  63   65  #define OWNING_GROUP            (ACE_GROUP|ACE_IDENTIFIER_GROUP)
  64   66  #define EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
  65   67      ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
  66   68  #define EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
  67   69      ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
  68   70  #define OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
  69   71      ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
  70   72  
  71   73  #define ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
  72   74      ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
  73   75      ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
  74   76      ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
  75   77  
  76   78  #define WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
  77   79  #define WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
  78   80      ACE_DELETE|ACE_DELETE_CHILD)
  79   81  #define WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
  80   82  
  81   83  #define OGE_CLEAR       (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
  82   84      ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
  83   85  
  84   86  #define OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
  85   87      ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
  86   88  
  87   89  #define ALL_INHERIT     (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
  88   90      ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
  89   91  
  90   92  #define RESTRICTED_CLEAR        (ACE_WRITE_ACL|ACE_WRITE_OWNER)
  91   93  
  92   94  #define V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
  93   95      ZFS_ACL_PROTECTED)
  94   96  
  95   97  #define ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
  96   98      ZFS_ACL_OBJ_ACE)
  97   99  
  98  100  #define ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
  99  101  
 100  102  static uint16_t
 101  103  zfs_ace_v0_get_type(void *acep)
 102  104  {
 103  105          return (((zfs_oldace_t *)acep)->z_type);
 104  106  }
 105  107  
 106  108  static uint16_t
 107  109  zfs_ace_v0_get_flags(void *acep)
 108  110  {
 109  111          return (((zfs_oldace_t *)acep)->z_flags);
 110  112  }
 111  113  
 112  114  static uint32_t
 113  115  zfs_ace_v0_get_mask(void *acep)
 114  116  {
 115  117          return (((zfs_oldace_t *)acep)->z_access_mask);
 116  118  }
 117  119  
 118  120  static uint64_t
 119  121  zfs_ace_v0_get_who(void *acep)
 120  122  {
 121  123          return (((zfs_oldace_t *)acep)->z_fuid);
 122  124  }
 123  125  
 124  126  static void
 125  127  zfs_ace_v0_set_type(void *acep, uint16_t type)
 126  128  {
 127  129          ((zfs_oldace_t *)acep)->z_type = type;
 128  130  }
 129  131  
 130  132  static void
 131  133  zfs_ace_v0_set_flags(void *acep, uint16_t flags)
 132  134  {
 133  135          ((zfs_oldace_t *)acep)->z_flags = flags;
 134  136  }
 135  137  
 136  138  static void
 137  139  zfs_ace_v0_set_mask(void *acep, uint32_t mask)
 138  140  {
 139  141          ((zfs_oldace_t *)acep)->z_access_mask = mask;
 140  142  }
 141  143  
 142  144  static void
 143  145  zfs_ace_v0_set_who(void *acep, uint64_t who)
 144  146  {
 145  147          ((zfs_oldace_t *)acep)->z_fuid = who;
 146  148  }
 147  149  
 148  150  /*ARGSUSED*/
 149  151  static size_t
 150  152  zfs_ace_v0_size(void *acep)
 151  153  {
 152  154          return (sizeof (zfs_oldace_t));
 153  155  }
 154  156  
 155  157  static size_t
 156  158  zfs_ace_v0_abstract_size(void)
 157  159  {
 158  160          return (sizeof (zfs_oldace_t));
 159  161  }
 160  162  
 161  163  static int
 162  164  zfs_ace_v0_mask_off(void)
 163  165  {
 164  166          return (offsetof(zfs_oldace_t, z_access_mask));
 165  167  }
 166  168  
 167  169  /*ARGSUSED*/
 168  170  static int
 169  171  zfs_ace_v0_data(void *acep, void **datap)
 170  172  {
 171  173          *datap = NULL;
 172  174          return (0);
 173  175  }
 174  176  
 175  177  static acl_ops_t zfs_acl_v0_ops = {
 176  178          zfs_ace_v0_get_mask,
 177  179          zfs_ace_v0_set_mask,
 178  180          zfs_ace_v0_get_flags,
 179  181          zfs_ace_v0_set_flags,
 180  182          zfs_ace_v0_get_type,
 181  183          zfs_ace_v0_set_type,
 182  184          zfs_ace_v0_get_who,
 183  185          zfs_ace_v0_set_who,
 184  186          zfs_ace_v0_size,
 185  187          zfs_ace_v0_abstract_size,
 186  188          zfs_ace_v0_mask_off,
 187  189          zfs_ace_v0_data
 188  190  };
 189  191  
 190  192  static uint16_t
 191  193  zfs_ace_fuid_get_type(void *acep)
 192  194  {
 193  195          return (((zfs_ace_hdr_t *)acep)->z_type);
 194  196  }
 195  197  
 196  198  static uint16_t
 197  199  zfs_ace_fuid_get_flags(void *acep)
 198  200  {
 199  201          return (((zfs_ace_hdr_t *)acep)->z_flags);
 200  202  }
 201  203  
 202  204  static uint32_t
 203  205  zfs_ace_fuid_get_mask(void *acep)
 204  206  {
 205  207          return (((zfs_ace_hdr_t *)acep)->z_access_mask);
 206  208  }
 207  209  
 208  210  static uint64_t
 209  211  zfs_ace_fuid_get_who(void *args)
 210  212  {
 211  213          uint16_t entry_type;
 212  214          zfs_ace_t *acep = args;
 213  215  
 214  216          entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
 215  217  
 216  218          if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
 217  219              entry_type == ACE_EVERYONE)
 218  220                  return (-1);
 219  221          return (((zfs_ace_t *)acep)->z_fuid);
 220  222  }
 221  223  
 222  224  static void
 223  225  zfs_ace_fuid_set_type(void *acep, uint16_t type)
 224  226  {
 225  227          ((zfs_ace_hdr_t *)acep)->z_type = type;
 226  228  }
 227  229  
 228  230  static void
 229  231  zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
 230  232  {
 231  233          ((zfs_ace_hdr_t *)acep)->z_flags = flags;
 232  234  }
 233  235  
 234  236  static void
 235  237  zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
 236  238  {
 237  239          ((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
 238  240  }
 239  241  
 240  242  static void
 241  243  zfs_ace_fuid_set_who(void *arg, uint64_t who)
 242  244  {
 243  245          zfs_ace_t *acep = arg;
 244  246  
 245  247          uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
 246  248  
 247  249          if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
 248  250              entry_type == ACE_EVERYONE)
 249  251                  return;
 250  252          acep->z_fuid = who;
 251  253  }
 252  254  
 253  255  static size_t
 254  256  zfs_ace_fuid_size(void *acep)
 255  257  {
 256  258          zfs_ace_hdr_t *zacep = acep;
 257  259          uint16_t entry_type;
 258  260  
 259  261          switch (zacep->z_type) {
 260  262          case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 261  263          case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 262  264          case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 263  265          case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 264  266                  return (sizeof (zfs_object_ace_t));
 265  267          case ALLOW:
 266  268          case DENY:
 267  269                  entry_type =
 268  270                      (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
 269  271                  if (entry_type == ACE_OWNER ||
 270  272                      entry_type == OWNING_GROUP ||
 271  273                      entry_type == ACE_EVERYONE)
 272  274                          return (sizeof (zfs_ace_hdr_t));
 273  275                  /*FALLTHROUGH*/
 274  276          default:
 275  277                  return (sizeof (zfs_ace_t));
 276  278          }
 277  279  }
 278  280  
 279  281  static size_t
 280  282  zfs_ace_fuid_abstract_size(void)
 281  283  {
 282  284          return (sizeof (zfs_ace_hdr_t));
 283  285  }
 284  286  
 285  287  static int
 286  288  zfs_ace_fuid_mask_off(void)
 287  289  {
 288  290          return (offsetof(zfs_ace_hdr_t, z_access_mask));
 289  291  }
 290  292  
 291  293  static int
 292  294  zfs_ace_fuid_data(void *acep, void **datap)
 293  295  {
 294  296          zfs_ace_t *zacep = acep;
 295  297          zfs_object_ace_t *zobjp;
 296  298  
 297  299          switch (zacep->z_hdr.z_type) {
 298  300          case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 299  301          case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 300  302          case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 301  303          case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 302  304                  zobjp = acep;
 303  305                  *datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
 304  306                  return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
 305  307          default:
 306  308                  *datap = NULL;
 307  309                  return (0);
 308  310          }
 309  311  }
 310  312  
 311  313  static acl_ops_t zfs_acl_fuid_ops = {
 312  314          zfs_ace_fuid_get_mask,
 313  315          zfs_ace_fuid_set_mask,
 314  316          zfs_ace_fuid_get_flags,
 315  317          zfs_ace_fuid_set_flags,
 316  318          zfs_ace_fuid_get_type,
 317  319          zfs_ace_fuid_set_type,
 318  320          zfs_ace_fuid_get_who,
 319  321          zfs_ace_fuid_set_who,
 320  322          zfs_ace_fuid_size,
 321  323          zfs_ace_fuid_abstract_size,
 322  324          zfs_ace_fuid_mask_off,
 323  325          zfs_ace_fuid_data
 324  326  };
 325  327  
 326  328  /*
 327  329   * The following three functions are provided for compatibility with
 328  330   * older ZPL version in order to determine if the file use to have
 329  331   * an external ACL and what version of ACL previously existed on the
 330  332   * file.  Would really be nice to not need this, sigh.
 331  333   */
 332  334  uint64_t
 333  335  zfs_external_acl(znode_t *zp)
 334  336  {
 335  337          zfs_acl_phys_t acl_phys;
 336  338          int error;
 337  339  
 338  340          if (zp->z_is_sa)
 339  341                  return (0);
 340  342  
 341  343          /*
 342  344           * Need to deal with a potential
 343  345           * race where zfs_sa_upgrade could cause
 344  346           * z_isa_sa to change.
 345  347           *
 346  348           * If the lookup fails then the state of z_is_sa should have
 347  349           * changed.
 348  350           */
 349  351  
 350  352          if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
 351  353              &acl_phys, sizeof (acl_phys))) == 0)
 352  354                  return (acl_phys.z_acl_extern_obj);
 353  355          else {
 354  356                  /*
 355  357                   * after upgrade the SA_ZPL_ZNODE_ACL should have been
 356  358                   * removed
 357  359                   */
 358  360                  VERIFY(zp->z_is_sa && error == ENOENT);
 359  361                  return (0);
 360  362          }
 361  363  }
 362  364  
 363  365  /*
 364  366   * Determine size of ACL in bytes
 365  367   *
 366  368   * This is more complicated than it should be since we have to deal
 367  369   * with old external ACLs.
 368  370   */
 369  371  static int
 370  372  zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
 371  373      zfs_acl_phys_t *aclphys)
 372  374  {
 373  375          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 374  376          uint64_t acl_count;
 375  377          int size;
 376  378          int error;
 377  379  
 378  380          ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 379  381          if (zp->z_is_sa) {
 380  382                  if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
 381  383                      &size)) != 0)
 382  384                          return (error);
 383  385                  *aclsize = size;
 384  386                  if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
 385  387                      &acl_count, sizeof (acl_count))) != 0)
 386  388                          return (error);
 387  389                  *aclcount = acl_count;
 388  390          } else {
 389  391                  if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
 390  392                      aclphys, sizeof (*aclphys))) != 0)
 391  393                          return (error);
 392  394  
 393  395                  if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
 394  396                          *aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
 395  397                          *aclcount = aclphys->z_acl_size;
 396  398                  } else {
 397  399                          *aclsize = aclphys->z_acl_size;
 398  400                          *aclcount = aclphys->z_acl_count;
 399  401                  }
 400  402          }
 401  403          return (0);
 402  404  }
 403  405  
 404  406  int
 405  407  zfs_znode_acl_version(znode_t *zp)
 406  408  {
 407  409          zfs_acl_phys_t acl_phys;
 408  410  
 409  411          if (zp->z_is_sa)
 410  412                  return (ZFS_ACL_VERSION_FUID);
 411  413          else {
 412  414                  int error;
 413  415  
 414  416                  /*
 415  417                   * Need to deal with a potential
 416  418                   * race where zfs_sa_upgrade could cause
 417  419                   * z_isa_sa to change.
 418  420                   *
 419  421                   * If the lookup fails then the state of z_is_sa should have
 420  422                   * changed.
 421  423                   */
 422  424                  if ((error = sa_lookup(zp->z_sa_hdl,
 423  425                      SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
 424  426                      &acl_phys, sizeof (acl_phys))) == 0)
 425  427                          return (acl_phys.z_acl_version);
 426  428                  else {
 427  429                          /*
 428  430                           * After upgrade SA_ZPL_ZNODE_ACL should have
 429  431                           * been removed.
 430  432                           */
 431  433                          VERIFY(zp->z_is_sa && error == ENOENT);
 432  434                          return (ZFS_ACL_VERSION_FUID);
 433  435                  }
 434  436          }
 435  437  }
 436  438  
 437  439  static int
 438  440  zfs_acl_version(int version)
 439  441  {
 440  442          if (version < ZPL_VERSION_FUID)
 441  443                  return (ZFS_ACL_VERSION_INITIAL);
 442  444          else
 443  445                  return (ZFS_ACL_VERSION_FUID);
 444  446  }
 445  447  
 446  448  static int
 447  449  zfs_acl_version_zp(znode_t *zp)
 448  450  {
 449  451          return (zfs_acl_version(zp->z_zfsvfs->z_version));
 450  452  }
 451  453  
 452  454  zfs_acl_t *
 453  455  zfs_acl_alloc(int vers)
 454  456  {
 455  457          zfs_acl_t *aclp;
 456  458  
 457  459          aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
 458  460          list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
 459  461              offsetof(zfs_acl_node_t, z_next));
 460  462          aclp->z_version = vers;
 461  463          if (vers == ZFS_ACL_VERSION_FUID)
 462  464                  aclp->z_ops = zfs_acl_fuid_ops;
 463  465          else
 464  466                  aclp->z_ops = zfs_acl_v0_ops;
 465  467          return (aclp);
 466  468  }
 467  469  
 468  470  zfs_acl_node_t *
 469  471  zfs_acl_node_alloc(size_t bytes)
 470  472  {
 471  473          zfs_acl_node_t *aclnode;
 472  474  
 473  475          aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
 474  476          if (bytes) {
 475  477                  aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
 476  478                  aclnode->z_allocdata = aclnode->z_acldata;
 477  479                  aclnode->z_allocsize = bytes;
 478  480                  aclnode->z_size = bytes;
 479  481          }
 480  482  
 481  483          return (aclnode);
 482  484  }
 483  485  
 484  486  static void
 485  487  zfs_acl_node_free(zfs_acl_node_t *aclnode)
 486  488  {
 487  489          if (aclnode->z_allocsize)
 488  490                  kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
 489  491          kmem_free(aclnode, sizeof (zfs_acl_node_t));
 490  492  }
 491  493  
 492  494  static void
 493  495  zfs_acl_release_nodes(zfs_acl_t *aclp)
 494  496  {
 495  497          zfs_acl_node_t *aclnode;
 496  498  
 497  499          while (aclnode = list_head(&aclp->z_acl)) {
 498  500                  list_remove(&aclp->z_acl, aclnode);
 499  501                  zfs_acl_node_free(aclnode);
 500  502          }
 501  503          aclp->z_acl_count = 0;
 502  504          aclp->z_acl_bytes = 0;
 503  505  }
 504  506  
 505  507  void
 506  508  zfs_acl_free(zfs_acl_t *aclp)
 507  509  {
 508  510          zfs_acl_release_nodes(aclp);
 509  511          list_destroy(&aclp->z_acl);
 510  512          kmem_free(aclp, sizeof (zfs_acl_t));
 511  513  }
 512  514  
 513  515  static boolean_t
 514  516  zfs_acl_valid_ace_type(uint_t type, uint_t flags)
 515  517  {
 516  518          uint16_t entry_type;
 517  519  
 518  520          switch (type) {
 519  521          case ALLOW:
 520  522          case DENY:
 521  523          case ACE_SYSTEM_AUDIT_ACE_TYPE:
 522  524          case ACE_SYSTEM_ALARM_ACE_TYPE:
 523  525                  entry_type = flags & ACE_TYPE_FLAGS;
 524  526                  return (entry_type == ACE_OWNER ||
 525  527                      entry_type == OWNING_GROUP ||
 526  528                      entry_type == ACE_EVERYONE || entry_type == 0 ||
 527  529                      entry_type == ACE_IDENTIFIER_GROUP);
 528  530          default:
 529  531                  if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
 530  532                          return (B_TRUE);
 531  533          }
 532  534          return (B_FALSE);
 533  535  }
 534  536  
 535  537  static boolean_t
 536  538  zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
 537  539  {
 538  540          /*
 539  541           * first check type of entry
 540  542           */
 541  543  
 542  544          if (!zfs_acl_valid_ace_type(type, iflags))
 543  545                  return (B_FALSE);
 544  546  
 545  547          switch (type) {
 546  548          case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 547  549          case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 548  550          case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 549  551          case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 550  552                  if (aclp->z_version < ZFS_ACL_VERSION_FUID)
 551  553                          return (B_FALSE);
 552  554                  aclp->z_hints |= ZFS_ACL_OBJ_ACE;
 553  555          }
 554  556  
 555  557          /*
 556  558           * next check inheritance level flags
 557  559           */
 558  560  
 559  561          if (obj_type == VDIR &&
 560  562              (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
 561  563                  aclp->z_hints |= ZFS_INHERIT_ACE;
 562  564  
 563  565          if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
 564  566                  if ((iflags & (ACE_FILE_INHERIT_ACE|
 565  567                      ACE_DIRECTORY_INHERIT_ACE)) == 0) {
 566  568                          return (B_FALSE);
 567  569                  }
 568  570          }
 569  571  
 570  572          return (B_TRUE);
 571  573  }
 572  574  
 573  575  static void *
 574  576  zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
 575  577      uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
 576  578  {
 577  579          zfs_acl_node_t *aclnode;
 578  580  
 579  581          ASSERT(aclp);
 580  582  
 581  583          if (start == NULL) {
 582  584                  aclnode = list_head(&aclp->z_acl);
 583  585                  if (aclnode == NULL)
 584  586                          return (NULL);
 585  587  
 586  588                  aclp->z_next_ace = aclnode->z_acldata;
 587  589                  aclp->z_curr_node = aclnode;
 588  590                  aclnode->z_ace_idx = 0;
 589  591          }
 590  592  
 591  593          aclnode = aclp->z_curr_node;
 592  594  
 593  595          if (aclnode == NULL)
 594  596                  return (NULL);
 595  597  
 596  598          if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
 597  599                  aclnode = list_next(&aclp->z_acl, aclnode);
 598  600                  if (aclnode == NULL)
 599  601                          return (NULL);
 600  602                  else {
 601  603                          aclp->z_curr_node = aclnode;
 602  604                          aclnode->z_ace_idx = 0;
 603  605                          aclp->z_next_ace = aclnode->z_acldata;
 604  606                  }
 605  607          }
 606  608  
 607  609          if (aclnode->z_ace_idx < aclnode->z_ace_count) {
 608  610                  void *acep = aclp->z_next_ace;
 609  611                  size_t ace_size;
 610  612  
 611  613                  /*
 612  614                   * Make sure we don't overstep our bounds
 613  615                   */
 614  616                  ace_size = aclp->z_ops.ace_size(acep);
 615  617  
 616  618                  if (((caddr_t)acep + ace_size) >
 617  619                      ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
 618  620                          return (NULL);
 619  621                  }
 620  622  
 621  623                  *iflags = aclp->z_ops.ace_flags_get(acep);
 622  624                  *type = aclp->z_ops.ace_type_get(acep);
 623  625                  *access_mask = aclp->z_ops.ace_mask_get(acep);
 624  626                  *who = aclp->z_ops.ace_who_get(acep);
 625  627                  aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
 626  628                  aclnode->z_ace_idx++;
 627  629  
 628  630                  return ((void *)acep);
 629  631          }
 630  632          return (NULL);
 631  633  }
 632  634  
 633  635  /*ARGSUSED*/
 634  636  static uint64_t
 635  637  zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
 636  638      uint16_t *flags, uint16_t *type, uint32_t *mask)
 637  639  {
 638  640          zfs_acl_t *aclp = datap;
 639  641          zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
 640  642          uint64_t who;
 641  643  
 642  644          acep = zfs_acl_next_ace(aclp, acep, &who, mask,
 643  645              flags, type);
 644  646          return ((uint64_t)(uintptr_t)acep);
 645  647  }
 646  648  
 647  649  static zfs_acl_node_t *
 648  650  zfs_acl_curr_node(zfs_acl_t *aclp)
 649  651  {
 650  652          ASSERT(aclp->z_curr_node);
 651  653          return (aclp->z_curr_node);
 652  654  }
 653  655  
 654  656  /*
 655  657   * Copy ACE to internal ZFS format.
 656  658   * While processing the ACL each ACE will be validated for correctness.
 657  659   * ACE FUIDs will be created later.
 658  660   */
 659  661  int
 660  662  zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
 661  663      void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
 662  664      zfs_fuid_info_t **fuidp, cred_t *cr)
 663  665  {
 664  666          int i;
 665  667          uint16_t entry_type;
 666  668          zfs_ace_t *aceptr = z_acl;
 667  669          ace_t *acep = datap;
 668  670          zfs_object_ace_t *zobjacep;
 669  671          ace_object_t *aceobjp;
 670  672  
 671  673          for (i = 0; i != aclcnt; i++) {
 672  674                  aceptr->z_hdr.z_access_mask = acep->a_access_mask;
 673  675                  aceptr->z_hdr.z_flags = acep->a_flags;
 674  676                  aceptr->z_hdr.z_type = acep->a_type;
 675  677                  entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
 676  678                  if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
 677  679                      entry_type != ACE_EVERYONE) {
 678  680                          aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
 679  681                              cr, (entry_type == 0) ?
 680  682                              ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
 681  683                  }
 682  684  
 683  685                  /*
 684  686                   * Make sure ACE is valid
 685  687                   */
 686  688                  if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
 687  689                      aceptr->z_hdr.z_flags) != B_TRUE)
 688  690                          return (SET_ERROR(EINVAL));
 689  691  
 690  692                  switch (acep->a_type) {
 691  693                  case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 692  694                  case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 693  695                  case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 694  696                  case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 695  697                          zobjacep = (zfs_object_ace_t *)aceptr;
 696  698                          aceobjp = (ace_object_t *)acep;
 697  699  
 698  700                          bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
 699  701                              sizeof (aceobjp->a_obj_type));
 700  702                          bcopy(aceobjp->a_inherit_obj_type,
 701  703                              zobjacep->z_inherit_type,
 702  704                              sizeof (aceobjp->a_inherit_obj_type));
 703  705                          acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
 704  706                          break;
 705  707                  default:
 706  708                          acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
 707  709                  }
 708  710  
 709  711                  aceptr = (zfs_ace_t *)((caddr_t)aceptr +
 710  712                      aclp->z_ops.ace_size(aceptr));
 711  713          }
 712  714  
 713  715          *size = (caddr_t)aceptr - (caddr_t)z_acl;
 714  716  
 715  717          return (0);
 716  718  }
 717  719  
 718  720  /*
 719  721   * Copy ZFS ACEs to fixed size ace_t layout
 720  722   */
 721  723  static void
 722  724  zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
 723  725      void *datap, int filter)
 724  726  {
 725  727          uint64_t who;
 726  728          uint32_t access_mask;
 727  729          uint16_t iflags, type;
 728  730          zfs_ace_hdr_t *zacep = NULL;
 729  731          ace_t *acep = datap;
 730  732          ace_object_t *objacep;
 731  733          zfs_object_ace_t *zobjacep;
 732  734          size_t ace_size;
 733  735          uint16_t entry_type;
 734  736  
 735  737          while (zacep = zfs_acl_next_ace(aclp, zacep,
 736  738              &who, &access_mask, &iflags, &type)) {
 737  739  
 738  740                  switch (type) {
 739  741                  case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 740  742                  case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 741  743                  case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 742  744                  case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 743  745                          if (filter) {
 744  746                                  continue;
 745  747                          }
 746  748                          zobjacep = (zfs_object_ace_t *)zacep;
 747  749                          objacep = (ace_object_t *)acep;
 748  750                          bcopy(zobjacep->z_object_type,
 749  751                              objacep->a_obj_type,
 750  752                              sizeof (zobjacep->z_object_type));
 751  753                          bcopy(zobjacep->z_inherit_type,
 752  754                              objacep->a_inherit_obj_type,
 753  755                              sizeof (zobjacep->z_inherit_type));
 754  756                          ace_size = sizeof (ace_object_t);
 755  757                          break;
 756  758                  default:
 757  759                          ace_size = sizeof (ace_t);
 758  760                          break;
 759  761                  }
 760  762  
 761  763                  entry_type = (iflags & ACE_TYPE_FLAGS);
 762  764                  if ((entry_type != ACE_OWNER &&
 763  765                      entry_type != OWNING_GROUP &&
 764  766                      entry_type != ACE_EVERYONE)) {
 765  767                          acep->a_who = zfs_fuid_map_id(zfsvfs, who,
 766  768                              cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
 767  769                              ZFS_ACE_GROUP : ZFS_ACE_USER);
 768  770                  } else {
 769  771                          acep->a_who = (uid_t)(int64_t)who;
 770  772                  }
 771  773                  acep->a_access_mask = access_mask;
 772  774                  acep->a_flags = iflags;
 773  775                  acep->a_type = type;
 774  776                  acep = (ace_t *)((caddr_t)acep + ace_size);
 775  777          }
 776  778  }
 777  779  
 778  780  static int
 779  781  zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
 780  782      zfs_oldace_t *z_acl, int aclcnt, size_t *size)
 781  783  {
 782  784          int i;
 783  785          zfs_oldace_t *aceptr = z_acl;
 784  786  
 785  787          for (i = 0; i != aclcnt; i++, aceptr++) {
 786  788                  aceptr->z_access_mask = acep[i].a_access_mask;
 787  789                  aceptr->z_type = acep[i].a_type;
 788  790                  aceptr->z_flags = acep[i].a_flags;
 789  791                  aceptr->z_fuid = acep[i].a_who;
 790  792                  /*
 791  793                   * Make sure ACE is valid
 792  794                   */
 793  795                  if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
 794  796                      aceptr->z_flags) != B_TRUE)
 795  797                          return (SET_ERROR(EINVAL));
 796  798          }
 797  799          *size = (caddr_t)aceptr - (caddr_t)z_acl;
 798  800          return (0);
 799  801  }
 800  802  
 801  803  /*
 802  804   * convert old ACL format to new
 803  805   */
 804  806  void
 805  807  zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
 806  808  {
 807  809          zfs_oldace_t *oldaclp;
 808  810          int i;
 809  811          uint16_t type, iflags;
 810  812          uint32_t access_mask;
 811  813          uint64_t who;
 812  814          void *cookie = NULL;
 813  815          zfs_acl_node_t *newaclnode;
 814  816  
 815  817          ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
 816  818          /*
 817  819           * First create the ACE in a contiguous piece of memory
 818  820           * for zfs_copy_ace_2_fuid().
 819  821           *
 820  822           * We only convert an ACL once, so this won't happen
 821  823           * everytime.
 822  824           */
 823  825          oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
 824  826              KM_SLEEP);
 825  827          i = 0;
 826  828          while (cookie = zfs_acl_next_ace(aclp, cookie, &who,
 827  829              &access_mask, &iflags, &type)) {
 828  830                  oldaclp[i].z_flags = iflags;
 829  831                  oldaclp[i].z_type = type;
 830  832                  oldaclp[i].z_fuid = who;
 831  833                  oldaclp[i++].z_access_mask = access_mask;
 832  834          }
 833  835  
 834  836          newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
 835  837              sizeof (zfs_object_ace_t));
 836  838          aclp->z_ops = zfs_acl_fuid_ops;
 837  839          VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
 838  840              oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
 839  841              &newaclnode->z_size, NULL, cr) == 0);
 840  842          newaclnode->z_ace_count = aclp->z_acl_count;
 841  843          aclp->z_version = ZFS_ACL_VERSION;
 842  844          kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
 843  845  
 844  846          /*
 845  847           * Release all previous ACL nodes
 846  848           */
 847  849  
 848  850          zfs_acl_release_nodes(aclp);
 849  851  
 850  852          list_insert_head(&aclp->z_acl, newaclnode);
 851  853  
 852  854          aclp->z_acl_bytes = newaclnode->z_size;
 853  855          aclp->z_acl_count = newaclnode->z_ace_count;
 854  856  
 855  857  }
 856  858  
 857  859  /*
 858  860   * Convert unix access mask to v4 access mask
 859  861   */
 860  862  static uint32_t
 861  863  zfs_unix_to_v4(uint32_t access_mask)
 862  864  {
 863  865          uint32_t new_mask = 0;
 864  866  
 865  867          if (access_mask & S_IXOTH)
 866  868                  new_mask |= ACE_EXECUTE;
 867  869          if (access_mask & S_IWOTH)
 868  870                  new_mask |= ACE_WRITE_DATA;
 869  871          if (access_mask & S_IROTH)
 870  872                  new_mask |= ACE_READ_DATA;
 871  873          return (new_mask);
 872  874  }
 873  875  
 874  876  static void
 875  877  zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
 876  878      uint16_t access_type, uint64_t fuid, uint16_t entry_type)
 877  879  {
 878  880          uint16_t type = entry_type & ACE_TYPE_FLAGS;
 879  881  
 880  882          aclp->z_ops.ace_mask_set(acep, access_mask);
 881  883          aclp->z_ops.ace_type_set(acep, access_type);
 882  884          aclp->z_ops.ace_flags_set(acep, entry_type);
 883  885          if ((type != ACE_OWNER && type != OWNING_GROUP &&
 884  886              type != ACE_EVERYONE))
 885  887                  aclp->z_ops.ace_who_set(acep, fuid);
 886  888  }
 887  889  
 888  890  /*
 889  891   * Determine mode of file based on ACL.
 890  892   */
 891  893  uint64_t
 892  894  zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
 893  895      uint64_t *pflags, uint64_t fuid, uint64_t fgid)
 894  896  {
 895  897          int             entry_type;
 896  898          mode_t          mode;
 897  899          mode_t          seen = 0;
 898  900          zfs_ace_hdr_t   *acep = NULL;
 899  901          uint64_t        who;
 900  902          uint16_t        iflags, type;
 901  903          uint32_t        access_mask;
 902  904          boolean_t       an_exec_denied = B_FALSE;
 903  905  
 904  906          mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
 905  907  
 906  908          while (acep = zfs_acl_next_ace(aclp, acep, &who,
 907  909              &access_mask, &iflags, &type)) {
 908  910  
 909  911                  if (!zfs_acl_valid_ace_type(type, iflags))
 910  912                          continue;
 911  913  
 912  914                  entry_type = (iflags & ACE_TYPE_FLAGS);
 913  915  
 914  916                  /*
 915  917                   * Skip over any inherit_only ACEs
 916  918                   */
 917  919                  if (iflags & ACE_INHERIT_ONLY_ACE)
 918  920                          continue;
 919  921  
 920  922                  if (entry_type == ACE_OWNER || (entry_type == 0 &&
 921  923                      who == fuid)) {
 922  924                          if ((access_mask & ACE_READ_DATA) &&
 923  925                              (!(seen & S_IRUSR))) {
 924  926                                  seen |= S_IRUSR;
 925  927                                  if (type == ALLOW) {
 926  928                                          mode |= S_IRUSR;
 927  929                                  }
 928  930                          }
 929  931                          if ((access_mask & ACE_WRITE_DATA) &&
 930  932                              (!(seen & S_IWUSR))) {
 931  933                                  seen |= S_IWUSR;
 932  934                                  if (type == ALLOW) {
 933  935                                          mode |= S_IWUSR;
 934  936                                  }
 935  937                          }
 936  938                          if ((access_mask & ACE_EXECUTE) &&
 937  939                              (!(seen & S_IXUSR))) {
 938  940                                  seen |= S_IXUSR;
 939  941                                  if (type == ALLOW) {
 940  942                                          mode |= S_IXUSR;
 941  943                                  }
 942  944                          }
 943  945                  } else if (entry_type == OWNING_GROUP ||
 944  946                      (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
 945  947                          if ((access_mask & ACE_READ_DATA) &&
 946  948                              (!(seen & S_IRGRP))) {
 947  949                                  seen |= S_IRGRP;
 948  950                                  if (type == ALLOW) {
 949  951                                          mode |= S_IRGRP;
 950  952                                  }
 951  953                          }
 952  954                          if ((access_mask & ACE_WRITE_DATA) &&
 953  955                              (!(seen & S_IWGRP))) {
 954  956                                  seen |= S_IWGRP;
 955  957                                  if (type == ALLOW) {
 956  958                                          mode |= S_IWGRP;
 957  959                                  }
 958  960                          }
 959  961                          if ((access_mask & ACE_EXECUTE) &&
 960  962                              (!(seen & S_IXGRP))) {
 961  963                                  seen |= S_IXGRP;
 962  964                                  if (type == ALLOW) {
 963  965                                          mode |= S_IXGRP;
 964  966                                  }
 965  967                          }
 966  968                  } else if (entry_type == ACE_EVERYONE) {
 967  969                          if ((access_mask & ACE_READ_DATA)) {
 968  970                                  if (!(seen & S_IRUSR)) {
 969  971                                          seen |= S_IRUSR;
 970  972                                          if (type == ALLOW) {
 971  973                                                  mode |= S_IRUSR;
 972  974                                          }
 973  975                                  }
 974  976                                  if (!(seen & S_IRGRP)) {
 975  977                                          seen |= S_IRGRP;
 976  978                                          if (type == ALLOW) {
 977  979                                                  mode |= S_IRGRP;
 978  980                                          }
 979  981                                  }
 980  982                                  if (!(seen & S_IROTH)) {
 981  983                                          seen |= S_IROTH;
 982  984                                          if (type == ALLOW) {
 983  985                                                  mode |= S_IROTH;
 984  986                                          }
 985  987                                  }
 986  988                          }
 987  989                          if ((access_mask & ACE_WRITE_DATA)) {
 988  990                                  if (!(seen & S_IWUSR)) {
 989  991                                          seen |= S_IWUSR;
 990  992                                          if (type == ALLOW) {
 991  993                                                  mode |= S_IWUSR;
 992  994                                          }
 993  995                                  }
 994  996                                  if (!(seen & S_IWGRP)) {
 995  997                                          seen |= S_IWGRP;
 996  998                                          if (type == ALLOW) {
 997  999                                                  mode |= S_IWGRP;
 998 1000                                          }
 999 1001                                  }
1000 1002                                  if (!(seen & S_IWOTH)) {
1001 1003                                          seen |= S_IWOTH;
1002 1004                                          if (type == ALLOW) {
1003 1005                                                  mode |= S_IWOTH;
1004 1006                                          }
1005 1007                                  }
1006 1008                          }
1007 1009                          if ((access_mask & ACE_EXECUTE)) {
1008 1010                                  if (!(seen & S_IXUSR)) {
1009 1011                                          seen |= S_IXUSR;
1010 1012                                          if (type == ALLOW) {
1011 1013                                                  mode |= S_IXUSR;
1012 1014                                          }
1013 1015                                  }
1014 1016                                  if (!(seen & S_IXGRP)) {
1015 1017                                          seen |= S_IXGRP;
1016 1018                                          if (type == ALLOW) {
1017 1019                                                  mode |= S_IXGRP;
1018 1020                                          }
1019 1021                                  }
1020 1022                                  if (!(seen & S_IXOTH)) {
1021 1023                                          seen |= S_IXOTH;
1022 1024                                          if (type == ALLOW) {
1023 1025                                                  mode |= S_IXOTH;
1024 1026                                          }
1025 1027                                  }
1026 1028                          }
1027 1029                  } else {
1028 1030                          /*
1029 1031                           * Only care if this IDENTIFIER_GROUP or
1030 1032                           * USER ACE denies execute access to someone,
1031 1033                           * mode is not affected
1032 1034                           */
1033 1035                          if ((access_mask & ACE_EXECUTE) && type == DENY)
1034 1036                                  an_exec_denied = B_TRUE;
1035 1037                  }
1036 1038          }
1037 1039  
1038 1040          /*
1039 1041           * Failure to allow is effectively a deny, so execute permission
1040 1042           * is denied if it was never mentioned or if we explicitly
1041 1043           * weren't allowed it.
1042 1044           */
1043 1045          if (!an_exec_denied &&
1044 1046              ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
1045 1047              (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
1046 1048                  an_exec_denied = B_TRUE;
1047 1049  
1048 1050          if (an_exec_denied)
1049 1051                  *pflags &= ~ZFS_NO_EXECS_DENIED;
1050 1052          else
1051 1053                  *pflags |= ZFS_NO_EXECS_DENIED;
1052 1054  
1053 1055          return (mode);
1054 1056  }
1055 1057  
1056 1058  /*
1057 1059   * Read an external acl object.  If the intent is to modify, always
1058 1060   * create a new acl and leave any cached acl in place.
1059 1061   */
1060 1062  static int
1061 1063  zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp,
1062 1064      boolean_t will_modify)
1063 1065  {
1064 1066          zfs_acl_t       *aclp;
1065 1067          int             aclsize;
1066 1068          int             acl_count;
1067 1069          zfs_acl_node_t  *aclnode;
1068 1070          zfs_acl_phys_t  znode_acl;
1069 1071          int             version;
1070 1072          int             error;
1071 1073          boolean_t       drop_lock = B_FALSE;
1072 1074  
1073 1075          ASSERT(MUTEX_HELD(&zp->z_acl_lock));
1074 1076  
1075 1077          if (zp->z_acl_cached && !will_modify) {
1076 1078                  *aclpp = zp->z_acl_cached;
1077 1079                  return (0);
1078 1080          }
1079 1081  
1080 1082          /*
1081 1083           * close race where znode could be upgrade while trying to
1082 1084           * read the znode attributes.
1083 1085           *
1084 1086           * But this could only happen if the file isn't already an SA
1085 1087           * znode
1086 1088           */
1087 1089          if (!zp->z_is_sa && !have_lock) {
1088 1090                  mutex_enter(&zp->z_lock);
1089 1091                  drop_lock = B_TRUE;
1090 1092          }
1091 1093          version = zfs_znode_acl_version(zp);
1092 1094  
1093 1095          if ((error = zfs_acl_znode_info(zp, &aclsize,
1094 1096              &acl_count, &znode_acl)) != 0) {
1095 1097                  goto done;
1096 1098          }
1097 1099  
1098 1100          aclp = zfs_acl_alloc(version);
1099 1101  
1100 1102          aclp->z_acl_count = acl_count;
1101 1103          aclp->z_acl_bytes = aclsize;
1102 1104  
1103 1105          aclnode = zfs_acl_node_alloc(aclsize);
1104 1106          aclnode->z_ace_count = aclp->z_acl_count;
1105 1107          aclnode->z_size = aclsize;
1106 1108  
1107 1109          if (!zp->z_is_sa) {
1108 1110                  if (znode_acl.z_acl_extern_obj) {
1109 1111                          error = dmu_read(zp->z_zfsvfs->z_os,
1110 1112                              znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
1111 1113                              aclnode->z_acldata, DMU_READ_PREFETCH);
1112 1114                  } else {
1113 1115                          bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
1114 1116                              aclnode->z_size);
1115 1117                  }
1116 1118          } else {
1117 1119                  error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
1118 1120                      aclnode->z_acldata, aclnode->z_size);
1119 1121          }
1120 1122  
1121 1123          if (error != 0) {
1122 1124                  zfs_acl_free(aclp);
1123 1125                  zfs_acl_node_free(aclnode);
1124 1126                  /* convert checksum errors into IO errors */
1125 1127                  if (error == ECKSUM)
1126 1128                          error = SET_ERROR(EIO);
1127 1129                  goto done;
1128 1130          }
1129 1131  
1130 1132          list_insert_head(&aclp->z_acl, aclnode);
1131 1133  
1132 1134          *aclpp = aclp;
1133 1135          if (!will_modify)
1134 1136                  zp->z_acl_cached = aclp;
1135 1137  done:
1136 1138          if (drop_lock)
1137 1139                  mutex_exit(&zp->z_lock);
1138 1140          return (error);
1139 1141  }
1140 1142  
1141 1143  /*ARGSUSED*/
1142 1144  void
1143 1145  zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
1144 1146      boolean_t start, void *userdata)
1145 1147  {
1146 1148          zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
1147 1149  
1148 1150          if (start) {
1149 1151                  cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
1150 1152          } else {
1151 1153                  cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
1152 1154                      cb->cb_acl_node);
1153 1155          }
1154 1156          *dataptr = cb->cb_acl_node->z_acldata;
1155 1157          *length = cb->cb_acl_node->z_size;
1156 1158  }
1157 1159  
1158 1160  int
1159 1161  zfs_acl_chown_setattr(znode_t *zp)
1160 1162  {
1161 1163          int error;
1162 1164          zfs_acl_t *aclp;
1163 1165  
1164 1166          ASSERT(MUTEX_HELD(&zp->z_lock));
1165 1167          ASSERT(MUTEX_HELD(&zp->z_acl_lock));
1166 1168  
1167 1169          if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0)
1168 1170                  zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
1169 1171                      &zp->z_pflags, zp->z_uid, zp->z_gid);
1170 1172          return (error);
1171 1173  }
1172 1174  
1173 1175  /*
1174 1176   * common code for setting ACLs.
1175 1177   *
1176 1178   * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
1177 1179   * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
1178 1180   * already checked the acl and knows whether to inherit.
1179 1181   */
1180 1182  int
  
    | 
      ↓ open down ↓ | 
    1114 lines elided | 
    
      ↑ open up ↑ | 
  
1181 1183  zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
1182 1184  {
1183 1185          int                     error;
1184 1186          zfsvfs_t                *zfsvfs = zp->z_zfsvfs;
1185 1187          dmu_object_type_t       otype;
1186 1188          zfs_acl_locator_cb_t    locate = { 0 };
1187 1189          uint64_t                mode;
1188 1190          sa_bulk_attr_t          bulk[5];
1189 1191          uint64_t                ctime[2];
1190 1192          int                     count = 0;
1191      -        zfs_acl_phys_t          acl_phys;
1192 1193  
1193 1194          mode = zp->z_mode;
1194 1195  
1195 1196          mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
1196 1197              zp->z_uid, zp->z_gid);
1197 1198  
1198 1199          zp->z_mode = mode;
1199 1200          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
1200 1201              &mode, sizeof (mode));
1201 1202          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1202 1203              &zp->z_pflags, sizeof (zp->z_pflags));
1203 1204          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
1204 1205              &ctime, sizeof (ctime));
1205 1206  
1206 1207          if (zp->z_acl_cached) {
1207 1208                  zfs_acl_free(zp->z_acl_cached);
1208 1209                  zp->z_acl_cached = NULL;
1209 1210          }
1210 1211  
1211 1212          /*
1212 1213           * Upgrade needed?
1213 1214           */
1214 1215          if (!zfsvfs->z_use_fuids) {
1215 1216                  otype = DMU_OT_OLDACL;
1216 1217          } else {
1217 1218                  if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
1218 1219                      (zfsvfs->z_version >= ZPL_VERSION_FUID))
1219 1220                          zfs_acl_xform(zp, aclp, cr);
1220 1221                  ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
1221 1222                  otype = DMU_OT_ACL;
1222 1223          }
1223 1224  
1224 1225          /*
1225 1226           * Arrgh, we have to handle old on disk format
1226 1227           * as well as newer (preferred) SA format.
1227 1228           */
  
    | 
      ↓ open down ↓ | 
    26 lines elided | 
    
      ↑ open up ↑ | 
  
1228 1229  
1229 1230          if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
1230 1231                  locate.cb_aclp = aclp;
1231 1232                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
1232 1233                      zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
1233 1234                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
1234 1235                      NULL, &aclp->z_acl_count, sizeof (uint64_t));
1235 1236          } else { /* Painful legacy way */
1236 1237                  zfs_acl_node_t *aclnode;
1237 1238                  uint64_t off = 0;
     1239 +                zfs_acl_phys_t acl_phys;
1238 1240                  uint64_t aoid;
1239 1241  
1240 1242                  if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
1241 1243                      &acl_phys, sizeof (acl_phys))) != 0)
1242 1244                          return (error);
1243 1245  
1244 1246                  aoid = acl_phys.z_acl_extern_obj;
1245 1247  
1246 1248                  if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1247 1249                          /*
1248 1250                           * If ACL was previously external and we are now
1249 1251                           * converting to new ACL format then release old
1250 1252                           * ACL object and create a new one.
1251 1253                           */
1252 1254                          if (aoid &&
1253 1255                              aclp->z_version != acl_phys.z_acl_version) {
1254 1256                                  error = dmu_object_free(zfsvfs->z_os, aoid, tx);
1255 1257                                  if (error)
1256 1258                                          return (error);
1257 1259                                  aoid = 0;
1258 1260                          }
1259 1261                          if (aoid == 0) {
1260 1262                                  aoid = dmu_object_alloc(zfsvfs->z_os,
1261 1263                                      otype, aclp->z_acl_bytes,
1262 1264                                      otype == DMU_OT_ACL ?
1263 1265                                      DMU_OT_SYSACL : DMU_OT_NONE,
1264 1266                                      otype == DMU_OT_ACL ?
1265 1267                                      DN_MAX_BONUSLEN : 0, tx);
1266 1268                          } else {
1267 1269                                  (void) dmu_object_set_blocksize(zfsvfs->z_os,
1268 1270                                      aoid, aclp->z_acl_bytes, 0, tx);
1269 1271                          }
1270 1272                          acl_phys.z_acl_extern_obj = aoid;
1271 1273                          for (aclnode = list_head(&aclp->z_acl); aclnode;
1272 1274                              aclnode = list_next(&aclp->z_acl, aclnode)) {
1273 1275                                  if (aclnode->z_ace_count == 0)
1274 1276                                          continue;
1275 1277                                  dmu_write(zfsvfs->z_os, aoid, off,
1276 1278                                      aclnode->z_size, aclnode->z_acldata, tx);
1277 1279                                  off += aclnode->z_size;
1278 1280                          }
1279 1281                  } else {
1280 1282                          void *start = acl_phys.z_ace_data;
1281 1283                          /*
1282 1284                           * Migrating back embedded?
1283 1285                           */
1284 1286                          if (acl_phys.z_acl_extern_obj) {
1285 1287                                  error = dmu_object_free(zfsvfs->z_os,
1286 1288                                      acl_phys.z_acl_extern_obj, tx);
1287 1289                                  if (error)
1288 1290                                          return (error);
1289 1291                                  acl_phys.z_acl_extern_obj = 0;
1290 1292                          }
1291 1293  
1292 1294                          for (aclnode = list_head(&aclp->z_acl); aclnode;
1293 1295                              aclnode = list_next(&aclp->z_acl, aclnode)) {
1294 1296                                  if (aclnode->z_ace_count == 0)
1295 1297                                          continue;
1296 1298                                  bcopy(aclnode->z_acldata, start,
1297 1299                                      aclnode->z_size);
1298 1300                                  start = (caddr_t)start + aclnode->z_size;
1299 1301                          }
1300 1302                  }
1301 1303                  /*
1302 1304                   * If Old version then swap count/bytes to match old
1303 1305                   * layout of znode_acl_phys_t.
1304 1306                   */
1305 1307                  if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1306 1308                          acl_phys.z_acl_size = aclp->z_acl_count;
1307 1309                          acl_phys.z_acl_count = aclp->z_acl_bytes;
1308 1310                  } else {
1309 1311                          acl_phys.z_acl_size = aclp->z_acl_bytes;
1310 1312                          acl_phys.z_acl_count = aclp->z_acl_count;
1311 1313                  }
1312 1314                  acl_phys.z_acl_version = aclp->z_version;
1313 1315  
1314 1316                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
1315 1317                      &acl_phys, sizeof (acl_phys));
1316 1318          }
1317 1319  
1318 1320          /*
1319 1321           * Replace ACL wide bits, but first clear them.
1320 1322           */
1321 1323          zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
1322 1324  
1323 1325          zp->z_pflags |= aclp->z_hints;
1324 1326  
1325 1327          if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
1326 1328                  zp->z_pflags |= ZFS_ACL_TRIVIAL;
1327 1329  
1328 1330          zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE);
1329 1331          return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
  
    | 
      ↓ open down ↓ | 
    82 lines elided | 
    
      ↑ open up ↑ | 
  
1330 1332  }
1331 1333  
1332 1334  static void
1333 1335  zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t split, boolean_t trim,
1334 1336      zfs_acl_t *aclp)
1335 1337  {
1336 1338          void            *acep = NULL;
1337 1339          uint64_t        who;
1338 1340          int             new_count, new_bytes;
1339 1341          int             ace_size;
1340      -        int             entry_type;
     1342 +        int             entry_type;
1341 1343          uint16_t        iflags, type;
1342 1344          uint32_t        access_mask;
1343 1345          zfs_acl_node_t  *newnode;
1344      -        size_t          abstract_size = aclp->z_ops.ace_abstract_size();
1345      -        void            *zacep;
     1346 +        size_t          abstract_size = aclp->z_ops.ace_abstract_size();
     1347 +        void            *zacep;
1346 1348          boolean_t       isdir;
1347 1349          trivial_acl_t   masks;
1348 1350  
1349 1351          new_count = new_bytes = 0;
1350 1352  
1351 1353          isdir = (vtype == VDIR);
1352 1354  
1353 1355          acl_trivial_access_masks((mode_t)mode, isdir, &masks);
1354 1356  
1355 1357          newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
1356 1358  
1357 1359          zacep = newnode->z_acldata;
1358 1360          if (masks.allow0) {
1359 1361                  zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
1360 1362                  zacep = (void *)((uintptr_t)zacep + abstract_size);
1361 1363                  new_count++;
1362 1364                  new_bytes += abstract_size;
1363 1365          }
1364 1366          if (masks.deny1) {
1365 1367                  zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
1366 1368                  zacep = (void *)((uintptr_t)zacep + abstract_size);
1367 1369                  new_count++;
1368 1370                  new_bytes += abstract_size;
1369 1371          }
1370 1372          if (masks.deny2) {
1371 1373                  zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
1372 1374                  zacep = (void *)((uintptr_t)zacep + abstract_size);
1373 1375                  new_count++;
1374 1376                  new_bytes += abstract_size;
1375 1377          }
1376 1378  
1377 1379          while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
1378 1380              &iflags, &type)) {
1379 1381                  entry_type = (iflags & ACE_TYPE_FLAGS);
1380 1382                  /*
1381 1383                   * ACEs used to represent the file mode may be divided
1382 1384                   * into an equivalent pair of inherit-only and regular
1383 1385                   * ACEs, if they are inheritable.
1384 1386                   * Skip regular ACEs, which are replaced by the new mode.
1385 1387                   */
1386 1388                  if (split && (entry_type == ACE_OWNER ||
1387 1389                      entry_type == OWNING_GROUP ||
1388 1390                      entry_type == ACE_EVERYONE)) {
1389 1391                          if (!isdir || !(iflags &
1390 1392                              (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
1391 1393                                  continue;
1392 1394                          /*
1393 1395                           * We preserve owner@, group@, or @everyone
1394 1396                           * permissions, if they are inheritable, by
1395 1397                           * copying them to inherit_only ACEs. This
1396 1398                           * prevents inheritable permissions from being
1397 1399                           * altered along with the file mode.
1398 1400                           */
1399 1401                          iflags |= ACE_INHERIT_ONLY_ACE;
1400 1402                  }
1401 1403  
1402 1404                  /*
1403 1405                   * If this ACL has any inheritable ACEs, mark that in
1404 1406                   * the hints (which are later masked into the pflags)
1405 1407                   * so create knows to do inheritance.
1406 1408                   */
1407 1409                  if (isdir && (iflags &
1408 1410                      (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
1409 1411                          aclp->z_hints |= ZFS_INHERIT_ACE;
1410 1412  
1411 1413                  if ((type != ALLOW && type != DENY) ||
1412 1414                      (iflags & ACE_INHERIT_ONLY_ACE)) {
1413 1415                          switch (type) {
1414 1416                          case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1415 1417                          case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1416 1418                          case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1417 1419                          case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1418 1420                                  aclp->z_hints |= ZFS_ACL_OBJ_ACE;
1419 1421                                  break;
1420 1422                          }
1421 1423                  } else {
1422 1424                          /*
1423 1425                           * Limit permissions granted by ACEs to be no greater
1424 1426                           * than permissions of the requested group mode.
1425 1427                           * Applies when the "aclmode" property is set to
1426 1428                           * "groupmask".
1427 1429                           */
1428 1430                          if ((type == ALLOW) && trim)
1429 1431                                  access_mask &= masks.group;
1430 1432                  }
1431 1433                  zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
1432 1434                  ace_size = aclp->z_ops.ace_size(acep);
1433 1435                  zacep = (void *)((uintptr_t)zacep + ace_size);
1434 1436                  new_count++;
1435 1437                  new_bytes += ace_size;
1436 1438          }
1437 1439          zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);
1438 1440          zacep = (void *)((uintptr_t)zacep + abstract_size);
1439 1441          zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);
1440 1442          zacep = (void *)((uintptr_t)zacep + abstract_size);
1441 1443          zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);
1442 1444  
1443 1445          new_count += 3;
1444 1446          new_bytes += abstract_size * 3;
1445 1447          zfs_acl_release_nodes(aclp);
1446 1448          aclp->z_acl_count = new_count;
1447 1449          aclp->z_acl_bytes = new_bytes;
1448 1450          newnode->z_ace_count = new_count;
1449 1451          newnode->z_size = new_bytes;
1450 1452          list_insert_tail(&aclp->z_acl, newnode);
1451 1453  }
1452 1454  
1453 1455  int
1454 1456  zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
1455 1457  {
1456 1458          int error = 0;
1457 1459  
1458 1460          mutex_enter(&zp->z_acl_lock);
1459 1461          mutex_enter(&zp->z_lock);
1460 1462          if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
1461 1463                  *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
1462 1464          else
1463 1465                  error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
1464 1466  
1465 1467          if (error == 0) {
1466 1468                  (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
1467 1469                  zfs_acl_chmod(ZTOV(zp)->v_type, mode, B_TRUE,
1468 1470                      (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
1469 1471          }
1470 1472          mutex_exit(&zp->z_lock);
1471 1473          mutex_exit(&zp->z_acl_lock);
1472 1474  
1473 1475          return (error);
1474 1476  }
1475 1477  
1476 1478  /*
1477 1479   * Should ACE be inherited?
1478 1480   */
1479 1481  static int
1480 1482  zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
1481 1483  {
1482 1484          int     iflags = (acep_flags & 0xf);
1483 1485  
1484 1486          if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
1485 1487                  return (1);
1486 1488          else if (iflags & ACE_FILE_INHERIT_ACE)
1487 1489                  return (!((vtype == VDIR) &&
1488 1490                      (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
1489 1491          return (0);
1490 1492  }
1491 1493  
1492 1494  /*
1493 1495   * inherit inheritable ACEs from parent
1494 1496   */
1495 1497  static zfs_acl_t *
1496 1498  zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
1497 1499      uint64_t mode)
1498 1500  {
1499 1501          void            *pacep = NULL;
1500 1502          void            *acep;
1501 1503          zfs_acl_node_t  *aclnode;
1502 1504          zfs_acl_t       *aclp = NULL;
1503 1505          uint64_t        who;
1504 1506          uint32_t        access_mask;
1505 1507          uint16_t        iflags, newflags, type;
1506 1508          size_t          ace_size;
1507 1509          void            *data1, *data2;
1508 1510          size_t          data1sz, data2sz;
1509 1511          uint_t          aclinherit;
1510 1512          boolean_t       isdir = (vtype == VDIR);
1511 1513  
1512 1514          aclp = zfs_acl_alloc(paclp->z_version);
1513 1515          aclinherit = zfsvfs->z_acl_inherit;
1514 1516          if (aclinherit == ZFS_ACL_DISCARD || vtype == VLNK)
1515 1517                  return (aclp);
1516 1518  
1517 1519          while (pacep = zfs_acl_next_ace(paclp, pacep, &who,
1518 1520              &access_mask, &iflags, &type)) {
1519 1521  
1520 1522                  /*
1521 1523                   * don't inherit bogus ACEs
1522 1524                   */
1523 1525                  if (!zfs_acl_valid_ace_type(type, iflags))
1524 1526                          continue;
1525 1527  
1526 1528                  /*
1527 1529                   * Check if ACE is inheritable by this vnode
1528 1530                   */
1529 1531                  if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||
1530 1532                      !zfs_ace_can_use(vtype, iflags))
1531 1533                          continue;
1532 1534  
1533 1535                  /*
1534 1536                   * Strip inherited execute permission from file if
1535 1537                   * not in mode
1536 1538                   */
1537 1539                  if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&
1538 1540                      !isdir && ((mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {
1539 1541                          access_mask &= ~ACE_EXECUTE;
1540 1542                  }
1541 1543  
1542 1544                  /*
1543 1545                   * Strip write_acl and write_owner from permissions
1544 1546                   * when inheriting an ACE
1545 1547                   */
1546 1548                  if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {
1547 1549                          access_mask &= ~RESTRICTED_CLEAR;
1548 1550                  }
1549 1551  
1550 1552                  ace_size = aclp->z_ops.ace_size(pacep);
1551 1553                  aclnode = zfs_acl_node_alloc(ace_size);
1552 1554                  list_insert_tail(&aclp->z_acl, aclnode);
1553 1555                  acep = aclnode->z_acldata;
1554 1556  
1555 1557                  zfs_set_ace(aclp, acep, access_mask, type,
1556 1558                      who, iflags|ACE_INHERITED_ACE);
1557 1559  
1558 1560                  /*
1559 1561                   * Copy special opaque data if any
1560 1562                   */
1561 1563                  if ((data1sz = paclp->z_ops.ace_data(pacep, &data1)) != 0) {
1562 1564                          VERIFY((data2sz = aclp->z_ops.ace_data(acep,
1563 1565                              &data2)) == data1sz);
1564 1566                          bcopy(data1, data2, data2sz);
1565 1567                  }
1566 1568  
1567 1569                  aclp->z_acl_count++;
1568 1570                  aclnode->z_ace_count++;
1569 1571                  aclp->z_acl_bytes += aclnode->z_size;
1570 1572                  newflags = aclp->z_ops.ace_flags_get(acep);
1571 1573  
1572 1574                  /*
1573 1575                   * If ACE is not to be inherited further, or if the vnode is
1574 1576                   * not a directory, remove all inheritance flags
1575 1577                   */
1576 1578                  if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {
1577 1579                          newflags &= ~ALL_INHERIT;
1578 1580                          aclp->z_ops.ace_flags_set(acep,
1579 1581                              newflags|ACE_INHERITED_ACE);
1580 1582                          continue;
1581 1583                  }
1582 1584  
1583 1585                  /*
1584 1586                   * This directory has an inheritable ACE
1585 1587                   */
1586 1588                  aclp->z_hints |= ZFS_INHERIT_ACE;
1587 1589  
1588 1590                  /*
1589 1591                   * If only FILE_INHERIT is set then turn on
1590 1592                   * inherit_only
1591 1593                   */
1592 1594                  if ((iflags & (ACE_FILE_INHERIT_ACE |
1593 1595                      ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
1594 1596                          newflags |= ACE_INHERIT_ONLY_ACE;
1595 1597                          aclp->z_ops.ace_flags_set(acep,
1596 1598                              newflags|ACE_INHERITED_ACE);
1597 1599                  } else {
1598 1600                          newflags &= ~ACE_INHERIT_ONLY_ACE;
1599 1601                          aclp->z_ops.ace_flags_set(acep,
1600 1602                              newflags|ACE_INHERITED_ACE);
1601 1603                  }
1602 1604          }
1603 1605  
1604 1606          return (aclp);
1605 1607  }
1606 1608  
1607 1609  /*
1608 1610   * Create file system object initial permissions
1609 1611   * including inheritable ACEs.
1610 1612   * Also, create FUIDs for owner and group.
1611 1613   */
1612 1614  int
1613 1615  zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
1614 1616      vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
1615 1617  {
1616 1618          int             error;
1617 1619          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1618 1620          zfs_acl_t       *paclp;
1619 1621          gid_t           gid;
1620 1622          boolean_t       trim = B_FALSE;
1621 1623          boolean_t       inherited = B_FALSE;
1622 1624  
1623 1625          bzero(acl_ids, sizeof (zfs_acl_ids_t));
1624 1626          acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
1625 1627  
1626 1628          if (vsecp)
1627 1629                  if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
1628 1630                      &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
1629 1631                          return (error);
1630 1632          /*
1631 1633           * Determine uid and gid.
1632 1634           */
1633 1635          if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
1634 1636              ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
1635 1637                  acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
1636 1638                      (uint64_t)vap->va_uid, cr,
1637 1639                      ZFS_OWNER, &acl_ids->z_fuidp);
1638 1640                  acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1639 1641                      (uint64_t)vap->va_gid, cr,
1640 1642                      ZFS_GROUP, &acl_ids->z_fuidp);
1641 1643                  gid = vap->va_gid;
1642 1644          } else {
1643 1645                  acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
1644 1646                      cr, &acl_ids->z_fuidp);
1645 1647                  acl_ids->z_fgid = 0;
1646 1648                  if (vap->va_mask & AT_GID)  {
1647 1649                          acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1648 1650                              (uint64_t)vap->va_gid,
1649 1651                              cr, ZFS_GROUP, &acl_ids->z_fuidp);
1650 1652                          gid = vap->va_gid;
1651 1653                          if (acl_ids->z_fgid != dzp->z_gid &&
1652 1654                              !groupmember(vap->va_gid, cr) &&
1653 1655                              secpolicy_vnode_create_gid(cr) != 0)
1654 1656                                  acl_ids->z_fgid = 0;
1655 1657                  }
1656 1658                  if (acl_ids->z_fgid == 0) {
1657 1659                          if (dzp->z_mode & S_ISGID) {
1658 1660                                  char            *domain;
1659 1661                                  uint32_t        rid;
1660 1662  
1661 1663                                  acl_ids->z_fgid = dzp->z_gid;
1662 1664                                  gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
1663 1665                                      cr, ZFS_GROUP);
1664 1666  
1665 1667                                  if (zfsvfs->z_use_fuids &&
1666 1668                                      IS_EPHEMERAL(acl_ids->z_fgid)) {
1667 1669                                          domain = zfs_fuid_idx_domain(
1668 1670                                              &zfsvfs->z_fuid_idx,
1669 1671                                              FUID_INDEX(acl_ids->z_fgid));
1670 1672                                          rid = FUID_RID(acl_ids->z_fgid);
1671 1673                                          zfs_fuid_node_add(&acl_ids->z_fuidp,
1672 1674                                              domain, rid,
1673 1675                                              FUID_INDEX(acl_ids->z_fgid),
1674 1676                                              acl_ids->z_fgid, ZFS_GROUP);
1675 1677                                  }
1676 1678                          } else {
1677 1679                                  acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
1678 1680                                      ZFS_GROUP, cr, &acl_ids->z_fuidp);
1679 1681                                  gid = crgetgid(cr);
1680 1682                          }
1681 1683                  }
1682 1684          }
1683 1685  
1684 1686          /*
1685 1687           * If we're creating a directory, and the parent directory has the
1686 1688           * set-GID bit set, set in on the new directory.
1687 1689           * Otherwise, if the user is neither privileged nor a member of the
1688 1690           * file's new group, clear the file's set-GID bit.
1689 1691           */
1690 1692  
1691 1693          if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
1692 1694              (vap->va_type == VDIR)) {
1693 1695                  acl_ids->z_mode |= S_ISGID;
1694 1696          } else {
1695 1697                  if ((acl_ids->z_mode & S_ISGID) &&
1696 1698                      secpolicy_vnode_setids_setgids(cr, gid) != 0)
1697 1699                          acl_ids->z_mode &= ~S_ISGID;
1698 1700          }
1699 1701  
1700 1702          if (acl_ids->z_aclp == NULL) {
1701 1703                  mutex_enter(&dzp->z_acl_lock);
1702 1704                  mutex_enter(&dzp->z_lock);
1703 1705                  if (!(flag & IS_ROOT_NODE) &&
1704 1706                      (dzp->z_pflags & ZFS_INHERIT_ACE) &&
1705 1707                      !(dzp->z_pflags & ZFS_XATTR)) {
1706 1708                          VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
1707 1709                              &paclp, B_FALSE));
1708 1710                          acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
1709 1711                              vap->va_type, paclp, acl_ids->z_mode);
1710 1712                          inherited = B_TRUE;
1711 1713                  } else {
1712 1714                          acl_ids->z_aclp =
1713 1715                              zfs_acl_alloc(zfs_acl_version_zp(dzp));
1714 1716                          acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
1715 1717                  }
1716 1718                  mutex_exit(&dzp->z_lock);
1717 1719                  mutex_exit(&dzp->z_acl_lock);
1718 1720  
1719 1721                  if (vap->va_type == VDIR)
1720 1722                          acl_ids->z_aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
1721 1723  
1722 1724                  if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&
1723 1725                      zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
1724 1726                      zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
1725 1727                          trim = B_TRUE;
1726 1728                  zfs_acl_chmod(vap->va_type, acl_ids->z_mode, B_FALSE, trim,
1727 1729                      acl_ids->z_aclp);
1728 1730          }
1729 1731  
1730 1732          if (inherited || vsecp) {
1731 1733                  acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
1732 1734                      acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
1733 1735                      acl_ids->z_fuid, acl_ids->z_fgid);
1734 1736                  if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
1735 1737                          acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
1736 1738          }
1737 1739  
1738 1740          return (0);
1739 1741  }
1740 1742  
1741 1743  /*
1742 1744   * Free ACL and fuid_infop, but not the acl_ids structure
1743 1745   */
1744 1746  void
1745 1747  zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
1746 1748  {
1747 1749          if (acl_ids->z_aclp)
1748 1750                  zfs_acl_free(acl_ids->z_aclp);
1749 1751          if (acl_ids->z_fuidp)
1750 1752                  zfs_fuid_info_free(acl_ids->z_fuidp);
1751 1753          acl_ids->z_aclp = NULL;
1752 1754          acl_ids->z_fuidp = NULL;
1753 1755  }
1754 1756  
1755 1757  boolean_t
1756 1758  zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
1757 1759  {
1758 1760          return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
1759 1761              zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
1760 1762  }
1761 1763  
1762 1764  /*
1763 1765   * Retrieve a file's ACL
1764 1766   */
1765 1767  int
1766 1768  zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1767 1769  {
1768 1770          zfs_acl_t       *aclp;
1769 1771          ulong_t         mask;
1770 1772          int             error;
1771 1773          int             count = 0;
1772 1774          int             largeace = 0;
1773 1775  
1774 1776          mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
1775 1777              VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
1776 1778  
1777 1779          if (mask == 0)
1778 1780                  return (SET_ERROR(ENOSYS));
1779 1781  
1780 1782          if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))
1781 1783                  return (error);
1782 1784  
1783 1785          mutex_enter(&zp->z_acl_lock);
1784 1786  
1785 1787          error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
1786 1788          if (error != 0) {
1787 1789                  mutex_exit(&zp->z_acl_lock);
1788 1790                  return (error);
1789 1791          }
1790 1792  
1791 1793          /*
1792 1794           * Scan ACL to determine number of ACEs
1793 1795           */
1794 1796          if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
1795 1797                  void *zacep = NULL;
1796 1798                  uint64_t who;
1797 1799                  uint32_t access_mask;
1798 1800                  uint16_t type, iflags;
1799 1801  
1800 1802                  while (zacep = zfs_acl_next_ace(aclp, zacep,
1801 1803                      &who, &access_mask, &iflags, &type)) {
1802 1804                          switch (type) {
1803 1805                          case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1804 1806                          case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1805 1807                          case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1806 1808                          case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1807 1809                                  largeace++;
1808 1810                                  continue;
1809 1811                          default:
1810 1812                                  count++;
1811 1813                          }
1812 1814                  }
1813 1815                  vsecp->vsa_aclcnt = count;
1814 1816          } else
1815 1817                  count = (int)aclp->z_acl_count;
1816 1818  
1817 1819          if (mask & VSA_ACECNT) {
1818 1820                  vsecp->vsa_aclcnt = count;
1819 1821          }
1820 1822  
1821 1823          if (mask & VSA_ACE) {
1822 1824                  size_t aclsz;
1823 1825  
1824 1826                  aclsz = count * sizeof (ace_t) +
1825 1827                      sizeof (ace_object_t) * largeace;
1826 1828  
1827 1829                  vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
1828 1830                  vsecp->vsa_aclentsz = aclsz;
1829 1831  
1830 1832                  if (aclp->z_version == ZFS_ACL_VERSION_FUID)
1831 1833                          zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
1832 1834                              vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
1833 1835                  else {
1834 1836                          zfs_acl_node_t *aclnode;
1835 1837                          void *start = vsecp->vsa_aclentp;
1836 1838  
1837 1839                          for (aclnode = list_head(&aclp->z_acl); aclnode;
1838 1840                              aclnode = list_next(&aclp->z_acl, aclnode)) {
1839 1841                                  bcopy(aclnode->z_acldata, start,
1840 1842                                      aclnode->z_size);
1841 1843                                  start = (caddr_t)start + aclnode->z_size;
1842 1844                          }
1843 1845                          ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
1844 1846                              aclp->z_acl_bytes);
1845 1847                  }
1846 1848          }
1847 1849          if (mask & VSA_ACE_ACLFLAGS) {
1848 1850                  vsecp->vsa_aclflags = 0;
1849 1851                  if (zp->z_pflags & ZFS_ACL_DEFAULTED)
1850 1852                          vsecp->vsa_aclflags |= ACL_DEFAULTED;
1851 1853                  if (zp->z_pflags & ZFS_ACL_PROTECTED)
1852 1854                          vsecp->vsa_aclflags |= ACL_PROTECTED;
1853 1855                  if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
1854 1856                          vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
1855 1857          }
1856 1858  
1857 1859          mutex_exit(&zp->z_acl_lock);
1858 1860  
1859 1861          return (0);
1860 1862  }
1861 1863  
1862 1864  int
1863 1865  zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
1864 1866      vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
1865 1867  {
1866 1868          zfs_acl_t *aclp;
1867 1869          zfs_acl_node_t *aclnode;
1868 1870          int aclcnt = vsecp->vsa_aclcnt;
1869 1871          int error;
1870 1872  
1871 1873          if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
1872 1874                  return (SET_ERROR(EINVAL));
1873 1875  
1874 1876          aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
1875 1877  
1876 1878          aclp->z_hints = 0;
1877 1879          aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
1878 1880          if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1879 1881                  if ((error = zfs_copy_ace_2_oldace(obj_type, aclp,
1880 1882                      (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
1881 1883                      aclcnt, &aclnode->z_size)) != 0) {
1882 1884                          zfs_acl_free(aclp);
1883 1885                          zfs_acl_node_free(aclnode);
1884 1886                          return (error);
1885 1887                  }
1886 1888          } else {
1887 1889                  if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
1888 1890                      vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
1889 1891                      &aclnode->z_size, fuidp, cr)) != 0) {
1890 1892                          zfs_acl_free(aclp);
1891 1893                          zfs_acl_node_free(aclnode);
1892 1894                          return (error);
1893 1895                  }
1894 1896          }
1895 1897          aclp->z_acl_bytes = aclnode->z_size;
1896 1898          aclnode->z_ace_count = aclcnt;
1897 1899          aclp->z_acl_count = aclcnt;
1898 1900          list_insert_head(&aclp->z_acl, aclnode);
1899 1901  
1900 1902          /*
1901 1903           * If flags are being set then add them to z_hints
1902 1904           */
1903 1905          if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
1904 1906                  if (vsecp->vsa_aclflags & ACL_PROTECTED)
1905 1907                          aclp->z_hints |= ZFS_ACL_PROTECTED;
1906 1908                  if (vsecp->vsa_aclflags & ACL_DEFAULTED)
1907 1909                          aclp->z_hints |= ZFS_ACL_DEFAULTED;
1908 1910                  if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
1909 1911                          aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
1910 1912          }
1911 1913  
1912 1914          *zaclp = aclp;
1913 1915  
1914 1916          return (0);
1915 1917  }
1916 1918  
1917 1919  /*
1918 1920   * Set a file's ACL
1919 1921   */
1920 1922  int
1921 1923  zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1922 1924  {
1923 1925          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
1924 1926          zilog_t         *zilog = zfsvfs->z_log;
1925 1927          ulong_t         mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
1926 1928          dmu_tx_t        *tx;
1927 1929          int             error;
1928 1930          zfs_acl_t       *aclp;
1929 1931          zfs_fuid_info_t *fuidp = NULL;
1930 1932          boolean_t       fuid_dirtied;
1931 1933          uint64_t        acl_obj;
1932 1934  
1933 1935          if (mask == 0)
1934 1936                  return (SET_ERROR(ENOSYS));
1935 1937  
1936 1938          if (zp->z_pflags & ZFS_IMMUTABLE)
1937 1939                  return (SET_ERROR(EPERM));
1938 1940  
1939 1941          if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
1940 1942                  return (error);
1941 1943  
1942 1944          error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
1943 1945              &aclp);
1944 1946          if (error)
1945 1947                  return (error);
1946 1948  
1947 1949          /*
1948 1950           * If ACL wide flags aren't being set then preserve any
1949 1951           * existing flags.
1950 1952           */
1951 1953          if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
1952 1954                  aclp->z_hints |=
1953 1955                      (zp->z_pflags & V4_ACL_WIDE_FLAGS);
1954 1956          }
1955 1957  top:
1956 1958          mutex_enter(&zp->z_acl_lock);
1957 1959          mutex_enter(&zp->z_lock);
1958 1960  
1959 1961          tx = dmu_tx_create(zfsvfs->z_os);
1960 1962  
1961 1963          dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1962 1964  
1963 1965          fuid_dirtied = zfsvfs->z_fuid_dirty;
1964 1966          if (fuid_dirtied)
1965 1967                  zfs_fuid_txhold(zfsvfs, tx);
1966 1968  
1967 1969          /*
1968 1970           * If old version and ACL won't fit in bonus and we aren't
1969 1971           * upgrading then take out necessary DMU holds
1970 1972           */
1971 1973  
1972 1974          if ((acl_obj = zfs_external_acl(zp)) != 0) {
1973 1975                  if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
1974 1976                      zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
1975 1977                          dmu_tx_hold_free(tx, acl_obj, 0,
1976 1978                              DMU_OBJECT_END);
1977 1979                          dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1978 1980                              aclp->z_acl_bytes);
1979 1981                  } else {
1980 1982                          dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
1981 1983                  }
1982 1984          } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1983 1985                  dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
1984 1986          }
1985 1987  
1986 1988          zfs_sa_upgrade_txholds(tx, zp);
1987 1989          error = dmu_tx_assign(tx, TXG_NOWAIT);
1988 1990          if (error) {
1989 1991                  mutex_exit(&zp->z_acl_lock);
1990 1992                  mutex_exit(&zp->z_lock);
1991 1993  
1992 1994                  if (error == ERESTART) {
1993 1995                          dmu_tx_wait(tx);
1994 1996                          dmu_tx_abort(tx);
1995 1997                          goto top;
1996 1998                  }
1997 1999                  dmu_tx_abort(tx);
1998 2000                  zfs_acl_free(aclp);
1999 2001                  return (error);
2000 2002          }
2001 2003  
2002 2004          error = zfs_aclset_common(zp, aclp, cr, tx);
2003 2005          ASSERT(error == 0);
2004 2006          ASSERT(zp->z_acl_cached == NULL);
2005 2007          zp->z_acl_cached = aclp;
2006 2008  
2007 2009          if (fuid_dirtied)
2008 2010                  zfs_fuid_sync(zfsvfs, tx);
2009 2011  
2010 2012          zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
2011 2013  
2012 2014          if (fuidp)
2013 2015                  zfs_fuid_info_free(fuidp);
2014 2016          dmu_tx_commit(tx);
2015 2017  done:
2016 2018          mutex_exit(&zp->z_lock);
2017 2019          mutex_exit(&zp->z_acl_lock);
2018 2020  
2019 2021          return (error);
2020 2022  }
2021 2023  
2022 2024  /*
2023 2025   * Check accesses of interest (AoI) against attributes of the dataset
2024 2026   * such as read-only.  Returns zero if no AoI conflict with dataset
2025 2027   * attributes, otherwise an appropriate errno is returned.
2026 2028   */
2027 2029  static int
2028 2030  zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
2029 2031  {
2030 2032          if ((v4_mode & WRITE_MASK) &&
2031 2033              (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
2032 2034              (!IS_DEVVP(ZTOV(zp)) ||
2033 2035              (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
2034 2036                  return (SET_ERROR(EROFS));
2035 2037          }
2036 2038  
2037 2039          /*
2038 2040           * Intentionally allow ZFS_READONLY through here.
2039 2041           * See zfs_zaccess_common().
2040 2042           */
2041 2043          if ((v4_mode & WRITE_MASK_DATA) &&
2042 2044              (zp->z_pflags & ZFS_IMMUTABLE)) {
2043 2045                  return (SET_ERROR(EPERM));
2044 2046          }
2045 2047  
2046 2048          if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
2047 2049              (zp->z_pflags & ZFS_NOUNLINK)) {
2048 2050                  return (SET_ERROR(EPERM));
2049 2051          }
2050 2052  
2051 2053          if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
2052 2054              (zp->z_pflags & ZFS_AV_QUARANTINED))) {
2053 2055                  return (SET_ERROR(EACCES));
2054 2056          }
2055 2057  
2056 2058          return (0);
2057 2059  }
2058 2060  
2059 2061  /*
2060 2062   * The primary usage of this function is to loop through all of the
2061 2063   * ACEs in the znode, determining what accesses of interest (AoI) to
2062 2064   * the caller are allowed or denied.  The AoI are expressed as bits in
2063 2065   * the working_mode parameter.  As each ACE is processed, bits covered
2064 2066   * by that ACE are removed from the working_mode.  This removal
2065 2067   * facilitates two things.  The first is that when the working mode is
2066 2068   * empty (= 0), we know we've looked at all the AoI. The second is
2067 2069   * that the ACE interpretation rules don't allow a later ACE to undo
2068 2070   * something granted or denied by an earlier ACE.  Removing the
2069 2071   * discovered access or denial enforces this rule.  At the end of
2070 2072   * processing the ACEs, all AoI that were found to be denied are
2071 2073   * placed into the working_mode, giving the caller a mask of denied
2072 2074   * accesses.  Returns:
2073 2075   *      0               if all AoI granted
2074 2076   *      EACCES          if the denied mask is non-zero
2075 2077   *      other error     if abnormal failure (e.g., IO error)
  
    | 
      ↓ open down ↓ | 
    720 lines elided | 
    
      ↑ open up ↑ | 
  
2076 2078   *
2077 2079   * A secondary usage of the function is to determine if any of the
2078 2080   * AoI are granted.  If an ACE grants any access in
2079 2081   * the working_mode, we immediately short circuit out of the function.
2080 2082   * This mode is chosen by setting anyaccess to B_TRUE.  The
2081 2083   * working_mode is not a denied access mask upon exit if the function
2082 2084   * is used in this manner.
2083 2085   */
2084 2086  static int
2085 2087  zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
2086      -    boolean_t anyaccess, cred_t *cr)
     2088 +    boolean_t anyaccess, cred_t *cr, boolean_t audit)
2087 2089  {
2088 2090          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
2089 2091          zfs_acl_t       *aclp;
2090 2092          int             error;
2091      -        uid_t           uid = crgetuid(cr);
2092      -        uint64_t        who;
     2093 +        uint64_t        who;            /* FUID from the ACE */
2093 2094          uint16_t        type, iflags;
2094 2095          uint16_t        entry_type;
2095 2096          uint32_t        access_mask;
2096 2097          uint32_t        deny_mask = 0;
     2098 +        uint32_t        sys_smask = 0;
     2099 +        uint32_t        sys_fmask = 0;
2097 2100          zfs_ace_hdr_t   *acep = NULL;
2098      -        boolean_t       checkit;
2099      -        uid_t           gowner;
2100      -        uid_t           fowner;
     2101 +        boolean_t       checkit;        /* ACE ID matches */
     2102 +        t_audit_data_t *tad;
2101 2103  
2102      -        zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
2103      -
2104 2104          mutex_enter(&zp->z_acl_lock);
2105 2105  
2106 2106          error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
2107 2107          if (error != 0) {
2108 2108                  mutex_exit(&zp->z_acl_lock);
2109 2109                  return (error);
2110 2110          }
2111 2111  
2112 2112          ASSERT(zp->z_acl_cached);
2113 2113  
2114 2114          while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
2115 2115              &iflags, &type)) {
  
    | 
      ↓ open down ↓ | 
    2 lines elided | 
    
      ↑ open up ↑ | 
  
2116 2116                  uint32_t mask_matched;
2117 2117  
2118 2118                  if (!zfs_acl_valid_ace_type(type, iflags))
2119 2119                          continue;
2120 2120  
2121 2121                  if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
2122 2122                          continue;
2123 2123  
2124 2124                  /* Skip ACE if it does not affect any AoI */
2125 2125                  mask_matched = (access_mask & *working_mode);
2126      -                if (!mask_matched)
     2126 +                if ((type == DENY || type == ALLOW) && !mask_matched)
2127 2127                          continue;
     2128 +                if (!audit && type != DENY && type != ALLOW)
     2129 +                        continue;
2128 2130  
2129 2131                  entry_type = (iflags & ACE_TYPE_FLAGS);
2130 2132  
2131 2133                  checkit = B_FALSE;
2132 2134  
2133 2135                  switch (entry_type) {
2134 2136                  case ACE_OWNER:
2135      -                        if (uid == fowner)
2136      -                                checkit = B_TRUE;
     2137 +                        who = zp->z_uid;
     2138 +                        /*FALLTHROUGH*/
     2139 +                case 0: /* USER Entry */
     2140 +                        checkit = zfs_user_in_cred(zfsvfs, who, cr);
2137 2141                          break;
2138 2142                  case OWNING_GROUP:
2139      -                        who = gowner;
     2143 +                        who = zp->z_gid;
2140 2144                          /*FALLTHROUGH*/
2141 2145                  case ACE_IDENTIFIER_GROUP:
2142 2146                          checkit = zfs_groupmember(zfsvfs, who, cr);
2143 2147                          break;
2144 2148                  case ACE_EVERYONE:
2145 2149                          checkit = B_TRUE;
2146 2150                          break;
2147 2151  
2148      -                /* USER Entry */
2149 2152                  default:
2150      -                        if (entry_type == 0) {
2151      -                                uid_t newid;
2152      -
2153      -                                newid = zfs_fuid_map_id(zfsvfs, who, cr,
2154      -                                    ZFS_ACE_USER);
2155      -                                if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
2156      -                                    uid == newid)
2157      -                                        checkit = B_TRUE;
2158      -                                break;
2159      -                        } else {
2160      -                                mutex_exit(&zp->z_acl_lock);
2161      -                                return (SET_ERROR(EIO));
2162      -                        }
     2153 +                        /*
     2154 +                         * The zfs_acl_valid_ace_type check above
     2155 +                         * should make this case impossible.
     2156 +                         */
     2157 +                        mutex_exit(&zp->z_acl_lock);
     2158 +                        return (SET_ERROR(EIO));
2163 2159                  }
2164 2160  
2165 2161                  if (checkit) {
2166      -                        if (type == DENY) {
     2162 +                        switch (type) {
     2163 +                        case DENY:
2167 2164                                  DTRACE_PROBE3(zfs__ace__denies,
2168 2165                                      znode_t *, zp,
2169 2166                                      zfs_ace_hdr_t *, acep,
2170 2167                                      uint32_t, mask_matched);
2171 2168                                  deny_mask |= mask_matched;
2172      -                        } else {
     2169 +                                *working_mode &= ~mask_matched;
     2170 +                                break;
     2171 +                        case ACE_SYSTEM_AUDIT_ACE_TYPE:
     2172 +                        case ACE_SYSTEM_ALARM_ACE_TYPE:
     2173 +                                DTRACE_PROBE3(zfs__ace__audit,
     2174 +                                    znode_t *, zp,
     2175 +                                    zfs_ace_hdr_t *, acep,
     2176 +                                    uint32_t, access_mask);
     2177 +                                if ((iflags &
     2178 +                                    ACE_SUCCESSFUL_ACCESS_ACE_FLAG) != 0)
     2179 +                                        sys_smask |= access_mask;
     2180 +                                if ((iflags & ACE_FAILED_ACCESS_ACE_FLAG) != 0)
     2181 +                                        sys_fmask |= access_mask;
     2182 +                                break;
     2183 +                        case ALLOW:
     2184 +                        default:
2173 2185                                  DTRACE_PROBE3(zfs__ace__allows,
2174 2186                                      znode_t *, zp,
2175 2187                                      zfs_ace_hdr_t *, acep,
2176 2188                                      uint32_t, mask_matched);
2177 2189                                  if (anyaccess) {
2178 2190                                          mutex_exit(&zp->z_acl_lock);
2179 2191                                          return (0);
2180 2192                                  }
     2193 +                                *working_mode &= ~mask_matched;
     2194 +                                break;
2181 2195                          }
2182      -                        *working_mode &= ~mask_matched;
2183 2196                  }
2184 2197  
2185      -                /* Are we done? */
2186      -                if (*working_mode == 0)
     2198 +                /*
     2199 +                 * Are we done? If auditing, process the entire list
     2200 +                 * to gather all audit ACEs
     2201 +                 */
     2202 +                if (!audit && *working_mode == 0)
2187 2203                          break;
2188 2204          }
2189 2205  
2190 2206          mutex_exit(&zp->z_acl_lock);
2191 2207  
     2208 +        if (audit) {
     2209 +                tad = T2A(curthread);
     2210 +                tad->tad_sacl_mask.tas_smask = sys_smask;
     2211 +                tad->tad_sacl_mask.tas_fmask = sys_fmask;
     2212 +        }
     2213 +
2192 2214          /* Put the found 'denies' back on the working mode */
2193 2215          if (deny_mask) {
2194 2216                  *working_mode |= deny_mask;
2195 2217                  return (SET_ERROR(EACCES));
2196 2218          } else if (*working_mode) {
2197 2219                  return (-1);
2198 2220          }
2199 2221  
2200 2222          return (0);
2201 2223  }
2202 2224  
2203 2225  /*
2204 2226   * Return true if any access whatsoever granted, we don't actually
2205 2227   * care what access is granted.
2206 2228   */
2207 2229  boolean_t
2208 2230  zfs_has_access(znode_t *zp, cred_t *cr)
2209 2231  {
2210 2232          uint32_t have = ACE_ALL_PERMS;
2211 2233  
2212      -        if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
     2234 +        if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr, B_FALSE) != 0) {
2213 2235                  uid_t owner;
2214 2236  
2215 2237                  owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
2216 2238                  return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0);
2217 2239          }
2218 2240          return (B_TRUE);
2219 2241  }
2220 2242  
2221 2243  static int
2222 2244  zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
2223 2245      boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
2224 2246  {
2225 2247          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2226 2248          int err;
     2249 +        boolean_t audit = B_FALSE;
2227 2250  
2228 2251          *working_mode = v4_mode;
2229 2252          *check_privs = B_TRUE;
2230 2253  
2231 2254          /*
2232 2255           * Short circuit empty requests
2233 2256           */
2234 2257          if (v4_mode == 0 || zfsvfs->z_replay) {
2235 2258                  *working_mode = 0;
2236 2259                  return (0);
2237 2260          }
2238 2261  
2239 2262          if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
2240 2263                  *check_privs = B_FALSE;
2241 2264                  return (err);
2242 2265          }
2243 2266  
2244 2267          /*
2245 2268           * The caller requested that the ACL check be skipped.  This
2246 2269           * would only happen if the caller checked VOP_ACCESS() with a
2247 2270           * 32 bit ACE mask and already had the appropriate permissions.
2248 2271           */
2249 2272          if (skipaclchk) {
2250 2273                  *working_mode = 0;
2251 2274                  return (0);
2252 2275          }
2253 2276  
2254 2277          /*
2255 2278           * Note: ZFS_READONLY represents the "DOS R/O" attribute.
2256 2279           * When that flag is set, we should behave as if write access
2257 2280           * were not granted by anything in the ACL.  In particular:
2258 2281           * We _must_ allow writes after opening the file r/w, then
2259 2282           * setting the DOS R/O attribute, and writing some more.
2260 2283           * (Similar to how you can write after fchmod(fd, 0444).)
2261 2284           *
  
    | 
      ↓ open down ↓ | 
    25 lines elided | 
    
      ↑ open up ↑ | 
  
2262 2285           * Therefore ZFS_READONLY is ignored in the dataset check
2263 2286           * above, and checked here as if part of the ACL check.
2264 2287           * Also note: DOS R/O is ignored for directories.
2265 2288           */
2266 2289          if ((v4_mode & WRITE_MASK_DATA) &&
2267 2290              (ZTOV(zp)->v_type != VDIR) &&
2268 2291              (zp->z_pflags & ZFS_READONLY)) {
2269 2292                  return (SET_ERROR(EPERM));
2270 2293          }
2271 2294  
2272      -        return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
     2295 +        if (cr != zone_kcred() && AU_ZONE_AUDITING(NULL)) {
     2296 +                t_audit_data_t *tad = T2A(curthread);
     2297 +                if (tad->tad_sacl_ctrl != SACL_AUDIT_NONE &&
     2298 +                    auditev(AUE_SACL, cr) != 0) {
     2299 +                        audit = B_TRUE;
     2300 +                        tad->tad_sacl_ctrl = SACL_AUDIT_NONE;
     2301 +                }
     2302 +        }
     2303 +
     2304 +        return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr, audit));
2273 2305  }
2274 2306  
2275 2307  static int
2276 2308  zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
2277 2309      cred_t *cr)
2278 2310  {
2279 2311          if (*working_mode != ACE_WRITE_DATA)
2280 2312                  return (SET_ERROR(EACCES));
2281 2313  
2282 2314          return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
2283 2315              check_privs, B_FALSE, cr));
2284 2316  }
2285 2317  
2286 2318  int
2287 2319  zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
2288 2320  {
2289 2321          boolean_t owner = B_FALSE;
2290 2322          boolean_t groupmbr = B_FALSE;
2291 2323          boolean_t is_attr;
2292 2324          uid_t uid = crgetuid(cr);
2293 2325          int error;
2294 2326  
2295 2327          if (zdp->z_pflags & ZFS_AV_QUARANTINED)
2296 2328                  return (SET_ERROR(EACCES));
2297 2329  
2298 2330          is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
2299 2331              (ZTOV(zdp)->v_type == VDIR));
2300 2332          if (is_attr)
2301 2333                  goto slow;
2302 2334  
2303 2335  
2304 2336          mutex_enter(&zdp->z_acl_lock);
2305 2337  
2306 2338          if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
2307 2339                  mutex_exit(&zdp->z_acl_lock);
2308 2340                  return (0);
2309 2341          }
2310 2342  
2311 2343          if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) {
2312 2344                  mutex_exit(&zdp->z_acl_lock);
2313 2345                  goto slow;
2314 2346          }
2315 2347  
2316 2348          if (uid == zdp->z_uid) {
2317 2349                  owner = B_TRUE;
2318 2350                  if (zdp->z_mode & S_IXUSR) {
2319 2351                          mutex_exit(&zdp->z_acl_lock);
2320 2352                          return (0);
2321 2353                  } else {
2322 2354                          mutex_exit(&zdp->z_acl_lock);
2323 2355                          goto slow;
2324 2356                  }
2325 2357          }
2326 2358          if (groupmember(zdp->z_gid, cr)) {
2327 2359                  groupmbr = B_TRUE;
2328 2360                  if (zdp->z_mode & S_IXGRP) {
2329 2361                          mutex_exit(&zdp->z_acl_lock);
2330 2362                          return (0);
2331 2363                  } else {
2332 2364                          mutex_exit(&zdp->z_acl_lock);
2333 2365                          goto slow;
2334 2366                  }
2335 2367          }
2336 2368          if (!owner && !groupmbr) {
2337 2369                  if (zdp->z_mode & S_IXOTH) {
2338 2370                          mutex_exit(&zdp->z_acl_lock);
2339 2371                          return (0);
2340 2372                  }
2341 2373          }
2342 2374  
2343 2375          mutex_exit(&zdp->z_acl_lock);
2344 2376  
2345 2377  slow:
2346 2378          DTRACE_PROBE(zfs__fastpath__execute__access__miss);
2347 2379          ZFS_ENTER(zdp->z_zfsvfs);
2348 2380          error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
2349 2381          ZFS_EXIT(zdp->z_zfsvfs);
2350 2382          return (error);
2351 2383  }
2352 2384  
2353 2385  /*
2354 2386   * Determine whether Access should be granted/denied.
2355 2387   *
2356 2388   * The least priv subsytem is always consulted as a basic privilege
2357 2389   * can define any form of access.
2358 2390   */
2359 2391  int
2360 2392  zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
2361 2393  {
2362 2394          uint32_t        working_mode;
2363 2395          int             error;
2364 2396          int             is_attr;
2365 2397          boolean_t       check_privs;
2366 2398          znode_t         *xzp;
2367 2399          znode_t         *check_zp = zp;
2368 2400          mode_t          needed_bits;
2369 2401          uid_t           owner;
2370 2402  
2371 2403          is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR));
2372 2404  
2373 2405          /*
2374 2406           * If attribute then validate against base file
2375 2407           */
2376 2408          if (is_attr) {
2377 2409                  uint64_t        parent;
2378 2410  
2379 2411                  if ((error = sa_lookup(zp->z_sa_hdl,
2380 2412                      SA_ZPL_PARENT(zp->z_zfsvfs), &parent,
2381 2413                      sizeof (parent))) != 0)
2382 2414                          return (error);
2383 2415  
2384 2416                  if ((error = zfs_zget(zp->z_zfsvfs,
2385 2417                      parent, &xzp)) != 0)        {
2386 2418                          return (error);
2387 2419                  }
2388 2420  
2389 2421                  check_zp = xzp;
2390 2422  
2391 2423                  /*
2392 2424                   * fixup mode to map to xattr perms
2393 2425                   */
2394 2426  
2395 2427                  if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
2396 2428                          mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
2397 2429                          mode |= ACE_WRITE_NAMED_ATTRS;
2398 2430                  }
2399 2431  
2400 2432                  if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
2401 2433                          mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
2402 2434                          mode |= ACE_READ_NAMED_ATTRS;
2403 2435                  }
2404 2436          }
2405 2437  
2406 2438          owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
2407 2439          /*
2408 2440           * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC
2409 2441           * in needed_bits.  Map the bits mapped by working_mode (currently
2410 2442           * missing) in missing_bits.
2411 2443           * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
2412 2444           * needed_bits.
2413 2445           */
2414 2446          needed_bits = 0;
2415 2447  
2416 2448          working_mode = mode;
2417 2449          if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
2418 2450              owner == crgetuid(cr))
2419 2451                  working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2420 2452  
2421 2453          if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2422 2454              ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2423 2455                  needed_bits |= VREAD;
2424 2456          if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2425 2457              ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2426 2458                  needed_bits |= VWRITE;
2427 2459          if (working_mode & ACE_EXECUTE)
2428 2460                  needed_bits |= VEXEC;
2429 2461  
2430 2462          if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
2431 2463              &check_privs, skipaclchk, cr)) == 0) {
2432 2464                  if (is_attr)
2433 2465                          VN_RELE(ZTOV(xzp));
2434 2466                  return (secpolicy_vnode_access2(cr, ZTOV(zp), owner,
  
    | 
      ↓ open down ↓ | 
    152 lines elided | 
    
      ↑ open up ↑ | 
  
2435 2467                      needed_bits, needed_bits));
2436 2468          }
2437 2469  
2438 2470          if (error && !check_privs) {
2439 2471                  if (is_attr)
2440 2472                          VN_RELE(ZTOV(xzp));
2441 2473                  return (error);
2442 2474          }
2443 2475  
2444 2476          if (error && (flags & V_APPEND)) {
     2477 +                /*
     2478 +                 * If zfs_zaccess_common checked aces, then we won't audit here.
     2479 +                 * Otherwise, we'll try and get audit masks here.
     2480 +                 */
2445 2481                  error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
2446 2482          }
2447 2483  
2448 2484          if (error && check_privs) {
2449 2485                  mode_t          checkmode = 0;
2450 2486  
2451 2487                  /*
2452 2488                   * First check for implicit owner permission on
2453 2489                   * read_acl/read_attributes
2454 2490                   */
2455 2491  
2456 2492                  error = 0;
2457 2493                  ASSERT(working_mode != 0);
2458 2494  
2459 2495                  if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
2460 2496                      owner == crgetuid(cr)))
2461 2497                          working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2462 2498  
2463 2499                  if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2464 2500                      ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2465 2501                          checkmode |= VREAD;
2466 2502                  if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2467 2503                      ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2468 2504                          checkmode |= VWRITE;
2469 2505                  if (working_mode & ACE_EXECUTE)
2470 2506                          checkmode |= VEXEC;
2471 2507  
2472 2508                  error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner,
2473 2509                      needed_bits & ~checkmode, needed_bits);
2474 2510  
2475 2511                  if (error == 0 && (working_mode & ACE_WRITE_OWNER))
2476 2512                          error = secpolicy_vnode_chown(cr, owner);
2477 2513                  if (error == 0 && (working_mode & ACE_WRITE_ACL))
2478 2514                          error = secpolicy_vnode_setdac(cr, owner);
2479 2515  
2480 2516                  if (error == 0 && (working_mode &
2481 2517                      (ACE_DELETE|ACE_DELETE_CHILD)))
2482 2518                          error = secpolicy_vnode_remove(cr);
2483 2519  
2484 2520                  if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
2485 2521                          error = secpolicy_vnode_chown(cr, owner);
2486 2522                  }
2487 2523                  if (error == 0) {
2488 2524                          /*
2489 2525                           * See if any bits other than those already checked
2490 2526                           * for are still present.  If so then return EACCES
2491 2527                           */
2492 2528                          if (working_mode & ~(ZFS_CHECKED_MASKS)) {
2493 2529                                  error = SET_ERROR(EACCES);
2494 2530                          }
2495 2531                  }
2496 2532          } else if (error == 0) {
2497 2533                  error = secpolicy_vnode_access2(cr, ZTOV(zp), owner,
2498 2534                      needed_bits, needed_bits);
2499 2535          }
2500 2536  
2501 2537  
2502 2538          if (is_attr)
2503 2539                  VN_RELE(ZTOV(xzp));
2504 2540  
2505 2541          return (error);
2506 2542  }
2507 2543  
2508 2544  /*
2509 2545   * Translate traditional unix VREAD/VWRITE/VEXEC mode into
2510 2546   * native ACL format and call zfs_zaccess()
2511 2547   */
2512 2548  int
2513 2549  zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
2514 2550  {
2515 2551          return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
2516 2552  }
2517 2553  
2518 2554  /*
2519 2555   * Access function for secpolicy_vnode_setattr
2520 2556   */
2521 2557  int
2522 2558  zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
2523 2559  {
2524 2560          int v4_mode = zfs_unix_to_v4(mode >> 6);
2525 2561  
2526 2562          return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
2527 2563  }
2528 2564  
2529 2565  /* See zfs_zaccess_delete() */
2530 2566  int zfs_write_implies_delete_child = 1;
2531 2567  
2532 2568  /*
2533 2569   * Determine whether delete access should be granted.
2534 2570   *
2535 2571   * The following chart outlines how we handle delete permissions which is
2536 2572   * how recent versions of windows (Windows 2008) handles it.  The efficiency
2537 2573   * comes from not having to check the parent ACL where the object itself grants
2538 2574   * delete:
2539 2575   *
2540 2576   *      -------------------------------------------------------
2541 2577   *      |   Parent Dir  |      Target Object Permissions      |
2542 2578   *      |  permissions  |                                     |
2543 2579   *      -------------------------------------------------------
2544 2580   *      |               | ACL Allows | ACL Denies| Delete     |
2545 2581   *      |               |  Delete    |  Delete   | unspecified|
2546 2582   *      -------------------------------------------------------
2547 2583   *      | ACL Allows    | Permit     | Deny *    | Permit     |
2548 2584   *      | DELETE_CHILD  |            |           |            |
2549 2585   *      -------------------------------------------------------
2550 2586   *      | ACL Denies    | Permit     | Deny      | Deny       |
2551 2587   *      | DELETE_CHILD  |            |           |            |
2552 2588   *      -------------------------------------------------------
2553 2589   *      | ACL specifies |            |           |            |
2554 2590   *      | only allow    | Permit     | Deny *    | Permit     |
2555 2591   *      | write and     |            |           |            |
2556 2592   *      | execute       |            |           |            |
2557 2593   *      -------------------------------------------------------
2558 2594   *      | ACL denies    |            |           |            |
2559 2595   *      | write and     | Permit     | Deny      | Deny       |
2560 2596   *      | execute       |            |           |            |
2561 2597   *      -------------------------------------------------------
2562 2598   *         ^
2563 2599   *         |
2564 2600   *         Re. execute permission on the directory:  if that's missing,
2565 2601   *         the vnode lookup of the target will fail before we get here.
2566 2602   *
2567 2603   * Re [*] in the table above:  NFSv4 would normally Permit delete for
2568 2604   * these two cells of the matrix.
2569 2605   * See acl.h for notes on which ACE_... flags should be checked for which
2570 2606   * operations.  Specifically, the NFSv4 committee recommendation is in
2571 2607   * conflict with the Windows interpretation of DENY ACEs, where DENY ACEs
2572 2608   * should take precedence ahead of ALLOW ACEs.
2573 2609   *
2574 2610   * This implementation always consults the target object's ACL first.
2575 2611   * If a DENY ACE is present on the target object that specifies ACE_DELETE,
2576 2612   * delete access is denied.  If an ALLOW ACE with ACE_DELETE is present on
2577 2613   * the target object, access is allowed.  If and only if no entries with
2578 2614   * ACE_DELETE are present in the object's ACL, check the container's ACL
2579 2615   * for entries with ACE_DELETE_CHILD.
2580 2616   *
2581 2617   * A summary of the logic implemented from the table above is as follows:
2582 2618   *
2583 2619   * First check for DENY ACEs that apply.
2584 2620   * If either target or container has a deny, EACCES.
2585 2621   *
2586 2622   * Delete access can then be summarized as follows:
2587 2623   * 1: The object to be deleted grants ACE_DELETE, or
2588 2624   * 2: The containing directory grants ACE_DELETE_CHILD.
2589 2625   * In a Windows system, that would be the end of the story.
2590 2626   * In this system, (2) has some complications...
2591 2627   * 2a: "sticky" bit on a directory adds restrictions, and
2592 2628   * 2b: existing ACEs from previous versions of ZFS may
2593 2629   * not carry ACE_DELETE_CHILD where they should, so we
2594 2630   * also allow delete when ACE_WRITE_DATA is granted.
2595 2631   *
2596 2632   * Note: 2b is technically a work-around for a prior bug,
2597 2633   * which hopefully can go away some day.  For those who
2598 2634   * no longer need the work around, and for testing, this
2599 2635   * work-around is made conditional via the tunable:
2600 2636   * zfs_write_implies_delete_child
  
    | 
      ↓ open down ↓ | 
    146 lines elided | 
    
      ↑ open up ↑ | 
  
2601 2637   */
2602 2638  int
2603 2639  zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
2604 2640  {
2605 2641          uint32_t wanted_dirperms;
2606 2642          uint32_t dzp_working_mode = 0;
2607 2643          uint32_t zp_working_mode = 0;
2608 2644          int dzp_error, zp_error;
2609 2645          boolean_t dzpcheck_privs;
2610 2646          boolean_t zpcheck_privs;
     2647 +        t_audit_data_t *tad;
2611 2648  
2612 2649          if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
2613 2650                  return (SET_ERROR(EPERM));
2614 2651  
2615 2652          /*
2616 2653           * Case 1:
2617 2654           * If target object grants ACE_DELETE then we are done.  This is
2618 2655           * indicated by a return value of 0.  For this case we don't worry
2619 2656           * about the sticky bit because sticky only applies to the parent
2620 2657           * directory and this is the child access result.
2621 2658           *
2622 2659           * If we encounter a DENY ACE here, we're also done (EACCES).
2623 2660           * Note that if we hit a DENY ACE here (on the target) it should
2624 2661           * take precedence over a DENY ACE on the container, so that when
2625 2662           * we have more complete auditing support we will be able to
2626 2663           * report an access failure against the specific target.
2627 2664           * (This is part of why we're checking the target first.)
2628 2665           */
2629 2666          zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
2630 2667              &zpcheck_privs, B_FALSE, cr);
2631 2668          if (zp_error == EACCES) {
2632 2669                  /* We hit a DENY ACE. */
2633 2670                  if (!zpcheck_privs)
2634 2671                          return (SET_ERROR(zp_error));
2635 2672                  return (secpolicy_vnode_remove(cr));
2636 2673  
2637 2674          }
2638 2675          if (zp_error == 0)
2639 2676                  return (0);
2640 2677  
  
    | 
      ↓ open down ↓ | 
    20 lines elided | 
    
      ↑ open up ↑ | 
  
2641 2678          /*
2642 2679           * Case 2:
2643 2680           * If the containing directory grants ACE_DELETE_CHILD,
2644 2681           * or we're in backward compatibility mode and the
2645 2682           * containing directory has ACE_WRITE_DATA, allow.
2646 2683           * Case 2b is handled with wanted_dirperms.
2647 2684           */
2648 2685          wanted_dirperms = ACE_DELETE_CHILD;
2649 2686          if (zfs_write_implies_delete_child)
2650 2687                  wanted_dirperms |= ACE_WRITE_DATA;
     2688 +        /* never audit the parent directory access check */
     2689 +        if (AU_ZONE_AUDITING(NULL)) {
     2690 +                tad = T2A(curthread);
     2691 +                tad->tad_sacl_ctrl = SACL_AUDIT_NONE;
     2692 +        }
2651 2693          dzp_error = zfs_zaccess_common(dzp, wanted_dirperms,
2652 2694              &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
2653 2695          if (dzp_error == EACCES) {
2654 2696                  /* We hit a DENY ACE. */
2655 2697                  if (!dzpcheck_privs)
2656 2698                          return (SET_ERROR(dzp_error));
2657 2699                  return (secpolicy_vnode_remove(cr));
2658 2700          }
2659 2701  
2660 2702          /*
2661 2703           * Cases 2a, 2b (continued)
2662 2704           *
2663 2705           * Note: dzp_working_mode now contains any permissions
2664 2706           * that were NOT granted.  Therefore, if any of the
2665 2707           * wanted_dirperms WERE granted, we will have:
2666 2708           *   dzp_working_mode != wanted_dirperms
2667 2709           * We're really asking if ANY of those permissions
2668 2710           * were granted, and if so, grant delete access.
2669 2711           */
2670 2712          if (dzp_working_mode != wanted_dirperms)
2671 2713                  dzp_error = 0;
2672 2714  
2673 2715          /*
2674 2716           * dzp_error is 0 if the container granted us permissions to "modify".
2675 2717           * If we do not have permission via one or more ACEs, our current
2676 2718           * privileges may still permit us to modify the container.
2677 2719           *
2678 2720           * dzpcheck_privs is false when i.e. the FS is read-only.
2679 2721           * Otherwise, do privilege checks for the container.
2680 2722           */
2681 2723          if (dzp_error != 0 && dzpcheck_privs) {
2682 2724                  uid_t owner;
2683 2725  
2684 2726                  /*
2685 2727                   * The secpolicy call needs the requested access and
2686 2728                   * the current access mode of the container, but it
2687 2729                   * only knows about Unix-style modes (VEXEC, VWRITE),
2688 2730                   * so this must condense the fine-grained ACE bits into
2689 2731                   * Unix modes.
2690 2732                   *
2691 2733                   * The VEXEC flag is easy, because we know that has
2692 2734                   * always been checked before we get here (during the
2693 2735                   * lookup of the target vnode).  The container has not
2694 2736                   * granted us permissions to "modify", so we do not set
2695 2737                   * the VWRITE flag in the current access mode.
2696 2738                   */
2697 2739                  owner = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr,
2698 2740                      ZFS_OWNER);
2699 2741                  dzp_error = secpolicy_vnode_access2(cr, ZTOV(dzp),
2700 2742                      owner, VEXEC, VWRITE|VEXEC);
2701 2743          }
2702 2744          if (dzp_error != 0) {
2703 2745                  /*
2704 2746                   * Note: We may have dzp_error = -1 here (from
2705 2747                   * zfs_zacess_common).  Don't return that.
2706 2748                   */
2707 2749                  return (SET_ERROR(EACCES));
2708 2750          }
2709 2751  
2710 2752          /*
2711 2753           * At this point, we know that the directory permissions allow
2712 2754           * us to modify, but we still need to check for the additional
2713 2755           * restrictions that apply when the "sticky bit" is set.
2714 2756           *
2715 2757           * Yes, zfs_sticky_remove_access() also checks this bit, but
2716 2758           * checking it here and skipping the call below is nice when
2717 2759           * you're watching all of this with dtrace.
2718 2760           */
2719 2761          if ((dzp->z_mode & S_ISVTX) == 0)
2720 2762                  return (0);
2721 2763  
2722 2764          /*
2723 2765           * zfs_sticky_remove_access will succeed if:
2724 2766           * 1. The sticky bit is absent.
2725 2767           * 2. We pass the sticky bit restrictions.
2726 2768           * 3. We have privileges that always allow file removal.
  
    | 
      ↓ open down ↓ | 
    66 lines elided | 
    
      ↑ open up ↑ | 
  
2727 2769           */
2728 2770          return (zfs_sticky_remove_access(dzp, zp, cr));
2729 2771  }
2730 2772  
2731 2773  int
2732 2774  zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
2733 2775      znode_t *tzp, cred_t *cr)
2734 2776  {
2735 2777          int add_perm;
2736 2778          int error;
     2779 +        t_audit_data_t *tad;
     2780 +        sacl_audit_ctrl_t do_audit;
2737 2781  
2738 2782          if (szp->z_pflags & ZFS_AV_QUARANTINED)
2739 2783                  return (SET_ERROR(EACCES));
2740 2784  
2741 2785          add_perm = (ZTOV(szp)->v_type == VDIR) ?
2742 2786              ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
2743 2787  
     2788 +        if (AU_ZONE_AUDITING(NULL)) {
     2789 +                tad = T2A(curthread);
     2790 +                do_audit = tad->tad_sacl_ctrl;
     2791 +        } else {
     2792 +                tad = NULL;
     2793 +                do_audit = SACL_AUDIT_NONE;
     2794 +        }
     2795 +
2744 2796          /*
2745 2797           * Rename permissions are combination of delete permission +
2746 2798           * add file/subdir permission.
2747 2799           */
2748 2800  
2749 2801          /*
2750 2802           * first make sure we do the delete portion.
2751 2803           *
2752 2804           * If that succeeds then check for add_file/add_subdir permissions
2753 2805           */
2754 2806  
2755      -        if (error = zfs_zaccess_delete(sdzp, szp, cr))
     2807 +        if (do_audit == SACL_AUDIT_NO_SRC)
     2808 +                tad->tad_sacl_ctrl = SACL_AUDIT_NONE;
     2809 +        error = zfs_zaccess_delete(sdzp, szp, cr);
     2810 +
     2811 +        if (do_audit == SACL_AUDIT_ALL) {
     2812 +                tad->tad_sacl_mask_src = tad->tad_sacl_mask;
     2813 +                tad->tad_sacl_mask.tas_smask = 0;
     2814 +                tad->tad_sacl_mask.tas_fmask = 0;
     2815 +        }
     2816 +        if (error != 0)
2756 2817                  return (error);
2757 2818  
     2819 +        if (do_audit != SACL_AUDIT_NONE)
     2820 +                tad->tad_sacl_ctrl = do_audit;
     2821 +
2758 2822          /*
2759 2823           * If we have a tzp, see if we can delete it?
2760 2824           */
2761 2825          if (tzp) {
2762      -                if (error = zfs_zaccess_delete(tdzp, tzp, cr))
     2826 +                error = zfs_zaccess_delete(tdzp, tzp, cr);
     2827 +                if (do_audit != SACL_AUDIT_NONE) {
     2828 +                        tad->tad_sacl_mask_dest = tad->tad_sacl_mask;
     2829 +                        tad->tad_sacl_mask.tas_smask = 0;
     2830 +                        tad->tad_sacl_mask.tas_fmask = 0;
     2831 +                }
     2832 +                if (error != 0)
2763 2833                          return (error);
     2834 +                if (do_audit != SACL_AUDIT_NONE)
     2835 +                        tad->tad_sacl_ctrl = do_audit;
2764 2836          }
2765 2837  
2766 2838          /*
2767 2839           * Now check for add permissions
2768 2840           */
2769 2841          error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
2770 2842  
     2843 +        /* do_audit: leave directory audit info in sacl_mask. */
     2844 +
2771 2845          return (error);
2772 2846  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX