1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/time.h>
  30 #include <sys/systm.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/resource.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/sid.h>
  36 #include <sys/file.h>
  37 #include <sys/stat.h>
  38 #include <sys/kmem.h>
  39 #include <sys/cmn_err.h>
  40 #include <sys/errno.h>
  41 #include <sys/unistd.h>
  42 #include <sys/sdt.h>
  43 #include <sys/fs/zfs.h>
  44 #include <sys/mode.h>
  45 #include <sys/policy.h>
  46 #include <sys/zfs_znode.h>
  47 #include <sys/zfs_fuid.h>
  48 #include <sys/zfs_acl.h>
  49 #include <sys/zfs_dir.h>
  50 #include <sys/zfs_vfsops.h>
  51 #include <sys/dmu.h>
  52 #include <sys/dnode.h>
  53 #include <sys/zap.h>
  54 #include <sys/sa.h>
  55 #include "fs/fs_subr.h"
  56 #include <acl/acl_common.h>
  57 #include <c2/audit.h>
  58 #include <c2/audit_kernel.h>
  59 
  60 #define ALLOW   ACE_ACCESS_ALLOWED_ACE_TYPE
  61 #define DENY    ACE_ACCESS_DENIED_ACE_TYPE
  62 #define MAX_ACE_TYPE    ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
  63 #define MIN_ACE_TYPE    ALLOW
  64 
  65 #define OWNING_GROUP            (ACE_GROUP|ACE_IDENTIFIER_GROUP)
  66 #define EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
  67     ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
  68 #define EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
  69     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
  70 #define OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
  71     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
  72 
  73 #define ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
  74     ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
  75     ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
  76     ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
  77 
  78 #define WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
  79 #define WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
  80     ACE_DELETE|ACE_DELETE_CHILD)
  81 #define WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
  82 
  83 #define OGE_CLEAR       (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
  84     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
  85 
  86 #define OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
  87     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
  88 
  89 #define ALL_INHERIT     (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
  90     ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
  91 
  92 #define RESTRICTED_CLEAR        (ACE_WRITE_ACL|ACE_WRITE_OWNER)
  93 
  94 #define V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
  95     ZFS_ACL_PROTECTED)
  96 
  97 #define ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
  98     ZFS_ACL_OBJ_ACE)
  99 
 100 #define ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
 101 
 102 static uint16_t
 103 zfs_ace_v0_get_type(void *acep)
 104 {
 105         return (((zfs_oldace_t *)acep)->z_type);
 106 }
 107 
 108 static uint16_t
 109 zfs_ace_v0_get_flags(void *acep)
 110 {
 111         return (((zfs_oldace_t *)acep)->z_flags);
 112 }
 113 
 114 static uint32_t
 115 zfs_ace_v0_get_mask(void *acep)
 116 {
 117         return (((zfs_oldace_t *)acep)->z_access_mask);
 118 }
 119 
 120 static uint64_t
 121 zfs_ace_v0_get_who(void *acep)
 122 {
 123         return (((zfs_oldace_t *)acep)->z_fuid);
 124 }
 125 
 126 static void
 127 zfs_ace_v0_set_type(void *acep, uint16_t type)
 128 {
 129         ((zfs_oldace_t *)acep)->z_type = type;
 130 }
 131 
 132 static void
 133 zfs_ace_v0_set_flags(void *acep, uint16_t flags)
 134 {
 135         ((zfs_oldace_t *)acep)->z_flags = flags;
 136 }
 137 
 138 static void
 139 zfs_ace_v0_set_mask(void *acep, uint32_t mask)
 140 {
 141         ((zfs_oldace_t *)acep)->z_access_mask = mask;
 142 }
 143 
 144 static void
 145 zfs_ace_v0_set_who(void *acep, uint64_t who)
 146 {
 147         ((zfs_oldace_t *)acep)->z_fuid = who;
 148 }
 149 
 150 /*ARGSUSED*/
 151 static size_t
 152 zfs_ace_v0_size(void *acep)
 153 {
 154         return (sizeof (zfs_oldace_t));
 155 }
 156 
 157 static size_t
 158 zfs_ace_v0_abstract_size(void)
 159 {
 160         return (sizeof (zfs_oldace_t));
 161 }
 162 
 163 static int
 164 zfs_ace_v0_mask_off(void)
 165 {
 166         return (offsetof(zfs_oldace_t, z_access_mask));
 167 }
 168 
 169 /*ARGSUSED*/
 170 static int
 171 zfs_ace_v0_data(void *acep, void **datap)
 172 {
 173         *datap = NULL;
 174         return (0);
 175 }
 176 
 177 static acl_ops_t zfs_acl_v0_ops = {
 178         zfs_ace_v0_get_mask,
 179         zfs_ace_v0_set_mask,
 180         zfs_ace_v0_get_flags,
 181         zfs_ace_v0_set_flags,
 182         zfs_ace_v0_get_type,
 183         zfs_ace_v0_set_type,
 184         zfs_ace_v0_get_who,
 185         zfs_ace_v0_set_who,
 186         zfs_ace_v0_size,
 187         zfs_ace_v0_abstract_size,
 188         zfs_ace_v0_mask_off,
 189         zfs_ace_v0_data
 190 };
 191 
 192 static uint16_t
 193 zfs_ace_fuid_get_type(void *acep)
 194 {
 195         return (((zfs_ace_hdr_t *)acep)->z_type);
 196 }
 197 
 198 static uint16_t
 199 zfs_ace_fuid_get_flags(void *acep)
 200 {
 201         return (((zfs_ace_hdr_t *)acep)->z_flags);
 202 }
 203 
 204 static uint32_t
 205 zfs_ace_fuid_get_mask(void *acep)
 206 {
 207         return (((zfs_ace_hdr_t *)acep)->z_access_mask);
 208 }
 209 
 210 static uint64_t
 211 zfs_ace_fuid_get_who(void *args)
 212 {
 213         uint16_t entry_type;
 214         zfs_ace_t *acep = args;
 215 
 216         entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
 217 
 218         if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
 219             entry_type == ACE_EVERYONE)
 220                 return (-1);
 221         return (((zfs_ace_t *)acep)->z_fuid);
 222 }
 223 
 224 static void
 225 zfs_ace_fuid_set_type(void *acep, uint16_t type)
 226 {
 227         ((zfs_ace_hdr_t *)acep)->z_type = type;
 228 }
 229 
 230 static void
 231 zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
 232 {
 233         ((zfs_ace_hdr_t *)acep)->z_flags = flags;
 234 }
 235 
 236 static void
 237 zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
 238 {
 239         ((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
 240 }
 241 
 242 static void
 243 zfs_ace_fuid_set_who(void *arg, uint64_t who)
 244 {
 245         zfs_ace_t *acep = arg;
 246 
 247         uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
 248 
 249         if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
 250             entry_type == ACE_EVERYONE)
 251                 return;
 252         acep->z_fuid = who;
 253 }
 254 
 255 static size_t
 256 zfs_ace_fuid_size(void *acep)
 257 {
 258         zfs_ace_hdr_t *zacep = acep;
 259         uint16_t entry_type;
 260 
 261         switch (zacep->z_type) {
 262         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 263         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 264         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 265         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 266                 return (sizeof (zfs_object_ace_t));
 267         case ALLOW:
 268         case DENY:
 269                 entry_type =
 270                     (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
 271                 if (entry_type == ACE_OWNER ||
 272                     entry_type == OWNING_GROUP ||
 273                     entry_type == ACE_EVERYONE)
 274                         return (sizeof (zfs_ace_hdr_t));
 275                 /*FALLTHROUGH*/
 276         default:
 277                 return (sizeof (zfs_ace_t));
 278         }
 279 }
 280 
 281 static size_t
 282 zfs_ace_fuid_abstract_size(void)
 283 {
 284         return (sizeof (zfs_ace_hdr_t));
 285 }
 286 
 287 static int
 288 zfs_ace_fuid_mask_off(void)
 289 {
 290         return (offsetof(zfs_ace_hdr_t, z_access_mask));
 291 }
 292 
 293 static int
 294 zfs_ace_fuid_data(void *acep, void **datap)
 295 {
 296         zfs_ace_t *zacep = acep;
 297         zfs_object_ace_t *zobjp;
 298 
 299         switch (zacep->z_hdr.z_type) {
 300         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 301         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 302         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 303         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 304                 zobjp = acep;
 305                 *datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
 306                 return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
 307         default:
 308                 *datap = NULL;
 309                 return (0);
 310         }
 311 }
 312 
 313 static acl_ops_t zfs_acl_fuid_ops = {
 314         zfs_ace_fuid_get_mask,
 315         zfs_ace_fuid_set_mask,
 316         zfs_ace_fuid_get_flags,
 317         zfs_ace_fuid_set_flags,
 318         zfs_ace_fuid_get_type,
 319         zfs_ace_fuid_set_type,
 320         zfs_ace_fuid_get_who,
 321         zfs_ace_fuid_set_who,
 322         zfs_ace_fuid_size,
 323         zfs_ace_fuid_abstract_size,
 324         zfs_ace_fuid_mask_off,
 325         zfs_ace_fuid_data
 326 };
 327 
 328 /*
 329  * The following three functions are provided for compatibility with
 330  * older ZPL version in order to determine if the file use to have
 331  * an external ACL and what version of ACL previously existed on the
 332  * file.  Would really be nice to not need this, sigh.
 333  */
 334 uint64_t
 335 zfs_external_acl(znode_t *zp)
 336 {
 337         zfs_acl_phys_t acl_phys;
 338         int error;
 339 
 340         if (zp->z_is_sa)
 341                 return (0);
 342 
 343         /*
 344          * Need to deal with a potential
 345          * race where zfs_sa_upgrade could cause
 346          * z_isa_sa to change.
 347          *
 348          * If the lookup fails then the state of z_is_sa should have
 349          * changed.
 350          */
 351 
 352         if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
 353             &acl_phys, sizeof (acl_phys))) == 0)
 354                 return (acl_phys.z_acl_extern_obj);
 355         else {
 356                 /*
 357                  * after upgrade the SA_ZPL_ZNODE_ACL should have been
 358                  * removed
 359                  */
 360                 VERIFY(zp->z_is_sa && error == ENOENT);
 361                 return (0);
 362         }
 363 }
 364 
 365 /*
 366  * Determine size of ACL in bytes
 367  *
 368  * This is more complicated than it should be since we have to deal
 369  * with old external ACLs.
 370  */
 371 static int
 372 zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
 373     zfs_acl_phys_t *aclphys)
 374 {
 375         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 376         uint64_t acl_count;
 377         int size;
 378         int error;
 379 
 380         ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 381         if (zp->z_is_sa) {
 382                 if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
 383                     &size)) != 0)
 384                         return (error);
 385                 *aclsize = size;
 386                 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
 387                     &acl_count, sizeof (acl_count))) != 0)
 388                         return (error);
 389                 *aclcount = acl_count;
 390         } else {
 391                 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
 392                     aclphys, sizeof (*aclphys))) != 0)
 393                         return (error);
 394 
 395                 if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
 396                         *aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
 397                         *aclcount = aclphys->z_acl_size;
 398                 } else {
 399                         *aclsize = aclphys->z_acl_size;
 400                         *aclcount = aclphys->z_acl_count;
 401                 }
 402         }
 403         return (0);
 404 }
 405 
 406 int
 407 zfs_znode_acl_version(znode_t *zp)
 408 {
 409         zfs_acl_phys_t acl_phys;
 410 
 411         if (zp->z_is_sa)
 412                 return (ZFS_ACL_VERSION_FUID);
 413         else {
 414                 int error;
 415 
 416                 /*
 417                  * Need to deal with a potential
 418                  * race where zfs_sa_upgrade could cause
 419                  * z_isa_sa to change.
 420                  *
 421                  * If the lookup fails then the state of z_is_sa should have
 422                  * changed.
 423                  */
 424                 if ((error = sa_lookup(zp->z_sa_hdl,
 425                     SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
 426                     &acl_phys, sizeof (acl_phys))) == 0)
 427                         return (acl_phys.z_acl_version);
 428                 else {
 429                         /*
 430                          * After upgrade SA_ZPL_ZNODE_ACL should have
 431                          * been removed.
 432                          */
 433                         VERIFY(zp->z_is_sa && error == ENOENT);
 434                         return (ZFS_ACL_VERSION_FUID);
 435                 }
 436         }
 437 }
 438 
 439 static int
 440 zfs_acl_version(int version)
 441 {
 442         if (version < ZPL_VERSION_FUID)
 443                 return (ZFS_ACL_VERSION_INITIAL);
 444         else
 445                 return (ZFS_ACL_VERSION_FUID);
 446 }
 447 
 448 static int
 449 zfs_acl_version_zp(znode_t *zp)
 450 {
 451         return (zfs_acl_version(zp->z_zfsvfs->z_version));
 452 }
 453 
 454 zfs_acl_t *
 455 zfs_acl_alloc(int vers)
 456 {
 457         zfs_acl_t *aclp;
 458 
 459         aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
 460         list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
 461             offsetof(zfs_acl_node_t, z_next));
 462         aclp->z_version = vers;
 463         if (vers == ZFS_ACL_VERSION_FUID)
 464                 aclp->z_ops = zfs_acl_fuid_ops;
 465         else
 466                 aclp->z_ops = zfs_acl_v0_ops;
 467         return (aclp);
 468 }
 469 
 470 zfs_acl_node_t *
 471 zfs_acl_node_alloc(size_t bytes)
 472 {
 473         zfs_acl_node_t *aclnode;
 474 
 475         aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
 476         if (bytes) {
 477                 aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
 478                 aclnode->z_allocdata = aclnode->z_acldata;
 479                 aclnode->z_allocsize = bytes;
 480                 aclnode->z_size = bytes;
 481         }
 482 
 483         return (aclnode);
 484 }
 485 
 486 static void
 487 zfs_acl_node_free(zfs_acl_node_t *aclnode)
 488 {
 489         if (aclnode->z_allocsize)
 490                 kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
 491         kmem_free(aclnode, sizeof (zfs_acl_node_t));
 492 }
 493 
 494 static void
 495 zfs_acl_release_nodes(zfs_acl_t *aclp)
 496 {
 497         zfs_acl_node_t *aclnode;
 498 
 499         while (aclnode = list_head(&aclp->z_acl)) {
 500                 list_remove(&aclp->z_acl, aclnode);
 501                 zfs_acl_node_free(aclnode);
 502         }
 503         aclp->z_acl_count = 0;
 504         aclp->z_acl_bytes = 0;
 505 }
 506 
 507 void
 508 zfs_acl_free(zfs_acl_t *aclp)
 509 {
 510         zfs_acl_release_nodes(aclp);
 511         list_destroy(&aclp->z_acl);
 512         kmem_free(aclp, sizeof (zfs_acl_t));
 513 }
 514 
 515 static boolean_t
 516 zfs_acl_valid_ace_type(uint_t type, uint_t flags)
 517 {
 518         uint16_t entry_type;
 519 
 520         switch (type) {
 521         case ALLOW:
 522         case DENY:
 523         case ACE_SYSTEM_AUDIT_ACE_TYPE:
 524         case ACE_SYSTEM_ALARM_ACE_TYPE:
 525                 entry_type = flags & ACE_TYPE_FLAGS;
 526                 return (entry_type == ACE_OWNER ||
 527                     entry_type == OWNING_GROUP ||
 528                     entry_type == ACE_EVERYONE || entry_type == 0 ||
 529                     entry_type == ACE_IDENTIFIER_GROUP);
 530         default:
 531                 if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
 532                         return (B_TRUE);
 533         }
 534         return (B_FALSE);
 535 }
 536 
 537 static boolean_t
 538 zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
 539 {
 540         /*
 541          * first check type of entry
 542          */
 543 
 544         if (!zfs_acl_valid_ace_type(type, iflags))
 545                 return (B_FALSE);
 546 
 547         switch (type) {
 548         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 549         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 550         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 551         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 552                 if (aclp->z_version < ZFS_ACL_VERSION_FUID)
 553                         return (B_FALSE);
 554                 aclp->z_hints |= ZFS_ACL_OBJ_ACE;
 555         }
 556 
 557         /*
 558          * next check inheritance level flags
 559          */
 560 
 561         if (obj_type == VDIR &&
 562             (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
 563                 aclp->z_hints |= ZFS_INHERIT_ACE;
 564 
 565         if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
 566                 if ((iflags & (ACE_FILE_INHERIT_ACE|
 567                     ACE_DIRECTORY_INHERIT_ACE)) == 0) {
 568                         return (B_FALSE);
 569                 }
 570         }
 571 
 572         return (B_TRUE);
 573 }
 574 
 575 static void *
 576 zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
 577     uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
 578 {
 579         zfs_acl_node_t *aclnode;
 580 
 581         ASSERT(aclp);
 582 
 583         if (start == NULL) {
 584                 aclnode = list_head(&aclp->z_acl);
 585                 if (aclnode == NULL)
 586                         return (NULL);
 587 
 588                 aclp->z_next_ace = aclnode->z_acldata;
 589                 aclp->z_curr_node = aclnode;
 590                 aclnode->z_ace_idx = 0;
 591         }
 592 
 593         aclnode = aclp->z_curr_node;
 594 
 595         if (aclnode == NULL)
 596                 return (NULL);
 597 
 598         if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
 599                 aclnode = list_next(&aclp->z_acl, aclnode);
 600                 if (aclnode == NULL)
 601                         return (NULL);
 602                 else {
 603                         aclp->z_curr_node = aclnode;
 604                         aclnode->z_ace_idx = 0;
 605                         aclp->z_next_ace = aclnode->z_acldata;
 606                 }
 607         }
 608 
 609         if (aclnode->z_ace_idx < aclnode->z_ace_count) {
 610                 void *acep = aclp->z_next_ace;
 611                 size_t ace_size;
 612 
 613                 /*
 614                  * Make sure we don't overstep our bounds
 615                  */
 616                 ace_size = aclp->z_ops.ace_size(acep);
 617 
 618                 if (((caddr_t)acep + ace_size) >
 619                     ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
 620                         return (NULL);
 621                 }
 622 
 623                 *iflags = aclp->z_ops.ace_flags_get(acep);
 624                 *type = aclp->z_ops.ace_type_get(acep);
 625                 *access_mask = aclp->z_ops.ace_mask_get(acep);
 626                 *who = aclp->z_ops.ace_who_get(acep);
 627                 aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
 628                 aclnode->z_ace_idx++;
 629 
 630                 return ((void *)acep);
 631         }
 632         return (NULL);
 633 }
 634 
 635 /*ARGSUSED*/
 636 static uint64_t
 637 zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
 638     uint16_t *flags, uint16_t *type, uint32_t *mask)
 639 {
 640         zfs_acl_t *aclp = datap;
 641         zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
 642         uint64_t who;
 643 
 644         acep = zfs_acl_next_ace(aclp, acep, &who, mask,
 645             flags, type);
 646         return ((uint64_t)(uintptr_t)acep);
 647 }
 648 
 649 static zfs_acl_node_t *
 650 zfs_acl_curr_node(zfs_acl_t *aclp)
 651 {
 652         ASSERT(aclp->z_curr_node);
 653         return (aclp->z_curr_node);
 654 }
 655 
 656 /*
 657  * Copy ACE to internal ZFS format.
 658  * While processing the ACL each ACE will be validated for correctness.
 659  * ACE FUIDs will be created later.
 660  */
 661 int
 662 zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
 663     void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
 664     zfs_fuid_info_t **fuidp, cred_t *cr)
 665 {
 666         int i;
 667         uint16_t entry_type;
 668         zfs_ace_t *aceptr = z_acl;
 669         ace_t *acep = datap;
 670         zfs_object_ace_t *zobjacep;
 671         ace_object_t *aceobjp;
 672 
 673         for (i = 0; i != aclcnt; i++) {
 674                 aceptr->z_hdr.z_access_mask = acep->a_access_mask;
 675                 aceptr->z_hdr.z_flags = acep->a_flags;
 676                 aceptr->z_hdr.z_type = acep->a_type;
 677                 entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
 678                 if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
 679                     entry_type != ACE_EVERYONE) {
 680                         aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
 681                             cr, (entry_type == 0) ?
 682                             ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
 683                 }
 684 
 685                 /*
 686                  * Make sure ACE is valid
 687                  */
 688                 if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
 689                     aceptr->z_hdr.z_flags) != B_TRUE)
 690                         return (SET_ERROR(EINVAL));
 691 
 692                 switch (acep->a_type) {
 693                 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 694                 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 695                 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 696                 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 697                         zobjacep = (zfs_object_ace_t *)aceptr;
 698                         aceobjp = (ace_object_t *)acep;
 699 
 700                         bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
 701                             sizeof (aceobjp->a_obj_type));
 702                         bcopy(aceobjp->a_inherit_obj_type,
 703                             zobjacep->z_inherit_type,
 704                             sizeof (aceobjp->a_inherit_obj_type));
 705                         acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
 706                         break;
 707                 default:
 708                         acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
 709                 }
 710 
 711                 aceptr = (zfs_ace_t *)((caddr_t)aceptr +
 712                     aclp->z_ops.ace_size(aceptr));
 713         }
 714 
 715         *size = (caddr_t)aceptr - (caddr_t)z_acl;
 716 
 717         return (0);
 718 }
 719 
 720 /*
 721  * Copy ZFS ACEs to fixed size ace_t layout
 722  */
 723 static void
 724 zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
 725     void *datap, int filter)
 726 {
 727         uint64_t who;
 728         uint32_t access_mask;
 729         uint16_t iflags, type;
 730         zfs_ace_hdr_t *zacep = NULL;
 731         ace_t *acep = datap;
 732         ace_object_t *objacep;
 733         zfs_object_ace_t *zobjacep;
 734         size_t ace_size;
 735         uint16_t entry_type;
 736 
 737         while (zacep = zfs_acl_next_ace(aclp, zacep,
 738             &who, &access_mask, &iflags, &type)) {
 739 
 740                 switch (type) {
 741                 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 742                 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 743                 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 744                 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 745                         if (filter) {
 746                                 continue;
 747                         }
 748                         zobjacep = (zfs_object_ace_t *)zacep;
 749                         objacep = (ace_object_t *)acep;
 750                         bcopy(zobjacep->z_object_type,
 751                             objacep->a_obj_type,
 752                             sizeof (zobjacep->z_object_type));
 753                         bcopy(zobjacep->z_inherit_type,
 754                             objacep->a_inherit_obj_type,
 755                             sizeof (zobjacep->z_inherit_type));
 756                         ace_size = sizeof (ace_object_t);
 757                         break;
 758                 default:
 759                         ace_size = sizeof (ace_t);
 760                         break;
 761                 }
 762 
 763                 entry_type = (iflags & ACE_TYPE_FLAGS);
 764                 if ((entry_type != ACE_OWNER &&
 765                     entry_type != OWNING_GROUP &&
 766                     entry_type != ACE_EVERYONE)) {
 767                         acep->a_who = zfs_fuid_map_id(zfsvfs, who,
 768                             cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
 769                             ZFS_ACE_GROUP : ZFS_ACE_USER);
 770                 } else {
 771                         acep->a_who = (uid_t)(int64_t)who;
 772                 }
 773                 acep->a_access_mask = access_mask;
 774                 acep->a_flags = iflags;
 775                 acep->a_type = type;
 776                 acep = (ace_t *)((caddr_t)acep + ace_size);
 777         }
 778 }
 779 
 780 static int
 781 zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
 782     zfs_oldace_t *z_acl, int aclcnt, size_t *size)
 783 {
 784         int i;
 785         zfs_oldace_t *aceptr = z_acl;
 786 
 787         for (i = 0; i != aclcnt; i++, aceptr++) {
 788                 aceptr->z_access_mask = acep[i].a_access_mask;
 789                 aceptr->z_type = acep[i].a_type;
 790                 aceptr->z_flags = acep[i].a_flags;
 791                 aceptr->z_fuid = acep[i].a_who;
 792                 /*
 793                  * Make sure ACE is valid
 794                  */
 795                 if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
 796                     aceptr->z_flags) != B_TRUE)
 797                         return (SET_ERROR(EINVAL));
 798         }
 799         *size = (caddr_t)aceptr - (caddr_t)z_acl;
 800         return (0);
 801 }
 802 
 803 /*
 804  * convert old ACL format to new
 805  */
 806 void
 807 zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
 808 {
 809         zfs_oldace_t *oldaclp;
 810         int i;
 811         uint16_t type, iflags;
 812         uint32_t access_mask;
 813         uint64_t who;
 814         void *cookie = NULL;
 815         zfs_acl_node_t *newaclnode;
 816 
 817         ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
 818         /*
 819          * First create the ACE in a contiguous piece of memory
 820          * for zfs_copy_ace_2_fuid().
 821          *
 822          * We only convert an ACL once, so this won't happen
 823          * everytime.
 824          */
 825         oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
 826             KM_SLEEP);
 827         i = 0;
 828         while (cookie = zfs_acl_next_ace(aclp, cookie, &who,
 829             &access_mask, &iflags, &type)) {
 830                 oldaclp[i].z_flags = iflags;
 831                 oldaclp[i].z_type = type;
 832                 oldaclp[i].z_fuid = who;
 833                 oldaclp[i++].z_access_mask = access_mask;
 834         }
 835 
 836         newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
 837             sizeof (zfs_object_ace_t));
 838         aclp->z_ops = zfs_acl_fuid_ops;
 839         VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
 840             oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
 841             &newaclnode->z_size, NULL, cr) == 0);
 842         newaclnode->z_ace_count = aclp->z_acl_count;
 843         aclp->z_version = ZFS_ACL_VERSION;
 844         kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
 845 
 846         /*
 847          * Release all previous ACL nodes
 848          */
 849 
 850         zfs_acl_release_nodes(aclp);
 851 
 852         list_insert_head(&aclp->z_acl, newaclnode);
 853 
 854         aclp->z_acl_bytes = newaclnode->z_size;
 855         aclp->z_acl_count = newaclnode->z_ace_count;
 856 
 857 }
 858 
 859 /*
 860  * Convert unix access mask to v4 access mask
 861  */
 862 static uint32_t
 863 zfs_unix_to_v4(uint32_t access_mask)
 864 {
 865         uint32_t new_mask = 0;
 866 
 867         if (access_mask & S_IXOTH)
 868                 new_mask |= ACE_EXECUTE;
 869         if (access_mask & S_IWOTH)
 870                 new_mask |= ACE_WRITE_DATA;
 871         if (access_mask & S_IROTH)
 872                 new_mask |= ACE_READ_DATA;
 873         return (new_mask);
 874 }
 875 
 876 static void
 877 zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
 878     uint16_t access_type, uint64_t fuid, uint16_t entry_type)
 879 {
 880         uint16_t type = entry_type & ACE_TYPE_FLAGS;
 881 
 882         aclp->z_ops.ace_mask_set(acep, access_mask);
 883         aclp->z_ops.ace_type_set(acep, access_type);
 884         aclp->z_ops.ace_flags_set(acep, entry_type);
 885         if ((type != ACE_OWNER && type != OWNING_GROUP &&
 886             type != ACE_EVERYONE))
 887                 aclp->z_ops.ace_who_set(acep, fuid);
 888 }
 889 
 890 /*
 891  * Determine mode of file based on ACL.
 892  */
 893 uint64_t
 894 zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
 895     uint64_t *pflags, uint64_t fuid, uint64_t fgid)
 896 {
 897         int             entry_type;
 898         mode_t          mode;
 899         mode_t          seen = 0;
 900         zfs_ace_hdr_t   *acep = NULL;
 901         uint64_t        who;
 902         uint16_t        iflags, type;
 903         uint32_t        access_mask;
 904         boolean_t       an_exec_denied = B_FALSE;
 905 
 906         mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
 907 
 908         while (acep = zfs_acl_next_ace(aclp, acep, &who,
 909             &access_mask, &iflags, &type)) {
 910 
 911                 if (!zfs_acl_valid_ace_type(type, iflags))
 912                         continue;
 913 
 914                 entry_type = (iflags & ACE_TYPE_FLAGS);
 915 
 916                 /*
 917                  * Skip over any inherit_only ACEs
 918                  */
 919                 if (iflags & ACE_INHERIT_ONLY_ACE)
 920                         continue;
 921 
 922                 if (entry_type == ACE_OWNER || (entry_type == 0 &&
 923                     who == fuid)) {
 924                         if ((access_mask & ACE_READ_DATA) &&
 925                             (!(seen & S_IRUSR))) {
 926                                 seen |= S_IRUSR;
 927                                 if (type == ALLOW) {
 928                                         mode |= S_IRUSR;
 929                                 }
 930                         }
 931                         if ((access_mask & ACE_WRITE_DATA) &&
 932                             (!(seen & S_IWUSR))) {
 933                                 seen |= S_IWUSR;
 934                                 if (type == ALLOW) {
 935                                         mode |= S_IWUSR;
 936                                 }
 937                         }
 938                         if ((access_mask & ACE_EXECUTE) &&
 939                             (!(seen & S_IXUSR))) {
 940                                 seen |= S_IXUSR;
 941                                 if (type == ALLOW) {
 942                                         mode |= S_IXUSR;
 943                                 }
 944                         }
 945                 } else if (entry_type == OWNING_GROUP ||
 946                     (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
 947                         if ((access_mask & ACE_READ_DATA) &&
 948                             (!(seen & S_IRGRP))) {
 949                                 seen |= S_IRGRP;
 950                                 if (type == ALLOW) {
 951                                         mode |= S_IRGRP;
 952                                 }
 953                         }
 954                         if ((access_mask & ACE_WRITE_DATA) &&
 955                             (!(seen & S_IWGRP))) {
 956                                 seen |= S_IWGRP;
 957                                 if (type == ALLOW) {
 958                                         mode |= S_IWGRP;
 959                                 }
 960                         }
 961                         if ((access_mask & ACE_EXECUTE) &&
 962                             (!(seen & S_IXGRP))) {
 963                                 seen |= S_IXGRP;
 964                                 if (type == ALLOW) {
 965                                         mode |= S_IXGRP;
 966                                 }
 967                         }
 968                 } else if (entry_type == ACE_EVERYONE) {
 969                         if ((access_mask & ACE_READ_DATA)) {
 970                                 if (!(seen & S_IRUSR)) {
 971                                         seen |= S_IRUSR;
 972                                         if (type == ALLOW) {
 973                                                 mode |= S_IRUSR;
 974                                         }
 975                                 }
 976                                 if (!(seen & S_IRGRP)) {
 977                                         seen |= S_IRGRP;
 978                                         if (type == ALLOW) {
 979                                                 mode |= S_IRGRP;
 980                                         }
 981                                 }
 982                                 if (!(seen & S_IROTH)) {
 983                                         seen |= S_IROTH;
 984                                         if (type == ALLOW) {
 985                                                 mode |= S_IROTH;
 986                                         }
 987                                 }
 988                         }
 989                         if ((access_mask & ACE_WRITE_DATA)) {
 990                                 if (!(seen & S_IWUSR)) {
 991                                         seen |= S_IWUSR;
 992                                         if (type == ALLOW) {
 993                                                 mode |= S_IWUSR;
 994                                         }
 995                                 }
 996                                 if (!(seen & S_IWGRP)) {
 997                                         seen |= S_IWGRP;
 998                                         if (type == ALLOW) {
 999                                                 mode |= S_IWGRP;
1000                                         }
1001                                 }
1002                                 if (!(seen & S_IWOTH)) {
1003                                         seen |= S_IWOTH;
1004                                         if (type == ALLOW) {
1005                                                 mode |= S_IWOTH;
1006                                         }
1007                                 }
1008                         }
1009                         if ((access_mask & ACE_EXECUTE)) {
1010                                 if (!(seen & S_IXUSR)) {
1011                                         seen |= S_IXUSR;
1012                                         if (type == ALLOW) {
1013                                                 mode |= S_IXUSR;
1014                                         }
1015                                 }
1016                                 if (!(seen & S_IXGRP)) {
1017                                         seen |= S_IXGRP;
1018                                         if (type == ALLOW) {
1019                                                 mode |= S_IXGRP;
1020                                         }
1021                                 }
1022                                 if (!(seen & S_IXOTH)) {
1023                                         seen |= S_IXOTH;
1024                                         if (type == ALLOW) {
1025                                                 mode |= S_IXOTH;
1026                                         }
1027                                 }
1028                         }
1029                 } else {
1030                         /*
1031                          * Only care if this IDENTIFIER_GROUP or
1032                          * USER ACE denies execute access to someone,
1033                          * mode is not affected
1034                          */
1035                         if ((access_mask & ACE_EXECUTE) && type == DENY)
1036                                 an_exec_denied = B_TRUE;
1037                 }
1038         }
1039 
1040         /*
1041          * Failure to allow is effectively a deny, so execute permission
1042          * is denied if it was never mentioned or if we explicitly
1043          * weren't allowed it.
1044          */
1045         if (!an_exec_denied &&
1046             ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
1047             (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
1048                 an_exec_denied = B_TRUE;
1049 
1050         if (an_exec_denied)
1051                 *pflags &= ~ZFS_NO_EXECS_DENIED;
1052         else
1053                 *pflags |= ZFS_NO_EXECS_DENIED;
1054 
1055         return (mode);
1056 }
1057 
1058 /*
1059  * Read an external acl object.  If the intent is to modify, always
1060  * create a new acl and leave any cached acl in place.
1061  */
1062 static int
1063 zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp,
1064     boolean_t will_modify)
1065 {
1066         zfs_acl_t       *aclp;
1067         int             aclsize;
1068         int             acl_count;
1069         zfs_acl_node_t  *aclnode;
1070         zfs_acl_phys_t  znode_acl;
1071         int             version;
1072         int             error;
1073         boolean_t       drop_lock = B_FALSE;
1074 
1075         ASSERT(MUTEX_HELD(&zp->z_acl_lock));
1076 
1077         if (zp->z_acl_cached && !will_modify) {
1078                 *aclpp = zp->z_acl_cached;
1079                 return (0);
1080         }
1081 
1082         /*
1083          * close race where znode could be upgrade while trying to
1084          * read the znode attributes.
1085          *
1086          * But this could only happen if the file isn't already an SA
1087          * znode
1088          */
1089         if (!zp->z_is_sa && !have_lock) {
1090                 mutex_enter(&zp->z_lock);
1091                 drop_lock = B_TRUE;
1092         }
1093         version = zfs_znode_acl_version(zp);
1094 
1095         if ((error = zfs_acl_znode_info(zp, &aclsize,
1096             &acl_count, &znode_acl)) != 0) {
1097                 goto done;
1098         }
1099 
1100         aclp = zfs_acl_alloc(version);
1101 
1102         aclp->z_acl_count = acl_count;
1103         aclp->z_acl_bytes = aclsize;
1104 
1105         aclnode = zfs_acl_node_alloc(aclsize);
1106         aclnode->z_ace_count = aclp->z_acl_count;
1107         aclnode->z_size = aclsize;
1108 
1109         if (!zp->z_is_sa) {
1110                 if (znode_acl.z_acl_extern_obj) {
1111                         error = dmu_read(zp->z_zfsvfs->z_os,
1112                             znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
1113                             aclnode->z_acldata, DMU_READ_PREFETCH);
1114                 } else {
1115                         bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
1116                             aclnode->z_size);
1117                 }
1118         } else {
1119                 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
1120                     aclnode->z_acldata, aclnode->z_size);
1121         }
1122 
1123         if (error != 0) {
1124                 zfs_acl_free(aclp);
1125                 zfs_acl_node_free(aclnode);
1126                 /* convert checksum errors into IO errors */
1127                 if (error == ECKSUM)
1128                         error = SET_ERROR(EIO);
1129                 goto done;
1130         }
1131 
1132         list_insert_head(&aclp->z_acl, aclnode);
1133 
1134         *aclpp = aclp;
1135         if (!will_modify)
1136                 zp->z_acl_cached = aclp;
1137 done:
1138         if (drop_lock)
1139                 mutex_exit(&zp->z_lock);
1140         return (error);
1141 }
1142 
1143 /*ARGSUSED*/
1144 void
1145 zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
1146     boolean_t start, void *userdata)
1147 {
1148         zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
1149 
1150         if (start) {
1151                 cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
1152         } else {
1153                 cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
1154                     cb->cb_acl_node);
1155         }
1156         *dataptr = cb->cb_acl_node->z_acldata;
1157         *length = cb->cb_acl_node->z_size;
1158 }
1159 
1160 int
1161 zfs_acl_chown_setattr(znode_t *zp)
1162 {
1163         int error;
1164         zfs_acl_t *aclp;
1165 
1166         ASSERT(MUTEX_HELD(&zp->z_lock));
1167         ASSERT(MUTEX_HELD(&zp->z_acl_lock));
1168 
1169         if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0)
1170                 zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
1171                     &zp->z_pflags, zp->z_uid, zp->z_gid);
1172         return (error);
1173 }
1174 
1175 /*
1176  * common code for setting ACLs.
1177  *
1178  * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
1179  * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
1180  * already checked the acl and knows whether to inherit.
1181  */
1182 int
1183 zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
1184 {
1185         int                     error;
1186         zfsvfs_t                *zfsvfs = zp->z_zfsvfs;
1187         dmu_object_type_t       otype;
1188         zfs_acl_locator_cb_t    locate = { 0 };
1189         uint64_t                mode;
1190         sa_bulk_attr_t          bulk[5];
1191         uint64_t                ctime[2];
1192         int                     count = 0;
1193 
1194         mode = zp->z_mode;
1195 
1196         mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
1197             zp->z_uid, zp->z_gid);
1198 
1199         zp->z_mode = mode;
1200         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
1201             &mode, sizeof (mode));
1202         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1203             &zp->z_pflags, sizeof (zp->z_pflags));
1204         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
1205             &ctime, sizeof (ctime));
1206 
1207         if (zp->z_acl_cached) {
1208                 zfs_acl_free(zp->z_acl_cached);
1209                 zp->z_acl_cached = NULL;
1210         }
1211 
1212         /*
1213          * Upgrade needed?
1214          */
1215         if (!zfsvfs->z_use_fuids) {
1216                 otype = DMU_OT_OLDACL;
1217         } else {
1218                 if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
1219                     (zfsvfs->z_version >= ZPL_VERSION_FUID))
1220                         zfs_acl_xform(zp, aclp, cr);
1221                 ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
1222                 otype = DMU_OT_ACL;
1223         }
1224 
1225         /*
1226          * Arrgh, we have to handle old on disk format
1227          * as well as newer (preferred) SA format.
1228          */
1229 
1230         if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
1231                 locate.cb_aclp = aclp;
1232                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
1233                     zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
1234                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
1235                     NULL, &aclp->z_acl_count, sizeof (uint64_t));
1236         } else { /* Painful legacy way */
1237                 zfs_acl_node_t *aclnode;
1238                 uint64_t off = 0;
1239                 zfs_acl_phys_t acl_phys;
1240                 uint64_t aoid;
1241 
1242                 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
1243                     &acl_phys, sizeof (acl_phys))) != 0)
1244                         return (error);
1245 
1246                 aoid = acl_phys.z_acl_extern_obj;
1247 
1248                 if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1249                         /*
1250                          * If ACL was previously external and we are now
1251                          * converting to new ACL format then release old
1252                          * ACL object and create a new one.
1253                          */
1254                         if (aoid &&
1255                             aclp->z_version != acl_phys.z_acl_version) {
1256                                 error = dmu_object_free(zfsvfs->z_os, aoid, tx);
1257                                 if (error)
1258                                         return (error);
1259                                 aoid = 0;
1260                         }
1261                         if (aoid == 0) {
1262                                 aoid = dmu_object_alloc(zfsvfs->z_os,
1263                                     otype, aclp->z_acl_bytes,
1264                                     otype == DMU_OT_ACL ?
1265                                     DMU_OT_SYSACL : DMU_OT_NONE,
1266                                     otype == DMU_OT_ACL ?
1267                                     DN_MAX_BONUSLEN : 0, tx);
1268                         } else {
1269                                 (void) dmu_object_set_blocksize(zfsvfs->z_os,
1270                                     aoid, aclp->z_acl_bytes, 0, tx);
1271                         }
1272                         acl_phys.z_acl_extern_obj = aoid;
1273                         for (aclnode = list_head(&aclp->z_acl); aclnode;
1274                             aclnode = list_next(&aclp->z_acl, aclnode)) {
1275                                 if (aclnode->z_ace_count == 0)
1276                                         continue;
1277                                 dmu_write(zfsvfs->z_os, aoid, off,
1278                                     aclnode->z_size, aclnode->z_acldata, tx);
1279                                 off += aclnode->z_size;
1280                         }
1281                 } else {
1282                         void *start = acl_phys.z_ace_data;
1283                         /*
1284                          * Migrating back embedded?
1285                          */
1286                         if (acl_phys.z_acl_extern_obj) {
1287                                 error = dmu_object_free(zfsvfs->z_os,
1288                                     acl_phys.z_acl_extern_obj, tx);
1289                                 if (error)
1290                                         return (error);
1291                                 acl_phys.z_acl_extern_obj = 0;
1292                         }
1293 
1294                         for (aclnode = list_head(&aclp->z_acl); aclnode;
1295                             aclnode = list_next(&aclp->z_acl, aclnode)) {
1296                                 if (aclnode->z_ace_count == 0)
1297                                         continue;
1298                                 bcopy(aclnode->z_acldata, start,
1299                                     aclnode->z_size);
1300                                 start = (caddr_t)start + aclnode->z_size;
1301                         }
1302                 }
1303                 /*
1304                  * If Old version then swap count/bytes to match old
1305                  * layout of znode_acl_phys_t.
1306                  */
1307                 if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1308                         acl_phys.z_acl_size = aclp->z_acl_count;
1309                         acl_phys.z_acl_count = aclp->z_acl_bytes;
1310                 } else {
1311                         acl_phys.z_acl_size = aclp->z_acl_bytes;
1312                         acl_phys.z_acl_count = aclp->z_acl_count;
1313                 }
1314                 acl_phys.z_acl_version = aclp->z_version;
1315 
1316                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
1317                     &acl_phys, sizeof (acl_phys));
1318         }
1319 
1320         /*
1321          * Replace ACL wide bits, but first clear them.
1322          */
1323         zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
1324 
1325         zp->z_pflags |= aclp->z_hints;
1326 
1327         if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
1328                 zp->z_pflags |= ZFS_ACL_TRIVIAL;
1329 
1330         zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE);
1331         return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
1332 }
1333 
1334 static void
1335 zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t split, boolean_t trim,
1336     zfs_acl_t *aclp)
1337 {
1338         void            *acep = NULL;
1339         uint64_t        who;
1340         int             new_count, new_bytes;
1341         int             ace_size;
1342         int             entry_type;
1343         uint16_t        iflags, type;
1344         uint32_t        access_mask;
1345         zfs_acl_node_t  *newnode;
1346         size_t          abstract_size = aclp->z_ops.ace_abstract_size();
1347         void            *zacep;
1348         boolean_t       isdir;
1349         trivial_acl_t   masks;
1350 
1351         new_count = new_bytes = 0;
1352 
1353         isdir = (vtype == VDIR);
1354 
1355         acl_trivial_access_masks((mode_t)mode, isdir, &masks);
1356 
1357         newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
1358 
1359         zacep = newnode->z_acldata;
1360         if (masks.allow0) {
1361                 zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
1362                 zacep = (void *)((uintptr_t)zacep + abstract_size);
1363                 new_count++;
1364                 new_bytes += abstract_size;
1365         }
1366         if (masks.deny1) {
1367                 zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
1368                 zacep = (void *)((uintptr_t)zacep + abstract_size);
1369                 new_count++;
1370                 new_bytes += abstract_size;
1371         }
1372         if (masks.deny2) {
1373                 zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
1374                 zacep = (void *)((uintptr_t)zacep + abstract_size);
1375                 new_count++;
1376                 new_bytes += abstract_size;
1377         }
1378 
1379         while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
1380             &iflags, &type)) {
1381                 entry_type = (iflags & ACE_TYPE_FLAGS);
1382                 /*
1383                  * ACEs used to represent the file mode may be divided
1384                  * into an equivalent pair of inherit-only and regular
1385                  * ACEs, if they are inheritable.
1386                  * Skip regular ACEs, which are replaced by the new mode.
1387                  */
1388                 if (split && (entry_type == ACE_OWNER ||
1389                     entry_type == OWNING_GROUP ||
1390                     entry_type == ACE_EVERYONE)) {
1391                         if (!isdir || !(iflags &
1392                             (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
1393                                 continue;
1394                         /*
1395                          * We preserve owner@, group@, or @everyone
1396                          * permissions, if they are inheritable, by
1397                          * copying them to inherit_only ACEs. This
1398                          * prevents inheritable permissions from being
1399                          * altered along with the file mode.
1400                          */
1401                         iflags |= ACE_INHERIT_ONLY_ACE;
1402                 }
1403 
1404                 /*
1405                  * If this ACL has any inheritable ACEs, mark that in
1406                  * the hints (which are later masked into the pflags)
1407                  * so create knows to do inheritance.
1408                  */
1409                 if (isdir && (iflags &
1410                     (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
1411                         aclp->z_hints |= ZFS_INHERIT_ACE;
1412 
1413                 if ((type != ALLOW && type != DENY) ||
1414                     (iflags & ACE_INHERIT_ONLY_ACE)) {
1415                         switch (type) {
1416                         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1417                         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1418                         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1419                         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1420                                 aclp->z_hints |= ZFS_ACL_OBJ_ACE;
1421                                 break;
1422                         }
1423                 } else {
1424                         /*
1425                          * Limit permissions granted by ACEs to be no greater
1426                          * than permissions of the requested group mode.
1427                          * Applies when the "aclmode" property is set to
1428                          * "groupmask".
1429                          */
1430                         if ((type == ALLOW) && trim)
1431                                 access_mask &= masks.group;
1432                 }
1433                 zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
1434                 ace_size = aclp->z_ops.ace_size(acep);
1435                 zacep = (void *)((uintptr_t)zacep + ace_size);
1436                 new_count++;
1437                 new_bytes += ace_size;
1438         }
1439         zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);
1440         zacep = (void *)((uintptr_t)zacep + abstract_size);
1441         zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);
1442         zacep = (void *)((uintptr_t)zacep + abstract_size);
1443         zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);
1444 
1445         new_count += 3;
1446         new_bytes += abstract_size * 3;
1447         zfs_acl_release_nodes(aclp);
1448         aclp->z_acl_count = new_count;
1449         aclp->z_acl_bytes = new_bytes;
1450         newnode->z_ace_count = new_count;
1451         newnode->z_size = new_bytes;
1452         list_insert_tail(&aclp->z_acl, newnode);
1453 }
1454 
1455 int
1456 zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
1457 {
1458         int error = 0;
1459 
1460         mutex_enter(&zp->z_acl_lock);
1461         mutex_enter(&zp->z_lock);
1462         if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
1463                 *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
1464         else
1465                 error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
1466 
1467         if (error == 0) {
1468                 (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
1469                 zfs_acl_chmod(ZTOV(zp)->v_type, mode, B_TRUE,
1470                     (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
1471         }
1472         mutex_exit(&zp->z_lock);
1473         mutex_exit(&zp->z_acl_lock);
1474 
1475         return (error);
1476 }
1477 
1478 /*
1479  * Should ACE be inherited?
1480  */
1481 static int
1482 zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
1483 {
1484         int     iflags = (acep_flags & 0xf);
1485 
1486         if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
1487                 return (1);
1488         else if (iflags & ACE_FILE_INHERIT_ACE)
1489                 return (!((vtype == VDIR) &&
1490                     (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
1491         return (0);
1492 }
1493 
1494 /*
1495  * inherit inheritable ACEs from parent
1496  */
1497 static zfs_acl_t *
1498 zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
1499     uint64_t mode)
1500 {
1501         void            *pacep = NULL;
1502         void            *acep;
1503         zfs_acl_node_t  *aclnode;
1504         zfs_acl_t       *aclp = NULL;
1505         uint64_t        who;
1506         uint32_t        access_mask;
1507         uint16_t        iflags, newflags, type;
1508         size_t          ace_size;
1509         void            *data1, *data2;
1510         size_t          data1sz, data2sz;
1511         uint_t          aclinherit;
1512         boolean_t       isdir = (vtype == VDIR);
1513 
1514         aclp = zfs_acl_alloc(paclp->z_version);
1515         aclinherit = zfsvfs->z_acl_inherit;
1516         if (aclinherit == ZFS_ACL_DISCARD || vtype == VLNK)
1517                 return (aclp);
1518 
1519         while (pacep = zfs_acl_next_ace(paclp, pacep, &who,
1520             &access_mask, &iflags, &type)) {
1521 
1522                 /*
1523                  * don't inherit bogus ACEs
1524                  */
1525                 if (!zfs_acl_valid_ace_type(type, iflags))
1526                         continue;
1527 
1528                 /*
1529                  * Check if ACE is inheritable by this vnode
1530                  */
1531                 if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||
1532                     !zfs_ace_can_use(vtype, iflags))
1533                         continue;
1534 
1535                 /*
1536                  * Strip inherited execute permission from file if
1537                  * not in mode
1538                  */
1539                 if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&
1540                     !isdir && ((mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {
1541                         access_mask &= ~ACE_EXECUTE;
1542                 }
1543 
1544                 /*
1545                  * Strip write_acl and write_owner from permissions
1546                  * when inheriting an ACE
1547                  */
1548                 if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {
1549                         access_mask &= ~RESTRICTED_CLEAR;
1550                 }
1551 
1552                 ace_size = aclp->z_ops.ace_size(pacep);
1553                 aclnode = zfs_acl_node_alloc(ace_size);
1554                 list_insert_tail(&aclp->z_acl, aclnode);
1555                 acep = aclnode->z_acldata;
1556 
1557                 zfs_set_ace(aclp, acep, access_mask, type,
1558                     who, iflags|ACE_INHERITED_ACE);
1559 
1560                 /*
1561                  * Copy special opaque data if any
1562                  */
1563                 if ((data1sz = paclp->z_ops.ace_data(pacep, &data1)) != 0) {
1564                         VERIFY((data2sz = aclp->z_ops.ace_data(acep,
1565                             &data2)) == data1sz);
1566                         bcopy(data1, data2, data2sz);
1567                 }
1568 
1569                 aclp->z_acl_count++;
1570                 aclnode->z_ace_count++;
1571                 aclp->z_acl_bytes += aclnode->z_size;
1572                 newflags = aclp->z_ops.ace_flags_get(acep);
1573 
1574                 /*
1575                  * If ACE is not to be inherited further, or if the vnode is
1576                  * not a directory, remove all inheritance flags
1577                  */
1578                 if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {
1579                         newflags &= ~ALL_INHERIT;
1580                         aclp->z_ops.ace_flags_set(acep,
1581                             newflags|ACE_INHERITED_ACE);
1582                         continue;
1583                 }
1584 
1585                 /*
1586                  * This directory has an inheritable ACE
1587                  */
1588                 aclp->z_hints |= ZFS_INHERIT_ACE;
1589 
1590                 /*
1591                  * If only FILE_INHERIT is set then turn on
1592                  * inherit_only
1593                  */
1594                 if ((iflags & (ACE_FILE_INHERIT_ACE |
1595                     ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
1596                         newflags |= ACE_INHERIT_ONLY_ACE;
1597                         aclp->z_ops.ace_flags_set(acep,
1598                             newflags|ACE_INHERITED_ACE);
1599                 } else {
1600                         newflags &= ~ACE_INHERIT_ONLY_ACE;
1601                         aclp->z_ops.ace_flags_set(acep,
1602                             newflags|ACE_INHERITED_ACE);
1603                 }
1604         }
1605 
1606         return (aclp);
1607 }
1608 
1609 /*
1610  * Create file system object initial permissions
1611  * including inheritable ACEs.
1612  * Also, create FUIDs for owner and group.
1613  */
1614 int
1615 zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
1616     vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
1617 {
1618         int             error;
1619         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1620         zfs_acl_t       *paclp;
1621         gid_t           gid;
1622         boolean_t       trim = B_FALSE;
1623         boolean_t       inherited = B_FALSE;
1624 
1625         bzero(acl_ids, sizeof (zfs_acl_ids_t));
1626         acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
1627 
1628         if (vsecp)
1629                 if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
1630                     &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
1631                         return (error);
1632         /*
1633          * Determine uid and gid.
1634          */
1635         if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
1636             ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
1637                 acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
1638                     (uint64_t)vap->va_uid, cr,
1639                     ZFS_OWNER, &acl_ids->z_fuidp);
1640                 acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1641                     (uint64_t)vap->va_gid, cr,
1642                     ZFS_GROUP, &acl_ids->z_fuidp);
1643                 gid = vap->va_gid;
1644         } else {
1645                 acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
1646                     cr, &acl_ids->z_fuidp);
1647                 acl_ids->z_fgid = 0;
1648                 if (vap->va_mask & AT_GID)  {
1649                         acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
1650                             (uint64_t)vap->va_gid,
1651                             cr, ZFS_GROUP, &acl_ids->z_fuidp);
1652                         gid = vap->va_gid;
1653                         if (acl_ids->z_fgid != dzp->z_gid &&
1654                             !groupmember(vap->va_gid, cr) &&
1655                             secpolicy_vnode_create_gid(cr) != 0)
1656                                 acl_ids->z_fgid = 0;
1657                 }
1658                 if (acl_ids->z_fgid == 0) {
1659                         if (dzp->z_mode & S_ISGID) {
1660                                 char            *domain;
1661                                 uint32_t        rid;
1662 
1663                                 acl_ids->z_fgid = dzp->z_gid;
1664                                 gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
1665                                     cr, ZFS_GROUP);
1666 
1667                                 if (zfsvfs->z_use_fuids &&
1668                                     IS_EPHEMERAL(acl_ids->z_fgid)) {
1669                                         domain = zfs_fuid_idx_domain(
1670                                             &zfsvfs->z_fuid_idx,
1671                                             FUID_INDEX(acl_ids->z_fgid));
1672                                         rid = FUID_RID(acl_ids->z_fgid);
1673                                         zfs_fuid_node_add(&acl_ids->z_fuidp,
1674                                             domain, rid,
1675                                             FUID_INDEX(acl_ids->z_fgid),
1676                                             acl_ids->z_fgid, ZFS_GROUP);
1677                                 }
1678                         } else {
1679                                 acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
1680                                     ZFS_GROUP, cr, &acl_ids->z_fuidp);
1681                                 gid = crgetgid(cr);
1682                         }
1683                 }
1684         }
1685 
1686         /*
1687          * If we're creating a directory, and the parent directory has the
1688          * set-GID bit set, set in on the new directory.
1689          * Otherwise, if the user is neither privileged nor a member of the
1690          * file's new group, clear the file's set-GID bit.
1691          */
1692 
1693         if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
1694             (vap->va_type == VDIR)) {
1695                 acl_ids->z_mode |= S_ISGID;
1696         } else {
1697                 if ((acl_ids->z_mode & S_ISGID) &&
1698                     secpolicy_vnode_setids_setgids(cr, gid) != 0)
1699                         acl_ids->z_mode &= ~S_ISGID;
1700         }
1701 
1702         if (acl_ids->z_aclp == NULL) {
1703                 mutex_enter(&dzp->z_acl_lock);
1704                 mutex_enter(&dzp->z_lock);
1705                 if (!(flag & IS_ROOT_NODE) &&
1706                     (dzp->z_pflags & ZFS_INHERIT_ACE) &&
1707                     !(dzp->z_pflags & ZFS_XATTR)) {
1708                         VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
1709                             &paclp, B_FALSE));
1710                         acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
1711                             vap->va_type, paclp, acl_ids->z_mode);
1712                         inherited = B_TRUE;
1713                 } else {
1714                         acl_ids->z_aclp =
1715                             zfs_acl_alloc(zfs_acl_version_zp(dzp));
1716                         acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
1717                 }
1718                 mutex_exit(&dzp->z_lock);
1719                 mutex_exit(&dzp->z_acl_lock);
1720 
1721                 if (vap->va_type == VDIR)
1722                         acl_ids->z_aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
1723 
1724                 if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&
1725                     zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
1726                     zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
1727                         trim = B_TRUE;
1728                 zfs_acl_chmod(vap->va_type, acl_ids->z_mode, B_FALSE, trim,
1729                     acl_ids->z_aclp);
1730         }
1731 
1732         if (inherited || vsecp) {
1733                 acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
1734                     acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
1735                     acl_ids->z_fuid, acl_ids->z_fgid);
1736                 if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
1737                         acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
1738         }
1739 
1740         return (0);
1741 }
1742 
1743 /*
1744  * Free ACL and fuid_infop, but not the acl_ids structure
1745  */
1746 void
1747 zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
1748 {
1749         if (acl_ids->z_aclp)
1750                 zfs_acl_free(acl_ids->z_aclp);
1751         if (acl_ids->z_fuidp)
1752                 zfs_fuid_info_free(acl_ids->z_fuidp);
1753         acl_ids->z_aclp = NULL;
1754         acl_ids->z_fuidp = NULL;
1755 }
1756 
1757 boolean_t
1758 zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
1759 {
1760         return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
1761             zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
1762 }
1763 
1764 /*
1765  * Retrieve a file's ACL
1766  */
1767 int
1768 zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1769 {
1770         zfs_acl_t       *aclp;
1771         ulong_t         mask;
1772         int             error;
1773         int             count = 0;
1774         int             largeace = 0;
1775 
1776         mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
1777             VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
1778 
1779         if (mask == 0)
1780                 return (SET_ERROR(ENOSYS));
1781 
1782         if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))
1783                 return (error);
1784 
1785         mutex_enter(&zp->z_acl_lock);
1786 
1787         error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
1788         if (error != 0) {
1789                 mutex_exit(&zp->z_acl_lock);
1790                 return (error);
1791         }
1792 
1793         /*
1794          * Scan ACL to determine number of ACEs
1795          */
1796         if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
1797                 void *zacep = NULL;
1798                 uint64_t who;
1799                 uint32_t access_mask;
1800                 uint16_t type, iflags;
1801 
1802                 while (zacep = zfs_acl_next_ace(aclp, zacep,
1803                     &who, &access_mask, &iflags, &type)) {
1804                         switch (type) {
1805                         case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
1806                         case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
1807                         case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
1808                         case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
1809                                 largeace++;
1810                                 continue;
1811                         default:
1812                                 count++;
1813                         }
1814                 }
1815                 vsecp->vsa_aclcnt = count;
1816         } else
1817                 count = (int)aclp->z_acl_count;
1818 
1819         if (mask & VSA_ACECNT) {
1820                 vsecp->vsa_aclcnt = count;
1821         }
1822 
1823         if (mask & VSA_ACE) {
1824                 size_t aclsz;
1825 
1826                 aclsz = count * sizeof (ace_t) +
1827                     sizeof (ace_object_t) * largeace;
1828 
1829                 vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
1830                 vsecp->vsa_aclentsz = aclsz;
1831 
1832                 if (aclp->z_version == ZFS_ACL_VERSION_FUID)
1833                         zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
1834                             vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
1835                 else {
1836                         zfs_acl_node_t *aclnode;
1837                         void *start = vsecp->vsa_aclentp;
1838 
1839                         for (aclnode = list_head(&aclp->z_acl); aclnode;
1840                             aclnode = list_next(&aclp->z_acl, aclnode)) {
1841                                 bcopy(aclnode->z_acldata, start,
1842                                     aclnode->z_size);
1843                                 start = (caddr_t)start + aclnode->z_size;
1844                         }
1845                         ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
1846                             aclp->z_acl_bytes);
1847                 }
1848         }
1849         if (mask & VSA_ACE_ACLFLAGS) {
1850                 vsecp->vsa_aclflags = 0;
1851                 if (zp->z_pflags & ZFS_ACL_DEFAULTED)
1852                         vsecp->vsa_aclflags |= ACL_DEFAULTED;
1853                 if (zp->z_pflags & ZFS_ACL_PROTECTED)
1854                         vsecp->vsa_aclflags |= ACL_PROTECTED;
1855                 if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
1856                         vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
1857         }
1858 
1859         mutex_exit(&zp->z_acl_lock);
1860 
1861         return (0);
1862 }
1863 
1864 int
1865 zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
1866     vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
1867 {
1868         zfs_acl_t *aclp;
1869         zfs_acl_node_t *aclnode;
1870         int aclcnt = vsecp->vsa_aclcnt;
1871         int error;
1872 
1873         if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
1874                 return (SET_ERROR(EINVAL));
1875 
1876         aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
1877 
1878         aclp->z_hints = 0;
1879         aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
1880         if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
1881                 if ((error = zfs_copy_ace_2_oldace(obj_type, aclp,
1882                     (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
1883                     aclcnt, &aclnode->z_size)) != 0) {
1884                         zfs_acl_free(aclp);
1885                         zfs_acl_node_free(aclnode);
1886                         return (error);
1887                 }
1888         } else {
1889                 if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
1890                     vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
1891                     &aclnode->z_size, fuidp, cr)) != 0) {
1892                         zfs_acl_free(aclp);
1893                         zfs_acl_node_free(aclnode);
1894                         return (error);
1895                 }
1896         }
1897         aclp->z_acl_bytes = aclnode->z_size;
1898         aclnode->z_ace_count = aclcnt;
1899         aclp->z_acl_count = aclcnt;
1900         list_insert_head(&aclp->z_acl, aclnode);
1901 
1902         /*
1903          * If flags are being set then add them to z_hints
1904          */
1905         if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
1906                 if (vsecp->vsa_aclflags & ACL_PROTECTED)
1907                         aclp->z_hints |= ZFS_ACL_PROTECTED;
1908                 if (vsecp->vsa_aclflags & ACL_DEFAULTED)
1909                         aclp->z_hints |= ZFS_ACL_DEFAULTED;
1910                 if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
1911                         aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
1912         }
1913 
1914         *zaclp = aclp;
1915 
1916         return (0);
1917 }
1918 
1919 /*
1920  * Set a file's ACL
1921  */
1922 int
1923 zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
1924 {
1925         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
1926         zilog_t         *zilog = zfsvfs->z_log;
1927         ulong_t         mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
1928         dmu_tx_t        *tx;
1929         int             error;
1930         zfs_acl_t       *aclp;
1931         zfs_fuid_info_t *fuidp = NULL;
1932         boolean_t       fuid_dirtied;
1933         uint64_t        acl_obj;
1934 
1935         if (mask == 0)
1936                 return (SET_ERROR(ENOSYS));
1937 
1938         if (zp->z_pflags & ZFS_IMMUTABLE)
1939                 return (SET_ERROR(EPERM));
1940 
1941         if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
1942                 return (error);
1943 
1944         error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
1945             &aclp);
1946         if (error)
1947                 return (error);
1948 
1949         /*
1950          * If ACL wide flags aren't being set then preserve any
1951          * existing flags.
1952          */
1953         if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
1954                 aclp->z_hints |=
1955                     (zp->z_pflags & V4_ACL_WIDE_FLAGS);
1956         }
1957 top:
1958         mutex_enter(&zp->z_acl_lock);
1959         mutex_enter(&zp->z_lock);
1960 
1961         tx = dmu_tx_create(zfsvfs->z_os);
1962 
1963         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1964 
1965         fuid_dirtied = zfsvfs->z_fuid_dirty;
1966         if (fuid_dirtied)
1967                 zfs_fuid_txhold(zfsvfs, tx);
1968 
1969         /*
1970          * If old version and ACL won't fit in bonus and we aren't
1971          * upgrading then take out necessary DMU holds
1972          */
1973 
1974         if ((acl_obj = zfs_external_acl(zp)) != 0) {
1975                 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
1976                     zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
1977                         dmu_tx_hold_free(tx, acl_obj, 0,
1978                             DMU_OBJECT_END);
1979                         dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1980                             aclp->z_acl_bytes);
1981                 } else {
1982                         dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
1983                 }
1984         } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1985                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
1986         }
1987 
1988         zfs_sa_upgrade_txholds(tx, zp);
1989         error = dmu_tx_assign(tx, TXG_NOWAIT);
1990         if (error) {
1991                 mutex_exit(&zp->z_acl_lock);
1992                 mutex_exit(&zp->z_lock);
1993 
1994                 if (error == ERESTART) {
1995                         dmu_tx_wait(tx);
1996                         dmu_tx_abort(tx);
1997                         goto top;
1998                 }
1999                 dmu_tx_abort(tx);
2000                 zfs_acl_free(aclp);
2001                 return (error);
2002         }
2003 
2004         error = zfs_aclset_common(zp, aclp, cr, tx);
2005         ASSERT(error == 0);
2006         ASSERT(zp->z_acl_cached == NULL);
2007         zp->z_acl_cached = aclp;
2008 
2009         if (fuid_dirtied)
2010                 zfs_fuid_sync(zfsvfs, tx);
2011 
2012         zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
2013 
2014         if (fuidp)
2015                 zfs_fuid_info_free(fuidp);
2016         dmu_tx_commit(tx);
2017 done:
2018         mutex_exit(&zp->z_lock);
2019         mutex_exit(&zp->z_acl_lock);
2020 
2021         return (error);
2022 }
2023 
2024 /*
2025  * Check accesses of interest (AoI) against attributes of the dataset
2026  * such as read-only.  Returns zero if no AoI conflict with dataset
2027  * attributes, otherwise an appropriate errno is returned.
2028  */
2029 static int
2030 zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
2031 {
2032         if ((v4_mode & WRITE_MASK) &&
2033             (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
2034             (!IS_DEVVP(ZTOV(zp)) ||
2035             (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
2036                 return (SET_ERROR(EROFS));
2037         }
2038 
2039         /*
2040          * Intentionally allow ZFS_READONLY through here.
2041          * See zfs_zaccess_common().
2042          */
2043         if ((v4_mode & WRITE_MASK_DATA) &&
2044             (zp->z_pflags & ZFS_IMMUTABLE)) {
2045                 return (SET_ERROR(EPERM));
2046         }
2047 
2048         if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
2049             (zp->z_pflags & ZFS_NOUNLINK)) {
2050                 return (SET_ERROR(EPERM));
2051         }
2052 
2053         if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
2054             (zp->z_pflags & ZFS_AV_QUARANTINED))) {
2055                 return (SET_ERROR(EACCES));
2056         }
2057 
2058         return (0);
2059 }
2060 
2061 /*
2062  * The primary usage of this function is to loop through all of the
2063  * ACEs in the znode, determining what accesses of interest (AoI) to
2064  * the caller are allowed or denied.  The AoI are expressed as bits in
2065  * the working_mode parameter.  As each ACE is processed, bits covered
2066  * by that ACE are removed from the working_mode.  This removal
2067  * facilitates two things.  The first is that when the working mode is
2068  * empty (= 0), we know we've looked at all the AoI. The second is
2069  * that the ACE interpretation rules don't allow a later ACE to undo
2070  * something granted or denied by an earlier ACE.  Removing the
2071  * discovered access or denial enforces this rule.  At the end of
2072  * processing the ACEs, all AoI that were found to be denied are
2073  * placed into the working_mode, giving the caller a mask of denied
2074  * accesses.  Returns:
2075  *      0               if all AoI granted
2076  *      EACCES          if the denied mask is non-zero
2077  *      other error     if abnormal failure (e.g., IO error)
2078  *
2079  * A secondary usage of the function is to determine if any of the
2080  * AoI are granted.  If an ACE grants any access in
2081  * the working_mode, we immediately short circuit out of the function.
2082  * This mode is chosen by setting anyaccess to B_TRUE.  The
2083  * working_mode is not a denied access mask upon exit if the function
2084  * is used in this manner.
2085  */
2086 static int
2087 zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
2088     boolean_t anyaccess, cred_t *cr, boolean_t audit)
2089 {
2090         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
2091         zfs_acl_t       *aclp;
2092         int             error;
2093         uint64_t        who;            /* FUID from the ACE */
2094         uint16_t        type, iflags;
2095         uint16_t        entry_type;
2096         uint32_t        access_mask;
2097         uint32_t        deny_mask = 0;
2098         uint32_t        sys_smask = 0;
2099         uint32_t        sys_fmask = 0;
2100         zfs_ace_hdr_t   *acep = NULL;
2101         boolean_t       checkit;        /* ACE ID matches */
2102         t_audit_data_t *tad;
2103 
2104         mutex_enter(&zp->z_acl_lock);
2105 
2106         error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
2107         if (error != 0) {
2108                 mutex_exit(&zp->z_acl_lock);
2109                 return (error);
2110         }
2111 
2112         ASSERT(zp->z_acl_cached);
2113 
2114         while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
2115             &iflags, &type)) {
2116                 uint32_t mask_matched;
2117 
2118                 if (!zfs_acl_valid_ace_type(type, iflags))
2119                         continue;
2120 
2121                 if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
2122                         continue;
2123 
2124                 /* Skip ACE if it does not affect any AoI */
2125                 mask_matched = (access_mask & *working_mode);
2126                 if ((type == DENY || type == ALLOW) && !mask_matched)
2127                         continue;
2128                 if (!audit && type != DENY && type != ALLOW)
2129                         continue;
2130 
2131                 entry_type = (iflags & ACE_TYPE_FLAGS);
2132 
2133                 checkit = B_FALSE;
2134 
2135                 switch (entry_type) {
2136                 case ACE_OWNER:
2137                         who = zp->z_uid;
2138                         /*FALLTHROUGH*/
2139                 case 0: /* USER Entry */
2140                         checkit = zfs_user_in_cred(zfsvfs, who, cr);
2141                         break;
2142                 case OWNING_GROUP:
2143                         who = zp->z_gid;
2144                         /*FALLTHROUGH*/
2145                 case ACE_IDENTIFIER_GROUP:
2146                         checkit = zfs_groupmember(zfsvfs, who, cr);
2147                         break;
2148                 case ACE_EVERYONE:
2149                         checkit = B_TRUE;
2150                         break;
2151 
2152                 default:
2153                         /*
2154                          * The zfs_acl_valid_ace_type check above
2155                          * should make this case impossible.
2156                          */
2157                         mutex_exit(&zp->z_acl_lock);
2158                         return (SET_ERROR(EIO));
2159                 }
2160 
2161                 if (checkit) {
2162                         switch (type) {
2163                         case DENY:
2164                                 DTRACE_PROBE3(zfs__ace__denies,
2165                                     znode_t *, zp,
2166                                     zfs_ace_hdr_t *, acep,
2167                                     uint32_t, mask_matched);
2168                                 deny_mask |= mask_matched;
2169                                 *working_mode &= ~mask_matched;
2170                                 break;
2171                         case ACE_SYSTEM_AUDIT_ACE_TYPE:
2172                         case ACE_SYSTEM_ALARM_ACE_TYPE:
2173                                 DTRACE_PROBE3(zfs__ace__audit,
2174                                     znode_t *, zp,
2175                                     zfs_ace_hdr_t *, acep,
2176                                     uint32_t, access_mask);
2177                                 if ((iflags &
2178                                     ACE_SUCCESSFUL_ACCESS_ACE_FLAG) != 0)
2179                                         sys_smask |= access_mask;
2180                                 if ((iflags & ACE_FAILED_ACCESS_ACE_FLAG) != 0)
2181                                         sys_fmask |= access_mask;
2182                                 break;
2183                         case ALLOW:
2184                         default:
2185                                 DTRACE_PROBE3(zfs__ace__allows,
2186                                     znode_t *, zp,
2187                                     zfs_ace_hdr_t *, acep,
2188                                     uint32_t, mask_matched);
2189                                 if (anyaccess) {
2190                                         mutex_exit(&zp->z_acl_lock);
2191                                         return (0);
2192                                 }
2193                                 *working_mode &= ~mask_matched;
2194                                 break;
2195                         }
2196                 }
2197 
2198                 /*
2199                  * Are we done? If auditing, process the entire list
2200                  * to gather all audit ACEs
2201                  */
2202                 if (!audit && *working_mode == 0)
2203                         break;
2204         }
2205 
2206         mutex_exit(&zp->z_acl_lock);
2207 
2208         if (audit) {
2209                 tad = T2A(curthread);
2210                 tad->tad_sacl_mask.tas_smask = sys_smask;
2211                 tad->tad_sacl_mask.tas_fmask = sys_fmask;
2212         }
2213 
2214         /* Put the found 'denies' back on the working mode */
2215         if (deny_mask) {
2216                 *working_mode |= deny_mask;
2217                 return (SET_ERROR(EACCES));
2218         } else if (*working_mode) {
2219                 return (-1);
2220         }
2221 
2222         return (0);
2223 }
2224 
2225 /*
2226  * Return true if any access whatsoever granted, we don't actually
2227  * care what access is granted.
2228  */
2229 boolean_t
2230 zfs_has_access(znode_t *zp, cred_t *cr)
2231 {
2232         uint32_t have = ACE_ALL_PERMS;
2233 
2234         if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr, B_FALSE) != 0) {
2235                 uid_t owner;
2236 
2237                 owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
2238                 return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0);
2239         }
2240         return (B_TRUE);
2241 }
2242 
2243 static int
2244 zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
2245     boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
2246 {
2247         zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2248         int err;
2249         boolean_t audit = B_FALSE;
2250 
2251         *working_mode = v4_mode;
2252         *check_privs = B_TRUE;
2253 
2254         /*
2255          * Short circuit empty requests
2256          */
2257         if (v4_mode == 0 || zfsvfs->z_replay) {
2258                 *working_mode = 0;
2259                 return (0);
2260         }
2261 
2262         if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
2263                 *check_privs = B_FALSE;
2264                 return (err);
2265         }
2266 
2267         /*
2268          * The caller requested that the ACL check be skipped.  This
2269          * would only happen if the caller checked VOP_ACCESS() with a
2270          * 32 bit ACE mask and already had the appropriate permissions.
2271          */
2272         if (skipaclchk) {
2273                 *working_mode = 0;
2274                 return (0);
2275         }
2276 
2277         /*
2278          * Note: ZFS_READONLY represents the "DOS R/O" attribute.
2279          * When that flag is set, we should behave as if write access
2280          * were not granted by anything in the ACL.  In particular:
2281          * We _must_ allow writes after opening the file r/w, then
2282          * setting the DOS R/O attribute, and writing some more.
2283          * (Similar to how you can write after fchmod(fd, 0444).)
2284          *
2285          * Therefore ZFS_READONLY is ignored in the dataset check
2286          * above, and checked here as if part of the ACL check.
2287          * Also note: DOS R/O is ignored for directories.
2288          */
2289         if ((v4_mode & WRITE_MASK_DATA) &&
2290             (ZTOV(zp)->v_type != VDIR) &&
2291             (zp->z_pflags & ZFS_READONLY)) {
2292                 return (SET_ERROR(EPERM));
2293         }
2294 
2295         if (cr != zone_kcred() && AU_ZONE_AUDITING(NULL)) {
2296                 t_audit_data_t *tad = T2A(curthread);
2297                 if (tad->tad_sacl_ctrl != SACL_AUDIT_NONE &&
2298                     auditev(AUE_SACL, cr) != 0) {
2299                         audit = B_TRUE;
2300                         tad->tad_sacl_ctrl = SACL_AUDIT_NONE;
2301                 }
2302         }
2303 
2304         return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr, audit));
2305 }
2306 
2307 static int
2308 zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
2309     cred_t *cr)
2310 {
2311         if (*working_mode != ACE_WRITE_DATA)
2312                 return (SET_ERROR(EACCES));
2313 
2314         return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
2315             check_privs, B_FALSE, cr));
2316 }
2317 
2318 int
2319 zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
2320 {
2321         boolean_t owner = B_FALSE;
2322         boolean_t groupmbr = B_FALSE;
2323         boolean_t is_attr;
2324         uid_t uid = crgetuid(cr);
2325         int error;
2326 
2327         if (zdp->z_pflags & ZFS_AV_QUARANTINED)
2328                 return (SET_ERROR(EACCES));
2329 
2330         is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
2331             (ZTOV(zdp)->v_type == VDIR));
2332         if (is_attr)
2333                 goto slow;
2334 
2335 
2336         mutex_enter(&zdp->z_acl_lock);
2337 
2338         if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
2339                 mutex_exit(&zdp->z_acl_lock);
2340                 return (0);
2341         }
2342 
2343         if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) {
2344                 mutex_exit(&zdp->z_acl_lock);
2345                 goto slow;
2346         }
2347 
2348         if (uid == zdp->z_uid) {
2349                 owner = B_TRUE;
2350                 if (zdp->z_mode & S_IXUSR) {
2351                         mutex_exit(&zdp->z_acl_lock);
2352                         return (0);
2353                 } else {
2354                         mutex_exit(&zdp->z_acl_lock);
2355                         goto slow;
2356                 }
2357         }
2358         if (groupmember(zdp->z_gid, cr)) {
2359                 groupmbr = B_TRUE;
2360                 if (zdp->z_mode & S_IXGRP) {
2361                         mutex_exit(&zdp->z_acl_lock);
2362                         return (0);
2363                 } else {
2364                         mutex_exit(&zdp->z_acl_lock);
2365                         goto slow;
2366                 }
2367         }
2368         if (!owner && !groupmbr) {
2369                 if (zdp->z_mode & S_IXOTH) {
2370                         mutex_exit(&zdp->z_acl_lock);
2371                         return (0);
2372                 }
2373         }
2374 
2375         mutex_exit(&zdp->z_acl_lock);
2376 
2377 slow:
2378         DTRACE_PROBE(zfs__fastpath__execute__access__miss);
2379         ZFS_ENTER(zdp->z_zfsvfs);
2380         error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
2381         ZFS_EXIT(zdp->z_zfsvfs);
2382         return (error);
2383 }
2384 
2385 /*
2386  * Determine whether Access should be granted/denied.
2387  *
2388  * The least priv subsytem is always consulted as a basic privilege
2389  * can define any form of access.
2390  */
2391 int
2392 zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
2393 {
2394         uint32_t        working_mode;
2395         int             error;
2396         int             is_attr;
2397         boolean_t       check_privs;
2398         znode_t         *xzp;
2399         znode_t         *check_zp = zp;
2400         mode_t          needed_bits;
2401         uid_t           owner;
2402 
2403         is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR));
2404 
2405         /*
2406          * If attribute then validate against base file
2407          */
2408         if (is_attr) {
2409                 uint64_t        parent;
2410 
2411                 if ((error = sa_lookup(zp->z_sa_hdl,
2412                     SA_ZPL_PARENT(zp->z_zfsvfs), &parent,
2413                     sizeof (parent))) != 0)
2414                         return (error);
2415 
2416                 if ((error = zfs_zget(zp->z_zfsvfs,
2417                     parent, &xzp)) != 0)    {
2418                         return (error);
2419                 }
2420 
2421                 check_zp = xzp;
2422 
2423                 /*
2424                  * fixup mode to map to xattr perms
2425                  */
2426 
2427                 if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
2428                         mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
2429                         mode |= ACE_WRITE_NAMED_ATTRS;
2430                 }
2431 
2432                 if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
2433                         mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
2434                         mode |= ACE_READ_NAMED_ATTRS;
2435                 }
2436         }
2437 
2438         owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
2439         /*
2440          * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC
2441          * in needed_bits.  Map the bits mapped by working_mode (currently
2442          * missing) in missing_bits.
2443          * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
2444          * needed_bits.
2445          */
2446         needed_bits = 0;
2447 
2448         working_mode = mode;
2449         if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
2450             owner == crgetuid(cr))
2451                 working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2452 
2453         if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2454             ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2455                 needed_bits |= VREAD;
2456         if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2457             ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2458                 needed_bits |= VWRITE;
2459         if (working_mode & ACE_EXECUTE)
2460                 needed_bits |= VEXEC;
2461 
2462         if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
2463             &check_privs, skipaclchk, cr)) == 0) {
2464                 if (is_attr)
2465                         VN_RELE(ZTOV(xzp));
2466                 return (secpolicy_vnode_access2(cr, ZTOV(zp), owner,
2467                     needed_bits, needed_bits));
2468         }
2469 
2470         if (error && !check_privs) {
2471                 if (is_attr)
2472                         VN_RELE(ZTOV(xzp));
2473                 return (error);
2474         }
2475 
2476         if (error && (flags & V_APPEND)) {
2477                 /*
2478                  * If zfs_zaccess_common checked aces, then we won't audit here.
2479                  * Otherwise, we'll try and get audit masks here.
2480                  */
2481                 error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
2482         }
2483 
2484         if (error && check_privs) {
2485                 mode_t          checkmode = 0;
2486 
2487                 /*
2488                  * First check for implicit owner permission on
2489                  * read_acl/read_attributes
2490                  */
2491 
2492                 error = 0;
2493                 ASSERT(working_mode != 0);
2494 
2495                 if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
2496                     owner == crgetuid(cr)))
2497                         working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
2498 
2499                 if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
2500                     ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
2501                         checkmode |= VREAD;
2502                 if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
2503                     ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
2504                         checkmode |= VWRITE;
2505                 if (working_mode & ACE_EXECUTE)
2506                         checkmode |= VEXEC;
2507 
2508                 error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner,
2509                     needed_bits & ~checkmode, needed_bits);
2510 
2511                 if (error == 0 && (working_mode & ACE_WRITE_OWNER))
2512                         error = secpolicy_vnode_chown(cr, owner);
2513                 if (error == 0 && (working_mode & ACE_WRITE_ACL))
2514                         error = secpolicy_vnode_setdac(cr, owner);
2515 
2516                 if (error == 0 && (working_mode &
2517                     (ACE_DELETE|ACE_DELETE_CHILD)))
2518                         error = secpolicy_vnode_remove(cr);
2519 
2520                 if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
2521                         error = secpolicy_vnode_chown(cr, owner);
2522                 }
2523                 if (error == 0) {
2524                         /*
2525                          * See if any bits other than those already checked
2526                          * for are still present.  If so then return EACCES
2527                          */
2528                         if (working_mode & ~(ZFS_CHECKED_MASKS)) {
2529                                 error = SET_ERROR(EACCES);
2530                         }
2531                 }
2532         } else if (error == 0) {
2533                 error = secpolicy_vnode_access2(cr, ZTOV(zp), owner,
2534                     needed_bits, needed_bits);
2535         }
2536 
2537 
2538         if (is_attr)
2539                 VN_RELE(ZTOV(xzp));
2540 
2541         return (error);
2542 }
2543 
2544 /*
2545  * Translate traditional unix VREAD/VWRITE/VEXEC mode into
2546  * native ACL format and call zfs_zaccess()
2547  */
2548 int
2549 zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
2550 {
2551         return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
2552 }
2553 
2554 /*
2555  * Access function for secpolicy_vnode_setattr
2556  */
2557 int
2558 zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
2559 {
2560         int v4_mode = zfs_unix_to_v4(mode >> 6);
2561 
2562         return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
2563 }
2564 
2565 /* See zfs_zaccess_delete() */
2566 int zfs_write_implies_delete_child = 1;
2567 
2568 /*
2569  * Determine whether delete access should be granted.
2570  *
2571  * The following chart outlines how we handle delete permissions which is
2572  * how recent versions of windows (Windows 2008) handles it.  The efficiency
2573  * comes from not having to check the parent ACL where the object itself grants
2574  * delete:
2575  *
2576  *      -------------------------------------------------------
2577  *      |   Parent Dir  |      Target Object Permissions      |
2578  *      |  permissions  |                                     |
2579  *      -------------------------------------------------------
2580  *      |               | ACL Allows | ACL Denies| Delete     |
2581  *      |               |  Delete    |  Delete   | unspecified|
2582  *      -------------------------------------------------------
2583  *      | ACL Allows    | Permit     | Deny *    | Permit     |
2584  *      | DELETE_CHILD  |            |           |            |
2585  *      -------------------------------------------------------
2586  *      | ACL Denies    | Permit     | Deny      | Deny       |
2587  *      | DELETE_CHILD  |            |           |            |
2588  *      -------------------------------------------------------
2589  *      | ACL specifies |            |           |            |
2590  *      | only allow    | Permit     | Deny *    | Permit     |
2591  *      | write and     |            |           |            |
2592  *      | execute       |            |           |            |
2593  *      -------------------------------------------------------
2594  *      | ACL denies    |            |           |            |
2595  *      | write and     | Permit     | Deny      | Deny       |
2596  *      | execute       |            |           |            |
2597  *      -------------------------------------------------------
2598  *         ^
2599  *         |
2600  *         Re. execute permission on the directory:  if that's missing,
2601  *         the vnode lookup of the target will fail before we get here.
2602  *
2603  * Re [*] in the table above:  NFSv4 would normally Permit delete for
2604  * these two cells of the matrix.
2605  * See acl.h for notes on which ACE_... flags should be checked for which
2606  * operations.  Specifically, the NFSv4 committee recommendation is in
2607  * conflict with the Windows interpretation of DENY ACEs, where DENY ACEs
2608  * should take precedence ahead of ALLOW ACEs.
2609  *
2610  * This implementation always consults the target object's ACL first.
2611  * If a DENY ACE is present on the target object that specifies ACE_DELETE,
2612  * delete access is denied.  If an ALLOW ACE with ACE_DELETE is present on
2613  * the target object, access is allowed.  If and only if no entries with
2614  * ACE_DELETE are present in the object's ACL, check the container's ACL
2615  * for entries with ACE_DELETE_CHILD.
2616  *
2617  * A summary of the logic implemented from the table above is as follows:
2618  *
2619  * First check for DENY ACEs that apply.
2620  * If either target or container has a deny, EACCES.
2621  *
2622  * Delete access can then be summarized as follows:
2623  * 1: The object to be deleted grants ACE_DELETE, or
2624  * 2: The containing directory grants ACE_DELETE_CHILD.
2625  * In a Windows system, that would be the end of the story.
2626  * In this system, (2) has some complications...
2627  * 2a: "sticky" bit on a directory adds restrictions, and
2628  * 2b: existing ACEs from previous versions of ZFS may
2629  * not carry ACE_DELETE_CHILD where they should, so we
2630  * also allow delete when ACE_WRITE_DATA is granted.
2631  *
2632  * Note: 2b is technically a work-around for a prior bug,
2633  * which hopefully can go away some day.  For those who
2634  * no longer need the work around, and for testing, this
2635  * work-around is made conditional via the tunable:
2636  * zfs_write_implies_delete_child
2637  */
2638 int
2639 zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
2640 {
2641         uint32_t wanted_dirperms;
2642         uint32_t dzp_working_mode = 0;
2643         uint32_t zp_working_mode = 0;
2644         int dzp_error, zp_error;
2645         boolean_t dzpcheck_privs;
2646         boolean_t zpcheck_privs;
2647         t_audit_data_t *tad;
2648 
2649         if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
2650                 return (SET_ERROR(EPERM));
2651 
2652         /*
2653          * Case 1:
2654          * If target object grants ACE_DELETE then we are done.  This is
2655          * indicated by a return value of 0.  For this case we don't worry
2656          * about the sticky bit because sticky only applies to the parent
2657          * directory and this is the child access result.
2658          *
2659          * If we encounter a DENY ACE here, we're also done (EACCES).
2660          * Note that if we hit a DENY ACE here (on the target) it should
2661          * take precedence over a DENY ACE on the container, so that when
2662          * we have more complete auditing support we will be able to
2663          * report an access failure against the specific target.
2664          * (This is part of why we're checking the target first.)
2665          */
2666         zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
2667             &zpcheck_privs, B_FALSE, cr);
2668         if (zp_error == EACCES) {
2669                 /* We hit a DENY ACE. */
2670                 if (!zpcheck_privs)
2671                         return (SET_ERROR(zp_error));
2672                 return (secpolicy_vnode_remove(cr));
2673 
2674         }
2675         if (zp_error == 0)
2676                 return (0);
2677 
2678         /*
2679          * Case 2:
2680          * If the containing directory grants ACE_DELETE_CHILD,
2681          * or we're in backward compatibility mode and the
2682          * containing directory has ACE_WRITE_DATA, allow.
2683          * Case 2b is handled with wanted_dirperms.
2684          */
2685         wanted_dirperms = ACE_DELETE_CHILD;
2686         if (zfs_write_implies_delete_child)
2687                 wanted_dirperms |= ACE_WRITE_DATA;
2688         /* never audit the parent directory access check */
2689         if (AU_ZONE_AUDITING(NULL)) {
2690                 tad = T2A(curthread);
2691                 tad->tad_sacl_ctrl = SACL_AUDIT_NONE;
2692         }
2693         dzp_error = zfs_zaccess_common(dzp, wanted_dirperms,
2694             &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
2695         if (dzp_error == EACCES) {
2696                 /* We hit a DENY ACE. */
2697                 if (!dzpcheck_privs)
2698                         return (SET_ERROR(dzp_error));
2699                 return (secpolicy_vnode_remove(cr));
2700         }
2701 
2702         /*
2703          * Cases 2a, 2b (continued)
2704          *
2705          * Note: dzp_working_mode now contains any permissions
2706          * that were NOT granted.  Therefore, if any of the
2707          * wanted_dirperms WERE granted, we will have:
2708          *   dzp_working_mode != wanted_dirperms
2709          * We're really asking if ANY of those permissions
2710          * were granted, and if so, grant delete access.
2711          */
2712         if (dzp_working_mode != wanted_dirperms)
2713                 dzp_error = 0;
2714 
2715         /*
2716          * dzp_error is 0 if the container granted us permissions to "modify".
2717          * If we do not have permission via one or more ACEs, our current
2718          * privileges may still permit us to modify the container.
2719          *
2720          * dzpcheck_privs is false when i.e. the FS is read-only.
2721          * Otherwise, do privilege checks for the container.
2722          */
2723         if (dzp_error != 0 && dzpcheck_privs) {
2724                 uid_t owner;
2725 
2726                 /*
2727                  * The secpolicy call needs the requested access and
2728                  * the current access mode of the container, but it
2729                  * only knows about Unix-style modes (VEXEC, VWRITE),
2730                  * so this must condense the fine-grained ACE bits into
2731                  * Unix modes.
2732                  *
2733                  * The VEXEC flag is easy, because we know that has
2734                  * always been checked before we get here (during the
2735                  * lookup of the target vnode).  The container has not
2736                  * granted us permissions to "modify", so we do not set
2737                  * the VWRITE flag in the current access mode.
2738                  */
2739                 owner = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr,
2740                     ZFS_OWNER);
2741                 dzp_error = secpolicy_vnode_access2(cr, ZTOV(dzp),
2742                     owner, VEXEC, VWRITE|VEXEC);
2743         }
2744         if (dzp_error != 0) {
2745                 /*
2746                  * Note: We may have dzp_error = -1 here (from
2747                  * zfs_zacess_common).  Don't return that.
2748                  */
2749                 return (SET_ERROR(EACCES));
2750         }
2751 
2752         /*
2753          * At this point, we know that the directory permissions allow
2754          * us to modify, but we still need to check for the additional
2755          * restrictions that apply when the "sticky bit" is set.
2756          *
2757          * Yes, zfs_sticky_remove_access() also checks this bit, but
2758          * checking it here and skipping the call below is nice when
2759          * you're watching all of this with dtrace.
2760          */
2761         if ((dzp->z_mode & S_ISVTX) == 0)
2762                 return (0);
2763 
2764         /*
2765          * zfs_sticky_remove_access will succeed if:
2766          * 1. The sticky bit is absent.
2767          * 2. We pass the sticky bit restrictions.
2768          * 3. We have privileges that always allow file removal.
2769          */
2770         return (zfs_sticky_remove_access(dzp, zp, cr));
2771 }
2772 
2773 int
2774 zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
2775     znode_t *tzp, cred_t *cr)
2776 {
2777         int add_perm;
2778         int error;
2779         t_audit_data_t *tad;
2780         sacl_audit_ctrl_t do_audit;
2781 
2782         if (szp->z_pflags & ZFS_AV_QUARANTINED)
2783                 return (SET_ERROR(EACCES));
2784 
2785         add_perm = (ZTOV(szp)->v_type == VDIR) ?
2786             ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
2787 
2788         if (AU_ZONE_AUDITING(NULL)) {
2789                 tad = T2A(curthread);
2790                 do_audit = tad->tad_sacl_ctrl;
2791         } else {
2792                 tad = NULL;
2793                 do_audit = SACL_AUDIT_NONE;
2794         }
2795 
2796         /*
2797          * Rename permissions are combination of delete permission +
2798          * add file/subdir permission.
2799          */
2800 
2801         /*
2802          * first make sure we do the delete portion.
2803          *
2804          * If that succeeds then check for add_file/add_subdir permissions
2805          */
2806 
2807         if (do_audit == SACL_AUDIT_NO_SRC)
2808                 tad->tad_sacl_ctrl = SACL_AUDIT_NONE;
2809         error = zfs_zaccess_delete(sdzp, szp, cr);
2810 
2811         if (do_audit == SACL_AUDIT_ALL) {
2812                 tad->tad_sacl_mask_src = tad->tad_sacl_mask;
2813                 tad->tad_sacl_mask.tas_smask = 0;
2814                 tad->tad_sacl_mask.tas_fmask = 0;
2815         }
2816         if (error != 0)
2817                 return (error);
2818 
2819         if (do_audit != SACL_AUDIT_NONE)
2820                 tad->tad_sacl_ctrl = do_audit;
2821 
2822         /*
2823          * If we have a tzp, see if we can delete it?
2824          */
2825         if (tzp) {
2826                 error = zfs_zaccess_delete(tdzp, tzp, cr);
2827                 if (do_audit != SACL_AUDIT_NONE) {
2828                         tad->tad_sacl_mask_dest = tad->tad_sacl_mask;
2829                         tad->tad_sacl_mask.tas_smask = 0;
2830                         tad->tad_sacl_mask.tas_fmask = 0;
2831                 }
2832                 if (error != 0)
2833                         return (error);
2834                 if (do_audit != SACL_AUDIT_NONE)
2835                         tad->tad_sacl_ctrl = do_audit;
2836         }
2837 
2838         /*
2839          * Now check for add permissions
2840          */
2841         error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
2842 
2843         /* do_audit: leave directory audit info in sacl_mask. */
2844 
2845         return (error);
2846 }