big-one Wdiff usr/src/common/smbsrv/smb_string.c

Print this page

NEX-19025 CIFS gets confused with filenames containing enhanced Unicode
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
and: (fix build, check-rtime)
NEX-2460 libfksmbd should not link with libsmb
SMB-65 SMB server in non-global zones (data structure changes)
Many things move to the smb_server_t object, and
many functions gain an sv arg (which server).

Split	Close
Expand all
Collapse all

          --- old/usr/src/common/smbsrv/smb_string.c
          +++ new/usr/src/common/smbsrv/smb_string.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   *
  25   25   * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26   26   * Copyright (c) 2017 by Delphix. All rights reserved.
  27   27   */
  28   28  
  29   29  #if defined(_KERNEL) || defined(_FAKE_KERNEL)
  30   30  #include <sys/types.h>
  31   31  #include <sys/sunddi.h>
  32   32  #else
  33   33  #include <stdio.h>
  34   34  #include <stdlib.h>
  35   35  #include <string.h>
  36   36  #include <strings.h>
  37   37  #endif
  38   38  #include <sys/u8_textprep.h>
  39   39  #include <smbsrv/alloc.h>
  40   40  #include <sys/errno.h>
  41   41  #include <smbsrv/string.h>
  42   42  #include <smbsrv/cp_usascii.h>
  43   43  #include <smbsrv/cp_unicode.h>
  44   44  
  45   45  #define UNICODE_N_ENTRIES       (sizeof (a_unicode) / sizeof (a_unicode[0]))
  46   46  
  47   47  /*
  48   48   * Global pointer to the current codepage: defaults to ASCII,
  49   49   * and a flag indicating whether the codepage is Unicode or ASCII.
  50   50   */
  51   51  static const smb_codepage_t *current_codepage = usascii_codepage;
  52   52  static boolean_t is_unicode = B_FALSE;
  53   53  
  54   54  static smb_codepage_t *unicode_codepage = NULL;
  55   55  
  56   56  static smb_codepage_t *smb_unicode_init(void);
  57   57  
  58   58  /*
  59   59   * strsubst
  60   60   *
  61   61   * Scan a string replacing all occurrences of orgchar with newchar.
  62   62   * Returns a pointer to s, or null of s is null.
  63   63   */
  64   64  char *
  65   65  strsubst(char *s, char orgchar, char newchar)
  66   66  {
  67   67          char *p = s;
  68   68  
  69   69          if (p == 0)
  70   70                  return (0);
  71   71  
  72   72          while (*p) {
  73   73                  if (*p == orgchar)
  74   74                          *p = newchar;
  75   75                  ++p;
  76   76          }
  77   77  
  78   78          return (s);
  79   79  }
  80   80  
  81   81  /*
  82   82   * strcanon
  83   83   *
  84   84   * Normalize a string by reducing all the repeated characters in
  85   85   * buf as defined by class. For example;
  86   86   *
  87   87   *              char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
  88   88   *              strcanon(buf, "/\\");
  89   89   *
  90   90   * Would result in buf containing the following string:
  91   91   *
  92   92   *              /d1/d2/d3\d4\f1.txt
  93   93   *
  94   94   * This function modifies the contents of buf in place and returns
  95   95   * a pointer to buf.
  96   96   */
  97   97  char *
  98   98  strcanon(char *buf, const char *class)
  99   99  {
 100  100          char *p = buf;
 101  101          char *q = buf;
 102  102          char *r;
 103  103  
 104  104          while (*p) {
 105  105                  *q++ = *p;
 106  106  
 107  107                  if ((r = strchr(class, *p)) != 0) {
 108  108                          while (*p == *r)
 109  109                                  ++p;
 110  110                  } else
 111  111                          ++p;
 112  112          }
 113  113  
 114  114          *q = '\0';
 115  115          return (buf);
 116  116  }
 117  117  
 118  118  void
 119  119  smb_codepage_init(void)
 120  120  {
 121  121          smb_codepage_t *cp;
 122  122  
 123  123          if (is_unicode)
 124  124                  return;
 125  125  
 126  126          if ((cp = smb_unicode_init()) != NULL) {
 127  127                  current_codepage = cp;
 128  128                  unicode_codepage = cp;
 129  129                  is_unicode = B_TRUE;
 130  130          } else {
 131  131                  current_codepage = usascii_codepage;
 132  132                  is_unicode = B_FALSE;
 133  133          }
 134  134  }
 135  135  
 136  136  void
 137  137  smb_codepage_fini(void)
 138  138  {
 139  139          if (unicode_codepage != NULL) {
 140  140                  MEM_FREE("unicode", unicode_codepage);
 141  141                  unicode_codepage = NULL;
 142  142                  current_codepage = NULL;
 143  143          }
 144  144  }
 145  145  
 146  146  /*
 147  147   * Determine whether or not a character is an uppercase character.
 148  148   * This function operates on the current codepage table. Returns
 149  149   * non-zero if the character is uppercase. Otherwise returns zero.
 150  150   */
 151  151  int
 152  152  smb_isupper(int c)
 153  153  {
 154  154          uint16_t mask = is_unicode ? 0xffff : 0xff;
 155  155  
 156  156          return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
 157  157  }
 158  158  
 159  159  /*
 160  160   * Determine whether or not a character is an lowercase character.
 161  161   * This function operates on the current codepage table. Returns
 162  162   * non-zero if the character is lowercase. Otherwise returns zero.
 163  163   */
 164  164  int
 165  165  smb_islower(int c)
 166  166  {

↓ open down ↓

166 lines elided

↑ open up ↑

 167  167          uint16_t mask = is_unicode ? 0xffff : 0xff;
 168  168  
 169  169          return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
 170  170  }
 171  171  
 172  172  /*
 173  173   * Convert individual characters to their uppercase equivalent value.
 174  174   * If the specified character is lowercase, the uppercase value will
 175  175   * be returned. Otherwise the original value will be returned.
 176  176   */
 177      -int
 178      -smb_toupper(int c)
      177 +uint32_t
      178 +smb_toupper(uint32_t c)
 179  179  {
 180  180          uint16_t mask = is_unicode ? 0xffff : 0xff;
 181  181  
 182  182          return (current_codepage[c & mask].upper);
 183  183  }
 184  184  
 185  185  /*
 186  186   * Convert individual characters to their lowercase equivalent value.
 187  187   * If the specified character is uppercase, the lowercase value will
 188  188   * be returned. Otherwise the original value will be returned.
 189  189   */
 190      -int
 191      -smb_tolower(int c)
      190 +uint32_t
      191 +smb_tolower(uint32_t c)
 192  192  {
 193  193          uint16_t mask = is_unicode ? 0xffff : 0xff;
 194  194  
 195  195          return (current_codepage[c & mask].lower);
 196  196  }
 197  197  
 198  198  /*
 199  199   * Convert a string to uppercase using the appropriate codepage. The
 200  200   * string is converted in place. A pointer to the string is returned.
 201  201   * There is an assumption here that uppercase and lowercase values
 202  202   * always result encode to the same length.
 203  203   */
 204  204  char *
 205  205  smb_strupr(char *s)
 206  206  {
 207      -        smb_wchar_t c;
      207 +        uint32_t c;
 208  208          char *p = s;
 209  209  
 210  210          while (*p) {
 211  211                  if (smb_isascii(*p)) {
 212  212                          *p = smb_toupper(*p);
 213  213                          p++;
 214  214                  } else {
 215  215                          if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
 216  216                                  return (0);
 217  217

 218  218                          if (c == 0)
 219  219                                  break;
 220  220  
 221  221                          c = smb_toupper(c);
 222  222                          p += smb_wctomb(p, c);
 223  223                  }
 224  224          }
 225  225  
 226  226          return (s);
 227  227  }

↓ open down ↓

10 lines elided

↑ open up ↑

 228  228  
 229  229  /*
 230  230   * Convert a string to lowercase using the appropriate codepage. The
 231  231   * string is converted in place. A pointer to the string is returned.
 232  232   * There is an assumption here that uppercase and lowercase values
 233  233   * always result encode to the same length.
 234  234   */
 235  235  char *
 236  236  smb_strlwr(char *s)
 237  237  {
 238      -        smb_wchar_t c;
      238 +        uint32_t c;
 239  239          char *p = s;
 240  240  
 241  241          while (*p) {
 242  242                  if (smb_isascii(*p)) {
 243  243                          *p = smb_tolower(*p);
 244  244                          p++;
 245  245                  } else {
 246  246                          if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
 247  247                                  return (0);
 248  248

 249  249                          if (c == 0)
 250  250                                  break;
 251  251  
 252  252                          c = smb_tolower(c);
 253  253                          p += smb_wctomb(p, c);
 254  254                  }
 255  255          }
 256  256

↓ open down ↓

8 lines elided

↑ open up ↑

 257  257          return (s);
 258  258  }
 259  259  
 260  260  /*
 261  261   * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
 262  262   * -1 is returned if "s" is not a valid multi-byte string.
 263  263   */
 264  264  int
 265  265  smb_isstrlwr(const char *s)
 266  266  {
 267      -        smb_wchar_t c;
      267 +        uint32_t c;
 268  268          int n;
 269  269          const char *p = s;
 270  270  
 271  271          while (*p) {
 272  272                  if (smb_isascii(*p) && smb_isupper(*p))
 273  273                          return (0);
 274  274                  else {
 275  275                          if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
 276  276                                  return (-1);
 277  277

 278  278                          if (c == 0)
 279  279                                  break;
 280  280  
 281  281                          if (smb_isupper(c))
 282  282                                  return (0);
 283  283  
 284  284                          p += n;
 285  285                  }
 286  286          }
 287  287

↓ open down ↓

10 lines elided

↑ open up ↑

 288  288          return (1);
 289  289  }
 290  290  
 291  291  /*
 292  292   * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
 293  293   * -1 is returned if "s" is not a valid multi-byte string.
 294  294   */
 295  295  int
 296  296  smb_isstrupr(const char *s)
 297  297  {
 298      -        smb_wchar_t c;
      298 +        uint32_t c;
 299  299          int n;
 300  300          const char *p = s;
 301  301  
 302  302          while (*p) {
 303  303                  if (smb_isascii(*p) && smb_islower(*p))
 304  304                          return (0);
 305  305                  else {
 306  306                          if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
 307  307                                  return (-1);
 308  308

 309  309                          if (c == 0)
 310  310                                  break;
 311  311  
 312  312                          if (smb_islower(c))
 313  313                                  return (0);
 314  314  
 315  315                          p += n;
 316  316                  }
 317  317          }
 318  318  
 319  319          return (1);
 320  320  }
 321  321  
 322  322  /*
 323  323   * Compare the null-terminated strings s1 and s2 and return an integer
 324  324   * greater than, equal to or less than 0 dependent on whether s1 is
 325  325   * lexicographically greater than, equal to or less than s2 after
 326  326   * translation of each character to lowercase.  The original strings
 327  327   * are not modified.
 328  328   *
 329  329   * If n is non-zero, at most n bytes are compared.  Otherwise, the strings
 330  330   * are compared until a null terminator is encountered.
 331  331   *
 332  332   * Out:    0 if strings are equal
 333  333   *       < 0 if first string < second string
 334  334   *       > 0 if first string > second string
 335  335   */
 336  336  int
 337  337  smb_strcasecmp(const char *s1, const char *s2, size_t n)
 338  338  {
 339  339          int     err = 0;
 340  340          int     rc;
 341  341  
 342  342          rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
 343  343          if (err != 0)
 344  344                  return (-1);
 345  345          return (rc);
 346  346  }
 347  347  
 348  348  /*
 349  349   * First build a codepage based on cp_unicode.h.  Then build the unicode
 350  350   * codepage from this interim codepage by copying the entries over while
 351  351   * fixing them and filling in the gaps.
 352  352   */
 353  353  static smb_codepage_t *
 354  354  smb_unicode_init(void)
 355  355  {
 356  356          smb_codepage_t  *unicode;
 357  357          uint32_t        a = 0;
 358  358          uint32_t        b = 0;
 359  359  
 360  360          unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
 361  361          if (unicode == NULL)
 362  362                  return (NULL);
 363  363  
 364  364          while (b != 0xffff) {
 365  365                  /*
 366  366                   * If there is a gap in the standard,
 367  367                   * fill in the gap with no-case entries.
 368  368                   */
 369  369                  if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
 370  370                          unicode[b].ctype = CODEPAGE_ISNONE;
 371  371                          unicode[b].upper = (smb_wchar_t)b;
 372  372                          unicode[b].lower = (smb_wchar_t)b;
 373  373                          b++;
 374  374                          continue;
 375  375                  }
 376  376  
 377  377                  /*
 378  378                   * Copy the entry and fixup as required.
 379  379                   */
 380  380                  switch (a_unicode[a].ctype) {
 381  381                  case CODEPAGE_ISNONE:
 382  382                          /*
 383  383                           * Replace 0xffff in upper/lower fields with its val.
 384  384                           */
 385  385                          unicode[b].ctype = CODEPAGE_ISNONE;
 386  386                          unicode[b].upper = (smb_wchar_t)b;
 387  387                          unicode[b].lower = (smb_wchar_t)b;
 388  388                          break;
 389  389                  case CODEPAGE_ISUPPER:
 390  390                          /*
 391  391                           * Some characters may have case yet not have
 392  392                           * case conversion.  Treat them as no-case.
 393  393                           */
 394  394                          if (a_unicode[a].lower == 0xffff) {
 395  395                                  unicode[b].ctype = CODEPAGE_ISNONE;
 396  396                                  unicode[b].upper = (smb_wchar_t)b;
 397  397                                  unicode[b].lower = (smb_wchar_t)b;
 398  398                          } else {
 399  399                                  unicode[b].ctype = CODEPAGE_ISUPPER;
 400  400                                  unicode[b].upper = (smb_wchar_t)b;
 401  401                                  unicode[b].lower = a_unicode[a].lower;
 402  402                          }
 403  403                          break;
 404  404                  case CODEPAGE_ISLOWER:
 405  405                          /*
 406  406                           * Some characters may have case yet not have
 407  407                           * case conversion.  Treat them as no-case.
 408  408                           */
 409  409                          if (a_unicode[a].upper == 0xffff) {
 410  410                                  unicode[b].ctype = CODEPAGE_ISNONE;
 411  411                                  unicode[b].upper = (smb_wchar_t)b;
 412  412                                  unicode[b].lower = (smb_wchar_t)b;
 413  413                          } else {
 414  414                                  unicode[b].ctype = CODEPAGE_ISLOWER;
 415  415                                  unicode[b].upper = a_unicode[a].upper;
 416  416                                  unicode[b].lower = (smb_wchar_t)b;
 417  417                          }
 418  418                          break;
 419  419                  default:
 420  420                          MEM_FREE("unicode", unicode);
 421  421                          return (NULL);
 422  422                  }
 423  423  
 424  424                  a++;
 425  425                  b++;
 426  426          };
 427  427  
 428  428          return (unicode);
 429  429  }
 430  430  
 431  431  /*
 432  432   * Parse a UNC path (\\server\share\path) into its components.

↓ open down ↓

124 lines elided

↑ open up ↑

 433  433   * Although a standard UNC path starts with two '\', in DFS
 434  434   * all UNC paths start with one '\'. So, this function only
 435  435   * checks for one.
 436  436   *
 437  437   * A valid UNC must at least contain two components i.e. server
 438  438   * and share. The path is parsed to:
 439  439   *
 440  440   * unc_server   server or domain name with no leading/trailing '\'
 441  441   * unc_share    share name with no leading/trailing '\'
 442  442   * unc_path     relative path to the share with no leading/trailing '\'
 443      - *              it is valid for unc_path to be NULL.
      443 + *              it is valid for unc_path to be NULL.
 444  444   *
 445  445   * Upon successful return of this function, smb_unc_free()
 446  446   * MUST be called when returned 'unc' is no longer needed.
 447  447   *
 448  448   * Returns 0 on success, otherwise returns an errno code.
 449  449   */
 450  450  int
 451  451  smb_unc_init(const char *path, smb_unc_t *unc)
 452  452  {
 453  453          char *p;

 454  454  
 455  455          if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
 456  456                  return (EINVAL);
 457  457  
 458  458          bzero(unc, sizeof (smb_unc_t));
 459  459  
 460  460  #if defined(_KERNEL) || defined(_FAKE_KERNEL)
 461  461          unc->unc_buf = smb_mem_strdup(path);
 462  462  #else
 463  463          if ((unc->unc_buf = strdup(path)) == NULL)
 464  464                  return (ENOMEM);
 465  465  #endif
 466  466  
 467  467          (void) strsubst(unc->unc_buf, '\\', '/');
 468  468          (void) strcanon(unc->unc_buf, "/");
 469  469  
 470  470          unc->unc_server = unc->unc_buf + 1;
 471  471          if (*unc->unc_server == '\0') {
 472  472                  smb_unc_free(unc);
 473  473                  return (EINVAL);
 474  474          }
 475  475  
 476  476          if ((p = strchr(unc->unc_server, '/')) == NULL) {
 477  477                  smb_unc_free(unc);
 478  478                  return (EINVAL);
 479  479          }
 480  480  
 481  481          *p++ = '\0';
 482  482          unc->unc_share = p;
 483  483  
 484  484          if (*unc->unc_share == '\0') {
 485  485                  smb_unc_free(unc);
 486  486                  return (EINVAL);
 487  487          }
 488  488  
 489  489          unc->unc_path = strchr(unc->unc_share, '/');
 490  490          if ((p = unc->unc_path) == NULL)
 491  491                  return (0);
 492  492  
 493  493          unc->unc_path++;
 494  494          *p = '\0';
 495  495  
 496  496          /* remove the last '/' if any */
 497  497          if ((p = strchr(unc->unc_path, '\0')) != NULL) {
 498  498                  if (*(--p) == '/')
 499  499                          *p = '\0';
 500  500          }
 501  501  
 502  502          return (0);
 503  503  }
 504  504  
 505  505  void
 506  506  smb_unc_free(smb_unc_t *unc)
 507  507  {
 508  508          if (unc == NULL)
 509  509                  return;
 510  510  
 511  511  #if defined(_KERNEL) || defined(_FAKE_KERNEL)
 512  512          smb_mem_free(unc->unc_buf);
 513  513  #else
 514  514          free(unc->unc_buf);
 515  515  #endif
 516  516          unc->unc_buf = NULL;
 517  517  }

↓ open down ↓

64 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX