Print this page
    
2964 need POSIX 2008 locale object support (more C++ fixes)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/lib/libc/port/locale/localeimpl.c
          +++ new/usr/src/lib/libc/port/locale/localeimpl.c
   1    1  /*
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13   13   * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  14   14   */
  15   15  
  16   16  /*
  17   17   * This file implements the 2008 newlocale and friends handling.
  18   18   */
  19   19  
  20   20  #ifndef _LCONV_C99
  21   21  #define _LCONV_C99
  22   22  #endif
  23   23  
  24   24  #include "lint.h"
  25   25  #include <atomic.h>
  26   26  #include <locale.h>
  27   27  #include <sys/types.h>
  28   28  #include <sys/mman.h>
  29   29  #include <errno.h>
  30   30  #include <string.h>
  31   31  #include "libc.h"
  32   32  #include "mtlib.h"
  33   33  #include "tsd.h"
  34   34  #include "localeimpl.h"
  35   35  #include "lctype.h"
  36   36  
  37   37  /*
  38   38   * Big Theory of Locales:
  39   39   *
  40   40   * (It is recommended that readers familiarize themselves with the POSIX
  41   41   * 2008 (XPG Issue 7) specifications for locales, first.)
  42   42   *
  43   43   * Historically, we had a bunch of global variables that stored locale
  44   44   * data.  While this worked well, it limited applications to a single locale
  45   45   * at a time.  This doesn't work well in certain server applications.
  46   46   *
  47   47   * Issue 7, X/Open introduced the concept of a locale_t object, along with
  48   48   * versions of functions that can take this object as a parameter, along
  49   49   * with functions to clone and manipulate these locale objects.  The new
  50   50   * functions are named with a _l() suffix.
  51   51   *
  52   52   * Additionally uselocale() is introduced which can change the locale of
  53   53   * of a single thread.  However, setlocale() can still be used to change
  54   54   * the global locale.
  55   55   *
  56   56   * In our implementation, we use libc's TSD to store the locale data that
  57   57   * was previously global.  We still have global data because some applications
  58   58   * have had those global objects compiled into them.  (Such applications will
  59   59   * be unable to benefit from uselocale(), btw.)  The legacy routines are
  60   60   * reimplemented as wrappers that use the appropriate locale object by
  61   61   * calling uselocale().  uselocale() when passed a NULL pointer returns the
  62   62   * thread-specific locale object if one is present, or the global locale
  63   63   * object otherwise.  Note that once the TSD data is set, the only way
  64   64   * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
  65   65   * to uselocale().
  66   66   *
  67   67   * We are careful to minimize performance impact of multiple calls to
  68   68   * uselocale() or setlocale() by using a cache of locale data whenever possible.
  69   69   * As a consequence of this, applications that iterate over all possible
  70   70   * locales will burn through a lot of virtual memory, but we find such
  71   71   * applications rare.  (locale -a might be an exception, but it is short lived.)
  72   72   *
  73   73   * Category data is never released (although enclosing locale objects might be),
  74   74   * in order to guarantee thread-safety.  Calling freelocale() on an object
  75   75   * while it is in use by another thread is a programmer error (use-after-free)
  76   76   * and we don't bother to note it further.
  77   77   *
  78   78   * Locale objects (global locales) established by setlocale() are also
  79   79   * never freed (for MT safety), but we will save previous locale objects
  80   80   * and reuse them when we can.
  81   81   */
  82   82  
  83   83  typedef struct locdata *(*loadfn_t)(const char *);
  84   84  
  85   85  static const loadfn_t loaders[LC_ALL] = {
  86   86          __lc_ctype_load,
  87   87          __lc_numeric_load,
  88   88          __lc_time_load,
  89   89          __lc_collate_load,
  90   90          __lc_monetary_load,
  
    | 
      ↓ open down ↓ | 
    90 lines elided | 
    
      ↑ open up ↑ | 
  
  91   91          __lc_messages_load,
  92   92  };
  93   93  
  94   94  extern struct lc_monetary lc_monetary_posix;
  95   95  extern struct lc_numeric lc_numeric_posix;
  96   96  extern struct lc_messages lc_messages_posix;
  97   97  extern struct lc_time lc_time_posix;
  98   98  extern struct lc_ctype lc_ctype_posix;
  99   99  extern struct lc_collate lc_collate_posix;
 100  100  
 101      -static struct locale posix_locale = {
      101 +static struct _locale posix_locale = {
 102  102          /* locdata */
 103  103          .locdata = {
 104  104                  &__posix_ctype_locdata,
 105  105                  &__posix_numeric_locdata,
 106  106                  &__posix_time_locdata,
 107  107                  &__posix_collate_locdata,
 108  108                  &__posix_monetary_locdata,
 109  109                  &__posix_messages_locdata,
 110  110          },
 111  111          .locname = "C",
 112  112          .ctype = &lc_ctype_posix,
 113  113          .numeric = &lc_numeric_posix,
 114  114          .collate = &lc_collate_posix,
 115  115          .monetary = &lc_monetary_posix,
 116  116          .messages = &lc_messages_posix,
 117  117          .time = &lc_time_posix,
 118  118          .runelocale = &_DefaultRuneLocale,
 119  119  };
 120  120  
 121  121  locale_t ___global_locale = &posix_locale;
 122  122  
 123  123  locale_t
 124  124  __global_locale(void)
 125  125  {
 126  126          return (___global_locale);
 127  127  }
 128  128  
 129  129  /*
 130  130   * Category names for getenv()  Note that this was modified
 131  131   * for Solaris.  See <iso/locale_iso.h>.
 132  132   */
 133  133  #define NUM_CATS        7
 134  134  static char *categories[7] = {
 135  135          "LC_CTYPE",
 136  136          "LC_NUMERIC",
 137  137          "LC_TIME",
 138  138          "LC_COLLATE",
 139  139          "LC_MONETARY",
 140  140          "LC_MESSAGES",
 141  141          "LC_ALL",
 142  142  };
 143  143  
 144  144  /*
 145  145   * Prototypes.
 146  146   */
 147  147  static const char *get_locale_env(int);
 148  148  static struct locdata *locdata_get(int, const const char *);
 149  149  static struct locdata *locdata_get_cache(int, const char *);
 150  150  static locale_t mklocname(locale_t);
 151  151  
 152  152  /*
 153  153   * Some utility routines.
 154  154   */
 155  155  
 156  156  struct locdata *
 157  157  __locdata_alloc(const char *name, size_t memsz)
 158  158  {
 159  159          struct locdata *ldata;
 160  160  
 161  161          if ((ldata = lmalloc(sizeof (*ldata))) == NULL) {
 162  162                  return (NULL);
 163  163          }
 164  164          if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) {
 165  165                  lfree(ldata, sizeof (*ldata));
 166  166                  errno = ENOMEM;
 167  167                  return (NULL);
 168  168          }
 169  169          (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
 170  170  
 171  171          return (ldata);
 172  172  }
 173  173  
 174  174  /*
 175  175   * Normally we never free locale data truly, but if we failed to load it
 176  176   * for some reason, this routine is used to cleanup the partial mess.
 177  177   */
 178  178  void
 179  179  __locdata_free(struct locdata *ldata)
 180  180  {
 181  181          for (int i = 0; i < NLOCDATA; i++)
 182  182                  libc_free(ldata->l_data[i]);
 183  183          if (ldata->l_map != NULL && ldata->l_map_len)
 184  184                  (void) munmap(ldata->l_map, ldata->l_map_len);
 185  185          lfree(ldata, sizeof (*ldata));
 186  186  }
 187  187  
 188  188  /*
 189  189   * It turns out that for performance reasons we would really like to
 190  190   * cache the most recently referenced locale data to avoid wasteful
 191  191   * loading from files.
 192  192   */
 193  193  
 194  194  static struct locdata *cache_data[LC_ALL];
 195  195  static struct locdata *cat_data[LC_ALL];
 196  196  static mutex_t cache_lock = DEFAULTMUTEX;
 197  197  
 198  198  /*
 199  199   * Returns the cached data if the locale name is the same.  If not,
 200  200   * returns NULL (cache miss).  The locdata is returned with a hold on
 201  201   * it, taken on behalf of the caller.  The caller should drop the hold
 202  202   * when it is finished.
 203  203   */
 204  204  static struct locdata *
 205  205  locdata_get_cache(int category, const char *locname)
 206  206  {
 207  207          struct locdata *loc;
 208  208  
 209  209          if (category < 0 || category >= LC_ALL)
 210  210                  return (NULL);
 211  211  
 212  212          /* Try cache first. */
 213  213          lmutex_lock(&cache_lock);
 214  214          loc = cache_data[category];
 215  215  
 216  216          if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
 217  217                  lmutex_unlock(&cache_lock);
 218  218                  return (loc);
 219  219          }
 220  220  
 221  221          /*
 222  222           * Failing that try previously loaded locales (linear search) --
 223  223           * this could be optimized to a hash, but its unlikely that a single
 224  224           * application will ever need to work with more than a few locales.
 225  225           */
 226  226          for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
 227  227                  if (strcmp(locname, loc->l_lname) == 0) {
 228  228                          break;
 229  229                  }
 230  230          }
 231  231  
 232  232          /*
 233  233           * Finally, if we still don't have one, try loading the locale
 234  234           * data from the actual on-disk data.
 235  235           *
 236  236           * We drop the lock (libc wants to ensure no internal locks
 237  237           * are held when we call other routines required to read from
 238  238           * files, allocate memory, etc.)  There is a small race here,
 239  239           * but the consequences of the race are benign -- if multiple
 240  240           * threads hit this at precisely the same point, we could
 241  241           * wind up with duplicates of the locale data in the cache.
 242  242           *
 243  243           * This wastes the memory for an extra copy of the locale
 244  244           * data, but there is no further harm beyond that.  Its not
 245  245           * worth the effort to recode this to something "safe"
 246  246           * (which would require rescanning the list, etc.), given
 247  247           * that this race will probably never actually occur.
 248  248           */
 249  249          if (loc == NULL) {
 250  250                  lmutex_unlock(&cache_lock);
 251  251                  loc = (*loaders[category])(locname);
 252  252                  lmutex_lock(&cache_lock);
 253  253                  if (loc != NULL)
 254  254                          (void) strlcpy(loc->l_lname, locname,
 255  255                              sizeof (loc->l_lname));
 256  256          }
 257  257  
 258  258          /*
 259  259           * Assuming we got one, update the cache, and stick us on the list
 260  260           * of loaded locale data.  We insert into the head (more recent
 261  261           * use is likely to win.)
 262  262           */
 263  263          if (loc != NULL) {
 264  264                  cache_data[category] = loc;
 265  265                  if (!loc->l_cached) {
 266  266                          loc->l_cached = 1;
 267  267                          loc->l_next = cat_data[category];
 268  268                          cat_data[category] = loc;
 269  269                  }
 270  270          }
 271  271  
 272  272          lmutex_unlock(&cache_lock);
 273  273          return (loc);
 274  274  }
 275  275  
 276  276  /*
 277  277   * Routine to get the locdata for a given category and locale.
 278  278   * This includes retrieving it from cache, retrieving it from
 279  279   * a file, etc.
 280  280   */
 281  281  static struct locdata *
 282  282  locdata_get(int category, const char *locname)
 283  283  {
 284  284          char scratch[ENCODING_LEN + 1];
 285  285          char *slash;
 286  286          int cnt;
 287  287          int len;
 288  288  
 289  289          if (locname == NULL || *locname == 0) {
 290  290                  locname = get_locale_env(category);
 291  291          }
 292  292  
 293  293          /*
 294  294           * Extract the locale name for the category if it is a composite
 295  295           * locale.
 296  296           */
 297  297          if ((slash = strchr(locname, '/')) != NULL) {
 298  298                  for (cnt = category; cnt && slash != NULL; cnt--) {
 299  299                          locname = slash + 1;
 300  300                          slash = strchr(locname, '/');
 301  301                  }
 302  302                  if (slash) {
 303  303                          len = slash - locname + 1;
 304  304                          if (len >= sizeof (scratch)) {
 305  305                                  len = sizeof (scratch);
 306  306                          }
 307  307                  } else {
 308  308                          len = sizeof (scratch);
 309  309                  }
 310  310                  (void) strlcpy(scratch, locname, len);
 311  311                  locname = scratch;
 312  312          }
 313  313  
 314  314          if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
 315  315                  return (posix_locale.locdata[category]);
 316  316  
 317  317          return (locdata_get_cache(category, locname));
 318  318  }
 319  319  
 320  320  /* tsd destructor */
 321  321  static void
 322  322  freelocptr(void *arg)
 323  323  {
 324  324          locale_t *locptr = arg;
 325  325          if (*locptr != NULL)
 326  326                  freelocale(*locptr);
 327  327  }
 328  328  
 329  329  static const char *
 330  330  get_locale_env(int category)
 331  331  {
 332  332          const char *env;
 333  333  
 334  334          /* 1. check LC_ALL. */
 335  335          env = getenv(categories[LC_ALL]);
 336  336  
 337  337          /* 2. check LC_* */
 338  338          if (env == NULL || *env == '\0')
 339  339                  env = getenv(categories[category]);
 340  340  
 341  341          /* 3. check LANG */
 342  342          if (env == NULL || *env == '\0')
 343  343                  env = getenv("LANG");
 344  344  
 345  345          /* 4. if none is set, fall to "C" */
 346  346          if (env == NULL || *env == '\0')
 347  347                  env = "C";
 348  348  
 349  349          return (env);
 350  350  }
 351  351  
 352  352  
 353  353  /*
 354  354   * This routine is exposed via the MB_CUR_MAX macro.  Note that legacy
 355  355   * code will continue to use _ctype[520], but we prefer this function as
 356  356   * it is the only way to get thread-specific information.
 357  357   */
 358  358  unsigned char
 359  359  __mb_cur_max_l(locale_t loc)
 360  360  {
 361  361          return (loc->ctype->lc_max_mblen);
 362  362  }
 363  363  
 364  364  unsigned char
 365  365  __mb_cur_max(void)
 366  366  {
 367  367          return (__mb_cur_max_l(uselocale(NULL)));
 368  368  }
 369  369  
 370  370  /*
 371  371   * Public interfaces.
 372  372   */
 373  373  
 374  374  locale_t
 375  375  duplocale(locale_t src)
 376  376  {
 377  377          locale_t        loc;
 378  378          int             i;
 379  379  
 380  380          loc = lmalloc(sizeof (*loc));
 381  381          if (loc == NULL) {
 382  382                  return (NULL);
 383  383          }
 384  384          if (src == NULL) {
 385  385                  /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
 386  386                  src = ___global_locale;
 387  387          }
 388  388          for (i = 0; i < LC_ALL; i++) {
 389  389                  loc->locdata[i] = src->locdata[i];
 390  390                  loc->loaded[i] = 0;
 391  391          }
 392  392          loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
 393  393          loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
 394  394          loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
 395  395          loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
 396  396          loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
 397  397          loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
 398  398          loc->time = loc->locdata[LC_TIME]->l_data[0];
 399  399          return (loc);
 400  400  }
 401  401  
 402  402  void
 403  403  freelocale(locale_t loc)
 404  404  {
 405  405          /*
 406  406           * We take extra care never to free a saved locale created by
 407  407           * setlocale().  This shouldn't be strictly necessary, but a little
 408  408           * extra safety doesn't hurt here.
 409  409           */
 410  410          if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list))
 411  411                  lfree(loc, sizeof (*loc));
 412  412  }
 413  413  
 414  414  locale_t
 415  415  newlocale(int catmask, const char *locname, locale_t base)
 416  416  {
 417  417          locale_t loc;
 418  418          int i, e;
 419  419  
 420  420          if (catmask & ~(LC_ALL_MASK)) {
 421  421                  errno = EINVAL;
 422  422                  return (NULL);
 423  423          }
 424  424  
 425  425          /*
 426  426           * Technically passing LC_GLOBAL_LOCALE here is illegal,
 427  427           * but we allow it.
 428  428           */
 429  429          if (base == NULL || base == ___global_locale) {
 430  430                  loc = duplocale(___global_locale);
 431  431          } else {
 432  432                  loc = duplocale(base);
 433  433          }
 434  434          if (loc == NULL) {
 435  435                  return (NULL);
 436  436          }
 437  437  
 438  438          for (i = 0; i < LC_ALL; i++) {
 439  439                  struct locdata *ldata;
 440  440                  loc->loaded[i] = 0;
 441  441                  if (((1 << i) & catmask) == 0) {
 442  442                          /* Default to base locale if not overriding */
 443  443                          continue;
 444  444                  }
 445  445                  ldata = locdata_get(i, locname);
 446  446                  if (ldata == NULL) {
 447  447                          e = errno;
 448  448                          freelocale(loc);
 449  449                          errno = e;
 450  450                          return (NULL);
 451  451                  }
 452  452                  loc->locdata[i] = ldata;
 453  453          }
 454  454          loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
 455  455          loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
 456  456          loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
 457  457          loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
 458  458          loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
 459  459          loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
 460  460          loc->time = loc->locdata[LC_TIME]->l_data[0];
 461  461          freelocale(base);
 462  462  
 463  463          return (mklocname(loc));
 464  464  }
 465  465  
 466  466  locale_t
 467  467  uselocale(locale_t loc)
 468  468  {
 469  469          locale_t lastloc = ___global_locale;
 470  470          locale_t *locptr;
 471  471  
 472  472          locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
 473  473          /* Should never occur */
 474  474          if (locptr == NULL) {
 475  475                  errno = EINVAL;
 476  476                  return (NULL);
 477  477          }
 478  478  
 479  479          if (*locptr != NULL)
 480  480                  lastloc = *locptr;
 481  481  
 482  482          /* Argument loc is NULL if we are just querying. */
 483  483          if (loc != NULL) {
 484  484                  /*
 485  485                   * Set it to LC_GLOBAL_LOCAL to return to using
 486  486                   * the global locale (setlocale).
 487  487                   */
 488  488                  if (loc == ___global_locale) {
 489  489                          *locptr = NULL;
 490  490                  } else {
 491  491                          /* No validation of the provided locale at present */
 492  492                          *locptr = loc;
 493  493                  }
 494  494          }
 495  495  
 496  496          /*
 497  497           * The caller is responsible for freeing, of course it would be
 498  498           * gross error to call freelocale() on a locale object that is still
 499  499           * in use.
 500  500           */
 501  501          return (lastloc);
 502  502  }
 503  503  
 504  504  static locale_t
 505  505  mklocname(locale_t loc)
 506  506  {
 507  507          int composite = 0;
 508  508  
 509  509          /* Look to see if any category is different */
 510  510          for (int i = 1; i < LC_ALL; ++i) {
 511  511                  if (strcmp(loc->locdata[0]->l_lname,
 512  512                      loc->locdata[i]->l_lname) != 0) {
 513  513                          composite = 1;
 514  514                          break;
 515  515                  }
 516  516          }
 517  517  
 518  518          if (composite) {
 519  519                  /*
 520  520                   * Note ordering of these follows the numeric order,
 521  521                   * if the order is changed, then setlocale() will need
 522  522                   * to be changed as well.
 523  523                   */
 524  524                  (void) snprintf(loc->locname, sizeof (loc->locname),
 525  525                      "%s/%s/%s/%s/%s/%s",
 526  526                      loc->locdata[LC_CTYPE]->l_lname,
 527  527                      loc->locdata[LC_NUMERIC]->l_lname,
 528  528                      loc->locdata[LC_TIME]->l_lname,
 529  529                      loc->locdata[LC_COLLATE]->l_lname,
 530  530                      loc->locdata[LC_MONETARY]->l_lname,
 531  531                      loc->locdata[LC_MESSAGES]->l_lname);
 532  532          } else {
 533  533                  (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname,
 534  534                      sizeof (loc->locname));
 535  535          }
 536  536          return (loc);
 537  537  }
  
    | 
      ↓ open down ↓ | 
    426 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX