Print this page
2964 need POSIX 2008 locale object support (more C++ fixes)
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/lib/libc/port/locale/localeimpl.c
+++ new/usr/src/lib/libc/port/locale/localeimpl.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
14 14 */
15 15
16 16 /*
17 17 * This file implements the 2008 newlocale and friends handling.
18 18 */
19 19
20 20 #ifndef _LCONV_C99
21 21 #define _LCONV_C99
22 22 #endif
23 23
24 24 #include "lint.h"
25 25 #include <atomic.h>
26 26 #include <locale.h>
27 27 #include <sys/types.h>
28 28 #include <sys/mman.h>
29 29 #include <errno.h>
30 30 #include <string.h>
31 31 #include "libc.h"
32 32 #include "mtlib.h"
33 33 #include "tsd.h"
34 34 #include "localeimpl.h"
35 35 #include "lctype.h"
36 36
37 37 /*
38 38 * Big Theory of Locales:
39 39 *
40 40 * (It is recommended that readers familiarize themselves with the POSIX
41 41 * 2008 (XPG Issue 7) specifications for locales, first.)
42 42 *
43 43 * Historically, we had a bunch of global variables that stored locale
44 44 * data. While this worked well, it limited applications to a single locale
45 45 * at a time. This doesn't work well in certain server applications.
46 46 *
47 47 * Issue 7, X/Open introduced the concept of a locale_t object, along with
48 48 * versions of functions that can take this object as a parameter, along
49 49 * with functions to clone and manipulate these locale objects. The new
50 50 * functions are named with a _l() suffix.
51 51 *
52 52 * Additionally uselocale() is introduced which can change the locale of
53 53 * of a single thread. However, setlocale() can still be used to change
54 54 * the global locale.
55 55 *
56 56 * In our implementation, we use libc's TSD to store the locale data that
57 57 * was previously global. We still have global data because some applications
58 58 * have had those global objects compiled into them. (Such applications will
59 59 * be unable to benefit from uselocale(), btw.) The legacy routines are
60 60 * reimplemented as wrappers that use the appropriate locale object by
61 61 * calling uselocale(). uselocale() when passed a NULL pointer returns the
62 62 * thread-specific locale object if one is present, or the global locale
63 63 * object otherwise. Note that once the TSD data is set, the only way
64 64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
65 65 * to uselocale().
66 66 *
67 67 * We are careful to minimize performance impact of multiple calls to
68 68 * uselocale() or setlocale() by using a cache of locale data whenever possible.
69 69 * As a consequence of this, applications that iterate over all possible
70 70 * locales will burn through a lot of virtual memory, but we find such
71 71 * applications rare. (locale -a might be an exception, but it is short lived.)
72 72 *
73 73 * Category data is never released (although enclosing locale objects might be),
74 74 * in order to guarantee thread-safety. Calling freelocale() on an object
75 75 * while it is in use by another thread is a programmer error (use-after-free)
76 76 * and we don't bother to note it further.
77 77 *
78 78 * Locale objects (global locales) established by setlocale() are also
79 79 * never freed (for MT safety), but we will save previous locale objects
80 80 * and reuse them when we can.
81 81 */
82 82
83 83 typedef struct locdata *(*loadfn_t)(const char *);
84 84
85 85 static const loadfn_t loaders[LC_ALL] = {
86 86 __lc_ctype_load,
87 87 __lc_numeric_load,
88 88 __lc_time_load,
89 89 __lc_collate_load,
90 90 __lc_monetary_load,
|
↓ open down ↓ |
90 lines elided |
↑ open up ↑ |
91 91 __lc_messages_load,
92 92 };
93 93
94 94 extern struct lc_monetary lc_monetary_posix;
95 95 extern struct lc_numeric lc_numeric_posix;
96 96 extern struct lc_messages lc_messages_posix;
97 97 extern struct lc_time lc_time_posix;
98 98 extern struct lc_ctype lc_ctype_posix;
99 99 extern struct lc_collate lc_collate_posix;
100 100
101 -static struct locale posix_locale = {
101 +static struct _locale posix_locale = {
102 102 /* locdata */
103 103 .locdata = {
104 104 &__posix_ctype_locdata,
105 105 &__posix_numeric_locdata,
106 106 &__posix_time_locdata,
107 107 &__posix_collate_locdata,
108 108 &__posix_monetary_locdata,
109 109 &__posix_messages_locdata,
110 110 },
111 111 .locname = "C",
112 112 .ctype = &lc_ctype_posix,
113 113 .numeric = &lc_numeric_posix,
114 114 .collate = &lc_collate_posix,
115 115 .monetary = &lc_monetary_posix,
116 116 .messages = &lc_messages_posix,
117 117 .time = &lc_time_posix,
118 118 .runelocale = &_DefaultRuneLocale,
119 119 };
120 120
121 121 locale_t ___global_locale = &posix_locale;
122 122
123 123 locale_t
124 124 __global_locale(void)
125 125 {
126 126 return (___global_locale);
127 127 }
128 128
129 129 /*
130 130 * Category names for getenv() Note that this was modified
131 131 * for Solaris. See <iso/locale_iso.h>.
132 132 */
133 133 #define NUM_CATS 7
134 134 static char *categories[7] = {
135 135 "LC_CTYPE",
136 136 "LC_NUMERIC",
137 137 "LC_TIME",
138 138 "LC_COLLATE",
139 139 "LC_MONETARY",
140 140 "LC_MESSAGES",
141 141 "LC_ALL",
142 142 };
143 143
144 144 /*
145 145 * Prototypes.
146 146 */
147 147 static const char *get_locale_env(int);
148 148 static struct locdata *locdata_get(int, const const char *);
149 149 static struct locdata *locdata_get_cache(int, const char *);
150 150 static locale_t mklocname(locale_t);
151 151
152 152 /*
153 153 * Some utility routines.
154 154 */
155 155
156 156 struct locdata *
157 157 __locdata_alloc(const char *name, size_t memsz)
158 158 {
159 159 struct locdata *ldata;
160 160
161 161 if ((ldata = lmalloc(sizeof (*ldata))) == NULL) {
162 162 return (NULL);
163 163 }
164 164 if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) {
165 165 lfree(ldata, sizeof (*ldata));
166 166 errno = ENOMEM;
167 167 return (NULL);
168 168 }
169 169 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
170 170
171 171 return (ldata);
172 172 }
173 173
174 174 /*
175 175 * Normally we never free locale data truly, but if we failed to load it
176 176 * for some reason, this routine is used to cleanup the partial mess.
177 177 */
178 178 void
179 179 __locdata_free(struct locdata *ldata)
180 180 {
181 181 for (int i = 0; i < NLOCDATA; i++)
182 182 libc_free(ldata->l_data[i]);
183 183 if (ldata->l_map != NULL && ldata->l_map_len)
184 184 (void) munmap(ldata->l_map, ldata->l_map_len);
185 185 lfree(ldata, sizeof (*ldata));
186 186 }
187 187
188 188 /*
189 189 * It turns out that for performance reasons we would really like to
190 190 * cache the most recently referenced locale data to avoid wasteful
191 191 * loading from files.
192 192 */
193 193
194 194 static struct locdata *cache_data[LC_ALL];
195 195 static struct locdata *cat_data[LC_ALL];
196 196 static mutex_t cache_lock = DEFAULTMUTEX;
197 197
198 198 /*
199 199 * Returns the cached data if the locale name is the same. If not,
200 200 * returns NULL (cache miss). The locdata is returned with a hold on
201 201 * it, taken on behalf of the caller. The caller should drop the hold
202 202 * when it is finished.
203 203 */
204 204 static struct locdata *
205 205 locdata_get_cache(int category, const char *locname)
206 206 {
207 207 struct locdata *loc;
208 208
209 209 if (category < 0 || category >= LC_ALL)
210 210 return (NULL);
211 211
212 212 /* Try cache first. */
213 213 lmutex_lock(&cache_lock);
214 214 loc = cache_data[category];
215 215
216 216 if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
217 217 lmutex_unlock(&cache_lock);
218 218 return (loc);
219 219 }
220 220
221 221 /*
222 222 * Failing that try previously loaded locales (linear search) --
223 223 * this could be optimized to a hash, but its unlikely that a single
224 224 * application will ever need to work with more than a few locales.
225 225 */
226 226 for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
227 227 if (strcmp(locname, loc->l_lname) == 0) {
228 228 break;
229 229 }
230 230 }
231 231
232 232 /*
233 233 * Finally, if we still don't have one, try loading the locale
234 234 * data from the actual on-disk data.
235 235 *
236 236 * We drop the lock (libc wants to ensure no internal locks
237 237 * are held when we call other routines required to read from
238 238 * files, allocate memory, etc.) There is a small race here,
239 239 * but the consequences of the race are benign -- if multiple
240 240 * threads hit this at precisely the same point, we could
241 241 * wind up with duplicates of the locale data in the cache.
242 242 *
243 243 * This wastes the memory for an extra copy of the locale
244 244 * data, but there is no further harm beyond that. Its not
245 245 * worth the effort to recode this to something "safe"
246 246 * (which would require rescanning the list, etc.), given
247 247 * that this race will probably never actually occur.
248 248 */
249 249 if (loc == NULL) {
250 250 lmutex_unlock(&cache_lock);
251 251 loc = (*loaders[category])(locname);
252 252 lmutex_lock(&cache_lock);
253 253 if (loc != NULL)
254 254 (void) strlcpy(loc->l_lname, locname,
255 255 sizeof (loc->l_lname));
256 256 }
257 257
258 258 /*
259 259 * Assuming we got one, update the cache, and stick us on the list
260 260 * of loaded locale data. We insert into the head (more recent
261 261 * use is likely to win.)
262 262 */
263 263 if (loc != NULL) {
264 264 cache_data[category] = loc;
265 265 if (!loc->l_cached) {
266 266 loc->l_cached = 1;
267 267 loc->l_next = cat_data[category];
268 268 cat_data[category] = loc;
269 269 }
270 270 }
271 271
272 272 lmutex_unlock(&cache_lock);
273 273 return (loc);
274 274 }
275 275
276 276 /*
277 277 * Routine to get the locdata for a given category and locale.
278 278 * This includes retrieving it from cache, retrieving it from
279 279 * a file, etc.
280 280 */
281 281 static struct locdata *
282 282 locdata_get(int category, const char *locname)
283 283 {
284 284 char scratch[ENCODING_LEN + 1];
285 285 char *slash;
286 286 int cnt;
287 287 int len;
288 288
289 289 if (locname == NULL || *locname == 0) {
290 290 locname = get_locale_env(category);
291 291 }
292 292
293 293 /*
294 294 * Extract the locale name for the category if it is a composite
295 295 * locale.
296 296 */
297 297 if ((slash = strchr(locname, '/')) != NULL) {
298 298 for (cnt = category; cnt && slash != NULL; cnt--) {
299 299 locname = slash + 1;
300 300 slash = strchr(locname, '/');
301 301 }
302 302 if (slash) {
303 303 len = slash - locname + 1;
304 304 if (len >= sizeof (scratch)) {
305 305 len = sizeof (scratch);
306 306 }
307 307 } else {
308 308 len = sizeof (scratch);
309 309 }
310 310 (void) strlcpy(scratch, locname, len);
311 311 locname = scratch;
312 312 }
313 313
314 314 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
315 315 return (posix_locale.locdata[category]);
316 316
317 317 return (locdata_get_cache(category, locname));
318 318 }
319 319
320 320 /* tsd destructor */
321 321 static void
322 322 freelocptr(void *arg)
323 323 {
324 324 locale_t *locptr = arg;
325 325 if (*locptr != NULL)
326 326 freelocale(*locptr);
327 327 }
328 328
329 329 static const char *
330 330 get_locale_env(int category)
331 331 {
332 332 const char *env;
333 333
334 334 /* 1. check LC_ALL. */
335 335 env = getenv(categories[LC_ALL]);
336 336
337 337 /* 2. check LC_* */
338 338 if (env == NULL || *env == '\0')
339 339 env = getenv(categories[category]);
340 340
341 341 /* 3. check LANG */
342 342 if (env == NULL || *env == '\0')
343 343 env = getenv("LANG");
344 344
345 345 /* 4. if none is set, fall to "C" */
346 346 if (env == NULL || *env == '\0')
347 347 env = "C";
348 348
349 349 return (env);
350 350 }
351 351
352 352
353 353 /*
354 354 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy
355 355 * code will continue to use _ctype[520], but we prefer this function as
356 356 * it is the only way to get thread-specific information.
357 357 */
358 358 unsigned char
359 359 __mb_cur_max_l(locale_t loc)
360 360 {
361 361 return (loc->ctype->lc_max_mblen);
362 362 }
363 363
364 364 unsigned char
365 365 __mb_cur_max(void)
366 366 {
367 367 return (__mb_cur_max_l(uselocale(NULL)));
368 368 }
369 369
370 370 /*
371 371 * Public interfaces.
372 372 */
373 373
374 374 locale_t
375 375 duplocale(locale_t src)
376 376 {
377 377 locale_t loc;
378 378 int i;
379 379
380 380 loc = lmalloc(sizeof (*loc));
381 381 if (loc == NULL) {
382 382 return (NULL);
383 383 }
384 384 if (src == NULL) {
385 385 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
386 386 src = ___global_locale;
387 387 }
388 388 for (i = 0; i < LC_ALL; i++) {
389 389 loc->locdata[i] = src->locdata[i];
390 390 loc->loaded[i] = 0;
391 391 }
392 392 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
393 393 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
394 394 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
395 395 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
396 396 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
397 397 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
398 398 loc->time = loc->locdata[LC_TIME]->l_data[0];
399 399 return (loc);
400 400 }
401 401
402 402 void
403 403 freelocale(locale_t loc)
404 404 {
405 405 /*
406 406 * We take extra care never to free a saved locale created by
407 407 * setlocale(). This shouldn't be strictly necessary, but a little
408 408 * extra safety doesn't hurt here.
409 409 */
410 410 if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list))
411 411 lfree(loc, sizeof (*loc));
412 412 }
413 413
414 414 locale_t
415 415 newlocale(int catmask, const char *locname, locale_t base)
416 416 {
417 417 locale_t loc;
418 418 int i, e;
419 419
420 420 if (catmask & ~(LC_ALL_MASK)) {
421 421 errno = EINVAL;
422 422 return (NULL);
423 423 }
424 424
425 425 /*
426 426 * Technically passing LC_GLOBAL_LOCALE here is illegal,
427 427 * but we allow it.
428 428 */
429 429 if (base == NULL || base == ___global_locale) {
430 430 loc = duplocale(___global_locale);
431 431 } else {
432 432 loc = duplocale(base);
433 433 }
434 434 if (loc == NULL) {
435 435 return (NULL);
436 436 }
437 437
438 438 for (i = 0; i < LC_ALL; i++) {
439 439 struct locdata *ldata;
440 440 loc->loaded[i] = 0;
441 441 if (((1 << i) & catmask) == 0) {
442 442 /* Default to base locale if not overriding */
443 443 continue;
444 444 }
445 445 ldata = locdata_get(i, locname);
446 446 if (ldata == NULL) {
447 447 e = errno;
448 448 freelocale(loc);
449 449 errno = e;
450 450 return (NULL);
451 451 }
452 452 loc->locdata[i] = ldata;
453 453 }
454 454 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
455 455 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
456 456 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
457 457 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
458 458 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
459 459 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
460 460 loc->time = loc->locdata[LC_TIME]->l_data[0];
461 461 freelocale(base);
462 462
463 463 return (mklocname(loc));
464 464 }
465 465
466 466 locale_t
467 467 uselocale(locale_t loc)
468 468 {
469 469 locale_t lastloc = ___global_locale;
470 470 locale_t *locptr;
471 471
472 472 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
473 473 /* Should never occur */
474 474 if (locptr == NULL) {
475 475 errno = EINVAL;
476 476 return (NULL);
477 477 }
478 478
479 479 if (*locptr != NULL)
480 480 lastloc = *locptr;
481 481
482 482 /* Argument loc is NULL if we are just querying. */
483 483 if (loc != NULL) {
484 484 /*
485 485 * Set it to LC_GLOBAL_LOCAL to return to using
486 486 * the global locale (setlocale).
487 487 */
488 488 if (loc == ___global_locale) {
489 489 *locptr = NULL;
490 490 } else {
491 491 /* No validation of the provided locale at present */
492 492 *locptr = loc;
493 493 }
494 494 }
495 495
496 496 /*
497 497 * The caller is responsible for freeing, of course it would be
498 498 * gross error to call freelocale() on a locale object that is still
499 499 * in use.
500 500 */
501 501 return (lastloc);
502 502 }
503 503
504 504 static locale_t
505 505 mklocname(locale_t loc)
506 506 {
507 507 int composite = 0;
508 508
509 509 /* Look to see if any category is different */
510 510 for (int i = 1; i < LC_ALL; ++i) {
511 511 if (strcmp(loc->locdata[0]->l_lname,
512 512 loc->locdata[i]->l_lname) != 0) {
513 513 composite = 1;
514 514 break;
515 515 }
516 516 }
517 517
518 518 if (composite) {
519 519 /*
520 520 * Note ordering of these follows the numeric order,
521 521 * if the order is changed, then setlocale() will need
522 522 * to be changed as well.
523 523 */
524 524 (void) snprintf(loc->locname, sizeof (loc->locname),
525 525 "%s/%s/%s/%s/%s/%s",
526 526 loc->locdata[LC_CTYPE]->l_lname,
527 527 loc->locdata[LC_NUMERIC]->l_lname,
528 528 loc->locdata[LC_TIME]->l_lname,
529 529 loc->locdata[LC_COLLATE]->l_lname,
530 530 loc->locdata[LC_MONETARY]->l_lname,
531 531 loc->locdata[LC_MESSAGES]->l_lname);
532 532 } else {
533 533 (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname,
534 534 sizeof (loc->locname));
535 535 }
536 536 return (loc);
537 537 }
|
↓ open down ↓ |
426 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX