Print this page
NEX-19025 CIFS gets confused with filenames containing enhanced Unicode
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
and: (fix build, check-rtime)
NEX-2460 libfksmbd should not link with libsmb
SMB-65 SMB server in non-global zones (data structure changes)
Many things move to the smb_server_t object, and
many functions gain an sv arg (which server).
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/common/smbsrv/smb_string.c
+++ new/usr/src/common/smbsrv/smb_string.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 *
25 25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26 26 * Copyright (c) 2017 by Delphix. All rights reserved.
27 27 */
28 28
29 29 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
30 30 #include <sys/types.h>
31 31 #include <sys/sunddi.h>
32 32 #else
33 33 #include <stdio.h>
34 34 #include <stdlib.h>
35 35 #include <string.h>
36 36 #include <strings.h>
37 37 #endif
38 38 #include <sys/u8_textprep.h>
39 39 #include <smbsrv/alloc.h>
40 40 #include <sys/errno.h>
41 41 #include <smbsrv/string.h>
42 42 #include <smbsrv/cp_usascii.h>
43 43 #include <smbsrv/cp_unicode.h>
44 44
45 45 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0]))
46 46
47 47 /*
48 48 * Global pointer to the current codepage: defaults to ASCII,
49 49 * and a flag indicating whether the codepage is Unicode or ASCII.
50 50 */
51 51 static const smb_codepage_t *current_codepage = usascii_codepage;
52 52 static boolean_t is_unicode = B_FALSE;
53 53
54 54 static smb_codepage_t *unicode_codepage = NULL;
55 55
56 56 static smb_codepage_t *smb_unicode_init(void);
57 57
58 58 /*
59 59 * strsubst
60 60 *
61 61 * Scan a string replacing all occurrences of orgchar with newchar.
62 62 * Returns a pointer to s, or null of s is null.
63 63 */
64 64 char *
65 65 strsubst(char *s, char orgchar, char newchar)
66 66 {
67 67 char *p = s;
68 68
69 69 if (p == 0)
70 70 return (0);
71 71
72 72 while (*p) {
73 73 if (*p == orgchar)
74 74 *p = newchar;
75 75 ++p;
76 76 }
77 77
78 78 return (s);
79 79 }
80 80
81 81 /*
82 82 * strcanon
83 83 *
84 84 * Normalize a string by reducing all the repeated characters in
85 85 * buf as defined by class. For example;
86 86 *
87 87 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
88 88 * strcanon(buf, "/\\");
89 89 *
90 90 * Would result in buf containing the following string:
91 91 *
92 92 * /d1/d2/d3\d4\f1.txt
93 93 *
94 94 * This function modifies the contents of buf in place and returns
95 95 * a pointer to buf.
96 96 */
97 97 char *
98 98 strcanon(char *buf, const char *class)
99 99 {
100 100 char *p = buf;
101 101 char *q = buf;
102 102 char *r;
103 103
104 104 while (*p) {
105 105 *q++ = *p;
106 106
107 107 if ((r = strchr(class, *p)) != 0) {
108 108 while (*p == *r)
109 109 ++p;
110 110 } else
111 111 ++p;
112 112 }
113 113
114 114 *q = '\0';
115 115 return (buf);
116 116 }
117 117
118 118 void
119 119 smb_codepage_init(void)
120 120 {
121 121 smb_codepage_t *cp;
122 122
123 123 if (is_unicode)
124 124 return;
125 125
126 126 if ((cp = smb_unicode_init()) != NULL) {
127 127 current_codepage = cp;
128 128 unicode_codepage = cp;
129 129 is_unicode = B_TRUE;
130 130 } else {
131 131 current_codepage = usascii_codepage;
132 132 is_unicode = B_FALSE;
133 133 }
134 134 }
135 135
136 136 void
137 137 smb_codepage_fini(void)
138 138 {
139 139 if (unicode_codepage != NULL) {
140 140 MEM_FREE("unicode", unicode_codepage);
141 141 unicode_codepage = NULL;
142 142 current_codepage = NULL;
143 143 }
144 144 }
145 145
146 146 /*
147 147 * Determine whether or not a character is an uppercase character.
148 148 * This function operates on the current codepage table. Returns
149 149 * non-zero if the character is uppercase. Otherwise returns zero.
150 150 */
151 151 int
152 152 smb_isupper(int c)
153 153 {
154 154 uint16_t mask = is_unicode ? 0xffff : 0xff;
155 155
156 156 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
157 157 }
158 158
159 159 /*
160 160 * Determine whether or not a character is an lowercase character.
161 161 * This function operates on the current codepage table. Returns
162 162 * non-zero if the character is lowercase. Otherwise returns zero.
163 163 */
164 164 int
165 165 smb_islower(int c)
166 166 {
|
↓ open down ↓ |
166 lines elided |
↑ open up ↑ |
167 167 uint16_t mask = is_unicode ? 0xffff : 0xff;
168 168
169 169 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
170 170 }
171 171
172 172 /*
173 173 * Convert individual characters to their uppercase equivalent value.
174 174 * If the specified character is lowercase, the uppercase value will
175 175 * be returned. Otherwise the original value will be returned.
176 176 */
177 -int
178 -smb_toupper(int c)
177 +uint32_t
178 +smb_toupper(uint32_t c)
179 179 {
180 180 uint16_t mask = is_unicode ? 0xffff : 0xff;
181 181
182 182 return (current_codepage[c & mask].upper);
183 183 }
184 184
185 185 /*
186 186 * Convert individual characters to their lowercase equivalent value.
187 187 * If the specified character is uppercase, the lowercase value will
188 188 * be returned. Otherwise the original value will be returned.
189 189 */
190 -int
191 -smb_tolower(int c)
190 +uint32_t
191 +smb_tolower(uint32_t c)
192 192 {
193 193 uint16_t mask = is_unicode ? 0xffff : 0xff;
194 194
195 195 return (current_codepage[c & mask].lower);
196 196 }
197 197
198 198 /*
199 199 * Convert a string to uppercase using the appropriate codepage. The
200 200 * string is converted in place. A pointer to the string is returned.
201 201 * There is an assumption here that uppercase and lowercase values
202 202 * always result encode to the same length.
203 203 */
204 204 char *
205 205 smb_strupr(char *s)
206 206 {
207 - smb_wchar_t c;
207 + uint32_t c;
208 208 char *p = s;
209 209
210 210 while (*p) {
211 211 if (smb_isascii(*p)) {
212 212 *p = smb_toupper(*p);
213 213 p++;
214 214 } else {
215 215 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
216 216 return (0);
217 217
218 218 if (c == 0)
219 219 break;
220 220
221 221 c = smb_toupper(c);
222 222 p += smb_wctomb(p, c);
223 223 }
224 224 }
225 225
226 226 return (s);
227 227 }
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
228 228
229 229 /*
230 230 * Convert a string to lowercase using the appropriate codepage. The
231 231 * string is converted in place. A pointer to the string is returned.
232 232 * There is an assumption here that uppercase and lowercase values
233 233 * always result encode to the same length.
234 234 */
235 235 char *
236 236 smb_strlwr(char *s)
237 237 {
238 - smb_wchar_t c;
238 + uint32_t c;
239 239 char *p = s;
240 240
241 241 while (*p) {
242 242 if (smb_isascii(*p)) {
243 243 *p = smb_tolower(*p);
244 244 p++;
245 245 } else {
246 246 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
247 247 return (0);
248 248
249 249 if (c == 0)
250 250 break;
251 251
252 252 c = smb_tolower(c);
253 253 p += smb_wctomb(p, c);
254 254 }
255 255 }
256 256
|
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
257 257 return (s);
258 258 }
259 259
260 260 /*
261 261 * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
262 262 * -1 is returned if "s" is not a valid multi-byte string.
263 263 */
264 264 int
265 265 smb_isstrlwr(const char *s)
266 266 {
267 - smb_wchar_t c;
267 + uint32_t c;
268 268 int n;
269 269 const char *p = s;
270 270
271 271 while (*p) {
272 272 if (smb_isascii(*p) && smb_isupper(*p))
273 273 return (0);
274 274 else {
275 275 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
276 276 return (-1);
277 277
278 278 if (c == 0)
279 279 break;
280 280
281 281 if (smb_isupper(c))
282 282 return (0);
283 283
284 284 p += n;
285 285 }
286 286 }
287 287
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
288 288 return (1);
289 289 }
290 290
291 291 /*
292 292 * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
293 293 * -1 is returned if "s" is not a valid multi-byte string.
294 294 */
295 295 int
296 296 smb_isstrupr(const char *s)
297 297 {
298 - smb_wchar_t c;
298 + uint32_t c;
299 299 int n;
300 300 const char *p = s;
301 301
302 302 while (*p) {
303 303 if (smb_isascii(*p) && smb_islower(*p))
304 304 return (0);
305 305 else {
306 306 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
307 307 return (-1);
308 308
309 309 if (c == 0)
310 310 break;
311 311
312 312 if (smb_islower(c))
313 313 return (0);
314 314
315 315 p += n;
316 316 }
317 317 }
318 318
319 319 return (1);
320 320 }
321 321
322 322 /*
323 323 * Compare the null-terminated strings s1 and s2 and return an integer
324 324 * greater than, equal to or less than 0 dependent on whether s1 is
325 325 * lexicographically greater than, equal to or less than s2 after
326 326 * translation of each character to lowercase. The original strings
327 327 * are not modified.
328 328 *
329 329 * If n is non-zero, at most n bytes are compared. Otherwise, the strings
330 330 * are compared until a null terminator is encountered.
331 331 *
332 332 * Out: 0 if strings are equal
333 333 * < 0 if first string < second string
334 334 * > 0 if first string > second string
335 335 */
336 336 int
337 337 smb_strcasecmp(const char *s1, const char *s2, size_t n)
338 338 {
339 339 int err = 0;
340 340 int rc;
341 341
342 342 rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
343 343 if (err != 0)
344 344 return (-1);
345 345 return (rc);
346 346 }
347 347
348 348 /*
349 349 * First build a codepage based on cp_unicode.h. Then build the unicode
350 350 * codepage from this interim codepage by copying the entries over while
351 351 * fixing them and filling in the gaps.
352 352 */
353 353 static smb_codepage_t *
354 354 smb_unicode_init(void)
355 355 {
356 356 smb_codepage_t *unicode;
357 357 uint32_t a = 0;
358 358 uint32_t b = 0;
359 359
360 360 unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
361 361 if (unicode == NULL)
362 362 return (NULL);
363 363
364 364 while (b != 0xffff) {
365 365 /*
366 366 * If there is a gap in the standard,
367 367 * fill in the gap with no-case entries.
368 368 */
369 369 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
370 370 unicode[b].ctype = CODEPAGE_ISNONE;
371 371 unicode[b].upper = (smb_wchar_t)b;
372 372 unicode[b].lower = (smb_wchar_t)b;
373 373 b++;
374 374 continue;
375 375 }
376 376
377 377 /*
378 378 * Copy the entry and fixup as required.
379 379 */
380 380 switch (a_unicode[a].ctype) {
381 381 case CODEPAGE_ISNONE:
382 382 /*
383 383 * Replace 0xffff in upper/lower fields with its val.
384 384 */
385 385 unicode[b].ctype = CODEPAGE_ISNONE;
386 386 unicode[b].upper = (smb_wchar_t)b;
387 387 unicode[b].lower = (smb_wchar_t)b;
388 388 break;
389 389 case CODEPAGE_ISUPPER:
390 390 /*
391 391 * Some characters may have case yet not have
392 392 * case conversion. Treat them as no-case.
393 393 */
394 394 if (a_unicode[a].lower == 0xffff) {
395 395 unicode[b].ctype = CODEPAGE_ISNONE;
396 396 unicode[b].upper = (smb_wchar_t)b;
397 397 unicode[b].lower = (smb_wchar_t)b;
398 398 } else {
399 399 unicode[b].ctype = CODEPAGE_ISUPPER;
400 400 unicode[b].upper = (smb_wchar_t)b;
401 401 unicode[b].lower = a_unicode[a].lower;
402 402 }
403 403 break;
404 404 case CODEPAGE_ISLOWER:
405 405 /*
406 406 * Some characters may have case yet not have
407 407 * case conversion. Treat them as no-case.
408 408 */
409 409 if (a_unicode[a].upper == 0xffff) {
410 410 unicode[b].ctype = CODEPAGE_ISNONE;
411 411 unicode[b].upper = (smb_wchar_t)b;
412 412 unicode[b].lower = (smb_wchar_t)b;
413 413 } else {
414 414 unicode[b].ctype = CODEPAGE_ISLOWER;
415 415 unicode[b].upper = a_unicode[a].upper;
416 416 unicode[b].lower = (smb_wchar_t)b;
417 417 }
418 418 break;
419 419 default:
420 420 MEM_FREE("unicode", unicode);
421 421 return (NULL);
422 422 }
423 423
424 424 a++;
425 425 b++;
426 426 };
427 427
428 428 return (unicode);
429 429 }
430 430
431 431 /*
432 432 * Parse a UNC path (\\server\share\path) into its components.
|
↓ open down ↓ |
124 lines elided |
↑ open up ↑ |
433 433 * Although a standard UNC path starts with two '\', in DFS
434 434 * all UNC paths start with one '\'. So, this function only
435 435 * checks for one.
436 436 *
437 437 * A valid UNC must at least contain two components i.e. server
438 438 * and share. The path is parsed to:
439 439 *
440 440 * unc_server server or domain name with no leading/trailing '\'
441 441 * unc_share share name with no leading/trailing '\'
442 442 * unc_path relative path to the share with no leading/trailing '\'
443 - * it is valid for unc_path to be NULL.
443 + * it is valid for unc_path to be NULL.
444 444 *
445 445 * Upon successful return of this function, smb_unc_free()
446 446 * MUST be called when returned 'unc' is no longer needed.
447 447 *
448 448 * Returns 0 on success, otherwise returns an errno code.
449 449 */
450 450 int
451 451 smb_unc_init(const char *path, smb_unc_t *unc)
452 452 {
453 453 char *p;
454 454
455 455 if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
456 456 return (EINVAL);
457 457
458 458 bzero(unc, sizeof (smb_unc_t));
459 459
460 460 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
461 461 unc->unc_buf = smb_mem_strdup(path);
462 462 #else
463 463 if ((unc->unc_buf = strdup(path)) == NULL)
464 464 return (ENOMEM);
465 465 #endif
466 466
467 467 (void) strsubst(unc->unc_buf, '\\', '/');
468 468 (void) strcanon(unc->unc_buf, "/");
469 469
470 470 unc->unc_server = unc->unc_buf + 1;
471 471 if (*unc->unc_server == '\0') {
472 472 smb_unc_free(unc);
473 473 return (EINVAL);
474 474 }
475 475
476 476 if ((p = strchr(unc->unc_server, '/')) == NULL) {
477 477 smb_unc_free(unc);
478 478 return (EINVAL);
479 479 }
480 480
481 481 *p++ = '\0';
482 482 unc->unc_share = p;
483 483
484 484 if (*unc->unc_share == '\0') {
485 485 smb_unc_free(unc);
486 486 return (EINVAL);
487 487 }
488 488
489 489 unc->unc_path = strchr(unc->unc_share, '/');
490 490 if ((p = unc->unc_path) == NULL)
491 491 return (0);
492 492
493 493 unc->unc_path++;
494 494 *p = '\0';
495 495
496 496 /* remove the last '/' if any */
497 497 if ((p = strchr(unc->unc_path, '\0')) != NULL) {
498 498 if (*(--p) == '/')
499 499 *p = '\0';
500 500 }
501 501
502 502 return (0);
503 503 }
504 504
505 505 void
506 506 smb_unc_free(smb_unc_t *unc)
507 507 {
508 508 if (unc == NULL)
509 509 return;
510 510
511 511 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
512 512 smb_mem_free(unc->unc_buf);
513 513 #else
514 514 free(unc->unc_buf);
515 515 #endif
516 516 unc->unc_buf = NULL;
517 517 }
|
↓ open down ↓ |
64 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX