1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2016 Joyent, Inc.
14 */
15
16 /*
17 * The illumos kernel provides two clock backends: CLOCK_REALTIME, the
18 * adjustable system wall clock; and CLOCK_HIGHRES, the monotonically
19 * increasing time source that is not subject to drift or adjustment. By
20 * contrast, the Linux kernel is furnished with an overabundance of narrowly
21 * differentiated clock types.
22 *
23 * Fortunately, most of the commonly used Linux clock types are either similar
24 * enough to the native clock backends that they can be directly mapped, or
25 * represent queries to the per-process and per-LWP microstate counters.
26 *
27 * CLOCK_BOOTTIME is identical to CLOCK_MONOTONIC, except that it takes into
28 * account time that the system is suspended. Since that is uninteresting to
29 * us, we treat it the same.
30 */
31
32 #include <sys/time.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/brand.h>
36 #include <sys/lx_brand.h>
37 #include <sys/lx_impl.h>
38 #include <lx_signum.h>
39
40 /*
41 * From "uts/common/os/timer.c":
42 */
43 extern int clock_settime(clockid_t, timespec_t *);
44 extern int clock_gettime(clockid_t, timespec_t *);
45 extern int clock_getres(clockid_t, timespec_t *);
46 extern int nanosleep(timespec_t *, timespec_t *);
47
48
49 static int lx_emul_clock_getres(clockid_t, timespec_t *);
50 static int lx_emul_clock_gettime(clockid_t, timespec_t *);
51 static int lx_emul_clock_settime(clockid_t, timespec_t *);
52
53 typedef struct lx_clock_backend {
54 clockid_t lclk_ntv_id;
55 int (*lclk_clock_getres)(clockid_t, timespec_t *);
56 int (*lclk_clock_gettime)(clockid_t, timespec_t *);
57 int (*lclk_clock_settime)(clockid_t, timespec_t *);
58 } lx_clock_backend_t;
59
60 /*
61 * NOTE: The Linux man pages state this structure is obsolete and is
62 * unsupported, so it is declared here for sizing purposes only.
63 */
64 struct lx_timezone {
65 int tz_minuteswest; /* minutes W of Greenwich */
66 int tz_dsttime; /* type of dst correction */
67 };
68
69 /*
70 * Use the native clock_* system call implementation, but with a translated
71 * clock identifier:
72 */
73 #define NATIVE(ntv_id) \
74 { ntv_id, clock_getres, clock_gettime, clock_settime }
75
76 /*
77 * This backend is not supported, so we provide an emulation handler:
78 */
79 #define EMUL(ntv_id) \
80 { ntv_id, lx_emul_clock_getres, lx_emul_clock_gettime, \
81 lx_emul_clock_settime }
82
83 static lx_clock_backend_t lx_clock_backends[] = {
84 NATIVE(CLOCK_REALTIME), /* LX_CLOCK_REALTIME */
85 NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC */
86 EMUL(CLOCK_PROCESS_CPUTIME_ID), /* LX_CLOCK_PROCESS_CPUTIME_ID */
87 EMUL(CLOCK_THREAD_CPUTIME_ID), /* LX_CLOCK_THREAD_CPUTIME_ID */
88 NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC_RAW */
89 NATIVE(CLOCK_REALTIME), /* LX_CLOCK_REALTIME_COARSE */
90 NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC_COARSE */
91 NATIVE(CLOCK_HIGHRES) /* LX_CLOCK_BOOTTIME */
92 };
93
94 #define LX_CLOCK_MAX \
95 (sizeof (lx_clock_backends) / sizeof (lx_clock_backends[0]))
96 #define LX_CLOCK_BACKEND(clk) (((clk) < LX_CLOCK_MAX && (clk) >= 0) ? \
97 &lx_clock_backends[(clk)] : NULL)
98
99 /*
100 * Linux defines the size of the sigevent structure to be 64 bytes. In order
101 * to meet that definition, the trailing union includes a member which pads it
102 * out to the desired length for the given architecture.
103 */
104 #define LX_SIGEV_PAD_SIZE ((64 - \
105 (sizeof (int) * 2 + sizeof (union sigval))) / sizeof (int))
106
107 typedef struct {
108 union sigval lx_sigev_value;
109 int lx_sigev_signo;
110 int lx_sigev_notify;
111 union {
112 int lx_pad[LX_SIGEV_PAD_SIZE];
113 int lx_tid;
114 struct {
115 void (*lx_notify_function)(union sigval);
116 void *lx_notify_attribute;
117 } lx_sigev_thread;
118 } lx_sigev_un;
119 } lx_sigevent_t;
120
121
122 #ifdef _SYSCALL32_IMPL
123
124 #define LX_SIGEV32_PAD_SIZE ((64 - \
125 (sizeof (int) * 2 + sizeof (union sigval32))) / sizeof (int))
126
127 typedef struct {
128 union sigval32 lx_sigev_value;
129 int lx_sigev_signo;
130 int lx_sigev_notify;
131 union {
132 int lx_pad[LX_SIGEV32_PAD_SIZE];
133 int lx_tid;
134 struct {
135 caddr32_t lx_notify_function;
136 caddr32_t lx_notify_attribute;
137 } lx_sigev_thread;
138 } lx_sigev_un;
139 } lx_sigevent32_t;
140
141 #endif /* _SYSCALL32_IMPL */
142
143 #define LX_SIGEV_SIGNAL 0
144 #define LX_SIGEV_NONE 1
145 #define LX_SIGEV_THREAD 2
146 #define LX_SIGEV_THREAD_ID 4
147
148 /*
149 * Access private SIGEV_THREAD_ID callback state in itimer_t
150 */
151 #define LX_SIGEV_THREAD_ID_LPID(it) ((it)->it_cb_data[0])
152 #define LX_SIGEV_THREAD_ID_TID(it) ((it)->it_cb_data[1])
153
154
155 /* ARGSUSED */
156 static int
157 lx_emul_clock_settime(clockid_t clock, timespec_t *tp)
158 {
159 return (set_errno(EINVAL));
160 }
161
162 static int
163 lx_emul_clock_gettime(clockid_t clock, timespec_t *tp)
164 {
165 timespec_t t;
166
167 switch (clock) {
168 case CLOCK_PROCESS_CPUTIME_ID: {
169 proc_t *p = ttoproc(curthread);
170 hrtime_t snsecs, unsecs;
171
172 /*
173 * Based on getrusage() in "rusagesys.c":
174 */
175 mutex_enter(&p->p_lock);
176 unsecs = mstate_aggr_state(p, LMS_USER);
177 snsecs = mstate_aggr_state(p, LMS_SYSTEM);
178 mutex_exit(&p->p_lock);
179
180 hrt2ts(unsecs + snsecs, &t);
181 break;
182 }
183
184 case CLOCK_THREAD_CPUTIME_ID: {
185 klwp_t *lwp = ttolwp(curthread);
186 struct mstate *ms = &lwp->lwp_mstate;
187 hrtime_t snsecs, unsecs;
188
189 /*
190 * Based on getrusage_lwp() in "rusagesys.c":
191 */
192 unsecs = ms->ms_acct[LMS_USER];
193 snsecs = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
194
195 scalehrtime(&unsecs);
196 scalehrtime(&snsecs);
197
198 hrt2ts(unsecs + snsecs, &t);
199 break;
200 }
201
202 default:
203 return (set_errno(EINVAL));
204 }
205
206 #if defined(_SYSCALL32_IMPL)
207 if (get_udatamodel() != DATAMODEL_NATIVE) {
208 timespec32_t t32;
209
210 if (TIMESPEC_OVERFLOW(&t)) {
211 return (set_errno(EOVERFLOW));
212 }
213 TIMESPEC_TO_TIMESPEC32(&t32, &t);
214
215 if (copyout(&t32, tp, sizeof (t32)) != 0) {
216 return (set_errno(EFAULT));
217 }
218
219 return (0);
220 }
221 #endif
222
223 if (copyout(&t, tp, sizeof (t)) != 0) {
224 return (set_errno(EFAULT));
225 }
226
227 return (0);
228 }
229
230 static int
231 lx_emul_clock_getres(clockid_t clock, timespec_t *tp)
232 {
233 timespec_t t;
234
235 if (tp == NULL) {
236 return (0);
237 }
238
239 switch (clock) {
240 case CLOCK_PROCESS_CPUTIME_ID:
241 case CLOCK_THREAD_CPUTIME_ID:
242 /*
243 * These clock backends return microstate accounting values for
244 * the LWP or the entire process. The Linux kernel claims they
245 * have nanosecond resolution; so will we.
246 */
247 t.tv_sec = 0;
248 t.tv_nsec = 1;
249 break;
250
251 default:
252 return (set_errno(EINVAL));
253 }
254
255 #if defined(_SYSCALL32_IMPL)
256 if (get_udatamodel() != DATAMODEL_NATIVE) {
257 timespec32_t t32;
258
259 if (TIMESPEC_OVERFLOW(&t)) {
260 return (set_errno(EOVERFLOW));
261 }
262 TIMESPEC_TO_TIMESPEC32(&t32, &t);
263
264 if (copyout(&t32, tp, sizeof (t32)) != 0) {
265 return (set_errno(EFAULT));
266 }
267
268 return (0);
269 }
270 #endif
271
272 if (copyout(&t, tp, sizeof (t)) != 0) {
273 return (set_errno(EFAULT));
274 }
275
276 return (0);
277 }
278
279 static void
280 lx_clock_unsupported(int clock)
281 {
282 char buf[100];
283
284 (void) snprintf(buf, sizeof (buf), "unsupported clock: %d", clock);
285 lx_unsupported(buf);
286 }
287
288 long
289 lx_clock_settime(int clock, timespec_t *tp)
290 {
291 lx_clock_backend_t *backend;
292
293 if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
294 lx_clock_unsupported(clock);
295 return (set_errno(EINVAL));
296 }
297
298 return (backend->lclk_clock_settime(backend->lclk_ntv_id, tp));
299 }
300
301 long
302 lx_clock_gettime(int clock, timespec_t *tp)
303 {
304 lx_clock_backend_t *backend;
305
306 if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
307 lx_clock_unsupported(clock);
308 return (set_errno(EINVAL));
309 }
310
311 return (backend->lclk_clock_gettime(backend->lclk_ntv_id, tp));
312 }
313
314 long
315 lx_clock_getres(int clock, timespec_t *tp)
316 {
317 lx_clock_backend_t *backend;
318
319 if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
320 lx_clock_unsupported(clock);
321 return (set_errno(EINVAL));
322 }
323
324 /*
325 * It is important this check is performed after the clock
326 * check. Both glibc and musl, in their clock_getcpuclockid(),
327 * use clock_getres() with a NULL tp to validate a clock
328 * value. Performing the tp check before the clock check could
329 * indicate a valid clock to libc when it shouldn't.
330 */
331 if (tp == NULL) {
332 return (0);
333 }
334
335 return (backend->lclk_clock_getres(backend->lclk_ntv_id, tp));
336 }
337
338 static int
339 lx_ltos_sigev(lx_sigevent_t *lev, struct sigevent *sev)
340 {
341 bzero(sev, sizeof (*sev));
342
343 switch (lev->lx_sigev_notify) {
344 case LX_SIGEV_NONE:
345 sev->sigev_notify = SIGEV_NONE;
346 break;
347
348 case LX_SIGEV_SIGNAL:
349 case LX_SIGEV_THREAD_ID:
350 sev->sigev_notify = SIGEV_SIGNAL;
351 break;
352
353 case LX_SIGEV_THREAD:
354 /*
355 * Just as in illumos, SIGEV_THREAD handling is performed in
356 * userspace with the help of SIGEV_SIGNAL/SIGEV_THREAD_ID.
357 *
358 * It's not expected to make an appearance in the syscall.
359 */
360 default:
361 return (EINVAL);
362 }
363
364 sev->sigev_signo = lx_ltos_signo(lev->lx_sigev_signo, 0);
365 sev->sigev_value = lev->lx_sigev_value;
366
367 /* Ensure SIGEV_SIGNAL has a valid signo to work with. */
368 if (sev->sigev_notify == SIGEV_SIGNAL && sev->sigev_signo == 0) {
369 return (EINVAL);
370 }
371 return (0);
372 }
373
374 static int
375 lx_sigev_copyin(lx_sigevent_t *userp, lx_sigevent_t *levp)
376 {
377 #ifdef _SYSCALL32_IMPL
378 if (get_udatamodel() != DATAMODEL_NATIVE) {
379 lx_sigevent32_t lev32;
380
381 if (copyin(userp, &lev32, sizeof (lev32)) != 0) {
382 return (EFAULT);
383 }
384 levp->lx_sigev_value.sival_int = lev32.lx_sigev_value.sival_int;
385 levp->lx_sigev_signo = lev32.lx_sigev_signo;
386 levp->lx_sigev_notify = lev32.lx_sigev_notify;
387 levp->lx_sigev_un.lx_tid = lev32.lx_sigev_un.lx_tid;
388 } else
389 #endif /* _SYSCALL32_IMPL */
390 {
391 if (copyin(userp, levp, sizeof (lx_sigevent_t)) != 0) {
392 return (EFAULT);
393 }
394 }
395 return (0);
396 }
397
398 static void
399 lx_sigev_thread_fire(itimer_t *it)
400 {
401 proc_t *p = it->it_proc;
402 pid_t lpid = (pid_t)LX_SIGEV_THREAD_ID_LPID(it);
403 id_t tid = (id_t)LX_SIGEV_THREAD_ID_TID(it);
404 lwpdir_t *ld;
405
406 ASSERT(MUTEX_HELD(&it->it_mutex));
407 ASSERT(it->it_pending == 0);
408 ASSERT(it->it_flags & IT_SIGNAL);
409 ASSERT(MUTEX_HELD(&p->p_lock));
410
411 ld = lwp_hash_lookup(p, tid);
412 if (ld != NULL) {
413 lx_lwp_data_t *lwpd;
414 kthread_t *t;
415
416 t = ld->ld_entry->le_thread;
417 lwpd = ttolxlwp(t);
418 if (lwpd != NULL && lwpd->br_pid == lpid) {
419 /*
420 * A thread matching the LX pid is still present in the
421 * process. Send a targeted signal as requested.
422 */
423 it->it_pending = 1;
424 mutex_exit(&it->it_mutex);
425 sigaddqa(p, t, it->it_sigq);
426 return;
427 }
428 }
429
430 mutex_exit(&it->it_mutex);
431 }
432
433 long
434 lx_timer_create(int clock, lx_sigevent_t *sevp, timer_t *tidp)
435 {
436 int error;
437 lx_sigevent_t lev;
438 struct sigevent sev;
439 clock_backend_t *backend = NULL;
440 proc_t *p = curproc;
441 itimer_t *itp;
442 timer_t tid;
443
444 if (clock == -2) {
445 /*
446 * A change was made to the old userspace timer emulation to
447 * handle this specific clock ID for MapR. It was wrongly
448 * mapped to CLOCK_REALTIME rather than CLOCK_THREAD_CPUTIME_ID
449 * which it maps to. Until the CLOCK_*_CPUTIME_ID timers can
450 * be emulated, the admittedly incorrect mapping will remain.
451 */
452 backend = clock_get_backend(CLOCK_REALTIME);
453 } else {
454 lx_clock_backend_t *lback = LX_CLOCK_BACKEND(clock);
455
456 if (lback != NULL) {
457 backend = clock_get_backend(lback->lclk_ntv_id);
458 }
459 }
460 if (backend == NULL) {
461 return (set_errno(EINVAL));
462 }
463
464 /* We have to convert the Linux sigevent layout to the illumos layout */
465 if (sevp != NULL) {
466 if ((error = lx_sigev_copyin(sevp, &lev)) != 0) {
467 return (set_errno(error));
468 }
469 if ((error = lx_ltos_sigev(&lev, &sev)) != 0) {
470 return (set_errno(error));
471 }
472 } else {
473 bzero(&sev, sizeof (sev));
474 sev.sigev_notify = SIGEV_SIGNAL;
475 sev.sigev_signo = SIGALRM;
476 }
477
478 if ((error = timer_setup(backend, &sev, NULL, &itp, &tid)) != 0) {
479 return (set_errno(error));
480 }
481
482 /*
483 * The SIGEV_THREAD_ID notification method in Linux allows the caller
484 * to target a specific thread to receive the signal. The IT_CALLBACK
485 * timer functionality is used to fulfill this need. After translating
486 * the LX pid to a SunOS thread ID (ensuring it exists in the current
487 * process), those IDs are attached to the timer along with the custom
488 * lx_sigev_thread_fire callback. This targets the signal notification
489 * properly when the timer fires.
490 */
491 if (lev.lx_sigev_notify == LX_SIGEV_THREAD_ID) {
492 pid_t lpid, spid;
493 id_t stid;
494
495 lpid = (pid_t)lev.lx_sigev_un.lx_tid;
496 if (lx_lpid_to_spair(lpid, &spid, &stid) != 0 ||
497 spid != curproc->p_pid) {
498 error = EINVAL;
499 goto err;
500 }
501
502 itp->it_flags |= IT_CALLBACK;
503 itp->it_cb_func = lx_sigev_thread_fire;
504 LX_SIGEV_THREAD_ID_LPID(itp) = lpid;
505 LX_SIGEV_THREAD_ID_TID(itp) = stid;
506 }
507
508 /*
509 * When the sigevent is not specified, its sigev_value field is
510 * expected to be populated with the timer ID.
511 */
512 if (sevp == NULL) {
513 itp->it_sigq->sq_info.si_value.sival_int = tid;
514 }
515
516 if (copyout(&tid, tidp, sizeof (timer_t)) != 0) {
517 error = EFAULT;
518 goto err;
519 }
520
521 timer_release(p, itp);
522 return (0);
523
524 err:
525 timer_delete_grabbed(p, tid, itp);
526 return (set_errno(error));
527 }
528
529 long
530 lx_gettimeofday(struct timeval *tvp, struct lx_timezone *tzp)
531 {
532 struct lx_timezone tz;
533
534 bzero(&tz, sizeof (tz));
535
536 /*
537 * We want to be similar to libc which just does a fasttrap to
538 * gethrestime and simply converts that result. We follow how uniqtime
539 * does the conversion but we can't use that code since it does some
540 * extra work which can cause the result to bounce around based on which
541 * CPU we run on.
542 */
543 if (tvp != NULL) {
544 struct timeval tv;
545 timestruc_t ts;
546 int usec, nsec;
547
548 gethrestime(&ts);
549 nsec = ts.tv_nsec;
550 usec = nsec + (nsec >> 2);
551 usec = nsec + (usec >> 1);
552 usec = nsec + (usec >> 2);
553 usec = nsec + (usec >> 4);
554 usec = nsec - (usec >> 3);
555 usec = nsec + (usec >> 2);
556 usec = nsec + (usec >> 3);
557 usec = nsec + (usec >> 4);
558 usec = nsec + (usec >> 1);
559 usec = nsec + (usec >> 6);
560 usec = usec >> 10;
561
562 tv.tv_sec = ts.tv_sec;
563 tv.tv_usec = usec;
564
565 if (get_udatamodel() == DATAMODEL_NATIVE) {
566 if (copyout(&tv, tvp, sizeof (tv)) != 0)
567 return (set_errno(EFAULT));
568 }
569 #ifdef _SYSCALL32_IMPL
570 else {
571 struct timeval32 tv32;
572
573 if (TIMEVAL_OVERFLOW(&tv))
574 return (set_errno(EOVERFLOW));
575 TIMEVAL_TO_TIMEVAL32(&tv32, &tv);
576
577 if (copyout(&tv32, tvp, sizeof (tv32)))
578 return (set_errno(EFAULT));
579 }
580 #endif
581 }
582
583 /*
584 * The Linux man page states use of the second parameter is obsolete,
585 * but gettimeofday(2) should still return EFAULT if it is set
586 * to a bad non-NULL pointer (sigh...)
587 */
588 if (tzp != NULL && copyout(&tz, tzp, sizeof (tz)) != 0)
589 return (set_errno(EFAULT));
590
591 return (0);
592 }
593
594 /*
595 * On Linux a bad buffer will set errno to EFAULT, and on Illumos the failure
596 * mode is documented as "undefined."
597 */
598 long
599 lx_time(time_t *tp)
600 {
601 timestruc_t ts;
602 struct timeval tv;
603
604 gethrestime(&ts);
605 tv.tv_sec = ts.tv_sec;
606 tv.tv_usec = 0;
607
608 if (get_udatamodel() == DATAMODEL_NATIVE) {
609 if (tp != NULL &&
610 copyout(&tv.tv_sec, tp, sizeof (tv.tv_sec)) != 0)
611 return (set_errno(EFAULT));
612
613 return (tv.tv_sec);
614 }
615 #ifdef _SYSCALL32_IMPL
616 else {
617 struct timeval32 tv32;
618
619 if (TIMEVAL_OVERFLOW(&tv))
620 return (set_errno(EOVERFLOW));
621 TIMEVAL_TO_TIMEVAL32(&tv32, &tv);
622
623 if (tp != NULL &&
624 copyout(&tv32.tv_sec, tp, sizeof (tv32.tv_sec)))
625 return (set_errno(EFAULT));
626
627 return (tv32.tv_sec);
628 }
629 #endif /* _SYSCALL32_IMPL */
630 /* NOTREACHED */
631 }
632
633 long
634 lx_nanosleep(timespec_t *rqtp, timespec_t *rmtp)
635 {
636 return (nanosleep(rqtp, rmtp));
637 }