1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2020 Joyent, Inc.
14 */
15
16 /*
17 * The illumos kernel provides two clock backends: CLOCK_REALTIME, the
18 * adjustable system wall clock; and CLOCK_HIGHRES, the monotonically
19 * increasing time source that is not subject to drift or adjustment. By
20 * contrast, the Linux kernel is furnished with an overabundance of narrowly
21 * differentiated clock types.
22 *
23 * Fortunately, most of the commonly used Linux clock types are similar
24 * enough to the native clock backends that they can be directly mapped.
25 *
26 * Unfortunately, while CLOCK_{THREAD,PROCESS}_CPUTIME_ID are *somewhat*
27 * implemented in the main illumos kernel as itimers (see setitimer(2)), we
28 * would need to entirely bringup of CLOCK_{THREAD,PROCESS}_CPUTIME_ID to
29 * implement timer_create() and friends. For now, we opt to map it to
30 * CLOCK_HIGHRES for timer_create() and friends, but continue to emulate
31 * more-accurate queries to per-process or per-LWP microstate accounting for
32 * getres, gettime, or settime.
33 *
34 * CLOCK_BOOTTIME is identical to CLOCK_MONOTONIC, except that it takes into
35 * account time that the system is suspended. Since that is uninteresting to
36 * us, we treat it the same.
37 */
38
39 #include <sys/time.h>
40 #include <sys/systm.h>
41 #include <sys/cmn_err.h>
42 #include <sys/brand.h>
43 #include <sys/lx_brand.h>
44 #include <sys/lx_impl.h>
45 #include <lx_signum.h>
46
47 /*
48 * From "uts/common/os/timer.c":
49 */
50 extern int clock_settime(clockid_t, timespec_t *);
51 extern int clock_gettime(clockid_t, timespec_t *);
52 extern int clock_getres(clockid_t, timespec_t *);
53 extern int nanosleep(timespec_t *, timespec_t *);
54
55
56 static int lx_emul_clock_getres(clockid_t, timespec_t *);
57 static int lx_emul_clock_gettime(clockid_t, timespec_t *);
58 static int lx_emul_clock_settime(clockid_t, timespec_t *);
59
60 typedef struct lx_clock_backend {
61 clockid_t lclk_ntv_id;
62 int (*lclk_clock_getres)(clockid_t, timespec_t *);
63 int (*lclk_clock_gettime)(clockid_t, timespec_t *);
64 int (*lclk_clock_settime)(clockid_t, timespec_t *);
65 } lx_clock_backend_t;
66
67 /*
68 * NOTE: The Linux man pages state this structure is obsolete and is
69 * unsupported, so it is declared here for sizing purposes only.
70 */
71 struct lx_timezone {
72 int tz_minuteswest; /* minutes W of Greenwich */
73 int tz_dsttime; /* type of dst correction */
74 };
75
76 /*
77 * Use the native clock_* system call implementation, but with a translated
78 * clock identifier:
79 */
80 #define NATIVE(ntv_id) \
81 { ntv_id, clock_getres, clock_gettime, clock_settime }
82
83 /*
84 * This backend is not supported, so we provide an emulation handler:
85 */
86 #define EMUL(ntv_id) \
87 { ntv_id, lx_emul_clock_getres, lx_emul_clock_gettime, \
88 lx_emul_clock_settime }
89
90 static lx_clock_backend_t lx_clock_backends[] = {
91 NATIVE(CLOCK_REALTIME), /* LX_CLOCK_REALTIME */
92 NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC */
93 EMUL(CLOCK_HIGHRES), /* LX_CLOCK_PROCESS_CPUTIME_ID */
94 EMUL(CLOCK_HIGHRES), /* LX_CLOCK_THREAD_CPUTIME_ID */
95 NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC_RAW */
96 NATIVE(CLOCK_REALTIME), /* LX_CLOCK_REALTIME_COARSE */
97 NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC_COARSE */
98 NATIVE(CLOCK_HIGHRES) /* LX_CLOCK_BOOTTIME */
99 };
100
101 #define LX_CLOCK_MAX \
102 (sizeof (lx_clock_backends) / sizeof (lx_clock_backends[0]))
103 #define LX_CLOCK_BACKEND(clk) (((clk) < LX_CLOCK_MAX && (clk) >= 0) ? \
104 &lx_clock_backends[(clk)] : NULL)
105
106 /*
107 * Linux defines the size of the sigevent structure to be 64 bytes. In order
108 * to meet that definition, the trailing union includes a member which pads it
109 * out to the desired length for the given architecture.
110 */
111 #define LX_SIGEV_PAD_SIZE ((64 - \
112 (sizeof (int) * 2 + sizeof (union sigval))) / sizeof (int))
113
114 typedef struct {
115 union sigval lx_sigev_value;
116 int lx_sigev_signo;
117 int lx_sigev_notify;
118 union {
119 int lx_pad[LX_SIGEV_PAD_SIZE];
120 int lx_tid;
121 struct {
122 void (*lx_notify_function)(union sigval);
123 void *lx_notify_attribute;
124 } lx_sigev_thread;
125 } lx_sigev_un;
126 } lx_sigevent_t;
127
128
129 #ifdef _SYSCALL32_IMPL
130
131 #define LX_SIGEV32_PAD_SIZE ((64 - \
132 (sizeof (int) * 2 + sizeof (union sigval32))) / sizeof (int))
133
134 typedef struct {
135 union sigval32 lx_sigev_value;
136 int lx_sigev_signo;
137 int lx_sigev_notify;
138 union {
139 int lx_pad[LX_SIGEV32_PAD_SIZE];
140 int lx_tid;
141 struct {
142 caddr32_t lx_notify_function;
143 caddr32_t lx_notify_attribute;
144 } lx_sigev_thread;
145 } lx_sigev_un;
146 } lx_sigevent32_t;
147
148 #endif /* _SYSCALL32_IMPL */
149
150 #define LX_SIGEV_SIGNAL 0
151 #define LX_SIGEV_NONE 1
152 #define LX_SIGEV_THREAD 2
153 #define LX_SIGEV_THREAD_ID 4
154
155 /*
156 * Access private SIGEV_THREAD_ID callback state in itimer_t
157 */
158 #define LX_SIGEV_THREAD_ID_LPID(it) ((it)->it_cb_data[0])
159 #define LX_SIGEV_THREAD_ID_TID(it) ((it)->it_cb_data[1])
160
161
162 /* ARGSUSED */
163 static int
164 lx_emul_clock_settime(clockid_t clock, timespec_t *tp)
165 {
166 return (set_errno(EINVAL));
167 }
168
169 static int
170 lx_emul_clock_gettime(clockid_t clock, timespec_t *tp)
171 {
172 timespec_t t;
173
174 switch (clock) {
175 case CLOCK_PROCESS_CPUTIME_ID: {
176 proc_t *p = ttoproc(curthread);
177 hrtime_t snsecs, unsecs;
178
179 /*
180 * Based on getrusage() in "rusagesys.c":
181 */
182 mutex_enter(&p->p_lock);
183 unsecs = mstate_aggr_state(p, LMS_USER);
184 snsecs = mstate_aggr_state(p, LMS_SYSTEM);
185 mutex_exit(&p->p_lock);
186
187 hrt2ts(unsecs + snsecs, &t);
188 break;
189 }
190
191 case CLOCK_THREAD_CPUTIME_ID: {
192 klwp_t *lwp = ttolwp(curthread);
193 struct mstate *ms = &lwp->lwp_mstate;
194 hrtime_t snsecs, unsecs;
195
196 /*
197 * Based on getrusage_lwp() in "rusagesys.c":
198 */
199 unsecs = ms->ms_acct[LMS_USER];
200 snsecs = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
201
202 scalehrtime(&unsecs);
203 scalehrtime(&snsecs);
204
205 hrt2ts(unsecs + snsecs, &t);
206 break;
207 }
208
209 default:
210 return (set_errno(EINVAL));
211 }
212
213 #if defined(_SYSCALL32_IMPL)
214 if (get_udatamodel() != DATAMODEL_NATIVE) {
215 timespec32_t t32;
216
217 if (TIMESPEC_OVERFLOW(&t)) {
218 return (set_errno(EOVERFLOW));
219 }
220 TIMESPEC_TO_TIMESPEC32(&t32, &t);
221
222 if (copyout(&t32, tp, sizeof (t32)) != 0) {
223 return (set_errno(EFAULT));
224 }
225
226 return (0);
227 }
228 #endif
229
230 if (copyout(&t, tp, sizeof (t)) != 0) {
231 return (set_errno(EFAULT));
232 }
233
234 return (0);
235 }
236
237 static int
238 lx_emul_clock_getres(clockid_t clock, timespec_t *tp)
239 {
240 timespec_t t;
241
242 if (tp == NULL) {
243 return (0);
244 }
245
246 switch (clock) {
247 case CLOCK_PROCESS_CPUTIME_ID:
248 case CLOCK_THREAD_CPUTIME_ID:
249 /*
250 * These clock backends return microstate accounting values for
251 * the LWP or the entire process. The Linux kernel claims they
252 * have nanosecond resolution; so will we.
253 */
254 t.tv_sec = 0;
255 t.tv_nsec = 1;
256 break;
257
258 default:
259 return (set_errno(EINVAL));
260 }
261
262 #if defined(_SYSCALL32_IMPL)
263 if (get_udatamodel() != DATAMODEL_NATIVE) {
264 timespec32_t t32;
265
266 if (TIMESPEC_OVERFLOW(&t)) {
267 return (set_errno(EOVERFLOW));
268 }
269 TIMESPEC_TO_TIMESPEC32(&t32, &t);
270
271 if (copyout(&t32, tp, sizeof (t32)) != 0) {
272 return (set_errno(EFAULT));
273 }
274
275 return (0);
276 }
277 #endif
278
279 if (copyout(&t, tp, sizeof (t)) != 0) {
280 return (set_errno(EFAULT));
281 }
282
283 return (0);
284 }
285
286 static void
287 lx_clock_unsupported(int clock)
288 {
289 char buf[100];
290
291 (void) snprintf(buf, sizeof (buf), "unsupported clock: %d", clock);
292 lx_unsupported(buf);
293 }
294
295 long
296 lx_clock_settime(int clock, timespec_t *tp)
297 {
298 lx_clock_backend_t *backend;
299
300 if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
301 lx_clock_unsupported(clock);
302 return (set_errno(EINVAL));
303 }
304
305 return (backend->lclk_clock_settime(backend->lclk_ntv_id, tp));
306 }
307
308 long
309 lx_clock_gettime(int clock, timespec_t *tp)
310 {
311 lx_clock_backend_t *backend;
312
313 if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
314 lx_clock_unsupported(clock);
315 return (set_errno(EINVAL));
316 }
317
318 return (backend->lclk_clock_gettime(backend->lclk_ntv_id, tp));
319 }
320
321 long
322 lx_clock_getres(int clock, timespec_t *tp)
323 {
324 lx_clock_backend_t *backend;
325
326 if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
327 lx_clock_unsupported(clock);
328 return (set_errno(EINVAL));
329 }
330
331 /*
332 * It is important this check is performed after the clock
333 * check. Both glibc and musl, in their clock_getcpuclockid(),
334 * use clock_getres() with a NULL tp to validate a clock
335 * value. Performing the tp check before the clock check could
336 * indicate a valid clock to libc when it shouldn't.
337 */
338 if (tp == NULL) {
339 return (0);
340 }
341
342 return (backend->lclk_clock_getres(backend->lclk_ntv_id, tp));
343 }
344
345 static int
346 lx_ltos_sigev(lx_sigevent_t *lev, struct sigevent *sev)
347 {
348 bzero(sev, sizeof (*sev));
349
350 switch (lev->lx_sigev_notify) {
351 case LX_SIGEV_NONE:
352 sev->sigev_notify = SIGEV_NONE;
353 break;
354
355 case LX_SIGEV_SIGNAL:
356 case LX_SIGEV_THREAD_ID:
357 sev->sigev_notify = SIGEV_SIGNAL;
358 break;
359
360 case LX_SIGEV_THREAD:
361 /*
362 * Just as in illumos, SIGEV_THREAD handling is performed in
363 * userspace with the help of SIGEV_SIGNAL/SIGEV_THREAD_ID.
364 *
365 * It's not expected to make an appearance in the syscall.
366 */
367 default:
368 return (EINVAL);
369 }
370
371 sev->sigev_signo = lx_ltos_signo(lev->lx_sigev_signo, 0);
372 sev->sigev_value = lev->lx_sigev_value;
373
374 /* Ensure SIGEV_SIGNAL has a valid signo to work with. */
375 if (sev->sigev_notify == SIGEV_SIGNAL && sev->sigev_signo == 0) {
376 return (EINVAL);
377 }
378 return (0);
379 }
380
381 static int
382 lx_sigev_copyin(lx_sigevent_t *userp, lx_sigevent_t *levp)
383 {
384 #ifdef _SYSCALL32_IMPL
385 if (get_udatamodel() != DATAMODEL_NATIVE) {
386 lx_sigevent32_t lev32;
387
388 if (copyin(userp, &lev32, sizeof (lev32)) != 0) {
389 return (EFAULT);
390 }
391 levp->lx_sigev_value.sival_int = lev32.lx_sigev_value.sival_int;
392 levp->lx_sigev_signo = lev32.lx_sigev_signo;
393 levp->lx_sigev_notify = lev32.lx_sigev_notify;
394 levp->lx_sigev_un.lx_tid = lev32.lx_sigev_un.lx_tid;
395 } else
396 #endif /* _SYSCALL32_IMPL */
397 {
398 if (copyin(userp, levp, sizeof (lx_sigevent_t)) != 0) {
399 return (EFAULT);
400 }
401 }
402 return (0);
403 }
404
405 static void
406 lx_sigev_thread_fire(itimer_t *it)
407 {
408 proc_t *p = it->it_proc;
409 pid_t lpid = (pid_t)LX_SIGEV_THREAD_ID_LPID(it);
410 id_t tid = (id_t)LX_SIGEV_THREAD_ID_TID(it);
411 lwpdir_t *ld;
412
413 ASSERT(MUTEX_HELD(&it->it_mutex));
414 ASSERT(it->it_pending == 0);
415 ASSERT(it->it_flags & IT_SIGNAL);
416 ASSERT(MUTEX_HELD(&p->p_lock));
417
418 ld = lwp_hash_lookup(p, tid);
419 if (ld != NULL) {
420 lx_lwp_data_t *lwpd;
421 kthread_t *t;
422
423 t = ld->ld_entry->le_thread;
424 lwpd = ttolxlwp(t);
425 if (lwpd != NULL && lwpd->br_pid == lpid) {
426 /*
427 * A thread matching the LX pid is still present in the
428 * process. Send a targeted signal as requested.
429 */
430 it->it_pending = 1;
431 mutex_exit(&it->it_mutex);
432 sigaddqa(p, t, it->it_sigq);
433 return;
434 }
435 }
436
437 mutex_exit(&it->it_mutex);
438 }
439
440 long
441 lx_timer_create(int clock, lx_sigevent_t *sevp, timer_t *tidp)
442 {
443 int error;
444 lx_sigevent_t lev;
445 struct sigevent sev;
446 clock_backend_t *backend = NULL;
447 proc_t *p = curproc;
448 itimer_t *itp;
449 timer_t tid;
450
451 if (clock == -2) {
452 /*
453 * A change was made to the old userspace timer emulation to
454 * handle this specific clock ID for MapR. It was wrongly
455 * mapped to CLOCK_REALTIME rather than CLOCK_THREAD_CPUTIME_ID
456 * which it maps to. Until the CLOCK_*_CPUTIME_ID timers can
457 * be emulated, the admittedly incorrect mapping will remain.
458 */
459 backend = clock_get_backend(CLOCK_REALTIME);
460 } else {
461 lx_clock_backend_t *lback = LX_CLOCK_BACKEND(clock);
462
463 if (lback != NULL) {
464 backend = clock_get_backend(lback->lclk_ntv_id);
465 }
466 }
467 if (backend == NULL) {
468 return (set_errno(EINVAL));
469 }
470
471 /* We have to convert the Linux sigevent layout to the illumos layout */
472 if (sevp != NULL) {
473 if ((error = lx_sigev_copyin(sevp, &lev)) != 0) {
474 return (set_errno(error));
475 }
476 if ((error = lx_ltos_sigev(&lev, &sev)) != 0) {
477 return (set_errno(error));
478 }
479 } else {
480 bzero(&sev, sizeof (sev));
481 sev.sigev_notify = SIGEV_SIGNAL;
482 sev.sigev_signo = SIGALRM;
483 }
484
485 if ((error = timer_setup(backend, &sev, NULL, &itp, &tid)) != 0) {
486 return (set_errno(error));
487 }
488
489 /*
490 * The SIGEV_THREAD_ID notification method in Linux allows the caller
491 * to target a specific thread to receive the signal. The IT_CALLBACK
492 * timer functionality is used to fulfill this need. After translating
493 * the LX pid to a SunOS thread ID (ensuring it exists in the current
494 * process), those IDs are attached to the timer along with the custom
495 * lx_sigev_thread_fire callback. This targets the signal notification
496 * properly when the timer fires.
497 */
498 if (lev.lx_sigev_notify == LX_SIGEV_THREAD_ID) {
499 pid_t lpid, spid;
500 id_t stid;
501
502 lpid = (pid_t)lev.lx_sigev_un.lx_tid;
503 if (lx_lpid_to_spair(lpid, &spid, &stid) != 0 ||
504 spid != curproc->p_pid) {
505 error = EINVAL;
506 goto err;
507 }
508
509 itp->it_flags |= IT_CALLBACK;
510 itp->it_cb_func = lx_sigev_thread_fire;
511 LX_SIGEV_THREAD_ID_LPID(itp) = lpid;
512 LX_SIGEV_THREAD_ID_TID(itp) = stid;
513 }
514
515 /*
516 * When the sigevent is not specified, its sigev_value field is
517 * expected to be populated with the timer ID.
518 */
519 if (sevp == NULL) {
520 itp->it_sigq->sq_info.si_value.sival_int = tid;
521 }
522
523 if (copyout(&tid, tidp, sizeof (timer_t)) != 0) {
524 error = EFAULT;
525 goto err;
526 }
527
528 timer_release(p, itp);
529 return (0);
530
531 err:
532 timer_delete_grabbed(p, tid, itp);
533 return (set_errno(error));
534 }
535
536 long
537 lx_gettimeofday(struct timeval *tvp, struct lx_timezone *tzp)
538 {
539 struct lx_timezone tz;
540
541 bzero(&tz, sizeof (tz));
542
543 /*
544 * We want to be similar to libc which just does a fasttrap to
545 * gethrestime and simply converts that result. We follow how uniqtime
546 * does the conversion but we can't use that code since it does some
547 * extra work which can cause the result to bounce around based on which
548 * CPU we run on.
549 */
550 if (tvp != NULL) {
551 struct timeval tv;
552 timestruc_t ts;
553 int usec, nsec;
554
555 gethrestime(&ts);
556 nsec = ts.tv_nsec;
557 usec = nsec + (nsec >> 2);
558 usec = nsec + (usec >> 1);
559 usec = nsec + (usec >> 2);
560 usec = nsec + (usec >> 4);
561 usec = nsec - (usec >> 3);
562 usec = nsec + (usec >> 2);
563 usec = nsec + (usec >> 3);
564 usec = nsec + (usec >> 4);
565 usec = nsec + (usec >> 1);
566 usec = nsec + (usec >> 6);
567 usec = usec >> 10;
568
569 tv.tv_sec = ts.tv_sec;
570 tv.tv_usec = usec;
571
572 if (get_udatamodel() == DATAMODEL_NATIVE) {
573 if (copyout(&tv, tvp, sizeof (tv)) != 0)
574 return (set_errno(EFAULT));
575 }
576 #ifdef _SYSCALL32_IMPL
577 else {
578 struct timeval32 tv32;
579
580 if (TIMEVAL_OVERFLOW(&tv))
581 return (set_errno(EOVERFLOW));
582 TIMEVAL_TO_TIMEVAL32(&tv32, &tv);
583
584 if (copyout(&tv32, tvp, sizeof (tv32)))
585 return (set_errno(EFAULT));
586 }
587 #endif
588 }
589
590 /*
591 * The Linux man page states use of the second parameter is obsolete,
592 * but gettimeofday(2) should still return EFAULT if it is set
593 * to a bad non-NULL pointer (sigh...)
594 */
595 if (tzp != NULL && copyout(&tz, tzp, sizeof (tz)) != 0)
596 return (set_errno(EFAULT));
597
598 return (0);
599 }
600
601 /*
602 * On Linux a bad buffer will set errno to EFAULT, and on Illumos the failure
603 * mode is documented as "undefined."
604 */
605 long
606 lx_time(time_t *tp)
607 {
608 timestruc_t ts;
609 struct timeval tv;
610
611 gethrestime(&ts);
612 tv.tv_sec = ts.tv_sec;
613 tv.tv_usec = 0;
614
615 if (get_udatamodel() == DATAMODEL_NATIVE) {
616 if (tp != NULL &&
617 copyout(&tv.tv_sec, tp, sizeof (tv.tv_sec)) != 0)
618 return (set_errno(EFAULT));
619
620 return (tv.tv_sec);
621 }
622 #ifdef _SYSCALL32_IMPL
623 else {
624 struct timeval32 tv32;
625
626 if (TIMEVAL_OVERFLOW(&tv))
627 return (set_errno(EOVERFLOW));
628 TIMEVAL_TO_TIMEVAL32(&tv32, &tv);
629
630 if (tp != NULL &&
631 copyout(&tv32.tv_sec, tp, sizeof (tv32.tv_sec)))
632 return (set_errno(EFAULT));
633
634 return (tv32.tv_sec);
635 }
636 #endif /* _SYSCALL32_IMPL */
637 /* NOTREACHED */
638 }
639
640 long
641 lx_nanosleep(timespec_t *rqtp, timespec_t *rmtp)
642 {
643 return (nanosleep(rqtp, rmtp));
644 }