1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright (c) 2014 by Delphix. All rights reserved.
27 * Copyright 2019 Nexenta Systems, Inc.
28 */
29
30 #ifndef _VM_HAT_I86_H
31 #define _VM_HAT_I86_H
32
33
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37
38 /*
39 * VM - Hardware Address Translation management.
40 *
41 * This file describes the contents of the x86_64 HAT data structures.
42 */
43 #include <sys/types.h>
44 #include <sys/t_lock.h>
45 #include <sys/cpuvar.h>
46 #include <sys/x_call.h>
47 #include <vm/seg.h>
48 #include <vm/page.h>
49 #include <sys/vmparam.h>
50 #include <sys/vm_machparam.h>
51 #include <sys/promif.h>
52 #include <vm/hat_pte.h>
53 #include <vm/htable.h>
54 #include <vm/hment.h>
55
56 /*
57 * The essential data types involved:
58 *
59 * htable_t - There is one of these for each page table and it is used
60 * by the HAT to manage the page table.
61 *
62 * hment_t - Links together multiple PTEs to a single page.
63 */
64
65 /*
66 * VLP processes have a 32 bit address range, so their top level is 2 and
67 * with only 4 PTEs in that table.
68 */
69 #define VLP_LEVEL (2)
70 #define VLP_NUM_PTES (4)
71 #define VLP_SIZE (VLP_NUM_PTES * sizeof (x86pte_t))
72 #define TOP_LEVEL(h) (((h)->hat_flags & HAT_VLP) ? VLP_LEVEL : mmu.max_level)
73 #define VLP_COPY(fromptep, toptep) { \
74 toptep[0] = fromptep[0]; \
75 toptep[1] = fromptep[1]; \
76 toptep[2] = fromptep[2]; \
77 toptep[3] = fromptep[3]; \
78 }
79
80 /*
81 * The hat struct exists for each address space.
82 */
83 struct hat {
84 kmutex_t hat_mutex;
85 struct as *hat_as;
86 uint_t hat_stats;
87 pgcnt_t hat_pages_mapped[MAX_PAGE_LEVEL + 1];
88 pgcnt_t hat_ism_pgcnt;
89 cpuset_t hat_cpus;
90 uint16_t hat_flags;
91 uint16_t hat_unmaps; /* stop hat being free'd during unmap */
92 htable_t *hat_htable; /* top level htable */
93 struct hat *hat_next;
94 struct hat *hat_prev;
95 uint_t hat_num_hash; /* number of htable hash buckets */
96 htable_t **hat_ht_hash; /* htable hash buckets */
97 htable_t *hat_ht_cached; /* cached free htables */
98 x86pte_t hat_vlp_ptes[VLP_NUM_PTES];
99 #if defined(__amd64) && defined(__xpv)
100 pfn_t hat_user_ptable; /* alt top ptable for user mode */
101 #endif
102 };
103 typedef struct hat hat_t;
104
105 #define PGCNT_INC(hat, level) \
106 atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
107 #define PGCNT_DEC(hat, level) \
108 atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
109
110 /*
111 * Flags for the hat_flags field
112 *
113 * HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
114 * demap()s can be avoided.
115 *
116 * HAT_VLP - indicates a 32 bit process has a virtual address range less than
117 * the hardware's physical address range. (VLP->Virtual Less-than Physical)
118 * Note - never used on the hypervisor.
119 *
120 * HAT_VICTIM - This is set while a hat is being examined for page table
121 * stealing and prevents it from being freed.
122 *
123 * HAT_SHARED - The hat has exported it's page tables via hat_share()
124 *
125 * HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.
126 */
127 #define HAT_FREEING (0x0001)
128 #define HAT_VLP (0x0002)
129 #define HAT_VICTIM (0x0004)
130 #define HAT_SHARED (0x0008)
131 #define HAT_PINNED (0x0010)
132
133 /*
134 * Additional platform attribute for hat_devload() to force no caching.
135 */
136 #define HAT_PLAT_NOCACHE (0x100000)
137
138 /*
139 * Simple statistics for the HAT. These are just counters that are
140 * atomically incremented. They can be reset directly from the kernel
141 * debugger.
142 */
143 struct hatstats {
144 ulong_t hs_reap_attempts;
145 ulong_t hs_reaped;
146 ulong_t hs_steals;
147 ulong_t hs_ptable_allocs;
148 ulong_t hs_ptable_frees;
149 ulong_t hs_htable_rgets; /* allocs from reserve */
150 ulong_t hs_htable_rputs; /* putbacks to reserve */
151 ulong_t hs_htable_shared; /* number of htables shared */
152 ulong_t hs_htable_unshared; /* number of htables unshared */
153 ulong_t hs_hm_alloc;
154 ulong_t hs_hm_free;
155 ulong_t hs_hm_put_reserve;
156 ulong_t hs_hm_get_reserve;
157 ulong_t hs_hm_steals;
158 ulong_t hs_hm_steal_exam;
159 ulong_t hs_tlb_inval_delayed;
160 };
161 extern struct hatstats hatstat;
162 #ifdef DEBUG
163 #define HATSTAT_INC(x) (++hatstat.x)
164 #else
165 #define HATSTAT_INC(x) (0)
166 #endif
167
168 #if defined(_KERNEL)
169
170 /*
171 * Useful macro to align hat_XXX() address arguments to a page boundary
172 */
173 #define ALIGN2PAGE(a) ((uintptr_t)(a) & MMU_PAGEMASK)
174 #define IS_PAGEALIGNED(a) (((uintptr_t)(a) & MMU_PAGEOFFSET) == 0)
175
176 extern uint_t khat_running; /* set at end of hat_kern_setup() */
177 extern cpuset_t khat_cpuset; /* cpuset for kernal address demap Xcalls */
178 extern kmutex_t hat_list_lock;
179 extern kcondvar_t hat_list_cv;
180
181
182
183 /*
184 * Interfaces to setup a cpu private mapping (ie. preemption disabled).
185 * The attr and flags arguments are the same as for hat_devload().
186 * setup() must be called once, then any number of calls to remap(),
187 * followed by a final call to release()
188 *
189 * Used by ppcopy(), page_zero(), the memscrubber, and the kernel debugger.
190 */
191 typedef paddr_t hat_mempte_t; /* phys addr of PTE */
192 extern hat_mempte_t hat_mempte_setup(caddr_t addr);
193 extern void hat_mempte_remap(pfn_t, caddr_t, hat_mempte_t,
194 uint_t attr, uint_t flags);
195 extern void hat_mempte_release(caddr_t addr, hat_mempte_t);
196
197 /*
198 * Interfaces to manage which thread has access to htable and hment reserves.
199 * The USE_HAT_RESERVES macro should always be recomputed in full. Its value
200 * (due to curthread) can change after any call into kmem/vmem.
201 */
202 extern uint_t can_steal_post_boot;
203 extern uint_t use_boot_reserve;
204 #define USE_HAT_RESERVES() \
205 (use_boot_reserve || curthread->t_hatdepth > 1 || \
206 panicstr != NULL || vmem_is_populator())
207
208 /*
209 * initialization stuff needed by by startup, mp_startup...
210 */
211 extern void hat_cpu_online(struct cpu *);
212 extern void hat_cpu_offline(struct cpu *);
213 extern void setup_vaddr_for_ppcopy(struct cpu *);
214 extern void teardown_vaddr_for_ppcopy(struct cpu *);
215 extern void clear_boot_mappings(uintptr_t, uintptr_t);
216
217 /*
218 * magic value to indicate that all TLB entries should be demapped.
219 */
220 #define DEMAP_ALL_ADDR (~(uintptr_t)0)
221
222 /*
223 * not in any include file???
224 */
225 extern void halt(char *fmt);
226
227 /*
228 * x86 specific routines for use online in setup or i86pc/vm files
229 */
230 extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
231 caddr_t ekernelheap);
232 extern void hat_kern_setup(void);
233 extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
234 extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
235 x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
236 extern void hat_init_finish(void);
237 extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
238 extern pfn_t hat_kpm_va2pfn(caddr_t);
239 extern page_t *hat_kpm_vaddr2page(caddr_t);
240 extern uintptr_t hat_kernelbase(uintptr_t);
241 extern void hat_kmap_init(uintptr_t base, size_t len);
242
243 extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
244
245 #if !defined(__xpv)
246 /*
247 * routines to deal with delayed TLB invalidations for idle CPUs
248 */
249 extern void tlb_going_idle(void);
250 extern void tlb_service(void);
251 #endif
252
253 /*
254 * Hat switch function invoked to load a new context into %cr3
255 */
256 extern void hat_switch(struct hat *hat);
257
258 #ifdef __xpv
259 /*
260 * Interfaces to use around code that maps/unmaps grant table references.
261 */
262 extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
263 extern void hat_release_mapping(hat_t *, caddr_t);
264
265 #define XPV_DISALLOW_MIGRATE() xen_block_migrate()
266 #define XPV_ALLOW_MIGRATE() xen_allow_migrate()
267
268 #else
269
270 #define XPV_DISALLOW_MIGRATE() /* nothing */
271 #define XPV_ALLOW_MIGRATE() /* nothing */
272
273 #define pfn_is_foreign(pfn) __lintzero
274
275 #endif
276
277
278 #endif /* _KERNEL */
279
280 #ifdef __cplusplus
281 }
282 #endif
283
284 #endif /* _VM_HAT_I86_H */