Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /* Paravirtualization interfaces
3 : Copyright (C) 2006 Rusty Russell IBM Corporation
4 :
5 :
6 : 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
7 : */
8 :
9 : #include <linux/errno.h>
10 : #include <linux/init.h>
11 : #include <linux/export.h>
12 : #include <linux/efi.h>
13 : #include <linux/bcd.h>
14 : #include <linux/highmem.h>
15 : #include <linux/kprobes.h>
16 : #include <linux/pgtable.h>
17 :
18 : #include <asm/bug.h>
19 : #include <asm/paravirt.h>
20 : #include <asm/debugreg.h>
21 : #include <asm/desc.h>
22 : #include <asm/setup.h>
23 : #include <asm/time.h>
24 : #include <asm/pgalloc.h>
25 : #include <asm/irq.h>
26 : #include <asm/delay.h>
27 : #include <asm/fixmap.h>
28 : #include <asm/apic.h>
29 : #include <asm/tlbflush.h>
30 : #include <asm/timer.h>
31 : #include <asm/special_insns.h>
32 : #include <asm/tlb.h>
33 : #include <asm/io_bitmap.h>
34 :
35 : /*
36 : * nop stub, which must not clobber anything *including the stack* to
37 : * avoid confusing the entry prologues.
38 : */
39 : extern void _paravirt_nop(void);
40 : asm (".pushsection .entry.text, \"ax\"\n"
41 : ".global _paravirt_nop\n"
42 : "_paravirt_nop:\n\t"
43 : "ret\n\t"
44 : ".size _paravirt_nop, . - _paravirt_nop\n\t"
45 : ".type _paravirt_nop, @function\n\t"
46 : ".popsection");
47 :
48 1 : void __init default_banner(void)
49 : {
50 1 : printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
51 : pv_info.name);
52 1 : }
53 :
54 : /* Undefined instruction for dealing with missing ops pointers. */
55 : static const unsigned char ud2a[] = { 0x0f, 0x0b };
56 :
57 : struct branch {
58 : unsigned char opcode;
59 : u32 delta;
60 : } __attribute__((packed));
61 :
62 71 : static unsigned paravirt_patch_call(void *insn_buff, const void *target,
63 : unsigned long addr, unsigned len)
64 : {
65 71 : const int call_len = 5;
66 71 : struct branch *b = insn_buff;
67 71 : unsigned long delta = (unsigned long)target - (addr+call_len);
68 :
69 71 : if (len < call_len) {
70 0 : pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
71 : /* Kernel might not be viable if patching fails, bail out: */
72 0 : BUG_ON(1);
73 : }
74 :
75 71 : b->opcode = 0xe8; /* call */
76 71 : b->delta = delta;
77 71 : BUILD_BUG_ON(sizeof(*b) != call_len);
78 :
79 71 : return call_len;
80 : }
81 :
82 : #ifdef CONFIG_PARAVIRT_XXL
83 : /* identity function, which can be inlined */
84 : u64 notrace _paravirt_ident_64(u64 x)
85 : {
86 : return x;
87 : }
88 :
89 : static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
90 : unsigned long addr, unsigned len)
91 : {
92 : struct branch *b = insn_buff;
93 : unsigned long delta = (unsigned long)target - (addr+5);
94 :
95 : if (len < 5) {
96 : #ifdef CONFIG_RETPOLINE
97 : WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
98 : #endif
99 : return len; /* call too long for patch site */
100 : }
101 :
102 : b->opcode = 0xe9; /* jmp */
103 : b->delta = delta;
104 :
105 : return 5;
106 : }
107 : #endif
108 :
109 : DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
110 :
111 1 : void __init native_pv_lock_init(void)
112 : {
113 1 : if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
114 0 : static_branch_disable(&virt_spin_lock_key);
115 1 : }
116 :
117 72 : unsigned paravirt_patch_default(u8 type, void *insn_buff,
118 : unsigned long addr, unsigned len)
119 : {
120 : /*
121 : * Neat trick to map patch type back to the call within the
122 : * corresponding structure.
123 : */
124 72 : void *opfunc = *((void **)&pv_ops + type);
125 72 : unsigned ret;
126 :
127 72 : if (opfunc == NULL)
128 : /* If there's no function, patch it with a ud2a (BUG) */
129 0 : ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
130 72 : else if (opfunc == _paravirt_nop)
131 : ret = 0;
132 :
133 : #ifdef CONFIG_PARAVIRT_XXL
134 : /* identity functions just return their single argument */
135 : else if (opfunc == _paravirt_ident_64)
136 : ret = paravirt_patch_ident_64(insn_buff, len);
137 :
138 : else if (type == PARAVIRT_PATCH(cpu.iret))
139 : /* If operation requires a jmp, then jmp */
140 : ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
141 : #endif
142 : else
143 : /* Otherwise call the function. */
144 71 : ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
145 :
146 72 : return ret;
147 : }
148 :
149 0 : unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
150 : const char *start, const char *end)
151 : {
152 0 : unsigned insn_len = end - start;
153 :
154 : /* Alternative instruction is too large for the patch site and we cannot continue: */
155 0 : BUG_ON(insn_len > len || start == NULL);
156 :
157 0 : memcpy(insn_buff, start, insn_len);
158 :
159 0 : return insn_len;
160 : }
161 :
162 : struct static_key paravirt_steal_enabled;
163 : struct static_key paravirt_steal_rq_enabled;
164 :
165 0 : static u64 native_steal_clock(int cpu)
166 : {
167 0 : return 0;
168 : }
169 :
170 : /* These are in entry.S */
171 : extern void native_iret(void);
172 :
173 : static struct resource reserve_ioports = {
174 : .start = 0,
175 : .end = IO_SPACE_LIMIT,
176 : .name = "paravirt-ioport",
177 : .flags = IORESOURCE_IO | IORESOURCE_BUSY,
178 : };
179 :
180 : /*
181 : * Reserve the whole legacy IO space to prevent any legacy drivers
182 : * from wasting time probing for their hardware. This is a fairly
183 : * brute-force approach to disabling all non-virtual drivers.
184 : *
185 : * Note that this must be called very early to have any effect.
186 : */
187 0 : int paravirt_disable_iospace(void)
188 : {
189 0 : return request_resource(&ioport_resource, &reserve_ioports);
190 : }
191 :
192 : static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
193 :
194 0 : static inline void enter_lazy(enum paravirt_lazy_mode mode)
195 : {
196 0 : BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
197 :
198 0 : this_cpu_write(paravirt_lazy_mode, mode);
199 0 : }
200 :
201 0 : static void leave_lazy(enum paravirt_lazy_mode mode)
202 : {
203 0 : BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
204 :
205 0 : this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
206 0 : }
207 :
208 0 : void paravirt_enter_lazy_mmu(void)
209 : {
210 0 : enter_lazy(PARAVIRT_LAZY_MMU);
211 0 : }
212 :
213 0 : void paravirt_leave_lazy_mmu(void)
214 : {
215 0 : leave_lazy(PARAVIRT_LAZY_MMU);
216 0 : }
217 :
218 0 : void paravirt_flush_lazy_mmu(void)
219 : {
220 0 : preempt_disable();
221 :
222 0 : if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
223 0 : arch_leave_lazy_mmu_mode();
224 0 : arch_enter_lazy_mmu_mode();
225 : }
226 :
227 0 : preempt_enable();
228 0 : }
229 :
230 : #ifdef CONFIG_PARAVIRT_XXL
231 : void paravirt_start_context_switch(struct task_struct *prev)
232 : {
233 : BUG_ON(preemptible());
234 :
235 : if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
236 : arch_leave_lazy_mmu_mode();
237 : set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
238 : }
239 : enter_lazy(PARAVIRT_LAZY_CPU);
240 : }
241 :
242 : void paravirt_end_context_switch(struct task_struct *next)
243 : {
244 : BUG_ON(preemptible());
245 :
246 : leave_lazy(PARAVIRT_LAZY_CPU);
247 :
248 : if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
249 : arch_enter_lazy_mmu_mode();
250 : }
251 : #endif
252 :
253 0 : enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
254 : {
255 0 : if (in_interrupt())
256 : return PARAVIRT_LAZY_NONE;
257 :
258 0 : return this_cpu_read(paravirt_lazy_mode);
259 : }
260 :
261 : struct pv_info pv_info = {
262 : .name = "bare hardware",
263 : #ifdef CONFIG_PARAVIRT_XXL
264 : .extra_user_64bit_cs = __USER_CS,
265 : #endif
266 : };
267 :
268 : /* 64-bit pagetable entries */
269 : #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
270 :
271 : struct paravirt_patch_template pv_ops = {
272 : /* Init ops. */
273 : .init.patch = native_patch,
274 :
275 : /* Time ops. */
276 : .time.sched_clock = native_sched_clock,
277 : .time.steal_clock = native_steal_clock,
278 :
279 : /* Cpu ops. */
280 : .cpu.io_delay = native_io_delay,
281 :
282 : #ifdef CONFIG_PARAVIRT_XXL
283 : .cpu.cpuid = native_cpuid,
284 : .cpu.get_debugreg = native_get_debugreg,
285 : .cpu.set_debugreg = native_set_debugreg,
286 : .cpu.read_cr0 = native_read_cr0,
287 : .cpu.write_cr0 = native_write_cr0,
288 : .cpu.write_cr4 = native_write_cr4,
289 : .cpu.wbinvd = native_wbinvd,
290 : .cpu.read_msr = native_read_msr,
291 : .cpu.write_msr = native_write_msr,
292 : .cpu.read_msr_safe = native_read_msr_safe,
293 : .cpu.write_msr_safe = native_write_msr_safe,
294 : .cpu.read_pmc = native_read_pmc,
295 : .cpu.load_tr_desc = native_load_tr_desc,
296 : .cpu.set_ldt = native_set_ldt,
297 : .cpu.load_gdt = native_load_gdt,
298 : .cpu.load_idt = native_load_idt,
299 : .cpu.store_tr = native_store_tr,
300 : .cpu.load_tls = native_load_tls,
301 : .cpu.load_gs_index = native_load_gs_index,
302 : .cpu.write_ldt_entry = native_write_ldt_entry,
303 : .cpu.write_gdt_entry = native_write_gdt_entry,
304 : .cpu.write_idt_entry = native_write_idt_entry,
305 :
306 : .cpu.alloc_ldt = paravirt_nop,
307 : .cpu.free_ldt = paravirt_nop,
308 :
309 : .cpu.load_sp0 = native_load_sp0,
310 :
311 : .cpu.iret = native_iret,
312 :
313 : #ifdef CONFIG_X86_IOPL_IOPERM
314 : .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap,
315 : .cpu.update_io_bitmap = native_tss_update_io_bitmap,
316 : #endif
317 :
318 : .cpu.start_context_switch = paravirt_nop,
319 : .cpu.end_context_switch = paravirt_nop,
320 :
321 : /* Irq ops. */
322 : .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
323 : .irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
324 : .irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
325 : .irq.safe_halt = native_safe_halt,
326 : .irq.halt = native_halt,
327 : #endif /* CONFIG_PARAVIRT_XXL */
328 :
329 : /* Mmu ops. */
330 : .mmu.flush_tlb_user = native_flush_tlb_local,
331 : .mmu.flush_tlb_kernel = native_flush_tlb_global,
332 : .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
333 : .mmu.flush_tlb_others = native_flush_tlb_others,
334 : .mmu.tlb_remove_table =
335 : (void (*)(struct mmu_gather *, void *))tlb_remove_page,
336 :
337 : .mmu.exit_mmap = paravirt_nop,
338 :
339 : #ifdef CONFIG_PARAVIRT_XXL
340 : .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
341 : .mmu.write_cr2 = native_write_cr2,
342 : .mmu.read_cr3 = __native_read_cr3,
343 : .mmu.write_cr3 = native_write_cr3,
344 :
345 : .mmu.pgd_alloc = __paravirt_pgd_alloc,
346 : .mmu.pgd_free = paravirt_nop,
347 :
348 : .mmu.alloc_pte = paravirt_nop,
349 : .mmu.alloc_pmd = paravirt_nop,
350 : .mmu.alloc_pud = paravirt_nop,
351 : .mmu.alloc_p4d = paravirt_nop,
352 : .mmu.release_pte = paravirt_nop,
353 : .mmu.release_pmd = paravirt_nop,
354 : .mmu.release_pud = paravirt_nop,
355 : .mmu.release_p4d = paravirt_nop,
356 :
357 : .mmu.set_pte = native_set_pte,
358 : .mmu.set_pmd = native_set_pmd,
359 :
360 : .mmu.ptep_modify_prot_start = __ptep_modify_prot_start,
361 : .mmu.ptep_modify_prot_commit = __ptep_modify_prot_commit,
362 :
363 : .mmu.set_pud = native_set_pud,
364 :
365 : .mmu.pmd_val = PTE_IDENT,
366 : .mmu.make_pmd = PTE_IDENT,
367 :
368 : .mmu.pud_val = PTE_IDENT,
369 : .mmu.make_pud = PTE_IDENT,
370 :
371 : .mmu.set_p4d = native_set_p4d,
372 :
373 : #if CONFIG_PGTABLE_LEVELS >= 5
374 : .mmu.p4d_val = PTE_IDENT,
375 : .mmu.make_p4d = PTE_IDENT,
376 :
377 : .mmu.set_pgd = native_set_pgd,
378 : #endif /* CONFIG_PGTABLE_LEVELS >= 5 */
379 :
380 : .mmu.pte_val = PTE_IDENT,
381 : .mmu.pgd_val = PTE_IDENT,
382 :
383 : .mmu.make_pte = PTE_IDENT,
384 : .mmu.make_pgd = PTE_IDENT,
385 :
386 : .mmu.dup_mmap = paravirt_nop,
387 : .mmu.activate_mm = paravirt_nop,
388 :
389 : .mmu.lazy_mode = {
390 : .enter = paravirt_nop,
391 : .leave = paravirt_nop,
392 : .flush = paravirt_nop,
393 : },
394 :
395 : .mmu.set_fixmap = native_set_fixmap,
396 : #endif /* CONFIG_PARAVIRT_XXL */
397 :
398 : #if defined(CONFIG_PARAVIRT_SPINLOCKS)
399 : /* Lock ops. */
400 : #ifdef CONFIG_SMP
401 : .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
402 : .lock.queued_spin_unlock =
403 : PV_CALLEE_SAVE(__native_queued_spin_unlock),
404 : .lock.wait = paravirt_nop,
405 : .lock.kick = paravirt_nop,
406 : .lock.vcpu_is_preempted =
407 : PV_CALLEE_SAVE(__native_vcpu_is_preempted),
408 : #endif /* SMP */
409 : #endif
410 : };
411 :
412 : #ifdef CONFIG_PARAVIRT_XXL
413 : /* At this point, native_get/set_debugreg has real function entries */
414 : NOKPROBE_SYMBOL(native_get_debugreg);
415 : NOKPROBE_SYMBOL(native_set_debugreg);
416 : NOKPROBE_SYMBOL(native_load_idt);
417 : #endif
418 :
419 : EXPORT_SYMBOL(pv_ops);
420 : EXPORT_SYMBOL_GPL(pv_info);
|