Introduction to PV IPI
本文将介绍PV IPI技术。部分内容转载自:kvm performance optimization technologies, part one。
1. Idea
Instead of sending the IPI to vcpu one by one, the pv ipi send uses a bitmap to to record the IPI vcpu and then make a hypercall thus reduce the VM-exit. The patchset is here.
2. Usage
Doc:
6. KVM_HC_SEND_IPI
------------------------
Architecture: x86
Status: active
Purpose: Send IPIs to multiple vCPUs.
a0: lower part of the bitmap of destination APIC IDs
a1: higher part of the bitmap of destination APIC IDs
a2: the lowest APIC ID in bitmap
a3: APIC ICR
The hypercall lets a guest send multicast IPIs, with at most 128
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
hypercall in 32-bit mode. The destinations are represented by a
bitmap contained in the first two arguments (a0 and a1). Bit 0 of
a0 corresponds to the APIC ID in the third argument (a2), bit 1
corresponds to the APIC ID a2+1, and so on.
Returns the number of CPUs to which the IPIs were delivered successfully.
The test code in KVM unit test:1
2
3
4
5
6
7
8static void test_pv_ipi(void)
{
int ret;
unsigned long a0 = 0xFFFFFFFF, a1 = 0, a2 = 0xFFFFFFFF, a3 = 0x0;
asm volatile("vmcall" : "=a"(ret) :"a"(KVM_HC_SEND_IPI), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
report(!ret, "PV IPIs testing");
}
a3
就是kvm_pv_send_ipi
函数中的icr
参数。1
2
3
4
5
6
7
8
9
10
11
12
13
14int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
...
if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
return -KVM_EINVAL;
irq.vector = icr & APIC_VECTOR_MASK;
irq.delivery_mode = icr & APIC_MODE_MASK;
irq.level = (icr & APIC_INT_ASSERT) != 0;
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
...
}
3. Implementation
源码基于Kernel v5.17.0-rc1。
3.1 kvm side
Expose PV_SEND_IPI CPUID feature bit to guest
KVM_FEATURE_PV_SEND_IPI
Implement PV IPIs send hypercall
KVM_HC_SEND_IPI
1 | int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, |
1 | static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, |
3.2 guest side
Set the IPI entry points
1
2
3
4
5
6static void kvm_setup_pv_ipi(void)
{
apic->send_IPI_mask = kvm_send_ipi_mask;
apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
pr_info("setup PV IPIs\n");
}Guest trigger IPI
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55static void __send_ipi_mask(const struct cpumask *mask, int vector)
{
unsigned long flags;
int cpu, apic_id, icr;
int min = 0, max = 0;
__uint128_t ipi_bitmap = 0;
u64 ipi_bitmap = 0;
long ret;
if (cpumask_empty(mask))
return;
local_irq_save(flags);
switch (vector) {
default:
icr = APIC_DM_FIXED | vector;
break;
case NMI_VECTOR:
icr = APIC_DM_NMI;
break;
}
for_each_cpu(cpu, mask) {
apic_id = per_cpu(x86_cpu_to_apicid, cpu);
if (!ipi_bitmap) {
min = max = apic_id;
} else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
ipi_bitmap <<= min - apic_id;
min = apic_id;
} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
max = apic_id < max ? max : apic_id;
} else {
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
ret);
min = max = apic_id;
ipi_bitmap = 0;
}
__set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
}
if (ipi_bitmap) {
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
ret);
}
local_irq_restore(flags);
}
It will set the bitmap accross the IPI target vcpu and finally call the kvm_hypercall4(KVM_HC_SEND_IPI)
.
参考资料: