本文将介绍PV IPI技术。部分内容转载自:kvm performance optimization technologies, part one

1. Idea

Instead of sending the IPI to vcpu one by one, the pv ipi send uses a bitmap to to record the IPI vcpu and then make a hypercall thus reduce the VM-exit. The patchset is here.

2. Usage

Doc:

6. KVM_HC_SEND_IPI
------------------------
Architecture: x86
Status: active
Purpose: Send IPIs to multiple vCPUs.

a0: lower part of the bitmap of destination APIC IDs
a1: higher part of the bitmap of destination APIC IDs
a2: the lowest APIC ID in bitmap
a3: APIC ICR

The hypercall lets a guest send multicast IPIs, with at most 128
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
hypercall in 32-bit mode.  The destinations are represented by a
bitmap contained in the first two arguments (a0 and a1). Bit 0 of
a0 corresponds to the APIC ID in the third argument (a2), bit 1
corresponds to the APIC ID a2+1, and so on.

Returns the number of CPUs to which the IPIs were delivered successfully.

The test code in KVM unit test:

1
2
3
4
5
6
7
8
static void test_pv_ipi(void)
{
int ret;
unsigned long a0 = 0xFFFFFFFF, a1 = 0, a2 = 0xFFFFFFFF, a3 = 0x0;

asm volatile("vmcall" : "=a"(ret) :"a"(KVM_HC_SEND_IPI), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
report(!ret, "PV IPIs testing");
}

a3就是kvm_pv_send_ipi函数中的icr参数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
...
if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
return -KVM_EINVAL;

irq.vector = icr & APIC_VECTOR_MASK;
irq.delivery_mode = icr & APIC_MODE_MASK;
irq.level = (icr & APIC_INT_ASSERT) != 0;
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
...
}

3. Implementation

源码基于Kernel v5.17.0-rc1。

3.1 kvm side

  • Expose PV_SEND_IPI CPUID feature bit to guest
    KVM_FEATURE_PV_SEND_IPI

  • Implement PV IPIs send hypercall
    KVM_HC_SEND_IPI

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
struct kvm_apic_map *map;
struct kvm_lapic_irq irq = {0};
int cluster_size = op_64_bit ? 64 : 32;
int count;

if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
return -KVM_EINVAL;

irq.vector = icr & APIC_VECTOR_MASK;
irq.delivery_mode = icr & APIC_MODE_MASK;
irq.level = (icr & APIC_INT_ASSERT) != 0;
irq.trig_mode = icr & APIC_INT_LEVELTRIG;

rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);

count = -EOPNOTSUPP;
if (likely(map)) {
count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
min += cluster_size;
count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
}

rcu_read_unlock();
return count;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
struct kvm_lapic_irq *irq, u32 min)
{
int i, count = 0;
struct kvm_vcpu *vcpu;

if (min > map->max_apic_id)
return 0;

for_each_set_bit(i, ipi_bitmap,
min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
if (map->phys_map[min + i]) {
vcpu = map->phys_map[min + i]->vcpu;
count += kvm_apic_set_irq(vcpu, irq, NULL);
}
}

return count;
}

3.2 guest side

  • Set the IPI entry points

    1
    2
    3
    4
    5
    6
    static void kvm_setup_pv_ipi(void)
    {
    apic->send_IPI_mask = kvm_send_ipi_mask;
    apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
    pr_info("setup PV IPIs\n");
    }
  • Guest trigger IPI

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    static void __send_ipi_mask(const struct cpumask *mask, int vector)
    {
    unsigned long flags;
    int cpu, apic_id, icr;
    int min = 0, max = 0;
    #ifdef CONFIG_X86_64
    __uint128_t ipi_bitmap = 0;
    #else
    u64 ipi_bitmap = 0;
    #endif
    long ret;

    if (cpumask_empty(mask))
    return;

    local_irq_save(flags);

    switch (vector) {
    default:
    icr = APIC_DM_FIXED | vector;
    break;
    case NMI_VECTOR:
    icr = APIC_DM_NMI;
    break;
    }

    for_each_cpu(cpu, mask) {
    apic_id = per_cpu(x86_cpu_to_apicid, cpu);
    if (!ipi_bitmap) {
    min = max = apic_id;
    } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
    ipi_bitmap <<= min - apic_id;
    min = apic_id;
    } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
    max = apic_id < max ? max : apic_id;
    } else {
    ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
    (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
    WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
    ret);
    min = max = apic_id;
    ipi_bitmap = 0;
    }
    __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
    }

    if (ipi_bitmap) {
    ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
    (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
    WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
    ret);
    }

    local_irq_restore(flags);
    }

It will set the bitmap accross the IPI target vcpu and finally call the kvm_hypercall4(KVM_HC_SEND_IPI).


参考资料:

  1. KVM: X86: Implement Exit-less IPIs support
  2. kvm performance optimization technologies, part one
  3. Boosting Dedicated InstanceviaKVMTaxCut
  4. IOMMU(六)-post interrupt