QEMU Firmware Configuration (fw_cfg) Device

QEMU provides a facility for passing strings and files into the VM. This facility is useful for passing kernel parameters, files, or other resources into a guest.

在Intel Graphics Device (IGD) assignment with vfio-pci中,有“etc/igd-opregion” 这一fw_cfg: This fw_cfg file exposes the OpRegion for the IGD device. 问题来了,这一fw_cfg的内容是哪里来的呢?

QEUM源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
struct vfio_region_info *info, Error **errp)
{
int ret;

vdev->igd_opregion = g_malloc0(info->size);
ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
info->size, info->offset);
if (ret != info->size) {
error_setg(errp, "failed to read IGD OpRegion");
g_free(vdev->igd_opregion);
vdev->igd_opregion = NULL;
return -EINVAL;
}
...
/*
* Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
* allocate 32bit reserved memory for, copy these contents into, and write
* the reserved memory base address to the device ASLS register at 0xFC.
*/
fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
vdev->igd_opregion, info->size);
...
}
1
2
3
4
5
6
7
8
static void vfio_realize(PCIDevice *pdev, Error **errp)
{
...
ret = vfio_get_dev_region_info(&vdev->vbasedev,
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
...
}

kernel vfio源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
{
...
ret = vfio_pci_register_dev_region(vdev,
PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
&vfio_pci_igd_regops, size + rvds, VFIO_REGION_INFO_FLAG_READ, base);
...
}

static const struct vfio_pci_regops vfio_pci_igd_regops = {
.rw = vfio_pci_igd_rw,
.release = vfio_pci_igd_release,
};

static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
void *base = vdev->region[i].data;
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
u16 version;
u64 rvda;
u32 rvds;

if (pos >= vdev->region[i].size || iswrite)
return -EINVAL;

count = min(count, (size_t)(vdev->region[i].size - pos));

version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION));
rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA));
rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS));

if (vdev->region[i].subtype == VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION &&
version == 0x0200 && rvda && rvds) {
u32 addr = cpu_to_le32(*(__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR));
void *vbt_base;
void *opregionvbt;

vbt_base = memremap(rvda, rvds, MEMREMAP_WB);
if (!vbt_base)
return -ENOMEM;

opregionvbt = kzalloc(vdev->region[i].size, GFP_KERNEL);
if (!opregionvbt) {
memunmap(vbt_base);
return -ENOMEM;
}

/* Stitch VBT after OpRegion if noncontigious */
memcpy(opregionvbt, base, OPREGION_SIZE);
memcpy(opregionvbt + OPREGION_SIZE, vbt_base, rvds);

/* Patch OpRegion 2.0 to 2.1 */
*(__le16 *)(opregionvbt + OPREGION_VERSION) = 0x0201;
/* Patch RVDA location after OpRegion */
*(__le64 *)(opregionvbt + OPREGION_RVDA) = OPREGION_SIZE;

if (copy_to_user(buf, opregionvbt + pos, count)) {
kfree(opregionvbt);
memunmap(vbt_base);
return -EFAULT;
}

kfree(opregionvbt);
memunmap(vbt_base);
} else {
if (copy_to_user(buf, base + pos, count))
return -EFAULT;
}

*ppos += count;

return count;
}


参考资料:

  1. https://wiki.osdev.org/QEMU_fw_cfg
  2. https://github.com/qemu/qemu/blob/master/docs/specs/fw_cfg.txt