QEMU fw_cfg
QEMU Firmware Configuration (fw_cfg) Device
QEMU provides a facility for passing strings and files into the VM. This facility is useful for passing kernel parameters, files, or other resources into a guest.
在Intel Graphics Device (IGD) assignment with vfio-pci中,有“etc/igd-opregion” 这一fw_cfg: This fw_cfg file exposes the OpRegion for the IGD device. 问题来了,这一fw_cfg的内容是哪里来的呢?
QEUM源码:
1  | int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,  | 
1  | static void vfio_realize(PCIDevice *pdev, Error **errp)  | 
kernel vfio源码:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
{
	...
	ret = vfio_pci_register_dev_region(vdev,
		PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
		VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
		&vfio_pci_igd_regops, size + rvds, VFIO_REGION_INFO_FLAG_READ, base);
	...
}
static const struct vfio_pci_regops vfio_pci_igd_regops = {
	.rw		= vfio_pci_igd_rw,
	.release	= vfio_pci_igd_release,
};
static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
			      size_t count, loff_t *ppos, bool iswrite)
{
	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
	void *base = vdev->region[i].data;
	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
	u16 version;
	u64 rvda;
	u32 rvds;
	if (pos >= vdev->region[i].size || iswrite)
		return -EINVAL;
	count = min(count, (size_t)(vdev->region[i].size - pos));
	version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION));
	rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA));
	rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS));
	if (vdev->region[i].subtype == VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION &&
	    version == 0x0200 && rvda && rvds) {
		u32 addr = cpu_to_le32(*(__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR));
		void *vbt_base;
		void *opregionvbt;
		vbt_base = memremap(rvda, rvds, MEMREMAP_WB);
		if (!vbt_base)
			return -ENOMEM;
		opregionvbt = kzalloc(vdev->region[i].size, GFP_KERNEL);
		if (!opregionvbt) {
			memunmap(vbt_base);
			return -ENOMEM;
		}
		/* Stitch VBT after OpRegion if noncontigious */
		memcpy(opregionvbt, base, OPREGION_SIZE);
		memcpy(opregionvbt + OPREGION_SIZE, vbt_base, rvds);
		/* Patch OpRegion 2.0 to 2.1 */
		*(__le16 *)(opregionvbt + OPREGION_VERSION) = 0x0201;
		/* Patch RVDA location after OpRegion */
		*(__le64 *)(opregionvbt + OPREGION_RVDA) = OPREGION_SIZE;
		if (copy_to_user(buf, opregionvbt + pos, count)) {
			kfree(opregionvbt);
			memunmap(vbt_base);
			return -EFAULT;
		}
		kfree(opregionvbt);
		memunmap(vbt_base);
	} else {
		if (copy_to_user(buf, base + pos, count))
			return -EFAULT;
	}
	*ppos += count;
	return count;
}
参考资料: