[PATCH v2 1/1] drm/bridge: anx7625: send DPCD command to downstream
Send DPCD command to downstream before anx7625 power down, let downstream monitor enter into standby mode. Signed-off-by: Xin Ji --- drivers/gpu/drm/bridge/analogix/anx7625.c | 40 --- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 33383f83255d..fd2217ae455e 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -129,6 +129,23 @@ static int anx7625_reg_write(struct anx7625_data *ctx, return ret; } +static int anx7625_reg_block_write(struct anx7625_data *ctx, + struct i2c_client *client, + u8 reg_addr, u8 len, u8 *buf) +{ + int ret; + struct device *dev = &client->dev; + + i2c_access_workaround(ctx, client); + + ret = i2c_smbus_write_i2c_block_data(client, reg_addr, len, buf); + if (ret < 0) + DRM_DEV_ERROR(dev, "write i2c block failed id=%x\n:%x", + client->addr, reg_addr); + + return ret; +} + static int anx7625_write_or(struct anx7625_data *ctx, struct i2c_client *client, u8 offset, u8 mask) @@ -214,8 +231,8 @@ static int wait_aux_op_finish(struct anx7625_data *ctx) return 0; } -static int anx7625_aux_dpcd_read(struct anx7625_data *ctx, -u32 address, u8 len, u8 *buf) +static int anx7625_aux_dpcd_trans(struct anx7625_data *ctx, u8 op, + u32 address, u8 len, u8 *buf) { struct device *dev = &ctx->client->dev; int ret; @@ -231,8 +248,7 @@ static int anx7625_aux_dpcd_read(struct anx7625_data *ctx, addrm = (address >> 8) & 0xFF; addrh = (address >> 16) & 0xFF; - cmd = DPCD_CMD(len, DPCD_READ); - cmd = ((len - 1) << 4) | 0x09; + cmd = DPCD_CMD(len, op); /* Set command and length */ ret = anx7625_reg_write(ctx, ctx->i2c.rx_p0_client, @@ -246,6 +262,9 @@ static int anx7625_aux_dpcd_read(struct anx7625_data *ctx, ret |= anx7625_reg_write(ctx, ctx->i2c.rx_p0_client, AP_AUX_ADDR_19_16, addrh); + if (op == DPCD_WRITE) + ret |= anx7625_reg_block_write(ctx, ctx->i2c.rx_p0_client, + AP_AUX_BUFF_START, len, buf); /* Enable aux access */ ret |= anx7625_write_or(ctx, ctx->i2c.rx_p0_client, AP_AUX_CTRL_STATUS, AP_AUX_CTRL_OP_EN); @@ -263,6 +282,11 @@ static int anx7625_aux_dpcd_read(struct anx7625_data *ctx, return ret; } + /* Write done */ + if (op == DPCD_WRITE) + return 0; + + /* Read done, read out dpcd data */ ret = anx7625_reg_block_read(ctx, ctx->i2c.rx_p0_client, AP_AUX_BUFF_START, len, buf); if (ret < 0) { @@ -845,7 +869,7 @@ static int anx7625_hdcp_enable(struct anx7625_data *ctx) } /* Read downstream capability */ - anx7625_aux_dpcd_read(ctx, 0x68028, 1, &bcap); + anx7625_aux_dpcd_trans(ctx, DPCD_READ, 0x68028, 1, &bcap); if (!(bcap & 0x01)) { pr_warn("downstream not support HDCP 1.4, cap(%x).\n", bcap); return 0; @@ -918,6 +942,7 @@ static void anx7625_dp_stop(struct anx7625_data *ctx) { struct device *dev = &ctx->client->dev; int ret; + u8 data; DRM_DEV_DEBUG_DRIVER(dev, "stop dp output\n"); @@ -929,6 +954,11 @@ static void anx7625_dp_stop(struct anx7625_data *ctx) ret |= anx7625_write_and(ctx, ctx->i2c.tx_p2_client, 0x08, 0x7f); ret |= anx7625_video_mute_control(ctx, 1); + + DRM_DEV_DEBUG_DRIVER(dev, "notify downstream enter into standby\n"); + /* Downstream monitor enter into standby mode */ + data = 2; + ret |= anx7625_aux_dpcd_trans(ctx, DPCD_WRITE, 0x000600, 1, &data); if (ret < 0) DRM_DEV_ERROR(dev, "IO error : mute video fail\n"); -- 2.25.1
Re: Phyr Starter
On Mon, Jan 10, 2022 at 08:41:26PM -0400, Jason Gunthorpe wrote: > On Mon, Jan 10, 2022 at 07:34:49PM +, Matthew Wilcox wrote: > > > Finally, it may be possible to stop using scatterlist to describe the > > input to the DMA-mapping operation. We may be able to get struct > > scatterlist down to just dma_address and dma_length, with chaining > > handled through an enclosing struct. > > Can you talk about this some more? IMHO one of the key properties of > the scatterlist is that it can hold huge amounts of pages without > having to do any kind of special allocation due to the chaining. > > The same will be true of the phyr idea right? My thinking is that we'd pass a relatively small array of phyr (maybe 16 entries) to get_user_phyr(). If that turned out not to be big enough, then we have two options; one is to map those 16 ranges with sg and use the sg chaining functionality before throwing away the phyr and calling get_user_phyr() again. The other is to stash those 16 ranges somewhere (eg a resizing array of some kind) and keep calling get_user_phyr() to get the next batch of 16; once we've got the entire range, call sg_map_phyr() passing all of the phyrs. > > I would like to see phyr replace bio_vec everywhere it's currently used. > > I don't have time to do that work now because I'm busy with folios. > > If someone else wants to take that on, I shall cheer from the sidelines. > > What I do intend to do is: > > I wonder if we mixed things though.. > > IMHO there is a lot of optimization to be had by having a > datastructure that is expressly 'the physical pages underlying a > contiguous chunk of va' > > If you limit to that scenario then we can be more optimal because > things like byte granular offsets and size in the interior pages don't > need to exist. Every interior chunk is always aligned to its order and > we only need to record the order. > > An overall starting offset and total length allow computing the slice > of the first/last entry. > > If the physical address is always aligned then we get 12 free bits > from the min 4k alignment and also only need to store order, not an > arbitary byte granular length. > > The win is I think we can meaningfully cover most common cases using > only 8 bytes per physical chunk. The 12 bits can be used to encode the > common orders (4k, 2M, 1G, etc) and some smart mechanism to get > another 16 bits to cover 'everything'. > > IMHO storage density here is quite important, we end up having to keep > this stuff around for a long time. > > I say this here, because I've always though bio_vec/etc are more > general than we actually need, being byte granular at every chunk. Oh, I can do you one better on the bit-packing scheme. There's a representation of every power-of-two that is naturally aligned, using just one extra bit. Let's say we have 3 bits of address space and 4 bits to represent any power of two allocation within that address space: index-0, order-0 0010 index-1, order-0 ... 1110 index-7, order-0 0001 index-0, order-1 0101 index-2, order-1 1001 index-4, order-1 1101 index-6, order-1 0011 index-0, order-2 1011 index-4, order-2 0111 index-0, order-3 has no meaning and can be used to represent an invalid range, if that's useful. The lowest clear bit decodes to the order, and (x & (x+1))/2 gets you the index. That leaves you with another 11 bits to represent something smart about partial pages. The question is whether this is the right kind of optimisation to be doing. I hear you that we want a dense format, but it's questionable whether the kind of thing you're suggesting is actually denser than this scheme. For example, if we have 1GB pages and userspace happens to have allocated pages (3, 4, 5, 6, 7, 8, 9, 10) then this can be represented as a single phyr. A power-of-two scheme would have us use four entries (3, 4-7, 8-9, 10). Using a (dma_addr, size_t) tuple makes coalescing adjacent pages very cheap. If I have to walk PTEs looking for pages which can be combined together, I end up with interesting behaviour where the length of the list shrinks and expands. Using the example above, as I walk successive PUDs, the data struct looks like this: (3) (3, 4) (3, 4-5) (3, 4-5, 6) (3, 4-7) (3, 4-7, 8) (3, 4-7, 8-9) (3, 4-7, 8-9, 10) We could end up with a situation where we stop because the array is full, even though if we kept going, it'd shrink back down below the length of the array (in this example, an array of length 2 would stop when it saw page 6, even though page 7 shrinks it back down again). > What is needed is a full scatterlist replacement, including the IOMMU > part. > > For the IOMMU I would expect the datastructure to be re-used, we start > with a list of physical pages and then 'dma map' gives us a list of > IOVA physical pages, in another allocation, but exactly the same > datastructure. > > This 'dma map' could return a pointer to the first datastructure if > there is no iommu, allocate a single entry
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 6:44 PM Linus Torvalds wrote: > > I'll double-check to see if a revert fixes it at the top of my tree. Yup. It reverts cleanly, and the end result builds and works fine, and doesn't show the horrendous flickering. I have done that revert, and will continue the merge window work. Somebody else gets to figure out what the actual bug is, but that commit was horribly broken on my machine (Sapphire Pulse RX 580 8GB, fwiw). Linus
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 6:22 PM Linus Torvalds wrote: > > and I guess I'll do the few more bisections to pick out the exact one. a896f870f8a5f23ec961d16baffd3fda1f8be57c is the first bad commit. Attaching ther BISECT_LOG in case anybody cares. I'll double-check to see if a revert fixes it at the top of my tree. Linus BISECT_LOG Description: Binary data
Re: [PATCH] drm/panel: Extend ACX424AKP bindings to ACX424AKM
On Mon, 03 Jan 2022 10:35:01 +0100, Linus Walleij wrote: > The panel ACX424AKP seems to only be used in prototypes, whereas > real products use the 10 pixels shorter ACX424AKM. Extend the > ACX424AKP bindings to also cover the ACX424AKM. The ACX424AKM > was used in a few different mobile phones from Sony Mobile. > > Cc: devicet...@vger.kernel.org > Cc: phone-de...@vger.kernel.org > Signed-off-by: Linus Walleij > --- > .../bindings/display/panel/sony,acx424akp.yaml| 11 +-- > 1 file changed, 9 insertions(+), 2 deletions(-) > Acked-by: Rob Herring
[PATCH 1/1] Add test for new hsaKmtAvailableMemory library call
Using DefaultGPUNode now instead of system memory, usage similar to other tests. Also cleaned up pSmall, which I originally intended to just let float away on the mistaken assumption that it would be cleaned up automatically at the end of the test. Basic test for the new hsaKmtAvailableMemory library call. This is a standalone test, does not modify any of the other tests just to be on the safe side. More elaborate tests coming soon. Signed-off-by: Daniel Phillips Change-Id: I645006a89bd8d55ef7b1605611e8ef0c010dad1a --- tests/kfdtest/src/KFDMemoryTest.cpp | 20 1 file changed, 20 insertions(+) diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 9f62727..d9016de 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -595,6 +595,26 @@ TEST_F(KFDMemoryTest, MemoryAlloc) { TEST_END } +// Basic test for hsaKmtAllocMemory +TEST_F(KFDMemoryTest, MemoryAllocAll) { +TEST_START(TESTPROFILE_RUNALL) + +int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); +unsigned int* pBig = NULL; +unsigned int* pSmall = NULL; +m_MemoryFlags.ui32.NoNUMABind = 1; +HSAuint64 available; + +EXPECT_SUCCESS(hsaKmtAvailableMemory(defaultGPUNode, &available)); +EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, available, m_MemoryFlags, reinterpret_cast(&pBig))); +EXPECT_NE(HSAKMT_STATUS_SUCCESS, hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&pSmall))); +EXPECT_SUCCESS(hsaKmtFreeMemory(pBig, available)); +EXPECT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&pSmall))); +EXPECT_SUCCESS(hsaKmtFreeMemory(pSmall, PAGE_SIZE)); + +TEST_END +} + TEST_F(KFDMemoryTest, AccessPPRMem) { TEST_START(TESTPROFILE_RUNALL) -- 2.34.1
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 5:21 PM Linus Torvalds wrote: > > I'll see if I can bisect it at least partially. It seems to be very reliable. I can see the flickering even at early boot before gdb has started - the graphical screen where you type the encrypted disk password at boot already shows it as you type. Right now it is bad: 9602044d1cc12280e20c5f2cd640ae80f69e good: 3867e3704f136beadf5e004b61696ef7f990bee4 so it's going to be one of these: 9602044d1cc1 drm/amd/display: Fix for the no Audio bug with Tiled Displays a896f870f8a5 drm/amd/display: Fix for otg synchronization logic aba3c3fede54 drm/amd/display: Clear DPCD lane settings after repeater training 9311ed1e1241 drm/amd/display: add hdmi disable debug check 6421f7c750e9 drm/amd/display: Allow DSC on supported MST branch devices ebe5ffd8e271 drm/amd/display: Enable P010 for DCN3x ASICs c022375ae095 drm/amd/display: Add DP-HDMI FRL PCON Support in DC 50b1f44ec547 drm/amd/display: Add DP-HDMI FRL PCON SST Support in DM 81d104f4afbf drm/amdgpu: Don't halt RLC on GFX suspend fe9c5c9affc9 drm/amdgpu: Use MAX_HWIP instead of HW_ID_MAX 370016988665 drm/amdgpu: fix the missed handling for SDMA2 and SDMA3 6c18ecefaba7 drm/amdgpu: declare static function to fix compiler warning 94a80b5bc7a2 amdgpu/pm: Modify implmentations of get_power_profile_mode to use amdgpu_pp_profile_name and I guess I'll do the few more bisections to pick out the exact one. Linus
[PATCH 1/1] Add available memory ioctl for libhsakmt
Add an ioctl to inquire memory available for allocation by libhsakmt per node, allowing for space consumed by page translation tables. This ioctl is the underlying mechanism for the new memory availability library call posted for review here: https://lists.freedesktop.org/archives/amd-gfx/2022-January/073352.html Signed-off-by: Daniel Phillips --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c| 14 ++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c| 17 + include/uapi/linux/kfd_ioctl.h | 14 -- 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index fcbc8a9c9e06..64c6c36685d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 86a1a6c109d9..b7490a659173 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -190,6 +190,20 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available_memory; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available_memory = + adev->gmc.real_vram_size - + adev->kfd.vram_used - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return available_memory; +} + static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4bfc0c8ab764..5c2f6d97ff1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -486,6 +486,20 @@ static int kfd_ioctl_get_queue_wave_state(struct file *filep, return r; } +static int kfd_ioctl_get_available_memory(struct file *filep, +struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_available_memory_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + args->available = amdgpu_amdkfd_get_available_memory(dev->adev); + return 0; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1959,6 +1973,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, + kfd_ioctl_get_available_memory, 0), }; #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..94a99add2432 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Add available_memory ioctl */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version;/* from KFD */ @@ -98,6 +99,12 @@ struct kfd_ioctl_get_queue_wave_state_args { __u32 pad; }; +struct kfd_ioctl_get_available_memory_args { + __u64 available;/* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -742,7 +749,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_AVAILABLE_MEMORY\ + AMDKFD_IOR(0x22, struct kfd_ioctl_get_available_memory_args)
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 5:21 PM Linus Torvalds wrote: > > It also seems to depend a bit on the screen contents - or possibly on > what else is going on. Hiding the browser window makes it happen less, > I think. But I suspect that's about "less gpu activity" than anything > else. Actually, sometimes "more activity" makes it go away too. Moving a window around wildly with the mouse makes it *stop* happen. But moving the mouse over different elements of the screen - or writing text in the web browser email window - seems to make it worse. Funky. It does "feel" to me like some bandwidth limitation, it has kind of the same behavior that I remember from the bad old times when you were pushing the video card past a resolution that it could really handle. But that can't be the case, this card has had no problems with this before. Linus
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 5:11 PM Alex Deucher wrote: > > We are putting together a system to try and repro the issue. Does it > happen with a single monitor or only with two? Nope. With a single monitor everything seems to look fine. And when I plug in the second monitor, it immediately starts happening again. It also seems to depend a bit on the screen contents - or possibly on what else is going on. Hiding the browser window makes it happen less, I think. But I suspect that's about "less gpu activity" than anything else. I'll see if I can bisect it at least partially. Linus
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 8:04 PM Linus Torvalds wrote: > > On Mon, Jan 10, 2022 at 2:13 PM Alex Deucher wrote: > > > > Sounds like something related to watermarks. That said, we haven't > > really touched the display code for DCE11 cards in quite a while. Can > > you provide your dmesg output? > > I'm not seeing anything that would look interesting, but here's the > parts that look relevant for drm.. We are putting together a system to try and repro the issue. Does it happen with a single monitor or only with two? Thanks, Alex
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 2:13 PM Alex Deucher wrote: > > Sounds like something related to watermarks. That said, we haven't > really touched the display code for DCE11 cards in quite a while. Can > you provide your dmesg output? I'm not seeing anything that would look interesting, but here's the parts that look relevant for drm.. Linus dmesg-gpu Description: Binary data
Re: Phyr Starter
On Mon, Jan 10, 2022 at 07:34:49PM +, Matthew Wilcox wrote: > Finally, it may be possible to stop using scatterlist to describe the > input to the DMA-mapping operation. We may be able to get struct > scatterlist down to just dma_address and dma_length, with chaining > handled through an enclosing struct. Can you talk about this some more? IMHO one of the key properties of the scatterlist is that it can hold huge amounts of pages without having to do any kind of special allocation due to the chaining. The same will be true of the phyr idea right? > I would like to see phyr replace bio_vec everywhere it's currently used. > I don't have time to do that work now because I'm busy with folios. > If someone else wants to take that on, I shall cheer from the sidelines. > What I do intend to do is: I wonder if we mixed things though.. IMHO there is a lot of optimization to be had by having a datastructure that is expressly 'the physical pages underlying a contiguous chunk of va' If you limit to that scenario then we can be more optimal because things like byte granular offsets and size in the interior pages don't need to exist. Every interior chunk is always aligned to its order and we only need to record the order. An overall starting offset and total length allow computing the slice of the first/last entry. If the physical address is always aligned then we get 12 free bits from the min 4k alignment and also only need to store order, not an arbitary byte granular length. The win is I think we can meaningfully cover most common cases using only 8 bytes per physical chunk. The 12 bits can be used to encode the common orders (4k, 2M, 1G, etc) and some smart mechanism to get another 16 bits to cover 'everything'. IMHO storage density here is quite important, we end up having to keep this stuff around for a long time. I say this here, because I've always though bio_vec/etc are more general than we actually need, being byte granular at every chunk. > - Add an interface to gup.c to pin/unpin N phyrs > - Add a sg_map_phyrs() >This will take an array of phyrs and allocate an sg for them > - Whatever else I need to do to make one RDMA driver happy with >this scheme I spent alot of time already cleaning all the DMA code in RDMA - it is now nicely uniform and ready to do this sort of change. I was expecting to be a bio_vec, but this is fine too. What is needed is a full scatterlist replacement, including the IOMMU part. For the IOMMU I would expect the datastructure to be re-used, we start with a list of physical pages and then 'dma map' gives us a list of IOVA physical pages, in another allocation, but exactly the same datastructure. This 'dma map' could return a pointer to the first datastructure if there is no iommu, allocate a single entry list if the whole thing can be linearly mapped with the iommu, and other baroque cases (like pci offset/etc) will need to allocate full array. ie good HW runs fast and is memory efficient. It would be nice to see a patch sketching showing what this datastructure could look like. VFIO would like this structure as well as it currently is a very inefficient page at a time loop when it iommu maps things. Jason
[PATCH] dma-buf-map: Fix dot vs comma in example
Fix typo: separate arguments with comma rather than dot. Signed-off-by: Lucas De Marchi --- include/linux/dma-buf-map.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 278d489e4bdd..19fa0b5ae5ec 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -52,13 +52,13 @@ * * struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf); * - * dma_buf_map_set_vaddr(&map. 0xdeadbeaf); + * dma_buf_map_set_vaddr(&map, 0xdeadbeaf); * * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem(). * * .. code-block:: c * - * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf); + * dma_buf_map_set_vaddr_iomem(&map, 0xdeadbeaf); * * Instances of struct dma_buf_map do not have to be cleaned up, but * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings -- 2.34.1
Re: [Patch v4 18/24] drm/amdkfd: CRIU checkpoint and restore xnack mode
On 2022-01-05 10:22 a.m., philip yang wrote: On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote: Recoverable page faults are represented by the xnack mode setting inside a kfd process and are used to represent the device page faults. For CR, we don't consider negative values which are typically used for querying the current xnack mode without modifying it. Signed-off-by: Rajneesh Bhardwaj --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 178b0ccfb286..446eb9310915 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1845,6 +1845,11 @@ static int criu_checkpoint_process(struct kfd_process *p, memset(&process_priv, 0, sizeof(process_priv)); process_priv.version = KFD_CRIU_PRIV_VERSION; + /* For CR, we don't consider negative xnack mode which is used for +* querying without changing it, here 0 simply means disabled and 1 +* means enabled so retry for finding a valid PTE. +*/ Negative value to query xnack mode is for kfd_ioctl_set_xnack_mode user space ioctl interface, which is not used by CRIU, I think this comment is misleading, + process_priv.xnack_mode = p->xnack_enabled ? 1 : 0; change to process_priv.xnack_enabled ret = copy_to_user(user_priv_data + *priv_offset, &process_priv, sizeof(process_priv)); @@ -2231,6 +2236,16 @@ static int criu_restore_process(struct kfd_process *p, return -EINVAL; } + pr_debug("Setting XNACK mode\n"); + if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) { + pr_err("xnack mode cannot be set\n"); + ret = -EPERM; + goto exit; + } else { On GFXv9 GPUs except Aldebaran, this means the process checkpointed is xnack off, it can restore and resume on GPU with xnack on, then shader will continue running successfully, but driver is not guaranteed to map svm ranges on GPU all the time, if retry fault happens, the shader will not recover. Maybe change to: If (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2) { The code here was correct. The xnack mode applies to the whole process, not just one GPU. The logic for checking the capabilities of all GPUs is already in kfd_process_xnack_mode. If XNACK cannot be supported by all GPUs, restoring a non-0 XNACK mode will fail. Any GPU can run in XNACK-disabled mode. So we don't need any limitations for process_priv.xnack_enabled == 0. Regards, Felix if (process_priv.xnack_enabled != kfd_process_xnack_mode(p, true)) { pr_err("xnack mode cannot be set\n"); ret = -EPERM; goto exit; } } pr_debug("set xnack mode: %d\n", process_priv.xnack_enabled); p->xnack_enabled = process_priv.xnack_enabled; + pr_debug("set xnack mode: %d\n", process_priv.xnack_mode); + p->xnack_enabled = process_priv.xnack_mode; + } + exit: return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 855c162b85ea..d72dda84c18c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1057,6 +1057,7 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, struct kfd_criu_process_priv_data { uint32_t version; + uint32_t xnack_mode; bool xnack_enabled; Regards, Philip }; struct kfd_criu_device_priv_data {
Re: [Patch v4 24/24] drm/amdkfd: CRIU resume shared virtual memory ranges
On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote: In CRIU resume stage, resume all the shared virtual memory ranges from the data stored inside the resuming kfd process during CRIU restore phase. Also setup xnack mode and free up the resources. Signed-off-by: Rajneesh Bhardwaj --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 55 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 6 +++ 3 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f7aa15b18f95..6191e37656dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2759,7 +2759,17 @@ static int criu_resume(struct file *filep, } mutex_lock(&target->mutex); + ret = kfd_criu_resume_svm(target); + if (ret) { + pr_err("kfd_criu_resume_svm failed for %i\n", args->pid); + goto exit; + } + ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info); + if (ret) + pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid); + +exit: mutex_unlock(&target->mutex); kfd_unref_process(target); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e9f6c63c2a26..bd2dce37f345 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3427,6 +3427,61 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm, return 0; } +int kfd_criu_resume_svm(struct kfd_process *p) +{ + int nattr_common = 4, nattr_accessibility = 1; + struct criu_svm_metadata *criu_svm_md = NULL; + struct criu_svm_metadata *next = NULL; + struct svm_range_list *svms = &p->svms; + int i, j, num_attrs, ret = 0; + struct mm_struct *mm; + + if (list_empty(&svms->criu_svm_metadata_list)) { + pr_debug("No SVM data from CRIU restore stage 2\n"); + return ret; + } + + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_err("failed to get mm for the target process\n"); + return -ESRCH; + } + + num_attrs = nattr_common + (nattr_accessibility * p->n_pdds); + + i = j = 0; + list_for_each_entry(criu_svm_md, &svms->criu_svm_metadata_list, list) { + pr_debug("criu_svm_md[%d]\n\tstart: 0x%llx size: 0x%llx (npages)\n", +i, criu_svm_md->start_addr, criu_svm_md->size); + for (j = 0; j < num_attrs; j++) { + pr_debug("\ncriu_svm_md[%d]->attrs[%d].type : 0x%x \ncriu_svm_md[%d]->attrs[%d].value : 0x%x\n", +i,j, criu_svm_md->attrs[j].type, +i,j, criu_svm_md->attrs[j].value); + } Is this super-detailed debug output really needed? Regards, Felix + + ret = svm_range_set_attr(p, mm, criu_svm_md->start_addr, +criu_svm_md->size, num_attrs, +criu_svm_md->attrs); + if (ret) { + pr_err("CRIU: failed to set range attributes\n"); + goto exit; + } + + i++; + } + +exit: + list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) { + pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n", + criu_svm_md->start_addr); + kfree(criu_svm_md); + } + + mmput(mm); + return ret; + +} + int svm_criu_prepare_for_resume(struct kfd_process *p, struct kfd_criu_svm_range_priv_data *svm_priv) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index e0c0853f085c..3b5bcb52723c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -195,6 +195,7 @@ int kfd_criu_restore_svm(struct kfd_process *p, uint8_t __user *user_priv_ptr, uint64_t *priv_data_offset, uint64_t max_priv_data_size); +int kfd_criu_resume_svm(struct kfd_process *p); struct kfd_process_device * svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm); @@ -256,6 +257,11 @@ static inline int kfd_criu_restore_svm(struct kfd_process *p, return -EINVAL; } +static inline int kfd_criu_resume_svm(struct kfd_process *p) +{ + return 0; +} + #define KFD_IS_SVM_API_SUPPORTED(dev) false #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
Re: [Patch v4 23/24] drm/amdkfd: CRIU prepare for svm resume
On 2022-01-05 9:43 a.m., philip yang wrote: On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote: During CRIU restore phase, the VMAs for the virtual address ranges are not at their final location yet so in this stage, only cache the data required to successfully resume the svm ranges during an imminent CRIU resume phase. Signed-off-by: Rajneesh Bhardwaj --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 5 ++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 99 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 12 +++ 4 files changed, 118 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 916b8d000317..f7aa15b18f95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2638,8 +2638,8 @@ static int criu_restore_objects(struct file *filep, goto exit; break; case KFD_CRIU_OBJECT_TYPE_SVM_RANGE: - /* TODO: Implement SVM range */ - *priv_offset += sizeof(struct kfd_criu_svm_range_priv_data); + ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data, +priv_offset, max_priv_data_size); if (ret) goto exit; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 87eb6739a78e..92191c541c29 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -790,6 +790,7 @@ struct svm_range_list { struct list_headlist; struct work_struct deferred_list_work; struct list_headdeferred_range_list; + struct list_headcriu_svm_metadata_list; spinlock_t deferred_list_lock; atomic_tevicted_ranges; booldrain_pagefaults; @@ -1148,6 +1149,10 @@ int kfd_criu_restore_event(struct file *devkfd, uint8_t __user *user_priv_data, uint64_t *priv_data_offset, uint64_t max_priv_data_size); +int kfd_criu_restore_svm(struct kfd_process *p, +uint8_t __user *user_priv_data, +uint64_t *priv_data_offset, +uint64_t max_priv_data_size); /* CRIU - End */ /* Queue Context Management */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 6d59f1bedcf2..e9f6c63c2a26 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -45,6 +45,14 @@ */ #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 +struct criu_svm_metadata { + struct list_head list; + __u64 start_addr; + __u64 size; + /* Variable length array of attributes */ + struct kfd_ioctl_svm_attribute attrs[0]; +}; This data structure is struct kfd_criu_svm_range_priv_data plus list_head, maybe you can add list_head to struct kfd_criu_svm_range_priv_data and remove this new data structure, then you can remove extra kzalloc, kfree for each svm object resume and function svm_criu_prepare_for_resume could be removed. Adding list_head to the private structure is a bad idea, because that structure is copied to/from user mode. Kernel mode pointers should not be exposed to user mode, even in an opaque structure. That's just begging for an exploit. But you could define criu_svm_metadata as struct criu_svm_metadata { struct list_head list; kfd_criu_svm_range_priv_data data; }; Then copy_from_user directly into criu_svm_md->data in kfd_criu_restore_svm to avoid the double allocation. Regards, Felix + static void svm_range_evict_svm_bo_worker(struct work_struct *work); static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -2753,6 +2761,7 @@ int svm_range_list_init(struct kfd_process *p) INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); INIT_LIST_HEAD(&svms->deferred_range_list); + INIT_LIST_HEAD(&svms->criu_svm_metadata_list); spin_lock_init(&svms->deferred_list_lock); for (i = 0; i < p->n_pdds; i++) @@ -3418,6 +3427,96 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm, return 0; } +int svm_criu_prepare_for_resume(struct kfd_process *p, + struct kfd_criu_svm_range_priv_data *svm_priv) +{ + int nattr_common = 4, nattr_accessibility = 1; + struct criu_svm_metadata *criu_svm_md = NULL; + uint64_t svm_attrs_size, svm_object_md
Re: [PATCH 1/1] Add test for hsaKmtAvailableMemory available memory inquiry
On 2022-01-10 4:48 p.m., Daniel Phillips wrote: Basic test for the new hsaKmtAvailableMemory library call. This is a standalone test, does not modify any of the other tests just to be on the safe side. More elaborate tests coming soon. Change-Id: I738600d4b74cc5dba6b857e4c793f6b14b7d2283 Signed-off-by: Daniel Phillips --- tests/kfdtest/src/KFDMemoryTest.cpp | 17 + 1 file changed, 17 insertions(+) diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 9f62727..1f93928 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -595,6 +595,23 @@ TEST_F(KFDMemoryTest, MemoryAlloc) { TEST_END } +// Basic test for hsaKmtAllocMemory +TEST_F(KFDMemoryTest, MemoryAllocAll) { +TEST_START(TESTPROFILE_RUNALL) + +unsigned int* pBig = NULL; +unsigned int* pSmall = NULL; +m_MemoryFlags.ui32.NoNUMABind = 1; +HSAuint64 available; +EXPECT_SUCCESS(hsaKmtAvailableMemory(0 /* system */, &available)); I don't think you've even implemented this API for system memory. The system memory node doesn't have a valid GPUID, so the ioctl will fail. I'd expect this test to work only for VRAM. +EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, available, m_MemoryFlags, reinterpret_cast(&pBig))); +EXPECT_NE(HSAKMT_STATUS_SUCCESS, hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&pSmall))); +EXPECT_SUCCESS(hsaKmtFreeMemory(pBig, available)); +EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&pSmall))); You're leaking pSmall here. Regards, Felix + +TEST_END +} + TEST_F(KFDMemoryTest, AccessPPRMem) { TEST_START(TESTPROFILE_RUNALL)
Re: [PATCH 1/1] Add available memory ioctl for libhsakmt
On 2022-01-10 3:54 p.m., Daniel Phillips wrote: From: Daniel Phillips This is weird. Looks like you've set up the your user email in your .gitconfig incorrectly. Or you changed it after you commited this patch locally. Add an ioctl to inquire memory available for allocation by libhsakmt per node, allowing for space consumed by page translation tables. Other than the missing signed-off-by, this patch is Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c| 14 ++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c| 17 + include/uapi/linux/kfd_ioctl.h | 14 -- 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index fcbc8a9c9e06..64c6c36685d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 86a1a6c109d9..b7490a659173 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -190,6 +190,20 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available_memory; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available_memory = + adev->gmc.real_vram_size - + adev->kfd.vram_used - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return available_memory; +} + static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4bfc0c8ab764..5c2f6d97ff1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -486,6 +486,20 @@ static int kfd_ioctl_get_queue_wave_state(struct file *filep, return r; } +static int kfd_ioctl_get_available_memory(struct file *filep, +struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_available_memory_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + args->available = amdgpu_amdkfd_get_available_memory(dev->adev); + return 0; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1959,6 +1973,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, + kfd_ioctl_get_available_memory, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..94a99add2432 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Add available_memory ioctl */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version;/* from KFD */ @@ -98,6 +99,12 @@ struct kfd_ioctl_get_queue_wave_state_args { __u32 pad; }; +struct kfd_ioctl_get_available_memory_args { + __u64 available;/* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -742,7 +749,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +
Re: [PATCH v3 1/2] drm/panel: panel-boe-tv101wum-nl6: tune the power sequence to avoid leakage
Jitao Shi writes: > "auo,kd101n80-45na" 2st LCD SPEC update, need to modify the timing > between IOVCC and mipi data. > The 2st version of SPEC modifies the timing requirements from IOVCC to > Mipi Data. IOVCC is now required to take precedence over MIPI DATA, > otherwise there is a risk of leakage. It is recommended that the time > for MIPI to enter LP11 be postponed after IOVCC (delay20ms). Similar to what Daniel said on v2: You're changing the behavior of *all* users of this panel driver with this patch, in order to fix a single user (in the next patch.) > Signed-off-by: Jitao Shi > Change-Id: Ic5212e2145a7dbf2efef9e5585904a93e1bc5a28 Please drop gerrit IDs from upstream submissions. Kevin > --- > drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 88 > +++--- > include/drm/panel_boe_tv101wum_nl6.h | 28 > 2 files changed, 94 insertions(+), 22 deletions(-) > create mode 100644 include/drm/panel_boe_tv101wum_nl6.h > > diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c > b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c > index db9d0b86d542..02efee06c430 100644 > --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c > +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c > @@ -49,7 +49,7 @@ struct boe_panel { > struct regulator *avee; > struct regulator *avdd; > struct gpio_desc *enable_gpio; > - > + int powered_refcnt; > bool prepared; > }; > > @@ -488,19 +488,15 @@ static int boe_panel_enter_sleep_mode(struct boe_panel > *boe) > return 0; > } > > -static int boe_panel_unprepare(struct drm_panel *panel) > +static int boe_panel_power_off(struct drm_panel *panel) > { > struct boe_panel *boe = to_boe_panel(panel); > - int ret; > > - if (!boe->prepared) > - return 0; > + if (WARN_ON(boe->powered_refcnt == 0)) > + return -EINVAL; > > - ret = boe_panel_enter_sleep_mode(boe); > - if (ret < 0) { > - dev_err(panel->dev, "failed to set panel off: %d\n", ret); > - return ret; > - } > + if (--boe->powered_refcnt != 0) > + return 0; > > msleep(150); > > @@ -520,17 +516,45 @@ static int boe_panel_unprepare(struct drm_panel *panel) > regulator_disable(boe->pp1800); > } > > + return 0; > +} > + > +int panel_unprepare_power(struct drm_panel *panel) > +{ > + if (of_device_is_compatible(panel->dev->of_node, "auo,kd101n80-45na")) > + return boe_panel_power_off(panel); > + > + return 0; > +} > +EXPORT_SYMBOL(panel_unprepare_power); > + > +static int boe_panel_unprepare(struct drm_panel *panel) > +{ > + struct boe_panel *boe = to_boe_panel(panel); > + int ret; > + > + if (!boe->prepared) > + return 0; > + > + ret = boe_panel_enter_sleep_mode(boe); > + if (ret < 0) { > + dev_err(panel->dev, "failed to set panel off: %d\n", ret); > + return ret; > + } > + > + boe_panel_power_off(panel); > + > boe->prepared = false; > > return 0; > } > > -static int boe_panel_prepare(struct drm_panel *panel) > +static int boe_panel_power_on(struct drm_panel *panel) > { > struct boe_panel *boe = to_boe_panel(panel); > int ret; > > - if (boe->prepared) > + if (++boe->powered_refcnt != 1) > return 0; > > gpiod_set_value(boe->enable_gpio, 0); > @@ -558,18 +582,8 @@ static int boe_panel_prepare(struct drm_panel *panel) > gpiod_set_value(boe->enable_gpio, 1); > usleep_range(6000, 1); > > - ret = boe_panel_init_dcs_cmd(boe); > - if (ret < 0) { > - dev_err(panel->dev, "failed to init panel: %d\n", ret); > - goto poweroff; > - } > - > - boe->prepared = true; > - > return 0; > > -poweroff: > - regulator_disable(boe->avee); > poweroffavdd: > regulator_disable(boe->avdd); > poweroff1v8: > @@ -580,6 +594,36 @@ static int boe_panel_prepare(struct drm_panel *panel) > return ret; > } > > +int panel_prepare_power(struct drm_panel *panel) > +{ > + if (of_device_is_compatible(panel->dev->of_node, "auo,kd101n80-45na")) > + return boe_panel_power_on(panel); > + > + return 0; > +} > +EXPORT_SYMBOL(panel_prepare_power); > + > +static int boe_panel_prepare(struct drm_panel *panel) > +{ > + struct boe_panel *boe = to_boe_panel(panel); > + int ret; > + > + boe_panel_power_on(panel); > + > + if (boe->prepared) > + return 0; > + > + ret = boe_panel_init_dcs_cmd(boe); > + if (ret < 0) { > + dev_err(panel->dev, "failed to init panel: %d\n", ret); > + return ret; > + } > + > + boe->prepared = true; > + > + return 0; > +} > + > static int boe_panel_enable(struct drm_panel *panel) > { > msleep(130); > diff --git a/include/drm/panel_boe_tv101wum_nl6.h > b/include/drm/panel_boe_tv101wum_nl6.h > new file mode 100644 > index 0
Re: [Patch v4 13/24] drm/amdkfd: CRIU checkpoint and restore queue mqds
On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote: From: David Yat Sin Checkpoint contents of queue MQD's on CRIU dump and restore them during CRIU restore. Signed-off-by: David Yat Sin David has an update for this patch to fix up the doorbell offset in the restored SDMA MQD. Regards, Felix --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 +- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 72 +++- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 14 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 7 + .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 67 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 68 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 68 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 69 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 + .../amd/amdkfd/kfd_process_queue_manager.c| 158 -- 11 files changed, 506 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 3fb155f756fd..146879cd3f2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -312,7 +312,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 0c50e67e2b51..3a5303ebcabf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) properties.type = KFD_QUEUE_TYPE_DIQ; status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - &properties, &qid, NULL, NULL); + &properties, &qid, NULL, NULL, NULL); if (status) { pr_err("Failed to create DIQ\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index a0f5b8533a03..a92274f9f1f7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -331,7 +331,8 @@ static void deallocate_vmid(struct device_queue_manager *dqm, static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, - const struct kfd_criu_queue_priv_data *qd) + const struct kfd_criu_queue_priv_data *qd, + const void *restore_mqd) { struct mqd_manager *mqd_mgr; int retval; @@ -390,8 +391,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, retval = -ENOMEM; goto out_deallocate_doorbell; } - mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, - &q->gart_mqd_addr, &q->properties); + + if (qd) + mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, +&q->properties, restore_mqd); + else + mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (q->properties.is_active) { if (!dqm->sched_running) { WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); @@ -1339,7 +1346,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, - const struct kfd_criu_queue_priv_data *qd) + const struct kfd_criu_queue_priv_data *qd, + const void *restore_mqd) { int retval; struct mqd_manager *mqd_mgr; @@ -1385,8 +1393,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, * updates the is_evicted flag but is a no-op otherwise. */ q->properties.is_evicted = !!qpd->evicted; - mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, - &q->gart_mqd_addr, &q->properties); + if (qd) + mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, +&q->properties, restore_mqd); + else +
Re: [Patch v4 07/24] drm/amdkfd: CRIU Implement KFD resume ioctl
On 2021-12-22 7:36 p.m., Rajneesh Bhardwaj wrote: This adds support to create userptr BOs on restore and introduces a new ioctl to restart memory notifiers for the restored userptr BOs. When doing CRIU restore MMU notifications can happen anytime after we call amdgpu_mn_register. Prevent MMU notifications until we reach stage-4 of the restore process i.e. criu_resume ioctl is received, and the process is ready to be resumed. This ioctl is different from other KFD CRIU ioctls since its called by CRIU master restore process for all the target processes being resumed by CRIU. Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 6 ++- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 51 +-- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 44 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 35 +++-- 5 files changed, 123 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index fcbc8a9c9e06..5c5fc839f701 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -131,6 +131,7 @@ struct amdkfd_process_info { atomic_t evicted_bos; struct delayed_work restore_userptr_work; struct pid *pid; + bool block_mmu_notifications; }; int amdgpu_amdkfd_init(void); @@ -269,7 +270,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, - uint64_t *offset, uint32_t flags); + uint64_t *offset, uint32_t flags, bool criu_resume); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size); @@ -297,6 +298,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev); +void amdgpu_amdkfd_block_mmu_notifications(void *p); +int amdgpu_amdkfd_criu_resume(void *p); + #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 90b985436878..5679fb75ec88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -846,7 +846,8 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, * * Returns 0 for success, negative errno for errors. */ -static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, + bool criu_resume) { struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; @@ -868,6 +869,17 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) goto out; } + if (criu_resume) { + /* +* During a CRIU restore operation, the userptr buffer objects +* will be validated in the restore_userptr_work worker at a +* later stage when it is scheduled by another ioctl called by +* CRIU master process for the target pid for restore. +*/ + atomic_inc(&mem->invalid); + mutex_unlock(&process_info->lock); + return 0; + } ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); @@ -1240,6 +1252,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, INIT_DELAYED_WORK(&info->restore_userptr_work, amdgpu_amdkfd_restore_userptr_worker); + info->block_mmu_notifications = false; *process_info = info; *ef = dma_fence_get(&info->eviction_fence->base); } @@ -1456,10 +1469,37 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) return avm->pd_phys_addr; } +void amdgpu_amdkfd_block_mmu_notifications(void *p) +{ + struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p; + + pinfo->block_mmu_notifications = true; +} + +int amdgpu_amdkfd_criu_resume(void *p) +{ + int ret = 0; + struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p; + + mutex_lock(&pinfo->lock); + pr_debug("scheduling work
Re: [Patch v4 21/24] drm/amdkfd: CRIU Discover svm ranges
On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote: A KFD process may contain a number of virtual address ranges for shared virtual memory management and each such range can have many SVM attributes spanning across various nodes within the process boundary. This change reports the total number of such SVM ranges and their total private data size by extending the PROCESS_INFO op of the the CRIU IOCTL to discover the svm ranges in the target process and a future patches brings in the required support for checkpoint and restore for SVM ranges. Signed-off-by: Rajneesh Bhardwaj --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 +++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 5 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 60 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 11 + 4 files changed, 82 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 446eb9310915..1c25d5e9067c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2089,10 +2089,9 @@ static int criu_get_process_object_info(struct kfd_process *p, uint32_t *num_objects, uint64_t *objs_priv_size) { - int ret; - uint64_t priv_size; + uint64_t queues_priv_data_size, svm_priv_data_size, priv_size; uint32_t num_queues, num_events, num_svm_ranges; - uint64_t queues_priv_data_size; + int ret; *num_devices = p->n_pdds; *num_bos = get_process_num_bos(p); @@ -2102,7 +2101,10 @@ static int criu_get_process_object_info(struct kfd_process *p, return ret; num_events = kfd_get_num_events(p); - num_svm_ranges = 0; /* TODO: Implement SVM-Ranges */ + + ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size); + if (ret) + return ret; *num_objects = num_queues + num_events + num_svm_ranges; @@ -2112,7 +2114,7 @@ static int criu_get_process_object_info(struct kfd_process *p, priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data); priv_size += queues_priv_data_size; priv_size += num_events * sizeof(struct kfd_criu_event_priv_data); - /* TODO: Add SVM ranges priv size */ + priv_size += svm_priv_data_size; *objs_priv_size = priv_size; } return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index d72dda84c18c..87eb6739a78e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1082,7 +1082,10 @@ enum kfd_criu_object_type { struct kfd_criu_svm_range_priv_data { uint32_t object_type; - uint64_t reserved; + uint64_t start_addr; + uint64_t size; + /* Variable length array of attributes */ + struct kfd_ioctl_svm_attribute attrs[0]; }; struct kfd_criu_queue_priv_data { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7c92116153fe..49e05fb5c898 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3418,6 +3418,66 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm, return 0; } +int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges, + uint64_t *svm_priv_data_size) +{ + uint64_t total_size, accessibility_size, common_attr_size; + int nattr_common = 4, naatr_accessibility = 1; + int num_devices = p->n_pdds; + struct svm_range_list *svms; + struct svm_range *prange; + uint32_t count = 0; + + *svm_priv_data_size = 0; + + svms = &p->svms; svms is defined as structure inside kfd_process, not pointer, so &p->svms will never be NULL. + if (!svms) + return -EINVAL; + + mutex_lock(&svms->lock); + list_for_each_entry(prange, &svms->list, list) { + pr_debug("prange: 0x%p start: 0x%lx\t npages: 0x%llx\t end: 0x%llx\n", + prange, prange->start, prange->npages, + prange->start + prange->npages - 1); + count++; + } + mutex_unlock(&svms->lock); + + *num_svm_ranges = count; + /* Only the accessbility attributes need to be queried for all the gpus + * individually, remaining ones are spanned across the entire process + * regardless of the various gpu nodes. Of the remaining attributes, + * KFD_IOCTL_SVM_ATTR_CLR_FLAGS need not be saved. + * + * KFD_IOCTL_SVM_ATTR_PREFERRED_LOC + * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC + * KFD_IOCTL_SVM_ATTR_SET_FLAGS + * KFD_IOCTL_SVM_ATTR_GRANULARITY + * + * ** ACCESSBILITY ATTRIBUTES ** + * (Considered as one, type is altered during query, value is gpuid) + * KFD_IOCTL_SVM_ATTR_ACCESS + * KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE + * KFD_IOCTL_SVM_ATTR_NO_ACCESS + */ + if (*num_svm_ranges > 0) + { + common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) * + nattr_common; + accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) * + naatr_accessibility * num_devices; + + total_size = sizeof(struct kfd_criu_svm_range_priv_data) + + common_attr_size + accessibility_size; + + *svm_priv_data_size = *num_svm_ranges * total_
Re: [Patch v4 06/24] drm/amdkfd: CRIU Implement KFD restore ioctl
On 2021-12-22 7:36 p.m., Rajneesh Bhardwaj wrote: This implements the KFD CRIU Restore ioctl that lays the basic foundation for the CRIU restore operation. It provides support to create the buffer objects corresponding to Non-Paged system memory mapped for GPU and/or CPU access and lays basic foundation for the userptrs buffer objects which will be added in a separate patch. This ioctl creates various types of buffer objects such as VRAM, MMIO, Doorbell, GTT based on the date sent from the userspace plugin. The data mostly contains the previously checkpointed KFD images from some KFD processs. While restoring a criu process, attach old IDR values to newly created BOs. This also adds the minimal gpu mapping support for a single gpu checkpoint restore use case. Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 298 ++- 1 file changed, 297 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index cdbb92972338..c93f74ad073f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2069,11 +2069,307 @@ static int criu_checkpoint(struct file *filep, return ret; } +static int criu_restore_process(struct kfd_process *p, + struct kfd_ioctl_criu_args *args, + uint64_t *priv_offset, + uint64_t max_priv_data_size) +{ + int ret = 0; + struct kfd_criu_process_priv_data process_priv; + + if (*priv_offset + sizeof(process_priv) > max_priv_data_size) + return -EINVAL; + + ret = copy_from_user(&process_priv, + (void __user *)(args->priv_data + *priv_offset), + sizeof(process_priv)); + if (ret) { + pr_err("Failed to copy process private information from user\n"); + ret = -EFAULT; + goto exit; + } + *priv_offset += sizeof(process_priv); + + if (process_priv.version != KFD_CRIU_PRIV_VERSION) { + pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n", + process_priv.version, KFD_CRIU_PRIV_VERSION); + return -EINVAL; + } + +exit: + return ret; +} + +static int criu_restore_bos(struct kfd_process *p, + struct kfd_ioctl_criu_args *args, + uint64_t *priv_offset, + uint64_t max_priv_data_size) +{ + struct kfd_criu_bo_bucket *bo_buckets; + struct kfd_criu_bo_priv_data *bo_privs; + bool flush_tlbs = false; + int ret = 0, j = 0; + uint32_t i; + + if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size) + return -EINVAL; + + bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL); + if (!bo_buckets) + return -ENOMEM; + + ret = copy_from_user(bo_buckets, (void __user *)args->bos, +args->num_bos * sizeof(*bo_buckets)); + if (ret) { + pr_err("Failed to copy BOs information from user\n"); + ret = -EFAULT; + goto exit; + } + + bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL); + if (!bo_privs) { + ret = -ENOMEM; + goto exit; + } + + ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset, +args->num_bos * sizeof(*bo_privs)); + if (ret) { + pr_err("Failed to copy BOs information from user\n"); + ret = -EFAULT; + goto exit; + } + *priv_offset += args->num_bos * sizeof(*bo_privs); + + /* Create and map new BOs */ + for (i = 0; i < args->num_bos; i++) { + struct kfd_criu_bo_bucket *bo_bucket; + struct kfd_criu_bo_priv_data *bo_priv; + struct kfd_dev *dev; + struct kfd_process_device *pdd; + void *mem; + u64 offset; + int idr_handle; + + bo_bucket = &bo_buckets[i]; + bo_priv = &bo_privs[i]; + + dev = kfd_device_by_id(bo_bucket->gpu_id); + if (!dev) { + ret = -EINVAL; + pr_err("Failed to get pdd\n"); + goto exit; + } + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + ret = -EINVAL; + pr_err("Failed to get pdd\n"); + goto exit; + } + + pr_debug("kfd restore ioctl - bo_bucket[%d]:\n", i); + pr_debug("size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n" +
Re: [Patch v4 04/24] drm/amdkfd: CRIU Implement KFD process_info ioctl
On 2021-12-22 7:36 p.m., Rajneesh Bhardwaj wrote: This IOCTL is expected to be called as a precursor to the actual Checkpoint operation. This does the basic discovery into the target process seized by CRIU and relays the information to the userspace that utilizes it to start the Checkpoint operation via another dedicated IOCTL. The process_info IOCTL determines the number of GPUs, buffer objects that are associated with the target process, its process id in caller's namespace since /proc/pid/mem interface maybe used to drain the contents of the discovered buffer objects in userspace and getpid returns the pid of CRIU dumper process. Also the pid of a process inside a container might be different than its global pid so return the ns pid. Signed-off-by: Rajneesh Bhardwaj Signed-off-by: David Yat Sin --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 55 +++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 2 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 ++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1b863bd84c96..53d7a20e3c06 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1857,6 +1857,41 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) } #endif +uint64_t get_process_num_bos(struct kfd_process *p) +{ + uint64_t num_of_bos = 0, i; + + /* Run over all PDDs of the process */ + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + void *mem; + int id; + + idr_for_each_entry(&pdd->alloc_idr, mem, id) { + struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; + + if ((uint64_t)kgd_mem->va > pdd->gpuvm_base) + num_of_bos++; + } + } + return num_of_bos; +} + +static void criu_get_process_object_info(struct kfd_process *p, +uint32_t *num_bos, +uint64_t *objs_priv_size) +{ + uint64_t priv_size; + + *num_bos = get_process_num_bos(p); + + if (objs_priv_size) { + priv_size = sizeof(struct kfd_criu_process_priv_data); + priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data); + *objs_priv_size = priv_size; + } +} + static int criu_checkpoint(struct file *filep, struct kfd_process *p, struct kfd_ioctl_criu_args *args) @@ -1889,7 +1924,25 @@ static int criu_process_info(struct file *filep, struct kfd_process *p, struct kfd_ioctl_criu_args *args) { - return 0; + int ret = 0; + + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) { + pr_err("No pdd for given process\n"); + ret = -ENODEV; + goto err_unlock; + } + + args->pid = task_pid_nr_ns(p->lead_thread, + task_active_pid_ns(p->lead_thread)); + + criu_get_process_object_info(p, &args->num_bos, &args->priv_data_size); + + dev_dbg(kfd_device, "Num of bos:%u\n", args->num_bos); +err_unlock: + mutex_unlock(&p->mutex); + return ret; } static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index e68f692362bb..4d9bc7af03af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -950,6 +950,8 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *p, void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, int handle); +bool kfd_has_process_device_data(struct kfd_process *p); + /* PASIDs */ int kfd_pasid_init(void); void kfd_pasid_exit(void); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d4c8a6948a9f..f77d556ca0fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1456,6 +1456,20 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, return 0; } +bool kfd_has_process_device_data(struct kfd_process *p) +{ + int i; + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; I think checking p->n_pdds is sufficient. All the pdds with i < n_pdds should be non-NULL. Regards, Felix + + if (pdd) + return true; + } + + return false; +} + struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
[PATCH v3 06/10] lib: test_hmm add ioctl to get zone device type
new ioctl cmd added to query zone device type. This will be used once the test_hmm adds zone device coherent type. Signed-off-by: Alex Sierra --- lib/test_hmm.c | 14 ++ lib/test_hmm_uapi.h | 8 2 files changed, 22 insertions(+) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index c259842f6d44..97e48164d56a 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -84,6 +84,7 @@ struct dmirror_chunk { struct dmirror_device { struct cdev cdevice; struct hmm_devmem *devmem; + unsigned intzone_device_type; unsigned intdevmem_capacity; unsigned intdevmem_count; @@ -470,6 +471,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, if (IS_ERR(res)) goto err_devmem; + mdevice->zone_device_type = HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; devmem->pagemap.range.start = res->start; devmem->pagemap.range.end = res->end; @@ -1025,6 +1027,15 @@ static int dmirror_snapshot(struct dmirror *dmirror, return ret; } +static int dmirror_get_device_type(struct dmirror *dmirror, + struct hmm_dmirror_cmd *cmd) +{ + mutex_lock(&dmirror->mutex); + cmd->zone_device_type = dmirror->mdevice->zone_device_type; + mutex_unlock(&dmirror->mutex); + + return 0; +} static long dmirror_fops_unlocked_ioctl(struct file *filp, unsigned int command, unsigned long arg) @@ -1075,6 +1086,9 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp, ret = dmirror_snapshot(dmirror, &cmd); break; + case HMM_DMIRROR_GET_MEM_DEV_TYPE: + ret = dmirror_get_device_type(dmirror, &cmd); + break; default: return -EINVAL; } diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h index f14dea5dcd06..17f842f1aa02 100644 --- a/lib/test_hmm_uapi.h +++ b/lib/test_hmm_uapi.h @@ -19,6 +19,7 @@ * @npages: (in) number of pages to read/write * @cpages: (out) number of pages copied * @faults: (out) number of device page faults seen + * @zone_device_type: (out) zone device memory type */ struct hmm_dmirror_cmd { __u64 addr; @@ -26,6 +27,7 @@ struct hmm_dmirror_cmd { __u64 npages; __u64 cpages; __u64 faults; + __u64 zone_device_type; }; /* Expose the address space of the calling process through hmm device file */ @@ -35,6 +37,7 @@ struct hmm_dmirror_cmd { #define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd) #define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x04, struct hmm_dmirror_cmd) #define HMM_DMIRROR_CHECK_EXCLUSIVE_IOWR('H', 0x05, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x06, struct hmm_dmirror_cmd) /* * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT. @@ -62,4 +65,9 @@ enum { HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30, }; +enum { + /* 0 is reserved to catch uninitialized type fields */ + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE = 1, +}; + #endif /* _LIB_TEST_HMM_UAPI_H */ -- 2.32.0
[PATCH v3 10/10] tools: update test_hmm script to support SP config
Add two more parameters to set spm_addr_dev0 & spm_addr_dev1 addresses. These two parameters configure the start SP addresses for each device in test_hmm driver. Consequently, this configures zone device type as coherent. Signed-off-by: Alex Sierra --- v2: Add more mknods for device coherent type. These are represented under /dev/hmm_mirror2 and /dev/hmm_mirror3, only in case they have created at probing the hmm-test driver. --- tools/testing/selftests/vm/test_hmm.sh | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh index 0647b525a625..539c9371e592 100755 --- a/tools/testing/selftests/vm/test_hmm.sh +++ b/tools/testing/selftests/vm/test_hmm.sh @@ -40,11 +40,26 @@ check_test_requirements() load_driver() { - modprobe $DRIVER > /dev/null 2>&1 + if [ $# -eq 0 ]; then + modprobe $DRIVER > /dev/null 2>&1 + else + if [ $# -eq 2 ]; then + modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2 + > /dev/null 2>&1 + else + echo "Missing module parameters. Make sure pass"\ + "spm_addr_dev0 and spm_addr_dev1" + usage + fi + fi if [ $? == 0 ]; then major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) mknod /dev/hmm_dmirror0 c $major 0 mknod /dev/hmm_dmirror1 c $major 1 + if [ $# -eq 2 ]; then + mknod /dev/hmm_dmirror2 c $major 2 + mknod /dev/hmm_dmirror3 c $major 3 + fi fi } @@ -58,7 +73,7 @@ run_smoke() { echo "Running smoke test. Note, this test provides basic coverage." - load_driver + load_driver $1 $2 $(dirname "${BASH_SOURCE[0]}")/hmm-tests unload_driver } @@ -75,6 +90,9 @@ usage() echo "# Smoke testing" echo "./${TEST_NAME}.sh smoke" echo + echo "# Smoke testing with SPM enabled" + echo "./${TEST_NAME}.sh smoke " + echo exit 0 } @@ -84,7 +102,7 @@ function run_test() usage else if [ "$1" = "smoke" ]; then - run_smoke + run_smoke $2 $3 else usage fi -- 2.32.0
[PATCH v3 08/10] lib: add support for device coherent type in test_hmm
Device Coherent type uses device memory that is coherently accesible by the CPU. This could be shown as SP (special purpose) memory range at the BIOS-e820 memory enumeration. If no SP memory is supported in system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP. Currently, test_hmm only supports two different SP ranges of at least 256MB size. This could be specified in the kernel parameter variable efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x1 & 0x14000 physical address. Ex. efi_fake_mem=1G@0x1:0x4,1G@0x14000:0x4 Private and coherent device mirror instances can be created in the same probed. This is done by passing the module parameters spm_addr_dev0 & spm_addr_dev1. In this case, it will create four instances of device_mirror. The first two correspond to private device type, the last two to coherent type. Then, they can be easily accessed from user space through /dev/hmm_mirror. Usually num_device 0 and 1 are for private, and 2 and 3 for coherent types. If no module parameters are passed, two instances of private type device_mirror will be created only. Signed-off-by: Alex Sierra --- lib/test_hmm.c | 247 lib/test_hmm_uapi.h | 15 ++- 2 files changed, 193 insertions(+), 69 deletions(-) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 9edeff52302e..7c641c5a9cfa 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -29,11 +29,22 @@ #include "test_hmm_uapi.h" -#define DMIRROR_NDEVICES 2 +#define DMIRROR_NDEVICES 4 #define DMIRROR_RANGE_FAULT_TIMEOUT1000 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) #define DEVMEM_CHUNKS_RESERVE 16 +/* + * For device_private pages, dpage is just a dummy struct page + * representing a piece of device memory. dmirror_devmem_alloc_page + * allocates a real system memory page as backing storage to fake a + * real device. zone_device_data points to that backing page. But + * for device_coherent memory, the struct page represents real + * physical CPU-accessible memory that we can use directly. + */ +#define BACKING_PAGE(page) (is_device_private_page((page)) ? \ + (page)->zone_device_data : (page)) + static unsigned long spm_addr_dev0; module_param(spm_addr_dev0, long, 0644); MODULE_PARM_DESC(spm_addr_dev0, @@ -122,6 +133,21 @@ static int dmirror_bounce_init(struct dmirror_bounce *bounce, return 0; } +static bool dmirror_is_private_zone(struct dmirror_device *mdevice) +{ + return (mdevice->zone_device_type == + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false; +} + +static enum migrate_vma_direction + dmirror_select_device(struct dmirror *dmirror) +{ + return (dmirror->mdevice->zone_device_type == + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? + MIGRATE_VMA_SELECT_DEVICE_PRIVATE : + MIGRATE_VMA_SELECT_DEVICE_COHERENT; +} + static void dmirror_bounce_fini(struct dmirror_bounce *bounce) { vfree(bounce->ptr); @@ -572,16 +598,19 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) { struct page *dpage = NULL; - struct page *rpage; + struct page *rpage = NULL; /* -* This is a fake device so we alloc real system memory to store -* our device memory. +* For ZONE_DEVICE private type, this is a fake device so we alloc real +* system memory to store our device memory. +* For ZONE_DEVICE coherent type we use the actual dpage to store the data +* and ignore rpage. */ - rpage = alloc_page(GFP_HIGHUSER); - if (!rpage) - return NULL; - + if (dmirror_is_private_zone(mdevice)) { + rpage = alloc_page(GFP_HIGHUSER); + if (!rpage) + return NULL; + } spin_lock(&mdevice->lock); if (mdevice->free_pages) { @@ -601,7 +630,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) return dpage; error: - __free_page(rpage); + if (rpage) + __free_page(rpage); return NULL; } @@ -627,12 +657,15 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * unallocated pte_none() or read-only zero page. */ spage = migrate_pfn_to_page(*src); + WARN(spage && is_zone_device_page(spage), +"page already in device spage pfn: 0x%lx\n", +page_to_pfn(spage)); dpage = dmirror_devmem_alloc_page(mdevice); if (!dpage) continue; - rpage = dpage->zone_device_data; + rpage = BACKING_PAGE(dpage); if (spage) copy_highpage(rpage, spage);
[PATCH v3 09/10] tools: update hmm-test to support device coherent type
Test cases such as migrate_fault and migrate_multiple, were modified to explicit migrate from device to sys memory without the need of page faults, when using device coherent type. Snapshot test case updated to read memory device type first and based on that, get the proper returned results migrate_ping_pong test case added to test explicit migration from device to sys memory for both private and coherent zone types. Helpers to migrate from device to sys memory and vicerversa were also added. Signed-off-by: Alex Sierra --- v2: Set FIXTURE_VARIANT to add multiple device types to the FIXTURE. This will run all the tests for each device type (private and coherent) in case both existed during hmm-test driver probed. --- tools/testing/selftests/vm/hmm-tests.c | 122 - 1 file changed, 101 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 864f126ffd78..8eb81dfba4b3 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -44,6 +44,14 @@ struct hmm_buffer { int fd; uint64_tcpages; uint64_tfaults; + int zone_device_type; +}; + +enum { + HMM_PRIVATE_DEVICE_ONE, + HMM_PRIVATE_DEVICE_TWO, + HMM_COHERENCE_DEVICE_ONE, + HMM_COHERENCE_DEVICE_TWO, }; #define TWOMEG (1 << 21) @@ -60,6 +68,21 @@ FIXTURE(hmm) unsigned intpage_shift; }; +FIXTURE_VARIANT(hmm) +{ + int device_number; +}; + +FIXTURE_VARIANT_ADD(hmm, hmm_device_private) +{ + .device_number = HMM_PRIVATE_DEVICE_ONE, +}; + +FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent) +{ + .device_number = HMM_COHERENCE_DEVICE_ONE, +}; + FIXTURE(hmm2) { int fd0; @@ -68,6 +91,24 @@ FIXTURE(hmm2) unsigned intpage_shift; }; +FIXTURE_VARIANT(hmm2) +{ + int device_number0; + int device_number1; +}; + +FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private) +{ + .device_number0 = HMM_PRIVATE_DEVICE_ONE, + .device_number1 = HMM_PRIVATE_DEVICE_TWO, +}; + +FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent) +{ + .device_number0 = HMM_COHERENCE_DEVICE_ONE, + .device_number1 = HMM_COHERENCE_DEVICE_TWO, +}; + static int hmm_open(int unit) { char pathname[HMM_PATH_MAX]; @@ -81,12 +122,19 @@ static int hmm_open(int unit) return fd; } +static bool hmm_is_coherent_type(int dev_num) +{ + return (dev_num >= HMM_COHERENCE_DEVICE_ONE); +} + FIXTURE_SETUP(hmm) { self->page_size = sysconf(_SC_PAGE_SIZE); self->page_shift = ffs(self->page_size) - 1; - self->fd = hmm_open(0); + self->fd = hmm_open(variant->device_number); + if (self->fd < 0 && hmm_is_coherent_type(variant->device_number)) + SKIP(exit(0), "DEVICE_COHERENT not available"); ASSERT_GE(self->fd, 0); } @@ -95,9 +143,11 @@ FIXTURE_SETUP(hmm2) self->page_size = sysconf(_SC_PAGE_SIZE); self->page_shift = ffs(self->page_size) - 1; - self->fd0 = hmm_open(0); + self->fd0 = hmm_open(variant->device_number0); + if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0)) + SKIP(exit(0), "DEVICE_COHERENT not available"); ASSERT_GE(self->fd0, 0); - self->fd1 = hmm_open(1); + self->fd1 = hmm_open(variant->device_number1); ASSERT_GE(self->fd1, 0); } @@ -144,6 +194,7 @@ static int hmm_dmirror_cmd(int fd, } buffer->cpages = cmd.cpages; buffer->faults = cmd.faults; + buffer->zone_device_type = cmd.zone_device_type; return 0; } @@ -211,6 +262,20 @@ static void hmm_nanosleep(unsigned int n) nanosleep(&t, NULL); } +static int hmm_migrate_sys_to_dev(int fd, + struct hmm_buffer *buffer, + unsigned long npages) +{ + return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages); +} + +static int hmm_migrate_dev_to_sys(int fd, + struct hmm_buffer *buffer, + unsigned long npages) +{ + return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages); +} + /* * Simple NULL test of device open/close. */ @@ -875,7 +940,7 @@ TEST_F(hmm, migrate) ptr[i] = i; /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, npages); @@ -923,7 +988,7 @@ TEST_F(hmm, migrate_fault) ptr[i] = i; /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffe
[PATCH v3 07/10] lib: test_hmm add module param for zone device type
In order to configure device coherent in test_hmm, two module parameters should be passed, which correspond to the SP start address of each device (2) spm_addr_dev0 & spm_addr_dev1. If no parameters are passed, private device type is configured. Signed-off-by: Alex Sierra --- lib/test_hmm.c | 74 +++-- lib/test_hmm_uapi.h | 1 + 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 97e48164d56a..9edeff52302e 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -34,6 +34,16 @@ #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) #define DEVMEM_CHUNKS_RESERVE 16 +static unsigned long spm_addr_dev0; +module_param(spm_addr_dev0, long, 0644); +MODULE_PARM_DESC(spm_addr_dev0, + "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too"); + +static unsigned long spm_addr_dev1; +module_param(spm_addr_dev1, long, 0644); +MODULE_PARM_DESC(spm_addr_dev1, + "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too"); + static const struct dev_pagemap_ops dmirror_devmem_ops; static const struct mmu_interval_notifier_ops dmirror_min_ops; static dev_t dmirror_dev; @@ -452,29 +462,44 @@ static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) return ret; } -static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, +static int dmirror_allocate_chunk(struct dmirror_device *mdevice, struct page **ppage) { struct dmirror_chunk *devmem; - struct resource *res; + struct resource *res = NULL; unsigned long pfn; unsigned long pfn_first; unsigned long pfn_last; void *ptr; + int ret = -ENOMEM; devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); if (!devmem) - return false; + return ret; - res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, - "hmm_dmirror"); - if (IS_ERR(res)) + switch (mdevice->zone_device_type) { + case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE: + res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, + "hmm_dmirror"); + if (IS_ERR_OR_NULL(res)) + goto err_devmem; + devmem->pagemap.range.start = res->start; + devmem->pagemap.range.end = res->end; + devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; + break; + case HMM_DMIRROR_MEMORY_DEVICE_COHERENT: + devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ? + spm_addr_dev0 : + spm_addr_dev1; + devmem->pagemap.range.end = devmem->pagemap.range.start + + DEVMEM_CHUNK_SIZE - 1; + devmem->pagemap.type = MEMORY_DEVICE_COHERENT; + break; + default: + ret = -EINVAL; goto err_devmem; + } - mdevice->zone_device_type = HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; - devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; - devmem->pagemap.range.start = res->start; - devmem->pagemap.range.end = res->end; devmem->pagemap.nr_range = 1; devmem->pagemap.ops = &dmirror_devmem_ops; devmem->pagemap.owner = mdevice; @@ -495,10 +520,14 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, mdevice->devmem_capacity = new_capacity; mdevice->devmem_chunks = new_chunks; } - ptr = memremap_pages(&devmem->pagemap, numa_node_id()); - if (IS_ERR(ptr)) + if (IS_ERR_OR_NULL(ptr)) { + if (ptr) + ret = PTR_ERR(ptr); + else + ret = -EFAULT; goto err_release; + } devmem->mdevice = mdevice; pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; @@ -527,15 +556,17 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, } spin_unlock(&mdevice->lock); - return true; + return 0; err_release: mutex_unlock(&mdevice->devmem_lock); - release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range)); + if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) + release_mem_region(devmem->pagemap.range.start, + range_len(&devmem->pagemap.range)); err_devmem: kfree(devmem); - return false; + return ret; } static struct page *dmirror_d
[PATCH v3 05/10] drm/amdkfd: coherent type as sys mem on migration to ram
Coherent device type memory on VRAM to RAM migration, has similar access as System RAM from the CPU. This flag sets the source from the sender. Which in Coherent type case, should be set as MIGRATE_VMA_SELECT_DEVICE_COHERENT. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 9e36fe8aea0f..3e405f078ade 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -661,9 +661,12 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate.vma = vma; migrate.start = start; migrate.end = end; - migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); + if (adev->gmc.xgmi.connected_to_cpu) + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT; + else + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); size *= npages; buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); -- 2.32.0
[PATCH v3 04/10] drm/amdkfd: add SPM support for SVM
When CPU is connected throug XGMI, it has coherent access to VRAM resource. In this case that resource is taken from a table in the device gmc aperture base. This resource is used along with the device type, which could be DEVICE_PRIVATE or DEVICE_COHERENT to create the device page map region. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling --- v7: Remove lookup_resource call, so export symbol for this function is not longer required. Patch dropped "kernel: resource: lookup_resource as exported symbol" --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 29 +++- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index aeade32ec298..9e36fe8aea0f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -935,7 +935,7 @@ int svm_migrate_init(struct amdgpu_device *adev) { struct kfd_dev *kfddev = adev->kfd.dev; struct dev_pagemap *pgmap; - struct resource *res; + struct resource *res = NULL; unsigned long size; void *r; @@ -950,28 +950,34 @@ int svm_migrate_init(struct amdgpu_device *adev) * should remove reserved size */ size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20); - res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); - if (IS_ERR(res)) - return -ENOMEM; + if (adev->gmc.xgmi.connected_to_cpu) { + pgmap->range.start = adev->gmc.aper_base; + pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1; + pgmap->type = MEMORY_DEVICE_COHERENT; + } else { + res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); + if (IS_ERR(res)) + return -ENOMEM; + pgmap->range.start = res->start; + pgmap->range.end = res->end; + pgmap->type = MEMORY_DEVICE_PRIVATE; + } - pgmap->type = MEMORY_DEVICE_PRIVATE; pgmap->nr_range = 1; - pgmap->range.start = res->start; - pgmap->range.end = res->end; pgmap->ops = &svm_migrate_pgmap_ops; pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev); - pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; - + pgmap->flags = 0; /* Device manager releases device-specific resources, memory region and * pgmap when driver disconnects from device. */ r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); - /* Disable SVM support capability */ pgmap->type = 0; - devm_release_mem_region(adev->dev, res->start, resource_size(res)); + if (pgmap->type == MEMORY_DEVICE_PRIVATE) + devm_release_mem_region(adev->dev, res->start, + res->end - res->start + 1); return PTR_ERR(r); } @@ -984,3 +990,4 @@ int svm_migrate_init(struct amdgpu_device *adev) return 0; } + -- 2.32.0
[PATCH v3 02/10] mm: add device coherent vma selection for memory migration
This case is used to migrate pages from device memory, back to system memory. Device coherent type memory is cache coherent from device and CPU point of view. Signed-off-by: Alex Sierra --- v2: condition added when migrations from device coherent pages. --- include/linux/migrate.h | 1 + mm/migrate.c| 9 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c8077e936691..e74bb0978f6f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -138,6 +138,7 @@ static inline unsigned long migrate_pfn(unsigned long pfn) enum migrate_vma_direction { MIGRATE_VMA_SELECT_SYSTEM = 1 << 0, MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1, + MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2, }; struct migrate_vma { diff --git a/mm/migrate.c b/mm/migrate.c index 91018880dc7f..0367f471211a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2340,8 +2340,6 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, if (is_writable_device_private_entry(entry)) mpfn |= MIGRATE_PFN_WRITE; } else { - if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) - goto next; pfn = pte_pfn(pte); if (is_zero_pfn(pfn)) { mpfn = MIGRATE_PFN_MIGRATE; @@ -2349,6 +2347,13 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, goto next; } page = vm_normal_page(migrate->vma, addr, pte); + if (page && !is_zone_device_page(page) && + !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) + goto next; + if (page && is_device_coherent_page(page) && + (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_COHERENT) || +page->pgmap->owner != migrate->pgmap_owner)) + goto next; mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; } -- 2.32.0
[PATCH v3 01/10] mm: add zone device coherent type memory support
Device memory that is cache coherent from device and CPU point of view. This is used on platforms that have an advanced system bus (like CAPI or CXL). Any page of a process can be migrated to such memory. However, no one should be allowed to pin such memory so that it can always be evicted. Signed-off-by: Alex Sierra --- include/linux/memremap.h | 8 include/linux/mm.h | 16 mm/memcontrol.c | 6 +++--- mm/memory-failure.c | 8 ++-- mm/memremap.c| 5 - mm/migrate.c | 21 + 6 files changed, 50 insertions(+), 14 deletions(-) diff --git a/include/linux/memremap.h b/include/linux/memremap.h index c0e9d35889e8..ff4d398edf35 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -39,6 +39,13 @@ struct vmem_altmap { * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/vm/hmm.rst. * + * MEMORY_DEVICE_COHERENT: + * Device memory that is cache coherent from device and CPU point of view. This + * is used on platforms that have an advanced system bus (like CAPI or CXL). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allowed to pin such memory so that it can always be evicted. + * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. In support of coordinating page @@ -59,6 +66,7 @@ struct vmem_altmap { enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, + MEMORY_DEVICE_COHERENT, MEMORY_DEVICE_FS_DAX, MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f..fcf96c0fc918 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1162,6 +1162,7 @@ static inline bool page_is_devmap_managed(struct page *page) return false; switch (page->pgmap->type) { case MEMORY_DEVICE_PRIVATE: + case MEMORY_DEVICE_COHERENT: case MEMORY_DEVICE_FS_DAX: return true; default: @@ -1191,6 +1192,21 @@ static inline bool is_device_private_page(const struct page *page) page->pgmap->type == MEMORY_DEVICE_PRIVATE; } +static inline bool is_device_coherent_page(const struct page *page) +{ + return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && + is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_COHERENT; +} + +static inline bool is_device_page(const struct page *page) +{ + return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && + is_zone_device_page(page) && + (page->pgmap->type == MEMORY_DEVICE_PRIVATE || + page->pgmap->type == MEMORY_DEVICE_COHERENT); +} + static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6da5020a8656..d0bab0747c73 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5695,8 +5695,8 @@ static int mem_cgroup_move_account(struct page *page, * 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a * target for charge migration. if @target is not NULL, the entry is stored * in target->ent. - * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is MEMORY_DEVICE_PRIVATE - * (so ZONE_DEVICE page and thus not on the lru). + * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is device memory and + * thus not on the lru. * For now we such page is charge like a regular page would be as for all * intent and purposes it is just special memory taking the place of a * regular page. @@ -5730,7 +5730,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, */ if (page_memcg(page) == mc.from) { ret = MC_TARGET_PAGE; - if (is_device_private_page(page)) + if (is_device_page(page)) ret = MC_TARGET_DEVICE; if (target) target->page = page; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3e6449f2102a..4cf212e5f432 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1554,12 +1554,16 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto unlock; } - if (pgmap->type == MEMORY_DEVICE_PRIVATE) { + switch (pgmap->type) { + case MEMORY_DEVICE_PRIVATE: + case MEMORY_DEVICE_COHERENT: /* -* TODO: Handle HMM pages which may need coordination +* TODO: Handle device pages which may need coordination * with devi
[PATCH v3 00/10] Add MEMORY_DEVICE_COHERENT for coherent device memory mapping
This patch series introduces MEMORY_DEVICE_COHERENT, a type of memory owned by a device that can be mapped into CPU page tables like MEMORY_DEVICE_GENERIC and can also be migrated like MEMORY_DEVICE_PRIVATE. Christoph, the suggestion to incorporate Ralph Campbell’s refcount cleanup patch into our hardware page migration patchset originally came from you, but it proved impractical to do things in that order because the refcount cleanup introduced a bug with wide ranging structural implications. Instead, we amended Ralph’s patch so that it could be applied after merging the migration work. As we saw from the recent discussion, merging the refcount work is going to take some time and cooperation between multiple development groups, while the migration work is ready now and is needed now. So we propose to merge this patchset first and continue to work with Ralph and others to merge the refcount cleanup separately, when it is ready. This patch series is mostly self-contained except for a few places where it needs to update other subsystems to handle the new memory type. System stability and performance are not affected according to our ongoing testing, including xfstests. How it works: The system BIOS advertises the GPU device memory (aka VRAM) as SPM (special purpose memory) in the UEFI system address map. The amdgpu driver registers the memory with devmap as MEMORY_DEVICE_COHERENT using devm_memremap_pages. The initial user for this hardware page migration capability is the Frontier supercomputer project. This functionality is not AMD-specific. We expect other GPU vendors to find this functionality useful, and possibly other hardware types in the future. Our test nodes in the lab are similar to the Frontier configuration, with .5 TB of system memory plus 256 GB of device memory split across 4 GPUs, all in a single coherent address space. Page migration is expected to improve application efficiency significantly. We will report empirical results as they become available. We extended hmm_test to cover migration of MEMORY_DEVICE_COHERENT. This patch set builds on HMM and our SVM memory manager already merged in 5.15. v2: - test_hmm is now able to create private and coherent device mirror instances in the same driver probe. This adds more usability to the hmm test by not having to remove the kernel module for each device type test (private/coherent type). This is done by passing the module parameters spm_addr_dev0 & spm_addr_dev1. In this case, it will create four instances of device_mirror. The first two correspond to private device type, the last two to coherent type. Then, they can be easily accessed from user space through /dev/hmm_mirror. Usually num_device 0 and 1 are for private, and 2 and 3 for coherent types. - Coherent device type pages at gup are now migrated back to system memory if they have been long term pinned (FOLL_LONGTERM). The reason is these pages could eventually interfere with their own device memory manager. A new hmm_gup_test has been added to the hmm-test to test this functionality. It makes use of the gup_test module to long term pin user pages that have been migrate to device memory first. - Other patch corrections made by Felix, Alistair and Christoph. v3: - Based on last v2 feedback we got from Alistair, we've decided to remove migration logic for FOLL_LONGTERM coherent device type pages at gup for now. Ideally, this should be done through the kernel mm, instead of calling the device driver to do it. Currently, there's no support for migrating device pages based on pfn, mainly because migrate_pages() relies on pages being LRU pages. Alistair mentioned, he has started to work on adding this migrate device pages logic. For now, we fail on get_user_pages call with FOLL_LONGTERM for DEVICE_COHERENT pages. - Also, hmm_gup_test has been removed from hmm-test. We plan to include it again after this migration work is ready. - Addressed Liam Howlett's feedback changes. Alex Sierra (10): mm: add zone device coherent type memory support mm: add device coherent vma selection for memory migration mm/gup: fail get_user_pages for LONGTERM dev coherent type drm/amdkfd: add SPM support for SVM drm/amdkfd: coherent type as sys mem on migration to ram lib: test_hmm add ioctl to get zone device type lib: test_hmm add module param for zone device type lib: add support for device coherent type in test_hmm tools: update hmm-test to support device coherent type tools: update test_hmm script to support SP config drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 34 ++- include/linux/memremap.h | 8 + include/linux/migrate.h | 1 + include/linux/mm.h | 16 ++ lib/test_hmm.c | 333 +-- lib/test_hmm_uapi.h | 22 +- mm/gup.c | 7 + mm/memcontrol.c | 6 +- mm/memory-failure.c |
[PATCH v3 03/10] mm/gup: fail get_user_pages for LONGTERM dev coherent type
Avoid long term pinning for Coherent device type pages. This could interfere with their own device memory manager. For now, we are just returning error for PIN_LONGTERM Coherent device type pages. Eventually, these type of pages will get migrated to system memory, once the device migration pages support is added. Signed-off-by: Alex Sierra --- mm/gup.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/mm/gup.c b/mm/gup.c index 886d6148d3d0..9c8a075d862d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1720,6 +1720,12 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, * If we get a movable page, since we are going to be pinning * these entries, try to move them out if possible. */ + if (is_device_page(head)) { + WARN_ON_ONCE(is_device_private_page(head)); + ret = -EFAULT; + goto unpin_pages; + } + if (!is_pinnable_page(head)) { if (PageHuge(head)) { if (!isolate_huge_page(head, &movable_page_list)) @@ -1750,6 +1756,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, if (list_empty(&movable_page_list) && !isolation_error_count) return nr_pages; +unpin_pages: if (gup_flags & FOLL_PIN) { unpin_user_pages(pages, nr_pages); } else { -- 2.32.0
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 5:05 PM Daniel Vetter wrote: > > On Mon, Jan 10, 2022 at 10:30 PM Linus Torvalds > wrote: > > > > On Thu, Jan 6, 2022 at 10:12 PM Dave Airlie wrote: > > > > > > git://anongit.freedesktop.org/drm/drm tags/drm-next-2022-01-07 > > > > Gaah. I merged things and it built cleanly, and I pushed it out. > > > > But then I actually *booted* it, and that's not pretty. > > > > It *works", but it's almost unusable because of random scanline > > flickering. I'm not sure how to explain it, but it's as if there > > wasn't quite enough bandwidth on the scan-out, so you get these lines > > of noise and/or shifted output. They are temporary - so the > > framebuffer contents themselves is not damaged (although I don't know > > how the compositor works - maybe the problem happens before scanout). > > > > This is on the same Radeon device: > > > >49:00.0 VGA compatible controller: Advanced Micro Devices, Inc. > > [AMD/ATI] Ellesmere [Radeon RX 470/480/570/570X/580/580X/590] (rev e7) > > > > with dual 4k monitors. > > > > Any idea? Sounds like something related to watermarks. That said, we haven't really touched the display code for DCE11 cards in quite a while. Can you provide your dmesg output? Alex > > Since Christian is mostly the compute/memory side, adding some display > folks for this. > -Daniel > -- > Daniel Vetter > Software Engineer, Intel Corporation > http://blog.ffwll.ch
Re: [Patch v4 03/24] drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs
On 2021-12-22 7:36 p.m., Rajneesh Bhardwaj wrote: Checkpoint-Restore in userspace (CRIU) is a powerful tool that can snapshot a running process and later restore it on same or a remote machine but expects the processes that have a device file (e.g. GPU) associated with them, provide necessary driver support to assist CRIU and its extensible plugin interface. Thus, In order to support the Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute Kernel driver, needs to provide a set of new APIs that provide necessary VRAM metadata and its contents to a userspace component (CRIU plugin) that can store it in form of image files. This introduces some new ioctls which will be used to checkpoint-Restore any KFD bound user process. KFD doesn't allow any arbitrary ioctl call unless it is called by the group leader process. Since these ioctls are expected to be called from a KFD criu plugin which has elevated ptrace attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with the file descriptors so modify KFD to allow such calls. (API redesigned by David Yat Sin) Suggested-by: Felix Kuehling Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 94 +++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 65 +++- include/uapi/linux/kfd_ioctl.h | 79 +++- 3 files changed, 235 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4bfc0c8ab764..1b863bd84c96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include "kfd_priv.h" @@ -1856,6 +1857,75 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) } #endif +static int criu_checkpoint(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_restore(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_unpause(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_resume(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_process_info(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_criu_args *args = data; + int ret; + + dev_dbg(kfd_device, "CRIU operation: %d\n", args->op); + switch (args->op) { + case KFD_CRIU_OP_PROCESS_INFO: + ret = criu_process_info(filep, p, args); + break; + case KFD_CRIU_OP_CHECKPOINT: + ret = criu_checkpoint(filep, p, args); + break; + case KFD_CRIU_OP_UNPAUSE: + ret = criu_unpause(filep, p, args); + break; + case KFD_CRIU_OP_RESTORE: + ret = criu_restore(filep, p, args); + break; + case KFD_CRIU_OP_RESUME: + ret = criu_resume(filep, p, args); + break; + default: + dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op); + ret = -EINVAL; + break; + } + + if (ret) + dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret); + + return ret; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1959,6 +2029,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, + kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) @@ -1973,6 +2046,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) char *kdata = NULL; unsigned int usize, asize; int retcode = -EINVAL; + bool ptrace_attached = false; if (nr >= AMDKFD_CORE_IOCTL_COUNT) goto err_i1; @@ -1998,7 +2072,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) * processes need to create their own KFD device context. */ process = filep
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Mon, Jan 10, 2022 at 10:30 PM Linus Torvalds wrote: > > On Thu, Jan 6, 2022 at 10:12 PM Dave Airlie wrote: > > > > git://anongit.freedesktop.org/drm/drm tags/drm-next-2022-01-07 > > Gaah. I merged things and it built cleanly, and I pushed it out. > > But then I actually *booted* it, and that's not pretty. > > It *works", but it's almost unusable because of random scanline > flickering. I'm not sure how to explain it, but it's as if there > wasn't quite enough bandwidth on the scan-out, so you get these lines > of noise and/or shifted output. They are temporary - so the > framebuffer contents themselves is not damaged (although I don't know > how the compositor works - maybe the problem happens before scanout). > > This is on the same Radeon device: > >49:00.0 VGA compatible controller: Advanced Micro Devices, Inc. > [AMD/ATI] Ellesmere [Radeon RX 470/480/570/570X/580/580X/590] (rev e7) > > with dual 4k monitors. > > Any idea? Since Christian is mostly the compute/memory side, adding some display folks for this. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
[PATCH 1/1] Add test for hsaKmtAvailableMemory available memory inquiry
Basic test for the new hsaKmtAvailableMemory library call. This is a standalone test, does not modify any of the other tests just to be on the safe side. More elaborate tests coming soon. Change-Id: I738600d4b74cc5dba6b857e4c793f6b14b7d2283 Signed-off-by: Daniel Phillips --- tests/kfdtest/src/KFDMemoryTest.cpp | 17 + 1 file changed, 17 insertions(+) diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 9f62727..1f93928 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -595,6 +595,23 @@ TEST_F(KFDMemoryTest, MemoryAlloc) { TEST_END } +// Basic test for hsaKmtAllocMemory +TEST_F(KFDMemoryTest, MemoryAllocAll) { +TEST_START(TESTPROFILE_RUNALL) + +unsigned int* pBig = NULL; +unsigned int* pSmall = NULL; +m_MemoryFlags.ui32.NoNUMABind = 1; +HSAuint64 available; +EXPECT_SUCCESS(hsaKmtAvailableMemory(0 /* system */, &available)); +EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, available, m_MemoryFlags, reinterpret_cast(&pBig))); +EXPECT_NE(HSAKMT_STATUS_SUCCESS, hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&pSmall))); +EXPECT_SUCCESS(hsaKmtFreeMemory(pBig, available)); +EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&pSmall))); + +TEST_END +} + TEST_F(KFDMemoryTest, AccessPPRMem) { TEST_START(TESTPROFILE_RUNALL) -- 2.34.1
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Thu, Jan 6, 2022 at 10:12 PM Dave Airlie wrote: > > git://anongit.freedesktop.org/drm/drm tags/drm-next-2022-01-07 Gaah. I merged things and it built cleanly, and I pushed it out. But then I actually *booted* it, and that's not pretty. It *works", but it's almost unusable because of random scanline flickering. I'm not sure how to explain it, but it's as if there wasn't quite enough bandwidth on the scan-out, so you get these lines of noise and/or shifted output. They are temporary - so the framebuffer contents themselves is not damaged (although I don't know how the compositor works - maybe the problem happens before scanout). This is on the same Radeon device: 49:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] Ellesmere [Radeon RX 470/480/570/570X/580/580X/590] (rev e7) with dual 4k monitors. Any idea? Linus
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
The pull request you sent on Fri, 7 Jan 2022 16:12:06 +1000: > git://anongit.freedesktop.org/drm/drm tags/drm-next-2022-01-07 has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/8d0749b4f83bf4768ceae45ee6a79e6e7eddfc2a Thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/prtracker.html
[PATCH 1/1] Add hsaKmtAvailableMemory available memory inquiry to libhsakmt
Add a library call to inquire memory available for allocation per node. Uses the AMDKFD_IOC_AVAILABLE_MEMORY ioctl available in KFD ioctl version 1.7 Change-Id: Id770fc2261e9e076f2fbce7dcdac640a6354ddbe --- include/hsakmt.h | 11 +++ include/linux/kfd_ioctl.h | 18 -- src/memory.c | 23 +++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/include/hsakmt.h b/include/hsakmt.h index ff2d023..abc617f 100644 --- a/include/hsakmt.h +++ b/include/hsakmt.h @@ -374,6 +374,17 @@ hsaKmtFreeMemory( HSAuint64 SizeInBytes //IN ); +/** + Inquires memory available for allocation as a memory buffer +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtAvailableMemory( +HSAuint32 Node, +HSAuint64 *AvailableBytes +); + /** Registers with KFD a memory buffer that may be accessed by the GPU */ diff --git a/include/linux/kfd_ioctl.h b/include/linux/kfd_ioctl.h index 039b30b..a81ae37 100644 --- a/include/linux/kfd_ioctl.h +++ b/include/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Add available_memory ioctl */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 /* * Debug revision change log @@ -761,6 +762,16 @@ struct kfd_ioctl_free_memory_of_gpu_args { __u64 handle; /* to KFD */ }; +/* Inquire available memory with kfd_ioctl_get_available_memory + * + * @available: memory available for alloc + */ +struct kfd_ioctl_get_available_memory_args { + __u64 available;/* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + /* Map memory to one or more GPUs * * @handle:memory handle returned by alloc @@ -1240,8 +1251,11 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_AVAILABLE_MEMORY\ + AMDKFD_IOR(0x22, struct kfd_ioctl_get_available_memory_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 /* non-upstream ioctls */ #define AMDKFD_IOC_IPC_IMPORT_HANDLE\ diff --git a/src/memory.c b/src/memory.c index 6d2a4f4..b2cd759 100644 --- a/src/memory.c +++ b/src/memory.c @@ -199,6 +199,29 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress, return fmm_release(MemoryAddress); } +HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node, HSAuint64 *AvailableBytes) +{ + struct kfd_ioctl_get_available_memory_args args = {}; + HSAKMT_STATUS result; + + CHECK_KFD_OPEN(); + CHECK_KFD_MINOR_VERSION(7); + + pr_debug("[%s] node %d\n", __func__, Node); + + result = validate_nodeid(Node, &args.gpu_id); + if (result != HSAKMT_STATUS_SUCCESS) { + pr_err("[%s] invalid node ID: %d\n", __func__, Node); + return result; + } + + if (kmtIoctl(kfd_fd, AMDKFD_IOC_AVAILABLE_MEMORY, &args)) + return HSAKMT_STATUS_ERROR; + + *AvailableBytes = args.available; + return HSAKMT_STATUS_SUCCESS; +} + HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress, HSAuint64 MemorySizeInBytes) { -- 2.34.1
Re: [PATCH] drm/amdkfd: Check for null pointer after calling kmemdup
On 2022-01-05 10:56 a.m., Felix Kuehling wrote: Am 2022-01-05 um 4:09 a.m. schrieb Jiasheng Jiang: As the possible failure of the allocation, kmemdup() may return NULL pointer. Therefore, it should be better to check the 'props2' in order to prevent the dereference of NULL pointer. Fixes: 3a87177eb141 ("drm/amdkfd: Add topology support for dGPUs") Signed-off-by: Jiasheng Jiang Reviewed-by: Felix Kuehling I applied the patch to amd-staging-drm-next. Regards, Felix --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index c60e82697385..d15380c65c6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -410,6 +410,9 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, return -ENODEV; /* same everything but the other direction */ props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); + if (!props2) + return -ENOMEM; + props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL;
Re: [PATCH 1/1] Add available memory ioctl for libhsakmt
[Public] This is missing your signed-off-by. Additionally, for UAPI changes, we need a link the patches for the userspace component that will make use of it. Alex From: amd-gfx on behalf of Daniel Phillips Sent: Monday, January 10, 2022 3:54 PM To: amd-...@lists.freedesktop.org ; dri-devel@lists.freedesktop.org Cc: Phillips, Daniel Subject: [PATCH 1/1] Add available memory ioctl for libhsakmt From: Daniel Phillips Add an ioctl to inquire memory available for allocation by libhsakmt per node, allowing for space consumed by page translation tables. --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c| 14 ++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c| 17 + include/uapi/linux/kfd_ioctl.h | 14 -- 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index fcbc8a9c9e06..64c6c36685d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 86a1a6c109d9..b7490a659173 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -190,6 +190,20 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available_memory; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available_memory = + adev->gmc.real_vram_size - + adev->kfd.vram_used - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return available_memory; +} + static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4bfc0c8ab764..5c2f6d97ff1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -486,6 +486,20 @@ static int kfd_ioctl_get_queue_wave_state(struct file *filep, return r; } +static int kfd_ioctl_get_available_memory(struct file *filep, +struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_available_memory_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + args->available = amdgpu_amdkfd_get_available_memory(dev->adev); + return 0; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1959,6 +1973,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, + kfd_ioctl_get_available_memory, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..94a99add2432 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Add available_memory ioctl */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version;/* from KFD */ @@ -98,6 +99,12 @@ struct kfd_ioctl_get_queue_wave_state_args { __u32 pad; }; +struct kfd_ioctl_get_available_memory_args { + __u64 available;/* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -742,7 +749,10 @@ struct kfd_ioctl_set_xnack_mode_args { #defin
Re: [git pull] drm for 5.17-rc1 (pre-merge window pull)
On Thu, Jan 6, 2022 at 10:12 PM Dave Airlie wrote: > > nouveau_fence.c is the only conflict I've seen and I've taken the result from > our rerere cache in the merge above. It's non trivial, would be good to have > Christian confirm it as well. Thanks, that conflict really ended up being one that I would have done very differently without having had that pointer to your reference merge. And I would almost certainly have messed it up in the process. So what I did was to look at your merge resolution (or possibly Christian's? I don't know how you guys share your trees and the origin of that rerere), and tried to understand it, and basically recreate it. It's not exactly the same (different whitespace and variable lifetimes), but I think I got the gist of it. Thanks for the pointer, and hopefully I didn't mess it up _despite_ your merge showing me what I should aim for ;) Linus
[PATCH v10 5/5] drm/msm/dp: stop link training after link training 2 failed
Each DP link training contains link training 1 followed by link training 2. There is maximum of 5 retries of DP link training before declared link training failed. It is required to stop link training at end of link training 2 if it is failed so that next link training 1 can start freshly. This patch fixes link compliance test case 4.3.1.13 (Source Device Link Training EQ Fallback Test). Changes in v10: -- group into one series Fixes: 2e0adc765d88 ("drm/msm/dp: do not end dp link training until video is ready") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index f98df93..245e1b9 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1755,6 +1755,9 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) /* end with failure */ break; /* lane == 1 already */ } + + /* stop link training before start re training */ + dp_ctrl_clear_training_pattern(ctrl); } } -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project
[PATCH v10 4/5] drm/msm/dp: add support of tps4 (training pattern 4) for HBR3
From: Kuogee Hsieh Some DP sinkers prefer to use tps4 instead of tps3 during training #2. This patch will use tps4 to perform link training #2 if sinker's DPCD supports it. Changes in V2: -- replace dp_catalog_ctrl_set_pattern() with dp_catalog_ctrl_set_pattern_state_bit() Changes in V3: -- change state_ctrl_bits type to u32 and pattern type to u8 Changes in V4: -- align } else if { and } else { Changes in v10: -- group into one series Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd --- drivers/gpu/drm/msm/dp/dp_catalog.c | 12 ++-- drivers/gpu/drm/msm/dp/dp_catalog.h | 2 +- drivers/gpu/drm/msm/dp/dp_ctrl.c| 17 - 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index 6ae9b29..64f0b26 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -456,19 +456,19 @@ void dp_catalog_ctrl_config_msa(struct dp_catalog *dp_catalog, dp_write_p0(catalog, MMSS_DP_DSC_DTO, 0x0); } -int dp_catalog_ctrl_set_pattern(struct dp_catalog *dp_catalog, - u32 pattern) +int dp_catalog_ctrl_set_pattern_state_bit(struct dp_catalog *dp_catalog, + u32 state_bit) { int bit, ret; u32 data; struct dp_catalog_private *catalog = container_of(dp_catalog, struct dp_catalog_private, dp_catalog); - bit = BIT(pattern - 1); - DRM_DEBUG_DP("hw: bit=%d train=%d\n", bit, pattern); + bit = BIT(state_bit - 1); + DRM_DEBUG_DP("hw: bit=%d train=%d\n", bit, state_bit); dp_catalog_ctrl_state_ctrl(dp_catalog, bit); - bit = BIT(pattern - 1) << DP_MAINLINK_READY_LINK_TRAINING_SHIFT; + bit = BIT(state_bit - 1) << DP_MAINLINK_READY_LINK_TRAINING_SHIFT; /* Poll for mainlink ready status */ ret = readx_poll_timeout(readl, catalog->io->dp_controller.link.base + @@ -476,7 +476,7 @@ int dp_catalog_ctrl_set_pattern(struct dp_catalog *dp_catalog, data, data & bit, POLLING_SLEEP_US, POLLING_TIMEOUT_US); if (ret < 0) { - DRM_ERROR("set pattern for link_train=%d failed\n", pattern); + DRM_ERROR("set state_bit for link_train=%d failed\n", state_bit); return ret; } return 0; diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.h b/drivers/gpu/drm/msm/dp/dp_catalog.h index 6965afa..7dea101 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.h +++ b/drivers/gpu/drm/msm/dp/dp_catalog.h @@ -94,7 +94,7 @@ void dp_catalog_ctrl_mainlink_ctrl(struct dp_catalog *dp_catalog, bool enable); void dp_catalog_ctrl_config_misc(struct dp_catalog *dp_catalog, u32 cc, u32 tb); void dp_catalog_ctrl_config_msa(struct dp_catalog *dp_catalog, u32 rate, u32 stream_rate_khz, bool fixed_nvid); -int dp_catalog_ctrl_set_pattern(struct dp_catalog *dp_catalog, u32 pattern); +int dp_catalog_ctrl_set_pattern_state_bit(struct dp_catalog *dp_catalog, u32 pattern); void dp_catalog_ctrl_reset(struct dp_catalog *dp_catalog); bool dp_catalog_ctrl_mainlink_ready(struct dp_catalog *dp_catalog); void dp_catalog_ctrl_enable_irq(struct dp_catalog *dp_catalog, bool enable); diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 9c80b49..f98df93 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1083,7 +1083,7 @@ static int dp_ctrl_link_train_1(struct dp_ctrl_private *ctrl, *training_step = DP_TRAINING_1; - ret = dp_catalog_ctrl_set_pattern(ctrl->catalog, DP_TRAINING_PATTERN_1); + ret = dp_catalog_ctrl_set_pattern_state_bit(ctrl->catalog, 1); if (ret) return ret; dp_ctrl_train_pattern_set(ctrl, DP_TRAINING_PATTERN_1 | @@ -1181,7 +1181,8 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl, int *training_step) { int tries = 0, ret = 0; - char pattern; + u8 pattern; + u32 state_ctrl_bit; int const maximum_retries = 5; u8 link_status[DP_LINK_STATUS_SIZE]; @@ -1189,12 +1190,18 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl, *training_step = DP_TRAINING_2; - if (drm_dp_tps3_supported(ctrl->panel->dpcd)) + if (drm_dp_tps4_supported(ctrl->panel->dpcd)) { + pattern = DP_TRAINING_PATTERN_4; + state_ctrl_bit = 4; + } else if (drm_dp_tps3_supported(ctrl->panel->dpcd)) { pattern = DP_TRAINING_PATTERN_3; - else + state_ctrl_bit = 3; + } else { pattern = DP_TRAINING_PATTERN_2; + state_ctrl_bit = 2; + } - ret = dp_catalog_ctrl_set_pattern(ctrl->catalog, pattern); + ret = dp_catalog_ctrl_set_pattern_stat
[PATCH v10 3/5] drm/msm/dp: populate connector of struct dp_panel
DP CTS test case 4.2.2.6 has valid edid with bad checksum on purpose and expect DP source return correct checksum. During drm edid read, correct edid checksum is calculated and stored at connector::real_edid_checksum. The problem is struct dp_panel::connector never be assigned, instead the connector is stored in struct msm_dp::connector. When we run compliance testing test case 4.2.2.6 dp_panel_handle_sink_request() won't have a valid edid set in struct dp_panel::edid so we'll try to use the connectors real_edid_checksum and hit a NULL pointer dereference error because the connector pointer is never assigned. Changes in V2: -- populate panel connector at msm_dp_modeset_init() instead of at dp_panel_read_sink_caps() Changes in V3: -- remove unhelpful kernel crash trace commit text -- remove renaming dp_display parameter to dp Changes in V4: -- add more details to commit text Changes in v10: -- group into one series Fixes: 7948fe12d47 ("drm/msm/dp: return correct edid checksum after corrupted edid checksum read") Signee-off-by: Kuogee Hsieh Reviewed-by: Bjorn Andersson Reviewed-by: Stephen Boyd --- drivers/gpu/drm/msm/dp/dp_display.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index c7f0423..e76e375 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1489,6 +1489,7 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, struct drm_encoder *encoder) { struct msm_drm_private *priv; + struct dp_display_private *dp_priv; int ret; if (WARN_ON(!encoder) || WARN_ON(!dp_display) || WARN_ON(!dev)) @@ -1497,6 +1498,8 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, priv = dev->dev_private; dp_display->drm_dev = dev; + dp_priv = container_of(dp_display, struct dp_display_private, dp_display); + ret = dp_display_request_irq(dp_display); if (ret) { DRM_ERROR("request_irq failed, ret=%d\n", ret); @@ -1514,6 +1517,8 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, return ret; } + dp_priv->panel->connector = dp_display->connector; + priv->connectors[priv->num_connectors++] = dp_display->connector; return 0; } -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project
[PATCH v10 2/5] drm/msm/dp: do not initialize phy until plugin interrupt received
Current DP drivers have regulators, clocks, irq and phy are grouped together within a function and executed not in a symmetric manner. This increase difficulty of code maintenance and limited code scalability. This patch divides the driver life cycle of operation into four states, resume (including booting up), dongle plugin, dongle unplugged and suspend. Regulators, core clocks and irq are grouped together and enabled at resume (or booting up) so that the DP controller is armed and ready to receive HPD plugin interrupts. HPD plugin interrupt is generated when a dongle plugs into DUT (device under test). Once HPD plugin interrupt is received, DP controller will initialize phy so that dpcd read/write will function and following link training can be proceeded successfully. DP phy will be disabled after main link is teared down at end of unplugged HPD interrupt handle triggered by dongle unplugged out of DUT. Finally regulators, code clocks and irq are disabled at corresponding suspension. Changes in V2: -- removed unnecessary dp_ctrl NULL check -- removed unnecessary phy init_count and power_count DRM_DEBUG_DP logs -- remove flip parameter out of dp_ctrl_irq_enable() -- add fixes tag Changes in V3: -- call dp_display_host_phy_init() instead of dp_ctrl_phy_init() at dp_display_host_init() for eDP Changes in V4: -- rewording commit text to match this commit changes Changes in V5: -- rebase on top of msm-next branch Changes in V6: -- delete flip variable Changes in V7: -- dp_ctrl_irq_enable/disabe() merged into dp_ctrl_reset_irq_ctrl() Changes in V8: -- add more detail comment regrading dp phy at dp_display_host_init() Changes in V9: -- remove set phy_initialized to false when -ECONNRESET detected Changes in v10: -- group into one series Fixes: 8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets") Signed-off-by: Kuogee Hsieh --- drivers/gpu/drm/msm/dp/dp_ctrl.c| 80 + drivers/gpu/drm/msm/dp/dp_ctrl.h| 8 ++-- drivers/gpu/drm/msm/dp/dp_display.c | 89 - 3 files changed, 94 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index c724cb0..9c80b49 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1365,60 +1365,44 @@ static int dp_ctrl_enable_stream_clocks(struct dp_ctrl_private *ctrl) return ret; } -int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset) +void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable) +{ + struct dp_ctrl_private *ctrl; + + ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); + + dp_catalog_ctrl_reset(ctrl->catalog); + + if (enable) + dp_catalog_ctrl_enable_irq(ctrl->catalog, enable); +} + +void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; struct dp_io *dp_io; struct phy *phy; - if (!dp_ctrl) { - DRM_ERROR("Invalid input data\n"); - return -EINVAL; - } - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); dp_io = &ctrl->parser->io; phy = dp_io->phy; - ctrl->dp_ctrl.orientation = flip; - - if (reset) - dp_catalog_ctrl_reset(ctrl->catalog); - - DRM_DEBUG_DP("flip=%d\n", flip); dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_init(phy); - dp_catalog_ctrl_enable_irq(ctrl->catalog, true); - - return 0; } -/** - * dp_ctrl_host_deinit() - Uninitialize DP controller - * @dp_ctrl: Display Port Driver data - * - * Perform required steps to uninitialize DP controller - * and its resources. - */ -void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl) +void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; struct dp_io *dp_io; struct phy *phy; - if (!dp_ctrl) { - DRM_ERROR("Invalid input data\n"); - return; - } - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); dp_io = &ctrl->parser->io; phy = dp_io->phy; - dp_catalog_ctrl_enable_irq(ctrl->catalog, false); + dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_exit(phy); - - DRM_DEBUG_DP("Host deinitialized successfully\n"); } static bool dp_ctrl_use_fixed_nvid(struct dp_ctrl_private *ctrl) @@ -1488,7 +1472,10 @@ static int dp_ctrl_deinitialize_mainlink(struct dp_ctrl_private *ctrl) } phy_power_off(phy); + + /* aux channel down, reinit phy */ phy_exit(phy); + phy_init(phy); return 0; } @@ -1893,8 +1880,14 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl) return ret; } + DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + phy_power_off(phy);
[PATCH v10 1/5] drm/msm/dp: dp_link_parse_sink_count() return immediately if aux read failed
Add checking aux read/write status at both dp_link_parse_sink_count() and dp_link_parse_sink_status_filed() to avoid long timeout delay if dp aux read/write failed at timeout due to cable unplugged. Also make sure dp controller had been initialized before start dpcd read and write. Changes in V4: -- split this patch as stand alone patch Changes in v5: -- rebase on msm-next branch Changes in v6: -- add more details commit text Changes in v10: -- group into one series Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Tested-by: Stephen Boyd --- drivers/gpu/drm/msm/dp/dp_display.c | 12 +--- drivers/gpu/drm/msm/dp/dp_link.c| 19 ++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 3d61459..0766752 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -692,9 +692,15 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) return 0; } - ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); - if (ret == -ECONNRESET) { /* cable unplugged */ - dp->core_initialized = false; + /* +* dp core (ahb/aux clks) must be initialized before +* irq_hpd be handled +*/ + if (dp->core_initialized) { + ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); + if (ret == -ECONNRESET) { /* cable unplugged */ + dp->core_initialized = false; + } } DRM_DEBUG_DP("hpd_state=%d\n", state); diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index a5bdfc5..d4d31e5 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -737,18 +737,25 @@ static int dp_link_parse_sink_count(struct dp_link *dp_link) return 0; } -static void dp_link_parse_sink_status_field(struct dp_link_private *link) +static int dp_link_parse_sink_status_field(struct dp_link_private *link) { int len = 0; link->prev_sink_count = link->dp_link.sink_count; - dp_link_parse_sink_count(&link->dp_link); + len = dp_link_parse_sink_count(&link->dp_link); + if (len < 0) { + DRM_ERROR("DP parse sink count failed\n"); + return len; + } len = drm_dp_dpcd_read_link_status(link->aux, link->link_status); - if (len < DP_LINK_STATUS_SIZE) + if (len < DP_LINK_STATUS_SIZE) { DRM_ERROR("DP link status read failed\n"); - dp_link_parse_request(link); + return len; + } + + return dp_link_parse_request(link); } /** @@ -1023,7 +1030,9 @@ int dp_link_process_request(struct dp_link *dp_link) dp_link_reset_data(link); - dp_link_parse_sink_status_field(link); + ret = dp_link_parse_sink_status_field(link); + if (ret) + return ret; if (link->request.test_requested == DP_TEST_LINK_EDID_READ) { dp_link->sink_request |= DP_TEST_LINK_EDID_READ; -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project
[PATCH v10 0/5] group dp driver related patches into one series
Group below 5 dp driver related patches into one series. Kuogee Hsieh (5): drm/msm/dp: dp_link_parse_sink_count() return immediately if aux read failed drm/msm/dp: do not initialize phy until plugin interrupt received drm/msm/dp: populate connector of struct dp_panel drm/msm/dp: add support of tps4 (training pattern 4) for HBR3 drm/msm/dp: stop link training after link training 2 failed drivers/gpu/drm/msm/dp/dp_catalog.c | 12 ++--- drivers/gpu/drm/msm/dp/dp_catalog.h | 2 +- drivers/gpu/drm/msm/dp/dp_ctrl.c| 100 drivers/gpu/drm/msm/dp/dp_ctrl.h| 8 +-- drivers/gpu/drm/msm/dp/dp_display.c | 98 --- drivers/gpu/drm/msm/dp/dp_link.c| 19 +-- 6 files changed, 140 insertions(+), 99 deletions(-) -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project
[PATCH 1/1] Add available memory ioctl for libhsakmt
From: Daniel Phillips Add an ioctl to inquire memory available for allocation by libhsakmt per node, allowing for space consumed by page translation tables. --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c| 14 ++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c| 17 + include/uapi/linux/kfd_ioctl.h | 14 -- 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index fcbc8a9c9e06..64c6c36685d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 86a1a6c109d9..b7490a659173 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -190,6 +190,20 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available_memory; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available_memory = + adev->gmc.real_vram_size - + adev->kfd.vram_used - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return available_memory; +} + static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4bfc0c8ab764..5c2f6d97ff1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -486,6 +486,20 @@ static int kfd_ioctl_get_queue_wave_state(struct file *filep, return r; } +static int kfd_ioctl_get_available_memory(struct file *filep, +struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_available_memory_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + args->available = amdgpu_amdkfd_get_available_memory(dev->adev); + return 0; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1959,6 +1973,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, + kfd_ioctl_get_available_memory, 0), }; #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..94a99add2432 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Add available_memory ioctl */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version;/* from KFD */ @@ -98,6 +99,12 @@ struct kfd_ioctl_get_queue_wave_state_args { __u32 pad; }; +struct kfd_ioctl_get_available_memory_args { + __u64 available;/* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -742,7 +749,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_AVAILABLE_MEMORY\ + AMDKFD_IOR(0x22, struct kfd_ioctl_get_available_memory_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 #endif -- 2.34.1
Re: [PATCH 1/4] dt-bindings: backlight: qcom-wled: Add PM6150L compatible
On Wed, 29 Dec 2021 18:03:55 +0100, Luca Weiss wrote: > Document the compatible for the wled block found in PM6150L. > > Signed-off-by: Luca Weiss > --- > Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml | 1 + > 1 file changed, 1 insertion(+) > Acked-by: Rob Herring
Phyr Starter
TLDR: I want to introduce a new data type: struct phyr { phys_addr_t addr; size_t len; }; and use it to replace bio_vec as well as using it to replace the array of struct pages used by get_user_pages() and friends. --- There are two distinct problems I want to address: doing I/O to memory which does not have a struct page and efficiently doing I/O to large blobs of physically contiguous memory, regardless of whether it has a struct page. There are some other improvements which I regard as minor. There are many types of memory that one might want to do I/O to that do not have a struct page, some examples: - Memory on a graphics card (or other PCI card, but gfx seems to be the primary provider of DRAM on the PCI bus today) - DAX, or other pmem (there are some fake pages today, but this is mostly a workaround for the IO problem today) - Guest memory being accessed from the hypervisor (KVM needs to create structpages to make this happen. Xen doesn't ...) All of these kinds of memories can be addressed by the CPU and so also by a bus master. That is, there is a physical address that the CPU can use which will address this memory, and there is a way to convert that to a DMA address which can be programmed into another device. There's no intent here to support memory which can be accessed by a complex scheme like writing an address to a control register and then accessing the memory through a FIFO; this is for memory which can be accessed by DMA and CPU loads and stores. For get_user_pages() and friends, we currently fill an array of struct pages, each one representing PAGE_SIZE bytes. For an application that is using 1GB hugepages, writing 2^18 entries is a significant overhead. It also makes drivers hard to write as they have to recoalesce the struct pages, even though the VM can tell it whether those 2^18 pages are contiguous. On the minor side, struct phyr can represent any mappable chunk of memory. A bio_vec is limited to 2^32 bytes, while on 64-bit machines a phyr can represent larger than 4GB. A phyr is the same size as a bio_vec on 64 bit (16 bytes), and the same size for 32-bit with PAE (12 bytes). It is smaller for 32-bit machines without PAE (8 bytes instead of 12). Finally, it may be possible to stop using scatterlist to describe the input to the DMA-mapping operation. We may be able to get struct scatterlist down to just dma_address and dma_length, with chaining handled through an enclosing struct. I would like to see phyr replace bio_vec everywhere it's currently used. I don't have time to do that work now because I'm busy with folios. If someone else wants to take that on, I shall cheer from the sidelines. What I do intend to do is: - Add an interface to gup.c to pin/unpin N phyrs - Add a sg_map_phyrs() This will take an array of phyrs and allocate an sg for them - Whatever else I need to do to make one RDMA driver happy with this scheme At that point, I intend to stop and let others more familiar with this area of the kernel continue the conversion of drivers. P.S. If you've had the Prodigy song running through your head the whole time you've been reading this email ... I'm sorry / You're welcome. If people insist, we can rename this to phys_range or something boring, but I quite like the spelling of phyr with the pronunciation of "fire".
Re: [PATCH] drm/amd/display: invalid parameter check in dmub_hpd_callback
Applied. Thanks! Alex On Mon, Jan 10, 2022 at 11:34 AM Harry Wentland wrote: > > On 2022-01-09 13:42, José Expósito wrote: > > The function performs a check on the "adev" input parameter, however, it > > is used before the check. > > > > Initialize the "dev" variable after the sanity check to avoid a possible > > NULL pointer dereference. > > > > Fixes: e27c41d5b0681 ("drm/amd/display: Support for DMUB HPD interrupt > > handling") > > Addresses-Coverity-ID: 1493909 ("Null pointer dereference") > > Signed-off-by: José Expósito > > Reviewed-by: Harry Wentland > > Harry > > > --- > > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- > > 1 file changed, 2 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > index e727f1dd2a9a..7fbded7a6d9c 100644 > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > @@ -656,7 +656,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, > > struct dmub_notification *not > > struct drm_connector_list_iter iter; > > struct dc_link *link; > > uint8_t link_index = 0; > > - struct drm_device *dev = adev->dm.ddev; > > + struct drm_device *dev; > > > > if (adev == NULL) > > return; > > @@ -673,6 +673,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, > > struct dmub_notification *not > > > > link_index = notify->link_index; > > link = adev->dm.dc->links[link_index]; > > + dev = adev->dm.ddev; > > > > drm_connector_list_iter_begin(dev, &iter); > > drm_for_each_connector_iter(connector, &iter) { >
Re: [PATCH] drm/msm/dp: Add DisplayPort controller for SM8350
On Mon, 27 Dec 2021 20:59:34 -0800, Bjorn Andersson wrote: > The Qualcomm SM8350 platform comes with a single DisplayPort controller, > add support for this in the DisplayPort driver. > > Signed-off-by: Bjorn Andersson > --- > .../devicetree/bindings/display/msm/dp-controller.yaml| 1 + > drivers/gpu/drm/msm/dp/dp_display.c | 8 > 2 files changed, 9 insertions(+) > Acked-by: Rob Herring
Re: [PATCH 2/2] drm/panfrost: adjusted job affinity for dual core group GPUs
> Whether it's worth the effort depends on whether anyone really cares > about getting the full performance out of this particular GPU. > > At this stage I think the main UABI change would be to add the opposite > flag to kbase, (e.g. "PANFROST_JD_DOESNT_NEED_COHERENCY_ON_GPU"[1]) to > opt-in to allowing the job to run across all cores. > > The second change would be to allow compute jobs to be run on the second > core group, so another flag: PANFROST_RUN_ON_SECOND_CORE_GROUP. > > But clearly there's little point adding such flags until someone steps > up to do the Mesa work. I worry about the maintainence burden (both Mesa and kernel) of adding UABI only used by a piece of hardware none of us own, and only useful "sometimes" for that hardware. Doubly so for the second core group support; currently Mesa doesn't advertise any compute support on anything older than Mali T760 ... to the best of my knowledge, nobody has missed that support either... To be clear I am in favour of merging the patches needed for GLES2 to work on all Malis, possibly at a performance cost on these dual-core systems. That's a far cry from the level of support the DDK gave these chips back in the day ... of course, the DDK doesn't support them at all anymore, so Panfrost wins there by default! ;)
Re: [RFC PATCH] drm/panfrost: Handle IDVS_GROUP_SIZE feature
> > This feature adds an extra IDVS group size field to the JM_CONFIG > > register. In kbase, the value is configurable via the device tree; kbase > > uses 0xF as a default if no value is specified. Until we find a device > > demanding otherwise, let's always set the 0xF default on devices which > > support this feature mimicking kbase's behaviour. > > This is a performance thing - so I don't think it will break anything if > this is wrong, it just won't be optimal. Then interpret my remarks as hardcoding the default until we find a device where setting to something other than 0xF improves performance nontrivially. (Read: I am lazy and do not want to write dt-bindings for something nobody will ever use.) > > As JM_CONFIG is an undocumented register, it's not clear to me what > > happens if we fail to include this handling. Index-driven vertex shading > > already works on Bifrost boards with this feature without this handling. > > Perhaps this has performance implications? Patch untested for the > > moment, wanted to give Steven a chance to comment. > > As it's a performance thing you shouldn't see correctness issues with > not setting it. But 0xF seems to have been chosen as it gave the best > overall performance (although for individual test content this can > vary). AFAICT the performance impact isn't massive either. Good to know, will update the commit message accordingly. > Reviewed-by: Steven Price > > Since you've tagged this RFC I won't merge it now, but it looks correct > to me. Thanks for the review... I hope you like reviewing Panfrost patches because I have a Valhall bring-up series waiting o:) When I get a chance to uprev the kernel on my G52 board I'll see if I can benchmark the impact of this change, so far this is only compile-tested. Even if there's no impact the patch should likely go in to stay consistent with kbase, but hopefully there's a win from this. At that point I'll send a v2 with your reviewed-by (and hopefully no changes other than the commit message) and we'll land that.
Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process
Hi Christian I have reverted the change from the amd-staging-drm-next as per the discussion. Thank you. Regards Rajneesh On 1/4/2022 1:08 PM, Felix Kuehling wrote: [+Adrian] Am 2021-12-23 um 2:05 a.m. schrieb Christian König: Am 22.12.21 um 21:53 schrieb Daniel Vetter: On Mon, Dec 20, 2021 at 01:12:51PM -0500, Bhardwaj, Rajneesh wrote: [SNIP] Still sounds funky. I think minimally we should have an ack from CRIU developers that this is officially the right way to solve this problem. I really don't want to have random one-off hacks that don't work across the board, for a problem where we (drm subsystem) really shouldn't be the only one with this problem. Where "this problem" means that the mmap space is per file description, and not per underlying inode or real device or whatever. That part sounds like a CRIU problem, and I expect CRIU folks want a consistent solution across the board for this. Hence please grab an ack from them. Unfortunately it's a KFD design problem. AMD used a single device node, then mmaped different objects from the same offset to different processes and expected it to work the rest of the fs subsystem without churn. This may be true for mmaps in the KFD device, but not for mmaps in the DRM render nodes. So yes, this is indeed because the mmap space is per file descriptor for the use case here. No. This is a different problem. The problem has to do with the way that DRM manages mmap permissions. In order to be able to mmap an offset in the render node, there needs to be a BO that was created in the same render node. If you fork a process, it inherits the VMA. But KFD doesn't know anything about the inherited BOs from the parent process. Therefore those BOs don't get checkpointed and restored in the child process. When the CRIU checkpoint is restored, our CRIU plugin never creates a BO corresponding to the VMA in the child process' render node FD. We've also lost the relationship between the parent and child-process' render node FDs. After "fork" the render node FD points to the same struct file in parent and child. After restoring the CRIU checkpoint, they are separate struct files, created by separate "open" system calls. Therefore the mmap call that restores the VMA fails in the child process. At least for KFD, there is no point inheriting BOs from a child process, because the GPU has no way of accessing the BOs in the child process. The child process has no GPU address space, no user mode queues, no way to do anything with the GPU before it completely reinitializes its KFD context. We can workaround this issue in user mode with madvise(..., MADV_DONTFORK). In fact we've already done this for some BOs to avoid a memory leak in the parent process while a child process exists. But it's slightly racy because there is a short time window where VMA exists without the VM_DONTCOPY flag. A fork during that time window could still create a child process with an inherited VMA. Therefore a safer solution is to set the vm_flags in the VMA in the driver when the VMA is first created. Regards, Felix And thanks for pointing this out, this indeed makes the whole change extremely questionable. Regards, Christian. Cheers, Daniel
Re: [v2 1/3] dt-bindings: msm/dsi: Add 10nm dsi phy tuning properties
On Mon, Jan 10, 2022 at 05:06:03PM +0300, Dmitry Baryshkov wrote: > On Mon, 10 Jan 2022 at 15:56, Rajeev Nandan wrote: > > > > In most cases, the default values of DSI PHY tuning registers should be > > sufficient as they are fully optimized. However, in some cases where > > extreme board parasitics cause the eye shape to degrade, the override > > bits can be used to improve the signal quality. > > > > The general guidelines for DSI PHY tuning include: > > - High and moderate data rates may benefit from the drive strength and > > drive level tuning. > > - Drive strength tuning will affect the output impedance and may be used > > for matching optimization. > > - Drive level tuning will affect the output levels without affecting the > > impedance. > > > > The clock and data lanes have a calibration circuitry feature. The drive > > strength tuning can be done by adjusting rescode offset for hstop/hsbot, > > and the drive level tuning can be done by adjusting the LDO output level > > for the HSTX drive. > > > > Signed-off-by: Rajeev Nandan > > --- > > > > Changes in v2: > > - More details in the commit text (Stephen Boyd) > > - Use human understandable values (Stephen Boyd, Dmitry Baryshkov) > > - Do not take values that are going to be unused (Dmitry Baryshkov) > > > > .../bindings/display/msm/dsi-phy-10nm.yaml | 33 > > ++ > > 1 file changed, 33 insertions(+) > > > > diff --git > > a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml > > b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml > > index 4399715..d0eb8f6 100644 > > --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml > > +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml > > @@ -35,6 +35,35 @@ properties: > >Connected to DSI0_MIPI_DSI_PLL_VDDA0P9 pin for sc7180 target and > >connected to VDDA_MIPI_DSI_0_PLL_0P9 pin for sdm845 target > > Generic note: > I think these properties should be prefixed with "qcom," prefix. > > > > > + phy-rescode-offset-top: > > +$ref: /schemas/types.yaml#/definitions/uint8-array > > +minItems: 5 > > +maxItems: 5 > > +description: > > + Integer array of offset for pull-up legs rescode for all five lanes. > > + To offset the drive strength from the calibrated value in an > > increasing > > + or decreasing manner, use 6 bit two’s complement values. > > dtc should support negative values, google hints that <(-2)> should work. Yes, but the schema checks don't check negative values correctly yet. So you can use 'int8-array', but just don't use negative values in the examples. I'm working on changes that will fix this issue. What does 6-bit mean? 0x3f is negative? Just sign extend the values and specify the valid range instead: minimum: -32 maximum: 31 Rob
[PATCH v2] drm: bridge: nwl-dsi: Drop panel_bridge from nwl_dsi
panel_bridge pointer never used anywhere except the one it looked up at nwl_dsi_bridge_attach. Drop it from the nwl_dsi structure. Reviewed-by: Guido Günther Signed-off-by: Jagan Teki --- Changes for v2: - collect Guido r-b Note: This is patch is part of switching of devm_drm_of_get_bridge serious however the child node support of devm_drm_of_get_bridge is still under-review. So send it separatly as it not related to that API switch. https://patchwork.kernel.org/project/dri-devel/patch/20211210174819.2250178-1-ja...@amarulasolutions.com/ drivers/gpu/drm/bridge/nwl-dsi.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c index fc3ad9fab867..9282e61dfbf0 100644 --- a/drivers/gpu/drm/bridge/nwl-dsi.c +++ b/drivers/gpu/drm/bridge/nwl-dsi.c @@ -65,7 +65,6 @@ struct nwl_dsi_transfer { struct nwl_dsi { struct drm_bridge bridge; struct mipi_dsi_host dsi_host; - struct drm_bridge *panel_bridge; struct device *dev; struct phy *phy; union phy_configure_opts phy_cfg; @@ -924,13 +923,11 @@ static int nwl_dsi_bridge_attach(struct drm_bridge *bridge, if (IS_ERR(panel_bridge)) return PTR_ERR(panel_bridge); } - dsi->panel_bridge = panel_bridge; - if (!dsi->panel_bridge) + if (!panel_bridge) return -EPROBE_DEFER; - return drm_bridge_attach(bridge->encoder, dsi->panel_bridge, bridge, -flags); + return drm_bridge_attach(bridge->encoder, panel_bridge, bridge, flags); } static void nwl_dsi_bridge_detach(struct drm_bridge *bridge) -- 2.25.1
[PATCH v7 6/6] drm/i915: Use struct vma_resource instead of struct vma_snapshot
There is always a struct vma_resource guaranteed to be alive when we access a corresponding struct vma_snapshot. So ditch the latter and instead of allocating vma_snapshots, reference the already existning vma_resource. This requires a couple of extra members in struct vma_resource but that's a small price to pay for the simplification. v2: - Fix a missing include and declaration (kernel test robot ) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/Makefile | 1 - .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +-- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 +- drivers/gpu/drm/i915/i915_gpu_error.c | 87 ++-- drivers/gpu/drm/i915/i915_request.c | 12 +- drivers/gpu/drm/i915/i915_request.h | 6 +- drivers/gpu/drm/i915/i915_vma.c | 16 +-- drivers/gpu/drm/i915/i915_vma_resource.c | 4 + drivers/gpu/drm/i915/i915_vma_resource.h | 28 +++- drivers/gpu/drm/i915/i915_vma_snapshot.c | 125 -- drivers/gpu/drm/i915/i915_vma_snapshot.h | 101 -- 11 files changed, 90 insertions(+), 314 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_vma_snapshot.c delete mode 100644 drivers/gpu/drm/i915/i915_vma_snapshot.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 98433ad74194..aa86ac33effc 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -175,7 +175,6 @@ i915-y += \ i915_ttm_buddy_manager.o \ i915_vma.o \ i915_vma_resource.o \ - i915_vma_snapshot.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3e359de0e460..cf283b5f6ffe 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -31,7 +31,6 @@ #include "i915_gem_ioctls.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "i915_vma_snapshot.h" struct eb_vma { struct i915_vma *vma; @@ -1954,7 +1953,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; unsigned int i = count, j; - struct i915_vma_snapshot *vsnap; while (i--) { struct eb_vma *ev = &eb->vma[i]; @@ -1964,11 +1962,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb) if (!(flags & EXEC_OBJECT_CAPTURE)) continue; - vsnap = i915_vma_snapshot_alloc(GFP_KERNEL); - if (!vsnap) - continue; - - i915_vma_snapshot_init(vsnap, vma, "user"); for_each_batch_create_order(eb, j) { struct i915_capture_list *capture; @@ -1977,10 +1970,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb) continue; capture->next = eb->capture_lists[j]; - capture->vma_snapshot = i915_vma_snapshot_get(vsnap); + capture->vma_res = i915_vma_resource_get(vma->resource); eb->capture_lists[j] = capture; } - i915_vma_snapshot_put(vsnap); } } @@ -3283,9 +3275,8 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, * _onstack interface. */ if (eb->batches[i]->vma) - i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot, - eb->batches[i]->vma, - "batch"); + eb->requests[i]->batch_res = + i915_vma_resource_get(eb->batches[i]->vma->resource); if (eb->batch_pool) { GEM_BUG_ON(intel_context_is_parallel(eb->context)); intel_gt_buffer_pool_mark_active(eb->batch_pool, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7b793a295475..baa346a49976 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1708,18 +1708,15 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, static void print_request_ring(struct drm_printer *m, struct i915_request *rq) { - struct i915_vma_snapshot *vsnap = &rq->batch_snapshot; + struct i915_vma_resource *vma_res = rq->batch_res; void *ring; int size; - if (!i915_vma_snapshot_present(vsnap)) - vsnap = NULL; - drm_printf(m, "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", rq->head, rq->postfix, rq->tail, - vsnap ? up
[PATCH v7 5/6] drm/i915: Asynchronous migration selftest
Add a selftest to exercise asynchronous migration and -unbining. Extend the gem_migrate selftest to perform the migrations while depending on a spinner and a bound vma set up on the migrated buffer object. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 12 ++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 3 + .../drm/i915/gem/selftests/i915_gem_migrate.c | 192 -- 3 files changed, 192 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d87b508b59b1..1a9e1f940a7d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -756,6 +756,18 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) return dma_fence_get(i915_gem_to_ttm(obj)->moving); } +void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence) +{ + struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving; + + if (*moving == fence) + return; + + dma_fence_put(*moving); + *moving = dma_fence_get(fence); +} + /** * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any * @obj: The object whose moving fence to wait for. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bc448f895ae8..02c37fe4a535 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -523,6 +523,9 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) struct dma_fence * i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); +void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence); + int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index ecb691c81d1e..d534141b2cf7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -4,8 +4,13 @@ */ #include "gt/intel_migrate.h" +#include "gt/intel_gpu_commands.h" #include "gem/i915_gem_ttm_move.h" +#include "i915_deps.h" + +#include "selftests/igt_spinner.h" + static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, bool fill) { @@ -101,7 +106,8 @@ static int igt_same_create_migrate(void *arg) } static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, - struct drm_i915_gem_object *obj) + struct drm_i915_gem_object *obj, + struct i915_vma *vma) { int err; @@ -109,6 +115,24 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, if (err) return err; + if (vma) { + err = i915_vma_pin_ww(vma, ww, obj->base.size, 0, + 0UL | PIN_OFFSET_FIXED | + PIN_USER); + if (err) { + if (err != -EINTR && err != ERESTARTSYS && + err != -EDEADLK) + pr_err("Failed to pin vma.\n"); + return err; + } + + i915_vma_unpin(vma); + } + + /* +* Migration will implicitly unbind (asynchronously) any bound +* vmas. +*/ if (i915_gem_object_is_lmem(obj)) { err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); if (err) { @@ -149,11 +173,15 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, return err; } -static int igt_lmem_pages_migrate(void *arg) +static int __igt_lmem_pages_migrate(struct intel_gt *gt, + struct i915_address_space *vm, + struct i915_deps *deps, + struct igt_spinner *spin, + struct dma_fence *spin_fence) { - struct intel_gt *gt = arg; struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; + struct i915_vma *vma = NULL; struct i915_gem_ww_ctx ww; struct i915_request *rq; int err; @@ -165,6 +193,14 @@ static int igt_lmem_pages_migrate(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); + if (vm) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + } + /* Initial GPU fill, sync, CPU i
[PATCH v7 4/6] drm/i915: Use vma resources for async unbinding
Implement async (non-blocking) unbinding by not syncing the vma before calling unbind on the vma_resource. Add the resulting unbind fence to the object's dma_resv from where it is picked up by the ttm migration code. Ideally these unbind fences should be coalesced with the migration blit fence to avoid stalling the migration blit waiting for unbind, as they can certainly go on in parallel, but since we don't yet have a reasonable data structure to use to coalesce fences and attach the resulting fence to a timeline, we defer that for now. Note that with async unbinding, even while the unbind waits for the preceding bind to complete before unbinding, the vma itself might have been destroyed in the process, clearing the vma pages. Therefore we can only allow async unbinding if we have a refcounted sg-list and keep a refcount on that for the vma resource pages to stay intact until binding occurs. If this condition is not met, a request for an async unbind is diverted to a sync unbind. v2: - Use a separate kmem_cache for vma resources for now to isolate their memory allocation and aid debugging. - Move the check for vm closed to the actual unbinding thread. Regardless of whether the vm is closed, we need the unbind fence to properly wait for capture. - Clear vma_res::vm on unbind and update its documentation. v4: - Take cache coloring into account when searching for vma resources pending unbind. (Matthew Auld) v5: - Fix timeout and error check in i915_vma_resource_bind_dep_await(). - Avoid taking a reference on the object for async binding if async unbind capable. - Fix braces around a single-line if statement. v6: - Fix up the cache coloring adjustment. (Kernel test robot ) - Don't allow async unbinding if the vma_res pages are not the same as the object pages. (Matthew Auld) v7: - s/unsigned long/u64/ in a number of places (Matthew Auld) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 11 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 4 + drivers/gpu/drm/i915/gt/intel_gtt.h | 3 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +- drivers/gpu/drm/i915/i915_module.c | 3 + drivers/gpu/drm/i915/i915_vma.c | 205 +-- drivers/gpu/drm/i915/i915_vma.h | 3 +- drivers/gpu/drm/i915/i915_vma_resource.c | 354 +-- drivers/gpu/drm/i915/i915_vma_resource.h | 48 +++ 11 files changed, 579 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 8653855d808b..1de306c03aaf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo) struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); int ret; - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + /* +* Note: The async unbinding here will actually transform the +* blocking wait for unbind into a wait before finally submitting +* evict / migration blit and thus stall the migration timeline +* which may not be good for overall throughput. We should make +* sure we await the unbind fences *after* the migration blit +* instead of *before* as we currently do. +*/ + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE | +I915_GEM_OBJECT_UNBIND_ASYNC); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index e49b6250c4b7..a1b2761bc16e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -142,7 +142,7 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm) continue; if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { - __i915_vma_evict(vma); + __i915_vma_evict(vma, false); drm_mm_remove_node(&vma->node); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index a94be0306464..46be4197b93f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -161,6 +161,9 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, release_work); + /* Synchronize async unbinds. */ + i915_vma_resource_bind_dep_sync_all(vm); + vm->cleanup(vm); i915_address_space_fini(vm); @@ -189,6 +192,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->
[PATCH v7 3/6] drm/i915: Don't pin the object pages during pending vma binds
A pin-count is already held by vma->pages so taking an additional pin during async binds is not necessary. When we introduce async unbinding we have other means of keeping the object pages alive. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_vma.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 42fff9ddf096..29c770a764aa 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -306,10 +306,8 @@ static void __vma_release(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); - if (vw->pinned) { - __i915_gem_object_unpin_pages(vw->pinned); + if (vw->pinned) i915_gem_object_put(vw->pinned); - } i915_vm_free_pt_stash(vw->vm, &vw->stash); i915_vm_put(vw->vm); @@ -478,7 +476,6 @@ int i915_vma_bind(struct i915_vma *vma, work->base.dma.error = 0; /* enable the queue_work() */ - __i915_gem_object_pin_pages(vma->obj); work->pinned = i915_gem_object_get(vma->obj); } else { if (vma->obj) { -- 2.31.1
[PATCH v7 2/6] drm/i915: Use the vma resource as argument for gtt binding / unbinding
When introducing asynchronous unbinding, the vma itself may no longer be alive when the actual binding or unbinding takes place. Update the gtt i915_vma_ops accordingly to take a struct i915_vma_resource instead of a struct i915_vma for the bind_vma() and unbind_vma() ops. Similarly change the insert_entries() op for struct i915_address_space. Replace a couple of i915_vma_snapshot members with their newly introduced i915_vma_resource counterparts, since they have the same lifetime. Also make sure to avoid changing the struct i915_vma_flags (in particular the bind flags) async. That should now only be done sync under the vm mutex. v2: - Update the vma_res::bound_flags when binding to the aliased ggtt v6: - Remove I915_VMA_ALLOC_BIT (Matthew Auld) - Change some members of struct i915_vma_resource from unsigned long to u64 (Matthew Auld) v7: - Fix vma resource size parameters to be u64 rather than unsigned long (Matthew Auld) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/display/intel_dpt.c | 27 ++--- .../gpu/drm/i915/gem/i915_gem_object_types.h | 27 + .../gpu/drm/i915/gem/selftests/huge_pages.c | 37 +++ drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 19 ++-- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 37 +++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 70 ++--- drivers/gpu/drm/i915/gt/intel_gtt.h | 16 +-- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 22 +++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 13 ++- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_gpu_error.c | 6 +- drivers/gpu/drm/i915/i915_vma.c | 24 - drivers/gpu/drm/i915/i915_vma.h | 11 +-- drivers/gpu/drm/i915/i915_vma_resource.c | 9 +- drivers/gpu/drm/i915/i915_vma_resource.h | 99 ++- drivers/gpu/drm/i915/i915_vma_snapshot.c | 4 - drivers/gpu/drm/i915/i915_vma_snapshot.h | 8 -- drivers/gpu/drm/i915/i915_vma_types.h | 14 ++- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 64 drivers/gpu/drm/i915/selftests/mock_gtt.c | 12 +-- 22 files changed, 314 insertions(+), 214 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index 8f674745e7e0..63a83d5f85a1 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -48,7 +48,7 @@ static void dpt_insert_page(struct i915_address_space *vm, } static void dpt_insert_entries(struct i915_address_space *vm, - struct i915_vma *vma, + struct i915_vma_resource *vma_res, enum i915_cache_level level, u32 flags) { @@ -64,8 +64,8 @@ static void dpt_insert_entries(struct i915_address_space *vm, * not to allow the user to override access to a read only page. */ - i = vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_daddr(addr, sgt_iter, vma->pages) + i = vma_res->start / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, sgt_iter, vma_res->bi.pages) gen8_set_pte(&base[i++], pte_encode | addr); } @@ -76,35 +76,38 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, -struct i915_vma *vma, +struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags) { - struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; + if (vma_res->bound_flags) + return; + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; - if (vma->vm->has_read_only && i915_gem_object_is_readonly(obj)) + if (vm->has_read_only && vma_res->bi.readonly) pte_flags |= PTE_READ_ONLY; - if (i915_gem_object_is_lmem(obj)) + if (vma_res->bi.lmem) pte_flags |= PTE_LM; - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + vm->insert_entries(vm, vma_res, cache_level, pte_flags); - vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; /* * Without aliasing PPGTT there's no difference between * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally * upgrade to both bound if we bind either to avoid double-binding. */ - atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); + vma_res->bound_flags = I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; } -static void dpt_unbind_vma(struct i915_address_space *
[PATCH v7 1/6] drm/i915: Initial introduction of vma resources
Introduce vma resources, sort of similar to TTM resources, needed for asynchronous bind management. Initially we will use them to hold completion of unbinding when we capture data from a vma, but they will be used extensively in upcoming patches for asynchronous vma unbinding. v6: - Some documentation updates Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 2 +- drivers/gpu/drm/i915/i915_vma.c | 55 +++- drivers/gpu/drm/i915/i915_vma.h | 19 ++- drivers/gpu/drm/i915/i915_vma_resource.c | 124 ++ drivers/gpu/drm/i915/i915_vma_resource.h | 69 ++ drivers/gpu/drm/i915/i915_vma_snapshot.c | 15 +-- drivers/gpu/drm/i915/i915_vma_snapshot.h | 7 +- drivers/gpu/drm/i915/i915_vma_types.h | 5 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 99 -- 10 files changed, 333 insertions(+), 63 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_vma_resource.c create mode 100644 drivers/gpu/drm/i915/i915_vma_resource.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 1b62b9f65196..98433ad74194 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -174,6 +174,7 @@ i915-y += \ i915_trace_points.o \ i915_ttm_buddy_manager.o \ i915_vma.o \ + i915_vma_resource.o \ i915_vma_snapshot.o \ intel_wopcm.o diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9e221ce42707..3e359de0e460 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1424,7 +1424,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, mutex_lock(&vma->vm->mutex); err = i915_vma_bind(target->vma, target->vma->obj->cache_level, - PIN_GLOBAL, NULL); + PIN_GLOBAL, NULL, NULL); mutex_unlock(&vma->vm->mutex); reloc_cache_remap(&eb->reloc_cache, ev->vma->obj); if (err) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 81a611b7d36f..05dcbc259b82 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -38,6 +38,7 @@ #include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_vma.h" +#include "i915_vma_resource.h" static struct kmem_cache *slab_vmas; @@ -381,6 +382,8 @@ static int i915_vma_verify_bind_complete(struct i915_vma *vma) * @cache_level: mapping cache level * @flags: flags like global or local mapping * @work: preallocated worker for allocating and binding the PTE + * @vma_res: pointer to a preallocated vma resource. The resource is either + * consumed or freed. * * DMA addresses are taken from the scatter-gather table of this object (or of * this VMA in case of non-default GGTT views) and PTE entries set up. @@ -389,7 +392,8 @@ static int i915_vma_verify_bind_complete(struct i915_vma *vma) int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags, - struct i915_vma_work *work) + struct i915_vma_work *work, + struct i915_vma_resource *vma_res) { u32 bind_flags; u32 vma_flags; @@ -400,11 +404,15 @@ int i915_vma_bind(struct i915_vma *vma, if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start, vma->node.size, - vma->vm->total))) + vma->vm->total))) { + kfree(vma_res); return -ENODEV; + } - if (GEM_DEBUG_WARN_ON(!flags)) + if (GEM_DEBUG_WARN_ON(!flags)) { + kfree(vma_res); return -EINVAL; + } bind_flags = flags; bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; @@ -413,11 +421,21 @@ int i915_vma_bind(struct i915_vma *vma, vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; bind_flags &= ~vma_flags; - if (bind_flags == 0) + if (bind_flags == 0) { + kfree(vma_res); return 0; + } GEM_BUG_ON(!atomic_read(&vma->pages_count)); + if (vma->resource || !vma_res) { + /* Rebinding with an additional I915_VMA_*_BIND */ + GEM_WARN_ON(!vma_flags); + kfree(vma_res); + } else { + i915_vma_resource_init(vma_res); + vma->resource = vma_res; + } trace_i915_vma_bind(vma, bind_flags); if (work && bind_flags & vma->v
[PATCH v7 0/6] drm/i915: Asynchronous vma unbinding
This patch series introduces infrastructure for asynchronous vma unbinding. The single enabled use-case is initially at buffer object migration where we otherwise sync when unbinding vmas before migration. This in theory allows us to pipeline any number of migrations, but in practice the number is restricted by a sync wait when filling the migration context ring. We might want to look at that moving forward if needed. The other main use-case is to be able to pipeline vma evictions, for example with softpinning where a new vma wants to reuse the vm range of an already active vma. We can't support this just yet because we need dma_resv locking around vma eviction for that, which is under implementation. Patch 1 introduces vma resource first for error capture purposes Patch 2 changes the vm backend interface to take vma resources rather than vmas Patch 3 removes and unneeded page pinning Patch 4 introduces the async unbinding itself, and finally Patch 5 introduces a selftest Patch 6 realizes we have duplicated functionality and removes the vma snapshots v2: -- Some kernel test robot reports addressed. -- kmem cache for vma resources, See patch 4 -- Various fixes all over the place. See separate commit messages. v3: -- Re-add a missing i915_vma_resource_put() -- Remove a stray debug printout v4: -- Patch series split in two. This is the second part. -- Take cache coloring into account when searching for vma_resources pending unbind. (Matthew Auld) v5: -- Add a selftest. -- Remove page pinning while sync binding. -- A couple of fixes in i915_vma_resource_bind_dep_await() v6: -- Some documentation updates -- Remove I915_VMA_ALLOC_BIT (Matthew Auld) -- Change some members of struct i915_vma_resource from unsigned long to u64 (Matthew Auld) -- Fix up the cache coloring adjustment. (Kernel test robot ) -- Don't allow async unbinding if the vma_res pages are not the same as the object pages. (Matthew Auld) v7: -- More s/unsigned long/u64/ changes (Matthew Auld) Thomas Hellström (6): drm/i915: Initial introduction of vma resources drm/i915: Use the vma resource as argument for gtt binding / unbinding drm/i915: Don't pin the object pages during pending vma binds drm/i915: Use vma resources for async unbinding drm/i915: Asynchronous migration selftest drm/i915: Use struct vma_resource instead of struct vma_snapshot drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/display/intel_dpt.c | 27 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 17 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 12 + drivers/gpu/drm/i915/gem/i915_gem_object.h| 3 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 27 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 11 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 37 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 192 +++- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 19 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 37 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 72 +-- drivers/gpu/drm/i915/gt/intel_gtt.c | 4 + drivers/gpu/drm/i915/gt/intel_gtt.h | 19 +- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 22 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 13 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +- drivers/gpu/drm/i915/i915_gpu_error.c | 87 ++-- drivers/gpu/drm/i915/i915_module.c| 3 + drivers/gpu/drm/i915/i915_request.c | 12 +- drivers/gpu/drm/i915/i915_request.h | 6 +- drivers/gpu/drm/i915/i915_vma.c | 241 +- drivers/gpu/drm/i915/i915_vma.h | 33 +- drivers/gpu/drm/i915/i915_vma_resource.c | 417 ++ drivers/gpu/drm/i915/i915_vma_resource.h | 234 ++ drivers/gpu/drm/i915/i915_vma_snapshot.c | 134 -- drivers/gpu/drm/i915/i915_vma_snapshot.h | 112 - drivers/gpu/drm/i915/i915_vma_types.h | 19 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 159 --- drivers/gpu/drm/i915/selftests/mock_gtt.c | 12 +- 34 files changed, 1421 insertions(+), 589 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_vma_resource.c create mode 100644 drivers/gpu/drm/i915/i915_vma_resource.h delete mode 100644 drivers/gpu/drm/i915/i915_vma_snapshot.c delete mode 100644 drivers/gpu/drm/i915/i915_vma_snapshot.h -- 2.31.1
Re: [RFC PATCH] drm/panfrost: Handle IDVS_GROUP_SIZE feature
On 09/01/2022 17:12, Alyssa Rosenzweig wrote: > The IDVS group size feature was missing. It is used on some Bifrost and > Valhall GPUs, and is the last kernel-relevant Bifrost feature we're > missing. > > This feature adds an extra IDVS group size field to the JM_CONFIG > register. In kbase, the value is configurable via the device tree; kbase > uses 0xF as a default if no value is specified. Until we find a device > demanding otherwise, let's always set the 0xF default on devices which > support this feature mimicking kbase's behaviour. This is a performance thing - so I don't think it will break anything if this is wrong, it just won't be optimal. > As JM_CONFIG is an undocumented register, it's not clear to me what > happens if we fail to include this handling. Index-driven vertex shading > already works on Bifrost boards with this feature without this handling. > Perhaps this has performance implications? Patch untested for the > moment, wanted to give Steven a chance to comment. As it's a performance thing you shouldn't see correctness issues with not setting it. But 0xF seems to have been chosen as it gave the best overall performance (although for individual test content this can vary). AFAICT the performance impact isn't massive either. > Applies on top of my feature clean up series which should go in first. > (That's pure cleaunp, this is a behaviour change RFC needing > discussion.) > > Signed-off-by: Alyssa Rosenzweig Reviewed-by: Steven Price Since you've tagged this RFC I won't merge it now, but it looks correct to me. Thanks, Steve > --- > drivers/gpu/drm/panfrost/panfrost_features.h | 3 +++ > drivers/gpu/drm/panfrost/panfrost_gpu.c | 3 +++ > drivers/gpu/drm/panfrost/panfrost_regs.h | 1 + > 3 files changed, 7 insertions(+) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h > b/drivers/gpu/drm/panfrost/panfrost_features.h > index 34f2bae1ec8c..36fadcf9634e 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_features.h > +++ b/drivers/gpu/drm/panfrost/panfrost_features.h > @@ -20,6 +20,7 @@ enum panfrost_hw_feature { > HW_FEATURE_AARCH64_MMU, > HW_FEATURE_TLS_HASHING, > HW_FEATURE_THREAD_GROUP_SPLIT, > + HW_FEATURE_IDVS_GROUP_SIZE, > HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, > }; > > @@ -74,6 +75,7 @@ enum panfrost_hw_feature { > BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ > BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ > BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ > + BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \ > BIT_ULL(HW_FEATURE_COHERENCY_REG)) > > #define hw_features_g76 (\ > @@ -87,6 +89,7 @@ enum panfrost_hw_feature { > BIT_ULL(HW_FEATURE_COHERENCY_REG) | \ > BIT_ULL(HW_FEATURE_AARCH64_MMU) | \ > BIT_ULL(HW_FEATURE_TLS_HASHING) | \ > + BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \ > BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) > > #define hw_features_g31 (\ > diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c > b/drivers/gpu/drm/panfrost/panfrost_gpu.c > index bbe628b306ee..50c8922694d7 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c > +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c > @@ -145,6 +145,9 @@ static void panfrost_gpu_init_quirks(struct > panfrost_device *pfdev) > quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << > JM_FORCE_COHERENCY_FEATURES_SHIFT; > > + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE)) > + quirks |= JM_DEFAULT_IDVS_GROUP_SIZE << > JM_IDVS_GROUP_SIZE_SHIFT; > + > if (quirks) > gpu_write(pfdev, GPU_JM_CONFIG, quirks); > > diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h > b/drivers/gpu/drm/panfrost/panfrost_regs.h > index 6c5a11ef1ee8..16e776cc82ea 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_regs.h > +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h > @@ -208,6 +208,7 @@ > #define JM_MAX_JOB_THROTTLE_LIMIT0x3F > #define JM_FORCE_COHERENCY_FEATURES_SHIFT 2 > #define JM_IDVS_GROUP_SIZE_SHIFT 16 > +#define JM_DEFAULT_IDVS_GROUP_SIZE 0xF > #define JM_MAX_IDVS_GROUP_SIZE 0x3F > > >
Re: [v2 1/3] dt-bindings: msm/dsi: Add 10nm dsi phy tuning properties
On Mon, 10 Jan 2022 18:25:35 +0530, Rajeev Nandan wrote: > In most cases, the default values of DSI PHY tuning registers should be > sufficient as they are fully optimized. However, in some cases where > extreme board parasitics cause the eye shape to degrade, the override > bits can be used to improve the signal quality. > > The general guidelines for DSI PHY tuning include: > - High and moderate data rates may benefit from the drive strength and > drive level tuning. > - Drive strength tuning will affect the output impedance and may be used > for matching optimization. > - Drive level tuning will affect the output levels without affecting the > impedance. > > The clock and data lanes have a calibration circuitry feature. The drive > strength tuning can be done by adjusting rescode offset for hstop/hsbot, > and the drive level tuning can be done by adjusting the LDO output level > for the HSTX drive. > > Signed-off-by: Rajeev Nandan > --- > > Changes in v2: > - More details in the commit text (Stephen Boyd) > - Use human understandable values (Stephen Boyd, Dmitry Baryshkov) > - Do not take values that are going to be unused (Dmitry Baryshkov) > > .../bindings/display/msm/dsi-phy-10nm.yaml | 33 > ++ > 1 file changed, 33 insertions(+) > My bot found errors running 'make DT_CHECKER_FLAGS=-m dt_binding_check' on your patch (DT_CHECKER_FLAGS is new in v5.13): yamllint warnings/errors: ./Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml:63:54: [error] syntax error: mapping values are not allowed here (syntax) dtschema/dtc warnings/errors: ./Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml: mapping values are not allowed in this context in "", line 63, column 54 make[1]: *** Deleting file 'Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.example.dts' Traceback (most recent call last): File "/usr/local/bin/dt-extract-example", line 46, in binding = yaml.load(open(args.yamlfile, encoding='utf-8').read()) File "/usr/local/lib/python3.8/dist-packages/ruamel/yaml/main.py", line 434, in load return constructor.get_single_data() File "/usr/local/lib/python3.8/dist-packages/ruamel/yaml/constructor.py", line 119, in get_single_data node = self.composer.get_single_node() File "_ruamel_yaml.pyx", line 706, in _ruamel_yaml.CParser.get_single_node File "_ruamel_yaml.pyx", line 724, in _ruamel_yaml.CParser._compose_document File "_ruamel_yaml.pyx", line 775, in _ruamel_yaml.CParser._compose_node File "_ruamel_yaml.pyx", line 889, in _ruamel_yaml.CParser._compose_mapping_node File "_ruamel_yaml.pyx", line 775, in _ruamel_yaml.CParser._compose_node File "_ruamel_yaml.pyx", line 889, in _ruamel_yaml.CParser._compose_mapping_node File "_ruamel_yaml.pyx", line 775, in _ruamel_yaml.CParser._compose_node File "_ruamel_yaml.pyx", line 891, in _ruamel_yaml.CParser._compose_mapping_node File "_ruamel_yaml.pyx", line 904, in _ruamel_yaml.CParser._parse_next_event ruamel.yaml.scanner.ScannerError: mapping values are not allowed in this context in "", line 63, column 54 make[1]: *** [Documentation/devicetree/bindings/Makefile:25: Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.example.dts] Error 1 make[1]: *** Waiting for unfinished jobs /builds/robherring/linux-dt-review/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml: ignoring, error parsing file make: *** [Makefile:1413: dt_binding_check] Error 2 doc reference errors (make refcheckdocs): See https://patchwork.ozlabs.org/patch/1577891 This check can fail if there are any dependencies. The base for a patch series is generally the most recent rc1. If you already ran 'make dt_binding_check' and didn't see the above error(s), then make sure 'yamllint' is installed and dt-schema is up to date: pip3 install dtschema --upgrade Please check and re-submit.
Re: [git pull] drm fixes for 5.16-rc3
On Sun, Jan 9, 2022 at 11:38 PM Geert Uytterhoeven wrote: > > The commit that merged this branch made a seemingly innocent change to > the top Makefile: "Seemingly" innocent? Or something darker and more sinister, related to the unrelenting slaughter of flightless fowl? You be the judge. Linus
Re: [PATCH] drm/panfrost: Update create_bo flags comment
On 09/01/2022 16:37, Alyssa Rosenzweig wrote: > Update a comment stating create_bo took no flags, since it now takes a > bit mask of optional flags NOEXEC and HEAP. > > Signed-off-by: Alyssa Rosenzweig Reviewed-by: Steven Price I'll push this to drm-misc-next. Thanks, Steve > --- > include/uapi/drm/panfrost_drm.h | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h > index 061e700dd06c..9e40277d8185 100644 > --- a/include/uapi/drm/panfrost_drm.h > +++ b/include/uapi/drm/panfrost_drm.h > @@ -84,14 +84,14 @@ struct drm_panfrost_wait_bo { > __s64 timeout_ns; /* absolute */ > }; > > +/* Valid flags to pass to drm_panfrost_create_bo */ > #define PANFROST_BO_NOEXEC 1 > #define PANFROST_BO_HEAP 2 > > /** > * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. > * > - * There are currently no values for the flags argument, but it may be > - * used in a future extension. > + * The flags argument is a bit mask of PANFROST_BO_* flags. > */ > struct drm_panfrost_create_bo { > __u32 size; >
Re: [PATCH] drm/amd/display: invalid parameter check in dmub_hpd_callback
On 2022-01-09 13:42, José Expósito wrote: > The function performs a check on the "adev" input parameter, however, it > is used before the check. > > Initialize the "dev" variable after the sanity check to avoid a possible > NULL pointer dereference. > > Fixes: e27c41d5b0681 ("drm/amd/display: Support for DMUB HPD interrupt > handling") > Addresses-Coverity-ID: 1493909 ("Null pointer dereference") > Signed-off-by: José Expósito Reviewed-by: Harry Wentland Harry > --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index e727f1dd2a9a..7fbded7a6d9c 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -656,7 +656,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct > dmub_notification *not > struct drm_connector_list_iter iter; > struct dc_link *link; > uint8_t link_index = 0; > - struct drm_device *dev = adev->dm.ddev; > + struct drm_device *dev; > > if (adev == NULL) > return; > @@ -673,6 +673,7 @@ void dmub_hpd_callback(struct amdgpu_device *adev, struct > dmub_notification *not > > link_index = notify->link_index; > link = adev->dm.dc->links[link_index]; > + dev = adev->dm.ddev; > > drm_connector_list_iter_begin(dev, &iter); > drm_for_each_connector_iter(connector, &iter) {
Re: [Intel-gfx] [PATCH 2/2] drm/mst: use DP_GET_SINK_COUNT() for sink count in ESI
On Tue, Jan 04, 2022 at 08:48:57PM +0200, Jani Nikula wrote: > Take bit 7 into account when reading sink count from > DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0. > > Signed-off-by: Jani Nikula > --- > drivers/gpu/drm/drm_dp_mst_topology.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c > b/drivers/gpu/drm/drm_dp_mst_topology.c > index f3d79eda94bb..ab4372e9fe43 100644 > --- a/drivers/gpu/drm/drm_dp_mst_topology.c > +++ b/drivers/gpu/drm/drm_dp_mst_topology.c > @@ -4196,7 +4196,7 @@ int drm_dp_mst_hpd_irq(struct drm_dp_mst_topology_mgr > *mgr, u8 *esi, bool *handl > int ret = 0; > int sc; > *handled = false; > - sc = esi[0] & 0x3f; > + sc = DP_GET_SINK_COUNT(esi[0]); I wouldn't mind a s/sc/sink_count/ as well. Reviewed-by: Ville Syrjälä > > if (sc != mgr->sink_count) { > mgr->sink_count = sc; > -- > 2.30.2 -- Ville Syrjälä Intel
Re: [Intel-gfx] [PATCH 1/2] drm/dp: note that DPCD 0x2002-0x2003 match 0x200-0x201
On Tue, Jan 04, 2022 at 08:48:56PM +0200, Jani Nikula wrote: > DP_SINK_COUNT_ESI and DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0 have the same > contents as DP_SINK_COUNT and DP_DEVICE_SERVICE_IRQ_VECTOR, > respectively. IIRC there was an oversight in the earlier spec revisions that showed bit 7 as reserved for one of the locations. But looks like that got fixed. Reviewed-by: Ville Syrjälä > > Signed-off-by: Jani Nikula > --- > include/drm/drm_dp_helper.h | 7 ++- > 1 file changed, 2 insertions(+), 5 deletions(-) > > diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h > index 30359e434c3f..98d020835b49 100644 > --- a/include/drm/drm_dp_helper.h > +++ b/include/drm/drm_dp_helper.h > @@ -1038,11 +1038,8 @@ struct drm_panel; > #define DP_SIDEBAND_MSG_UP_REQ_BASE 0x1600 /* 1.2 MST */ > > /* DPRX Event Status Indicator */ > -#define DP_SINK_COUNT_ESI0x2002 /* 1.2 */ > -/* 0-5 sink count */ > -# define DP_SINK_COUNT_CP_READY (1 << 6) > - > -#define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0 0x2003 /* 1.2 */ > +#define DP_SINK_COUNT_ESI 0x2002 /* same as 0x200 */ > +#define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0 0x2003 /* same as 0x201 */ > > #define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1 0x2004 /* 1.2 */ > # define DP_RX_GTC_MSTR_REQ_STATUS_CHANGE(1 << 0) > -- > 2.30.2 -- Ville Syrjälä Intel
Re: [PATCH 3/3] drm/atomic: Make private objs proper objects
On Fri, Dec 31, 2021 at 03:23:31PM +0200, Jani Nikula wrote: > On Wed, 12 Jul 2017, ville.syrj...@linux.intel.com wrote: > > From: Ville Syrjälä > > > > Make the atomic private object stuff less special by introducing proper > > base classes for the object and its state. Drivers can embed these in > > their own appropriate objects, after which these things will work > > exactly like the plane/crtc/connector states during atomic operations. > > > > v2: Reorder to not depend on drm_dynarray (Daniel) > > > > Cc: Dhinakaran Pandiyan > > Cc: Daniel Vetter > > Reviewed-by: Daniel Vetter #v1 > > Signed-off-by: Ville Syrjälä > > Stumbled upon an old commit > > commit a4370c777406c2810e37fafd166ccddecdb2a60c > Author: Ville Syrjälä > Date: Wed Jul 12 18:51:02 2017 +0300 > > drm/atomic: Make private objs proper objects > > which is this patch. > > > @@ -3050,8 +3043,7 @@ struct drm_dp_mst_topology_state > > *drm_atomic_get_mst_topology_state(struct drm_a > > struct drm_device *dev = mgr->dev; > > > > WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); > > - return drm_atomic_get_private_obj_state(state, mgr, > > - &mst_state_funcs); > > + return to_dp_mst_topology_state(drm_atomic_get_private_obj_state(state, > > &mgr->base)); > > } > > EXPORT_SYMBOL(drm_atomic_get_mst_topology_state); > > I don't think this combines well with... > > > diff --git a/include/drm/drm_dp_mst_helper.h > > b/include/drm/drm_dp_mst_helper.h > > index 177ab6f86855..d55abb75f29a 100644 > > --- a/include/drm/drm_dp_mst_helper.h > > +++ b/include/drm/drm_dp_mst_helper.h > > @@ -404,12 +404,17 @@ struct drm_dp_payload { > > int vcpi; > > }; > > > > +#define to_dp_mst_topology_state(x) container_of(x, struct > > drm_dp_mst_topology_state, base) > > ...this in case of error pointers that > drm_atomic_get_private_obj_state() may return. offsetof(base)==0 so should work in practice. -- Ville Syrjälä Intel
Re: [PATCH 1/2] drm: exynos: dsi: Convert to bridge driver
On Mon, 10 Jan 2022 at 16:35, Jagan Teki wrote: > > Hi Robert, > > On Mon, Jan 10, 2022 at 9:02 PM Robert Foss wrote: > > > > Hey Jagan, > > > > This is a mistake on my end, I must have been looking at reviewing > > this series and then accidentally included it with another batch of > > patches. Thank you for catching this. > > Thanks for the response. > > > > > I would suggest reverting these two patches[1][2]. Is that ok with you? > > May be I will revert 1/2. but 2/2 is valid. Please let me know, if you > have any concerns on reverting 1/2. Please go ahead!
Re: [git pull] drm final fixes for 5.16
On Fri, Jan 7, 2022 at 6:42 PM Linus Torvalds wrote: > > On Thu, Jan 6, 2022 at 7:23 PM Dave Airlie wrote: > > > > There is only the amdgpu runtime pm regression fix in here. > > Thanks, from a quick test it works for me - the backlight actually > does eventually go away. > > It does so only on the second time the monitors say "no signal, going > to power save", but that has been true before too. > > So I think there's still some confusion in this area, but it might be > elsewhere - who knows what Wayland and friends do. At least it doesn't > look like a regression to me any more. Well it's not a true fix, just a "go back to exact old behaviour, but limited to relevant gpus for amdgpu only" so that i915 doesn't regress. I think there's some more debug to do here and Alex/Harry&team can look at leisure now :-) Cheers, Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Re: [PATCH 1/2] drm: exynos: dsi: Convert to bridge driver
Hi Robert, On Mon, Jan 10, 2022 at 9:02 PM Robert Foss wrote: > > Hey Jagan, > > This is a mistake on my end, I must have been looking at reviewing > this series and then accidentally included it with another batch of > patches. Thank you for catching this. Thanks for the response. > > I would suggest reverting these two patches[1][2]. Is that ok with you? May be I will revert 1/2. but 2/2 is valid. Please let me know, if you have any concerns on reverting 1/2. Thanks, Jagan.
Re: [PATCH 1/2] drm: exynos: dsi: Convert to bridge driver
Hey Jagan, This is a mistake on my end, I must have been looking at reviewing this series and then accidentally included it with another batch of patches. Thank you for catching this. I would suggest reverting these two patches[1][2]. Is that ok with you? [1] https://cgit.freedesktop.org/drm/drm-misc/commit/?id=92e794fab87af0793403d5e4a547f0be94a0e656 [2] https://cgit.freedesktop.org/drm/drm-misc/commit/?id=aee039e66035b66f0c587cc1b0dd32fb04c9a892 On Mon, 10 Jan 2022 at 12:17, Jagan Teki wrote: > > Hi Robert, > > On Mon, Nov 22, 2021 at 9:34 PM Marek Szyprowski > wrote: > > > > On 22.11.2021 16:07, Marek Szyprowski wrote: > > > On 22.11.2021 15:55, Jagan Teki wrote: > > >> On Mon, Nov 22, 2021 at 7:59 PM Jagan Teki > > >> wrote: > > >>> On Mon, Nov 22, 2021 at 7:51 PM Jagan Teki > > >>> wrote: > > On Mon, Nov 22, 2021 at 7:45 PM Marek Szyprowski > > wrote: > > > On 22.11.2021 08:06, Jagan Teki wrote: > > >> Some display panels would come up with a non-DSI output, those > > >> can have an option to connect the DSI host by means of interface > > >> bridge converter. > > >> > > >> This DSI to non-DSI interface bridge converter would requires > > >> DSI Host to handle drm bridge functionalities in order to DSI > > >> Host to Interface bridge. > > >> > > >> This patch convert the existing to a drm bridge driver with a > > >> built-in encoder support for compatibility with existing > > >> component drivers. > > >> > > >> Signed-off-by: Jagan Teki > > >> --- > > >> Note: > > >> Hi Marek Szyprowski, > > >> > > >> Please test this on Panel and Bridge hardware. > > > I don't have good news, t crashes: > > > > > > [drm] Exynos DRM: using 1380.decon device for DMA mapping > > > operations > > > exynos-drm exynos-drm: bound 1380.decon (ops decon_component_ops) > > > exynos-drm exynos-drm: bound 1388.decon (ops decon_component_ops) > > > exynos-drm exynos-drm: bound 1393.mic (ops > > > exynos_mic_component_ops) > > > [drm:drm_bridge_attach] *ERROR* failed to attach bridge > > > /soc@0/dsi@1390 to encoder TMDS-67: -22 > > > exynos-drm exynos-drm: failed to bind 1390.dsi (ops > > > exynos_dsi_component_ops): -22 > > > Internal error: synchronous external abort: 96000210 [#1] PREEMPT SMP > > > Modules linked in: > > > CPU: 2 PID: 74 Comm: kworker/u16:1 Not tainted 5.16.0-rc1+ #4141 > > > Hardware name: Samsung TM2E board (DT) > > > Workqueue: events_unbound deferred_probe_work_func > > > pstate: 8005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) > > > pc : decon_atomic_disable+0x58/0xd4 > > > lr : decon_atomic_disable+0x28/0xd4 > > > sp : 80001390b940 > > > x29: 80001390b940 x28: 80001259a000 x27: 27f39e80 > > > input: stmfts as > > > /devices/platform/soc@0/14ed.hsi2c/i2c-3/3-0049/input/input0 > > > x26: ffea x25: 25a40280 x24: 0001 > > > x23: 800011b55f98 x22: 315dc000 x21: 2695d100 > > > x20: 27e7a080 x19: 315e6000 x18: > > > x17: 645f736f6e797865 x16: 2073706f28206973 x15: 00028ee0 > > > x14: 0028 x13: 0001 x12: 0040 > > > x11: 23c18920 x10: 23c18922 x9 : 8000126352f0 > > > x8 : 23c00270 x7 : x6 : 23c00268 > > > x5 : 27e7a3a0 x4 : 0001 x3 : 27e7a080 > > > x2 : 0024 x1 : 800013bc8024 x0 : 246117c0 > > > Call trace: > > >decon_atomic_disable+0x58/0xd4 > > >decon_unbind+0x1c/0x3c > > >component_unbind+0x38/0x60 > > >component_bind_all+0x16c/0x25c > > >exynos_drm_bind+0x104/0x1bc > > >try_to_bring_up_master+0x164/0x1d0 > > >__component_add+0xa8/0x174 > > >component_add+0x14/0x20 > > >hdmi_probe+0x438/0x710 > > >platform_probe+0x68/0xe0 > > >really_probe.part.0+0x9c/0x31c > > >__driver_probe_device+0x98/0x144 > > >driver_probe_device+0xc8/0x160 > > >__device_attach_driver+0xb8/0x120 > > >bus_for_each_drv+0x78/0xd0 > > >__device_attach+0xd8/0x180 > > >device_initial_probe+0x14/0x20 > > >bus_probe_device+0x9c/0xa4 > > >deferred_probe_work_func+0x88/0xc4 > > >process_one_work+0x288/0x6f0 > > >worker_thread+0x74/0x470 > > >kthread+0x188/0x194 > > >ret_from_fork+0x10/0x20 > > > Code: 11002042 f9481c61 531e7442 8b020021 (88dffc21) > > > ---[ end trace d73aff585b108954 ]--- > > > Kernel panic - not syncing: synchronous external abort: Fatal > > > exception > > > SMP: stopping secondary CPUs > > > Kernel Offset: disabled > > > CPU features: 0x2,300071c2,0846 > > > Memory Limit: none > > > ---[ end Ke
Re: [PATCH v6 4/6] drm/i915: Use vma resources for async unbinding
On 1/10/22 14:21, Matthew Auld wrote: On 07/01/2022 14:23, Thomas Hellström wrote: Implement async (non-blocking) unbinding by not syncing the vma before calling unbind on the vma_resource. Add the resulting unbind fence to the object's dma_resv from where it is picked up by the ttm migration code. Ideally these unbind fences should be coalesced with the migration blit fence to avoid stalling the migration blit waiting for unbind, as they can certainly go on in parallel, but since we don't yet have a reasonable data structure to use to coalesce fences and attach the resulting fence to a timeline, we defer that for now. Note that with async unbinding, even while the unbind waits for the preceding bind to complete before unbinding, the vma itself might have been destroyed in the process, clearing the vma pages. Therefore we can only allow async unbinding if we have a refcounted sg-list and keep a refcount on that for the vma resource pages to stay intact until binding occurs. If this condition is not met, a request for an async unbind is diverted to a sync unbind. v2: - Use a separate kmem_cache for vma resources for now to isolate their memory allocation and aid debugging. - Move the check for vm closed to the actual unbinding thread. Regardless of whether the vm is closed, we need the unbind fence to properly wait for capture. - Clear vma_res::vm on unbind and update its documentation. v4: - Take cache coloring into account when searching for vma resources pending unbind. (Matthew Auld) v5: - Fix timeout and error check in i915_vma_resource_bind_dep_await(). - Avoid taking a reference on the object for async binding if async unbind capable. - Fix braces around a single-line if statement. v6: - Fix up the cache coloring adjustment. (Kernel test robot ) - Don't allow async unbinding if the vma_res pages are not the same as the object pages. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 11 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 4 + drivers/gpu/drm/i915/gt/intel_gtt.h | 3 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +- drivers/gpu/drm/i915/i915_module.c | 3 + drivers/gpu/drm/i915/i915_vma.c | 205 +-- drivers/gpu/drm/i915/i915_vma.h | 3 +- drivers/gpu/drm/i915/i915_vma_resource.c | 354 +-- drivers/gpu/drm/i915/i915_vma_resource.h | 48 +++ 11 files changed, 579 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 8653855d808b..1de306c03aaf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo) struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); int ret; - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + /* + * Note: The async unbinding here will actually transform the + * blocking wait for unbind into a wait before finally submitting + * evict / migration blit and thus stall the migration timeline + * which may not be good for overall throughput. We should make + * sure we await the unbind fences *after* the migration blit + * instead of *before* as we currently do. + */ + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_ASYNC); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index e49b6250c4b7..a1b2761bc16e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -142,7 +142,7 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm) continue; if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { - __i915_vma_evict(vma); + __i915_vma_evict(vma, false); drm_mm_remove_node(&vma->node); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index a94be0306464..46be4197b93f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -161,6 +161,9 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, release_work); + /* Synchronize async unbinds. */ + i915_vma_resource_bind_dep_sync_all(vm); + vm->cleanup(vm); i915_address_space_fini(vm); @@ -189,6 +192,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); + vm->pending_unbind = RB_ROOT_CACHED; INIT_WORK(&vm->rele
Re: [PATCH v6 5/6] drm/i915: Asynchronous migration selftest
On 10/01/2022 14:36, Thomas Hellström wrote: On 1/10/22 14:59, Matthew Auld wrote: On 07/01/2022 14:23, Thomas Hellström wrote: Add a selftest to exercise asynchronous migration and -unbining. Extend the gem_migrate selftest to perform the migrations while depending on a spinner and a bound vma set up on the migrated buffer object. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 12 ++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 + .../drm/i915/gem/selftests/i915_gem_migrate.c | 192 -- 3 files changed, 192 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d87b508b59b1..1a9e1f940a7d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -756,6 +756,18 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) return dma_fence_get(i915_gem_to_ttm(obj)->moving); } +void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence) +{ + struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving; + + if (*moving == fence) + return; + + dma_fence_put(*moving); + *moving = dma_fence_get(fence); +} + /** * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any * @obj: The object whose moving fence to wait for. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index f66d46882ea7..1d178236 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -524,6 +524,9 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) struct dma_fence * i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); +void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence); + int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index ecb691c81d1e..d534141b2cf7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -4,8 +4,13 @@ */ #include "gt/intel_migrate.h" +#include "gt/intel_gpu_commands.h" #include "gem/i915_gem_ttm_move.h" +#include "i915_deps.h" + +#include "selftests/igt_spinner.h" + static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, bool fill) { @@ -101,7 +106,8 @@ static int igt_same_create_migrate(void *arg) } static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, - struct drm_i915_gem_object *obj) + struct drm_i915_gem_object *obj, + struct i915_vma *vma) { int err; @@ -109,6 +115,24 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, if (err) return err; + if (vma) { + err = i915_vma_pin_ww(vma, ww, obj->base.size, 0, + 0UL | PIN_OFFSET_FIXED | + PIN_USER); + if (err) { + if (err != -EINTR && err != ERESTARTSYS && + err != -EDEADLK) + pr_err("Failed to pin vma.\n"); + return err; + } + + i915_vma_unpin(vma); + } + + /* + * Migration will implicitly unbind (asynchronously) any bound + * vmas. + */ if (i915_gem_object_is_lmem(obj)) { err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); if (err) { @@ -149,11 +173,15 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, return err; } -static int igt_lmem_pages_migrate(void *arg) +static int __igt_lmem_pages_migrate(struct intel_gt *gt, + struct i915_address_space *vm, + struct i915_deps *deps, + struct igt_spinner *spin, + struct dma_fence *spin_fence) { - struct intel_gt *gt = arg; struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; + struct i915_vma *vma = NULL; struct i915_gem_ww_ctx ww; struct i915_request *rq; int err; @@ -165,6 +193,14 @@ static int igt_lmem_pages_migrate(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); + if (vm) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + } + /* Initial GPU fill, sync, CPU initialization. */ for_i915_gem_ww(&ww, err, true) { err = i915_gem_object_lock(obj, &ww); @@ -175,25 +211,23 @@ static int igt_lmem_pages_migrate(void *arg) if (err) continue; - err = intel_migrate_clear(>->migrate, &ww,
Re: [PATCH v6 5/6] drm/i915: Asynchronous migration selftest
On 1/10/22 14:59, Matthew Auld wrote: On 07/01/2022 14:23, Thomas Hellström wrote: Add a selftest to exercise asynchronous migration and -unbining. Extend the gem_migrate selftest to perform the migrations while depending on a spinner and a bound vma set up on the migrated buffer object. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 12 ++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 + .../drm/i915/gem/selftests/i915_gem_migrate.c | 192 -- 3 files changed, 192 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d87b508b59b1..1a9e1f940a7d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -756,6 +756,18 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) return dma_fence_get(i915_gem_to_ttm(obj)->moving); } +void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence) +{ + struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving; + + if (*moving == fence) + return; + + dma_fence_put(*moving); + *moving = dma_fence_get(fence); +} + /** * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any * @obj: The object whose moving fence to wait for. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index f66d46882ea7..1d178236 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -524,6 +524,9 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) struct dma_fence * i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); +void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence); + int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index ecb691c81d1e..d534141b2cf7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -4,8 +4,13 @@ */ #include "gt/intel_migrate.h" +#include "gt/intel_gpu_commands.h" #include "gem/i915_gem_ttm_move.h" +#include "i915_deps.h" + +#include "selftests/igt_spinner.h" + static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, bool fill) { @@ -101,7 +106,8 @@ static int igt_same_create_migrate(void *arg) } static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, - struct drm_i915_gem_object *obj) + struct drm_i915_gem_object *obj, + struct i915_vma *vma) { int err; @@ -109,6 +115,24 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, if (err) return err; + if (vma) { + err = i915_vma_pin_ww(vma, ww, obj->base.size, 0, + 0UL | PIN_OFFSET_FIXED | + PIN_USER); + if (err) { + if (err != -EINTR && err != ERESTARTSYS && + err != -EDEADLK) + pr_err("Failed to pin vma.\n"); + return err; + } + + i915_vma_unpin(vma); + } + + /* + * Migration will implicitly unbind (asynchronously) any bound + * vmas. + */ if (i915_gem_object_is_lmem(obj)) { err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); if (err) { @@ -149,11 +173,15 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, return err; } -static int igt_lmem_pages_migrate(void *arg) +static int __igt_lmem_pages_migrate(struct intel_gt *gt, + struct i915_address_space *vm, + struct i915_deps *deps, + struct igt_spinner *spin, + struct dma_fence *spin_fence) { - struct intel_gt *gt = arg; struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; + struct i915_vma *vma = NULL; struct i915_gem_ww_ctx ww; struct i915_request *rq; int err; @@ -165,6 +193,14 @@ static int igt_lmem_pages_migrate(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); + if (vm) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + } + /* Initial GPU fill, sync, CPU initialization. */ for_i915_gem_ww(&ww, err, true) { err = i915_gem_object_lock(obj, &ww); @@ -175,25 +211,23 @@ static int igt_lmem_pages_migrate(void *arg) if (err) continue; - err = intel_migrate_clear(>->migrate, &ww, NULL, + err = intel_migrate_clear(>->m
Re: [Intel-gfx] [PATCH v6 6/6] drm/i915: Use struct vma_resource instead of struct vma_snapshot
On Fri, 7 Jan 2022 at 14:24, Thomas Hellström wrote: > > There is always a struct vma_resource guaranteed to be alive when we > access a corresponding struct vma_snapshot. > > So ditch the latter and instead of allocating vma_snapshots, reference > the already existning vma_resource. > > This requires a couple of extra members in struct vma_resource but that's > a small price to pay for the simplification. > > v2: > - Fix a missing include and declaration (kernel test robot ) > > Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld
Re: [PATCH 2/2] drm/panfrost: adjusted job affinity for dual core group GPUs
On 24/12/2021 08:56, Alexey Sheplyakov wrote: > Hi, > > On 23.12.2021 18:11, Alyssa Rosenzweig wrote: >>> The kernel driver itself can't guess which jobs need a such a strict >>> affinity, so setting proper requirements is the responsibility of >>> the userspace (Mesa). However the userspace is not smart enough [yet]. >>> Therefore this patch applies the above affinity rule to all jobs on >>> dual core group GPUs. >> >> What does Mesa need to do for this to work "properly"? > > I don't know. > The blob restricts affinity of jobs with JD_REQ_COHERENT_GROUP requirement. > In theory jobs without such a requirement can run on any core, but in > practice all jobs in slots 0, 1 are assigned to core group 0 (with workloads > I've run - i.e. weston, firefox, glmark2, perhaps it's also SoC dependent). > So I've forced all jobs in slots 0, 1 to core group 0. Surprisingly this > (and memory attributes adjustment) appeared to be enough to get panfrost > working with T628 (on some SoCs). Without these patches GPU locks up in > a few seconds. Let me fill in a few details here. The T628 is pretty unique in that it has two core groups, i.e. more than one L2 cache. Previous designs (i.e. T604) didn't have enough cores to require a second core group, and later designs with sufficient cores have coherent L2 caches so act as a single core group (although the hardware has multiple L2s it only reports a single one as they act as if a single cache). Note that technically the T608, T658 and T678 also exist and have this problem - but I don't believe any products were produced with these (so unless you're in ARM with a very unusual FPGA they can be ignored). The blob/kbase handle this situation with a new flag JD_REQ_COHERENT_GROUP which specifies that the affinity of a job must land on a single (coherent) core group, and JD_REQ_SPECIFIC_COHERENT_GROUP which allows user space to target a specific group. In theory fragment shading can be performed over all cores (because a fragment shader job doesn't need coherency between threads), so doesn't need the JD_REQ_COHERENT_GROUP flag, vertex shading however requires to be run on the same core group as the tiler (which always lives in core group 0). Of course there are various 'tricks' that can happen even within a fragment shader which might require coherency. So the expected sequence is that vertex+tiling is restricted to core group 0, and fragment shading can be run over all cores. Although there can be issues with performance doing this naïvely because the Job Manager doesn't necessarily share the GPUs cores fairly between vertex and fragment jobs. Also note that a cache flush is needed between running the vertex+tiling and the fragment job to ensure that the extra core group is coherent - this can be expensive, so it may not be worth using the second core group in some situations. I'm not sure what logic the Blob uses for that. Finally there's GPU compute (i.e. OpenCL): here coherency is usually required, but there's often more information about the amount of coherency needed. In this case it is possible to run different job chains on each core group. This is the only situation there slot 2 is used, and is the reason for the JS_REQ_SPECIFIC_COHERENT_GROUP flag. It's also a nightmare for scheduling as the hardware gets upset if the affinity masks for slot 1 and slot 2 overlap. >> What are the limitations of the approach implemented here? > > Suboptimal performance. > > 1) There might be job chains which don't care about affinity >(I haven't seen any of these yet on systems I've got). You are effectively throwing away half the cores if everything is pinned to core group 0, so I'm pretty sure the Blob manages to run (some) fragment jobs without the COHERENT_GROUP flag. But equally this is a reasonable first step for the kernel driver - we can make the GPU look like ever other GPU by pretending the second core group doesn't exist. > 2) There might be dual core group GPUs which don't need such a strict > affinity. >(I haven't seen any dual core group T[78]xx GPUs yet. This doesn't mean > such > GPUs don't exist). They should all be a single core group (fully coherent L2s). >> If we need to extend it down the line with a UABI change, what would that >> look like? > > I have no idea. And I'm not sure if it's worth the effort (since most jobs > end up on core group 0 anyway). Whether it's worth the effort depends on whether anyone really cares about getting the full performance out of this particular GPU. At this stage I think the main UABI change would be to add the opposite flag to kbase, (e.g. "PANFROST_JD_DOESNT_NEED_COHERENCY_ON_GPU"[1]) to opt-in to allowing the job to run across all cores. The second change would be to allow compute jobs to be run on the second core group, so another flag: PANFROST_RUN_ON_SECOND_CORE_GROUP. But clearly there's little point adding such flags until someone steps up to do the Mesa work. Steve [1] Bike-shedding the
Re: [v2 2/3] drm/msm/dsi: Add dsi phy tuning configuration support
On Mon, 10 Jan 2022 at 15:56, Rajeev Nandan wrote: > > Add support for MSM DSI PHY tuning configuration. Current design is > to support drive strength and drive level/amplitude tuning for > 10nm PHY version, but this can be extended to other PHY versions. > > Signed-off-by: Rajeev Nandan > --- > > Changes in v2: > - New. > - Split into generic code and 10nm-specific part (Dmitry Baryshkov) > > drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 3 +++ > drivers/gpu/drm/msm/dsi/phy/dsi_phy.h | 16 > 2 files changed, 19 insertions(+) > > diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c > b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c > index 8c65ef6..ee3739d 100644 > --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c > +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c > @@ -739,6 +739,9 @@ static int dsi_phy_driver_probe(struct platform_device > *pdev) > } > } > > + if (phy->cfg->ops.tuning_cfg_init) > + phy->cfg->ops.tuning_cfg_init(phy); Please rename to parse_dt_properties() or something like that. > + > ret = dsi_phy_regulator_init(phy); > if (ret) > goto fail; > diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h > b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h > index b91303a..b559a2b 100644 > --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h > +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h > @@ -25,6 +25,7 @@ struct msm_dsi_phy_ops { > void (*save_pll_state)(struct msm_dsi_phy *phy); > int (*restore_pll_state)(struct msm_dsi_phy *phy); > bool (*set_continuous_clock)(struct msm_dsi_phy *phy, bool enable); > + void (*tuning_cfg_init)(struct msm_dsi_phy *phy); > }; > > struct msm_dsi_phy_cfg { > @@ -81,6 +82,20 @@ struct msm_dsi_dphy_timing { > #define DSI_PIXEL_PLL_CLK 1 > #define NUM_PROVIDED_CLKS 2 > > +#define DSI_LANE_MAX 5 > + > +/** > + * struct msm_dsi_phy_tuning_cfg - Holds PHY tuning config parameters. > + * @rescode_offset_top: Offset for pull-up legs rescode. > + * @rescode_offset_bot: Offset for pull-down legs rescode. > + * @vreg_ctrl: vreg ctrl to drive LDO level > + */ > +struct msm_dsi_phy_tuning_cfg { > + u8 rescode_offset_top[DSI_LANE_MAX]; > + u8 rescode_offset_bot[DSI_LANE_MAX]; > + u8 vreg_ctrl; > +}; How generic is this? In other words, you are adding a struct with the generic name to the generic structure. I'd expect that it would be common to several PHY generations. > + > struct msm_dsi_phy { > struct platform_device *pdev; > void __iomem *base; > @@ -98,6 +113,7 @@ struct msm_dsi_phy { > > struct msm_dsi_dphy_timing timing; > const struct msm_dsi_phy_cfg *cfg; > + struct msm_dsi_phy_tuning_cfg tuning_cfg; > > enum msm_dsi_phy_usecase usecase; > bool regulator_ldo_mode; > -- > 2.7.4 > -- With best wishes Dmitry
Re: [v2 1/3] dt-bindings: msm/dsi: Add 10nm dsi phy tuning properties
On Mon, 10 Jan 2022 at 15:56, Rajeev Nandan wrote: > > In most cases, the default values of DSI PHY tuning registers should be > sufficient as they are fully optimized. However, in some cases where > extreme board parasitics cause the eye shape to degrade, the override > bits can be used to improve the signal quality. > > The general guidelines for DSI PHY tuning include: > - High and moderate data rates may benefit from the drive strength and > drive level tuning. > - Drive strength tuning will affect the output impedance and may be used > for matching optimization. > - Drive level tuning will affect the output levels without affecting the > impedance. > > The clock and data lanes have a calibration circuitry feature. The drive > strength tuning can be done by adjusting rescode offset for hstop/hsbot, > and the drive level tuning can be done by adjusting the LDO output level > for the HSTX drive. > > Signed-off-by: Rajeev Nandan > --- > > Changes in v2: > - More details in the commit text (Stephen Boyd) > - Use human understandable values (Stephen Boyd, Dmitry Baryshkov) > - Do not take values that are going to be unused (Dmitry Baryshkov) > > .../bindings/display/msm/dsi-phy-10nm.yaml | 33 > ++ > 1 file changed, 33 insertions(+) > > diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml > b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml > index 4399715..d0eb8f6 100644 > --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml > +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml > @@ -35,6 +35,35 @@ properties: >Connected to DSI0_MIPI_DSI_PLL_VDDA0P9 pin for sc7180 target and >connected to VDDA_MIPI_DSI_0_PLL_0P9 pin for sdm845 target Generic note: I think these properties should be prefixed with "qcom," prefix. > > + phy-rescode-offset-top: > +$ref: /schemas/types.yaml#/definitions/uint8-array > +minItems: 5 > +maxItems: 5 > +description: > + Integer array of offset for pull-up legs rescode for all five lanes. > + To offset the drive strength from the calibrated value in an increasing > + or decreasing manner, use 6 bit two’s complement values. dtc should support negative values, google hints that <(-2)> should work. > + > + phy-rescode-offset-bot: > +$ref: /schemas/types.yaml#/definitions/uint8-array > +minItems: 5 > +maxItems: 5 > +description: > + Integer array of offset for pull-down legs rescode for all five lanes. > + To offset the drive strength from the calibrated value in an increasing > + or decreasing manner, use 6 bit two’s complement values. > + > + phy-drive-ldo-level: > +$ref: /schemas/types.yaml#/definitions/uint8 > +minimum: 0 > +maximum: 7 > +description: > + The PHY LDO has an amplitude tuning feature to adjust the LDO output > + for the HSTX drive. To offset the drive level from the default value, > + supported levels are with the following mapping: > + 0 = 375mV, 1 = 400mV, 2 = 425mV, 3 = 450mV, 4 = 475mV, 5 = 500mV, > + 6 = 500mV, 7 = 500mV No encoding please. Specify the values in the dts and convert them into the register values in the driver. > + > required: >- compatible >- reg > @@ -64,5 +93,9 @@ examples: > clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, ><&rpmhcc RPMH_CXO_CLK>; > clock-names = "iface", "ref"; > + > + phy-resocde-offset-top = /bits/ 8 <0x0 0x0 0x0 0x0 0x0>; > + phy-rescode-offset-bot = /bits/ 8 <0x0 0x0 0x0 0x0 0x0>; > + phy-drive-ldo-level = /bits/ 8 <1>; -- With best wishes Dmitry
Re: [PATCH v6 5/6] drm/i915: Asynchronous migration selftest
On 07/01/2022 14:23, Thomas Hellström wrote: Add a selftest to exercise asynchronous migration and -unbining. Extend the gem_migrate selftest to perform the migrations while depending on a spinner and a bound vma set up on the migrated buffer object. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_object.c| 12 ++ drivers/gpu/drm/i915/gem/i915_gem_object.h| 3 + .../drm/i915/gem/selftests/i915_gem_migrate.c | 192 -- 3 files changed, 192 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d87b508b59b1..1a9e1f940a7d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -756,6 +756,18 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) return dma_fence_get(i915_gem_to_ttm(obj)->moving); } +void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence) +{ + struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving; + + if (*moving == fence) + return; + + dma_fence_put(*moving); + *moving = dma_fence_get(fence); +} + /** * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any * @obj: The object whose moving fence to wait for. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index f66d46882ea7..1d178236 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -524,6 +524,9 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) struct dma_fence * i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); +void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence *fence); + int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index ecb691c81d1e..d534141b2cf7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -4,8 +4,13 @@ */ #include "gt/intel_migrate.h" +#include "gt/intel_gpu_commands.h" #include "gem/i915_gem_ttm_move.h" +#include "i915_deps.h" + +#include "selftests/igt_spinner.h" + static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, bool fill) { @@ -101,7 +106,8 @@ static int igt_same_create_migrate(void *arg) } static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, - struct drm_i915_gem_object *obj) + struct drm_i915_gem_object *obj, + struct i915_vma *vma) { int err; @@ -109,6 +115,24 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, if (err) return err; + if (vma) { + err = i915_vma_pin_ww(vma, ww, obj->base.size, 0, + 0UL | PIN_OFFSET_FIXED | + PIN_USER); + if (err) { + if (err != -EINTR && err != ERESTARTSYS && + err != -EDEADLK) + pr_err("Failed to pin vma.\n"); + return err; + } + + i915_vma_unpin(vma); + } + + /* +* Migration will implicitly unbind (asynchronously) any bound +* vmas. +*/ if (i915_gem_object_is_lmem(obj)) { err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); if (err) { @@ -149,11 +173,15 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, return err; } -static int igt_lmem_pages_migrate(void *arg) +static int __igt_lmem_pages_migrate(struct intel_gt *gt, + struct i915_address_space *vm, + struct i915_deps *deps, + struct igt_spinner *spin, + struct dma_fence *spin_fence) { - struct intel_gt *gt = arg; struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; + struct i915_vma *vma = NULL; struct i915_gem_ww_ctx ww; struct i915_request *rq; int err; @@ -165,6 +193,14 @@ static int igt_lmem_pages_migrate(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); + if (vm) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } +
Re: [PATCH v6 4/6] drm/i915: Use vma resources for async unbinding
On 07/01/2022 14:23, Thomas Hellström wrote: Implement async (non-blocking) unbinding by not syncing the vma before calling unbind on the vma_resource. Add the resulting unbind fence to the object's dma_resv from where it is picked up by the ttm migration code. Ideally these unbind fences should be coalesced with the migration blit fence to avoid stalling the migration blit waiting for unbind, as they can certainly go on in parallel, but since we don't yet have a reasonable data structure to use to coalesce fences and attach the resulting fence to a timeline, we defer that for now. Note that with async unbinding, even while the unbind waits for the preceding bind to complete before unbinding, the vma itself might have been destroyed in the process, clearing the vma pages. Therefore we can only allow async unbinding if we have a refcounted sg-list and keep a refcount on that for the vma resource pages to stay intact until binding occurs. If this condition is not met, a request for an async unbind is diverted to a sync unbind. v2: - Use a separate kmem_cache for vma resources for now to isolate their memory allocation and aid debugging. - Move the check for vm closed to the actual unbinding thread. Regardless of whether the vm is closed, we need the unbind fence to properly wait for capture. - Clear vma_res::vm on unbind and update its documentation. v4: - Take cache coloring into account when searching for vma resources pending unbind. (Matthew Auld) v5: - Fix timeout and error check in i915_vma_resource_bind_dep_await(). - Avoid taking a reference on the object for async binding if async unbind capable. - Fix braces around a single-line if statement. v6: - Fix up the cache coloring adjustment. (Kernel test robot ) - Don't allow async unbinding if the vma_res pages are not the same as the object pages. Signed-off-by: Thomas Hellström --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 11 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 4 + drivers/gpu/drm/i915/gt/intel_gtt.h | 3 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +- drivers/gpu/drm/i915/i915_module.c | 3 + drivers/gpu/drm/i915/i915_vma.c | 205 +-- drivers/gpu/drm/i915/i915_vma.h | 3 +- drivers/gpu/drm/i915/i915_vma_resource.c | 354 +-- drivers/gpu/drm/i915/i915_vma_resource.h | 48 +++ 11 files changed, 579 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 8653855d808b..1de306c03aaf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo) struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); int ret; - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + /* +* Note: The async unbinding here will actually transform the +* blocking wait for unbind into a wait before finally submitting +* evict / migration blit and thus stall the migration timeline +* which may not be good for overall throughput. We should make +* sure we await the unbind fences *after* the migration blit +* instead of *before* as we currently do. +*/ + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE | +I915_GEM_OBJECT_UNBIND_ASYNC); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index e49b6250c4b7..a1b2761bc16e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -142,7 +142,7 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm) continue; if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { - __i915_vma_evict(vma); + __i915_vma_evict(vma, false); drm_mm_remove_node(&vma->node); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index a94be0306464..46be4197b93f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -161,6 +161,9 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, release_work); + /* Synchronize async unbinds. */ + i915_vma_resource_bind_dep_sync_all(vm); + vm->cleanup(vm); i915_address_space_fini(vm); @@ -189,6 +192,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); +
[PATCH v4 5/7] drm/i915: Remove assert_object_held_shared
This duck tape workaround is no longer required, unbind and destroy are fixed to take the obj->resv mutex before destroying and obj->mm.lock has been removed, always requiring obj->resv as well. Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_object.h | 14 -- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 6 +++--- 5 files changed, 11 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d87b508b59b1..fd34b1a115c4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -550,7 +550,7 @@ bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) #ifdef CONFIG_LOCKDEP if (IS_DGFX(to_i915(obj->base.dev)) && i915_gem_object_evictable((void __force *)obj)) - assert_object_held_shared(obj); + assert_object_held(obj); #endif return obj->mem_flags & I915_BO_FLAG_STRUCT_PAGE; } @@ -569,7 +569,7 @@ bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) #ifdef CONFIG_LOCKDEP if (IS_DGFX(to_i915(obj->base.dev)) && i915_gem_object_evictable((void __force *)obj)) - assert_object_held_shared(obj); + assert_object_held(obj); #endif return obj->mem_flags & I915_BO_FLAG_IOMEM; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bc448f895ae8..c1cdfaf2d1e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -157,20 +157,6 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) -/* - * If more than one potential simultaneous locker, assert held. - */ -static inline void assert_object_held_shared(const struct drm_i915_gem_object *obj) -{ - /* -* Note mm list lookup is protected by -* kref_get_unless_zero(). -*/ - if (IS_ENABLED(CONFIG_LOCKDEP) && - kref_read(&obj->base.refcount) > 0) - assert_object_held(obj); -} - static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, bool intr) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 7d2211fbe548..a1a785068779 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -19,7 +19,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, bool shrinkable; int i; - assert_object_held_shared(obj); + assert_object_held(obj); if (i915_gem_object_is_volatile(obj)) obj->mm.madv = I915_MADV_DONTNEED; @@ -95,7 +95,7 @@ int i915_gem_object_get_pages(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); int err; - assert_object_held_shared(obj); + assert_object_held(obj); if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { drm_dbg(&i915->drm, @@ -122,7 +122,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) assert_object_held(obj); - assert_object_held_shared(obj); + assert_object_held(obj); if (unlikely(!i915_gem_object_has_pages(obj))) { GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); @@ -191,7 +191,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; - assert_object_held_shared(obj); + assert_object_held(obj); pages = fetch_and_zero(&obj->mm.pages); if (IS_ERR_OR_NULL(pages)) @@ -222,7 +222,7 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) return -EBUSY; /* May be called by shrinker from within get_pages() (on another bo) */ - assert_object_held_shared(obj); + assert_object_held(obj); i915_gem_object_release_mmap_offset(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 3cc01c30dd62..8a0da441225b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -109,7 +109,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) { struct page **pvec = NULL; - assert_object_held_shared(obj); + assert_object_held(obj); if (!--obj->userptr.page_ref) { pvec = obj->userptr.pvec; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i
[PATCH v4 6/7] drm/i915: Remove support for unlocked i915_vma unbind
Now that we require the object lock for all ops, some code handling race conditions can be removed. This is required to not take short-term pins inside execbuf. Signed-off-by: Maarten Lankhorst Acked-by: Niranjana Vishwanathapura --- drivers/gpu/drm/i915/i915_vma.c | 55 + 1 file changed, 8 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 11e10e0d628c..8859feb7d131 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -777,7 +777,6 @@ i915_vma_detach(struct i915_vma *vma) static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) { unsigned int bound; - bool pinned = true; bound = atomic_read(&vma->flags); do { @@ -787,34 +786,10 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) return false; - if (!(bound & I915_VMA_PIN_MASK)) - goto unpinned; - GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0); } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); return true; - -unpinned: - /* -* If pin_count==0, but we are bound, check under the lock to avoid -* racing with a concurrent i915_vma_unbind(). -*/ - mutex_lock(&vma->vm->mutex); - do { - if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) { - pinned = false; - break; - } - - if (unlikely(flags & ~bound)) { - pinned = false; - break; - } - } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); - mutex_unlock(&vma->vm->mutex); - - return pinned; } static struct scatterlist * @@ -1153,7 +1128,6 @@ static int __i915_vma_get_pages(struct i915_vma *vma) { struct sg_table *pages; - int ret; /* * The vma->pages are only valid within the lifespan of the borrowed @@ -1186,18 +1160,16 @@ __i915_vma_get_pages(struct i915_vma *vma) break; } - ret = 0; if (IS_ERR(pages)) { - ret = PTR_ERR(pages); - pages = NULL; drm_err(&vma->vm->i915->drm, - "Failed to get pages for VMA view type %u (%d)!\n", - vma->ggtt_view.type, ret); + "Failed to get pages for VMA view type %u (%ld)!\n", + vma->ggtt_view.type, PTR_ERR(pages)); + return PTR_ERR(pages); } vma->pages = pages; - return ret; + return 0; } I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma) @@ -1229,25 +1201,14 @@ I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma) static void __vma_put_pages(struct i915_vma *vma, unsigned int count) { /* We allocate under vma_get_pages, so beware the shrinker */ - struct sg_table *pages = READ_ONCE(vma->pages); - GEM_BUG_ON(atomic_read(&vma->pages_count) < count); if (atomic_sub_return(count, &vma->pages_count) == 0) { - /* -* The atomic_sub_return is a read barrier for the READ_ONCE of -* vma->pages above. -* -* READ_ONCE is safe because this is either called from the same -* function (i915_vma_pin_ww), or guarded by vma->vm->mutex. -* -* TODO: We're leaving vma->pages dangling, until vma->obj->resv -* lock is required. -*/ - if (pages != vma->obj->mm.pages) { - sg_free_table(pages); - kfree(pages); + if (vma->pages != vma->obj->mm.pages) { + sg_free_table(vma->pages); + kfree(vma->pages); } + vma->pages = NULL; i915_gem_object_unpin_pages(vma->obj); } -- 2.34.1
[PATCH v4 7/7] drm/i915: Remove short-term pins from execbuf, v6.
Add a flag PIN_VALIDATE, to indicate we don't need to pin and only protected by the object lock. This removes the need to unpin, which is done by just releasing the lock. eb_reserve is slightly reworked for readability, but the same steps are still done: - First pass pins with NONBLOCK. - Second pass unbinds all objects first, then pins. - Third pass is only called when not all objects are softpinned, and unbinds all objects, then calls i915_gem_evict_vm(), then pins. Changes since v1: - Split out eb_reserve() into separate functions for readability. Changes since v2: - Make batch buffer mappable on platforms where only GGTT is available, to prevent moving the batch buffer during relocations. Changes since v3: - Preserve current behavior for batch buffer, instead be cautious when calling i915_gem_object_ggtt_pin_ww, and re-use the current batch vma if it's inside ggtt and map-and-fenceable. - Remove impossible condition check from eb_reserve. (Matt) Changes since v5: - Do not even temporarily pin, just call i915_gem_evict_vm() and mark all vma's as unpinned. Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Auld --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 220 +- drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 1 - drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_vma.c | 24 +- 4 files changed, 128 insertions(+), 118 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index da35a143af36..cfff194d90e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -441,7 +441,7 @@ eb_pin_vma(struct i915_execbuffer *eb, else pin_flags = entry->offset & PIN_OFFSET_MASK; - pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; + pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE; if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)) pin_flags |= PIN_GLOBAL; @@ -459,17 +459,15 @@ eb_pin_vma(struct i915_execbuffer *eb, entry->pad_to_size, entry->alignment, eb_pin_flags(entry, ev->flags) | -PIN_USER | PIN_NOEVICT); +PIN_USER | PIN_NOEVICT | PIN_VALIDATE); if (unlikely(err)) return err; } if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { err = i915_vma_pin_fence(vma); - if (unlikely(err)) { - i915_vma_unpin(vma); + if (unlikely(err)) return err; - } if (vma->fence) ev->flags |= __EXEC_OBJECT_HAS_FENCE; @@ -485,13 +483,9 @@ eb_pin_vma(struct i915_execbuffer *eb, static inline void eb_unreserve_vma(struct eb_vma *ev) { - if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) - return; - if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) __i915_vma_unpin_fence(ev->vma); - __i915_vma_unpin(ev->vma); ev->flags &= ~__EXEC_OBJECT_RESERVED; } @@ -684,10 +678,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb, if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { err = i915_vma_pin_fence(vma); - if (unlikely(err)) { - i915_vma_unpin(vma); + if (unlikely(err)) return err; - } if (vma->fence) ev->flags |= __EXEC_OBJECT_HAS_FENCE; @@ -699,85 +691,95 @@ static int eb_reserve_vma(struct i915_execbuffer *eb, return 0; } -static int eb_reserve(struct i915_execbuffer *eb) +static bool eb_unbind(struct i915_execbuffer *eb, bool force) { const unsigned int count = eb->buffer_count; - unsigned int pin_flags = PIN_USER | PIN_NONBLOCK; + unsigned int i; struct list_head last; + bool unpinned = false; + + /* Resort *all* the objects into priority order */ + INIT_LIST_HEAD(&eb->unbound); + INIT_LIST_HEAD(&last); + + for (i = 0; i < count; i++) { + struct eb_vma *ev = &eb->vma[i]; + unsigned int flags = ev->flags; + + if (!force && flags & EXEC_OBJECT_PINNED && + flags & __EXEC_OBJECT_HAS_PIN) + continue; + + unpinned = true; + eb_unreserve_vma(ev); + + if (flags & EXEC_OBJECT_PINNED) + /* Pinned must have their slot */ + list_add(&ev->bind_link, &eb->unbound); + else if (flags & __EXEC_OBJECT_NEEDS_MAP) + /* Map require the lowest 256MiB (apert
[PATCH v4 3/7] drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something
Because we will start to require the obj->resv lock for unbinding, ensure these shrinker functions also take the lock. This requires some function signature changes, to ensure that the ww context is passed around, but is mostly straightforward. Previously this was split up into several patches, but reworking should allow for easier bisection. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/gvt/aperture_gm.c| 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem_evict.c | 34 +++ drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 ++ drivers/gpu/drm/i915/i915_vgpu.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 9 ++--- .../gpu/drm/i915/selftests/i915_gem_evict.c | 17 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 14 11 files changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index ab6c4322dc08..e416e1f12d1a 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -504,7 +504,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); size = ggtt->vm.total - GUC_GGTT_TOP; - ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 15d63435ec4d..9c21b55b927b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1382,7 +1382,7 @@ static int evict_vma(void *data) complete(&arg->completion); mutex_lock(&vm->mutex); - err = i915_gem_evict_for_node(vm, &evict, 0); + err = i915_gem_evict_for_node(vm, NULL, &evict, 0); mutex_unlock(&vm->mutex); return err; diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 0d6d59871308..c08098a167e9 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -63,7 +63,7 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) mutex_lock(>->ggtt->vm.mutex); mmio_hw_access_pre(gt); - ret = i915_gem_gtt_insert(>->ggtt->vm, node, + ret = i915_gem_gtt_insert(>->ggtt->vm, NULL, node, size, I915_GTT_PAGE_SIZE, I915_COLOR_UNEVICTABLE, start, end, flags); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ef121ddef418..88eb203a0742 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1722,11 +1722,13 @@ i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, + struct i915_gem_ww_ctx *ww, u64 min_size, u64 alignment, unsigned long color, u64 start, u64 end, unsigned flags); int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, +struct i915_gem_ww_ctx *ww, struct drm_mm_node *node, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index bfd66f539fc1..f502a617b35c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -51,6 +51,7 @@ static int ggtt_flush(struct intel_gt *gt) static bool mark_free(struct drm_mm_scan *scan, + struct i915_gem_ww_ctx *ww, struct i915_vma *vma, unsigned int flags, struct list_head *unwind) @@ -58,6 +59,9 @@ mark_free(struct drm_mm_scan *scan, if (i915_vma_is_pinned(vma)) return false; + if (!i915_gem_object_trylock(vma->obj, ww)) + return false; + list_add(&vma->evict_link, unwind); return drm_mm_scan_add_block(scan, &vma->node); } @@ -98,6 +102,7 @@ static bool defer_evict(struct i915_vma *vma) */ int i915_gem_evict_something(struct i915_address_space *vm, +struct i915_gem_ww_ctx *ww, u64 min_size, u64 al
[PATCH v4 1/7] drm/i915: Call i915_gem_evict_vm in vm_fault_gtt to prevent new ENOSPC errors, v2.
Now that we cannot unbind kill the currently locked object directly because we're removing short term pinning, we may have to unbind the object from gtt manually, using a i915_gem_evict_vm() call. Changes since v1: - Remove -ENOSPC warning, can still happen with concurrent mmaps where we can't unbind the other mmap because of the lock held. This fixes the gem_mmap_gtt@cpuset tests. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 17 +++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 5ac2506f4ee8..4337f3c1400c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -358,8 +358,21 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); } - /* The entire mappable GGTT is pinned? Unexpected! */ - GEM_BUG_ON(vma == ERR_PTR(-ENOSPC)); + /* +* The entire mappable GGTT is pinned? Unexpected! +* Try to evict the object we locked too, as normally we skip it +* due to lack of short term pinning inside execbuf. +*/ + if (vma == ERR_PTR(-ENOSPC)) { + ret = mutex_lock_interruptible(&ggtt->vm.mutex); + if (!ret) { + ret = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); + } + if (ret) + goto err_reset; + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); + } } if (IS_ERR(vma)) { ret = PTR_ERR(vma); -- 2.34.1
[PATCH v4 2/7] drm/i915: Add locking to i915_gem_evict_vm()
i915_gem_evict_vm will need to be able to evict objects that are locked by the current ctx. By testing if the current context already locked the object, we can do this correctly. This allows us to evict the entire vm even if we already hold some objects' locks. Previously, this was spread over several commits, but it makes more sense to commit the changes to i915_gem_evict_vm separately from the changes to i915_gem_evict_something() and i915_gem_evict_for_node(). Signed-off-by: Maarten Lankhorst --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 2 +- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem_evict.c | 30 +-- drivers/gpu/drm/i915/i915_vma.c | 7 - .../gpu/drm/i915/selftests/i915_gem_evict.c | 10 +-- 6 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5ecc85b96a3d..da35a143af36 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -766,7 +766,7 @@ static int eb_reserve(struct i915_execbuffer *eb) case 1: /* Too fragmented, unbind everything and retry */ mutex_lock(&eb->context->vm->mutex); - err = i915_gem_evict_vm(eb->context->vm); + err = i915_gem_evict_vm(eb->context->vm, &eb->ww); mutex_unlock(&eb->context->vm->mutex); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 4337f3c1400c..4afad1604a6a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -366,7 +366,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) if (vma == ERR_PTR(-ENOSPC)) { ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (!ret) { - ret = i915_gem_evict_vm(&ggtt->vm); + ret = i915_gem_evict_vm(&ggtt->vm, &ww); mutex_unlock(&ggtt->vm.mutex); } if (ret) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2f9336302e6c..ef121ddef418 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1729,7 +1729,8 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm, int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); -int i915_gem_evict_vm(struct i915_address_space *vm); +int i915_gem_evict_vm(struct i915_address_space *vm, + struct i915_gem_ww_ctx *ww); /* i915_gem_internal.c */ struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 2b73ddb11c66..bfd66f539fc1 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -367,7 +367,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_vm(struct i915_address_space *vm) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) { int ret = 0; @@ -388,24 +388,50 @@ int i915_gem_evict_vm(struct i915_address_space *vm) do { struct i915_vma *vma, *vn; LIST_HEAD(eviction_list); + LIST_HEAD(locked_eviction_list); list_for_each_entry(vma, &vm->bound_list, vm_link) { if (i915_vma_is_pinned(vma)) continue; + /* +* If we already own the lock, trylock fails. In case the resv +* is shared among multiple objects, we still need the object ref. +*/ + if (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx)) { + __i915_vma_pin(vma); + list_add(&vma->evict_link, &locked_eviction_list); + continue; + } + + if (!i915_gem_object_trylock(vma->obj, ww)) + continue; + __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); } - if (list_empty(&eviction_list)) + if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
[PATCH v4 0/7] drm/i915: Remove short term pins from execbuf by requiring lock to unbind.
Previously, short term pinning in execbuf was required because i915_vma was effectively independent from objects, and has its own refcount, locking, lifetime rules and pinning. This series removes the separate locking, by requiring vma->obj->resv to be held when pinning and unbinding. This will also be required for VM_BIND work. Some patches have already been merged, but this contains the mremainder of the conversion. With pinning required for pinning and unbinding, the lock is enough to prevent unbinding when trying to pin with the lock held, for example in execbuf. This makes binding/unbinding similar to ttm_bo_validate()'s use, which just cares that an object is in a certain place, without pinning it in place. Having the VMA part of gem bo removes a lot of the vma refcounting, and makes i915_vma more a part of the bo, instead of its own floating object that just happens to be part of a bo. This is also required to make it more compatible with TTM, and migration in general. For future work, it makes things a lot simpler and clear. We want to end up with i915_vma just being a specific mapping of the BO, just like is the case in other drivers. i915_vma->active removal is the next step there, and makes it when object is destroyed, the bindings are destroyed (after idle), instead of object being destroyed when bindings are idle. Maarten Lankhorst (7): drm/i915: Call i915_gem_evict_vm in vm_fault_gtt to prevent new ENOSPC errors, v2. drm/i915: Add locking to i915_gem_evict_vm() drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something drm/i915: Add i915_vma_unbind_unlocked, and take obj lock for i915_vma_unbind, v2. drm/i915: Remove assert_object_held_shared drm/i915: Remove support for unlocked i915_vma unbind drm/i915: Remove short-term pins from execbuf, v6. drivers/gpu/drm/i915/display/intel_fb_pin.c | 2 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 220 +- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 17 +- drivers/gpu/drm/i915/gem/i915_gem_object.c| 4 +- drivers/gpu/drm/i915/gem/i915_gem_object.h| 14 -- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../i915/gem/selftests/i915_gem_client_blt.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 6 + drivers/gpu/drm/i915/gt/intel_ggtt.c | 51 +++- drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 1 - drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/gvt/aperture_gm.c| 2 +- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_gem.c | 2 + drivers/gpu/drm/i915/i915_gem_evict.c | 64 - drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 4 + drivers/gpu/drm/i915/i915_vgpu.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 122 +- drivers/gpu/drm/i915/i915_vma.h | 1 + .../gpu/drm/i915/selftests/i915_gem_evict.c | 27 ++- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 36 +-- drivers/gpu/drm/i915/selftests/i915_vma.c | 8 +- 25 files changed, 361 insertions(+), 253 deletions(-) -- 2.34.1
[PATCH v4 4/7] drm/i915: Add i915_vma_unbind_unlocked, and take obj lock for i915_vma_unbind, v2.
We want to remove more members of i915_vma, which requires the locking to be held more often. Start requiring gem object lock for i915_vma_unbind, as it's one of the callers that may unpin pages. Some special care is needed when evicting, because the last reference to the object may be held by the VMA, so after __i915_vma_unbind, vma may be garbage, and we need to cache vma->obj before unlocking. Changes since v1: - Make trylock failing a WARN. (Matt) - Remove double i915_vma_wait_for_bind() (Matt) - Move atomic_set to right before mutex_unlock(), to make it more clear they belong together. (Matt) Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/display/intel_fb_pin.c | 2 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../i915/gem/selftests/i915_gem_client_blt.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c| 6 +++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 49 --- drivers/gpu/drm/i915/i915_gem.c | 2 + drivers/gpu/drm/i915/i915_vma.c | 27 +- drivers/gpu/drm/i915/i915_vma.h | 1 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 22 - drivers/gpu/drm/i915/selftests/i915_vma.c | 8 +-- 10 files changed, 95 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 31c15e5fca95..9c555f6d1958 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -47,7 +47,7 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb, goto err; if (i915_vma_misplaced(vma, 0, alignment, 0)) { - ret = i915_vma_unbind(vma); + ret = i915_vma_unbind_unlocked(vma); if (ret) { vma = ERR_PTR(ret); goto err; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 11f0aa65f8a3..b14c4e0a58d8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -647,7 +647,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) * pages. */ for (offset = 4096; offset < page_size; offset += 4096) { - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) goto out_unpin; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c08f766e6e15..c8ff8bf0986d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -318,7 +318,7 @@ static int pin_buffer(struct i915_vma *vma, u64 addr) int err; if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) { - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index f61356b72b1c..ba29767348be 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -166,7 +166,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, kunmap(p); out: + i915_gem_object_lock(obj, NULL); __i915_vma_put(vma); + i915_gem_object_unlock(obj); return err; } @@ -261,7 +263,9 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, if (err) return err; + i915_gem_object_lock(obj, NULL); __i915_vma_put(vma); + i915_gem_object_unlock(obj); if (igt_timeout(end_time, "%s: timed out after tiling=%d stride=%d\n", @@ -1352,7 +1356,9 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, * for other objects. Ergo we have to revoke the previous mmap PTE * access as it no longer points to the same object. */ + i915_gem_object_lock(obj, NULL); err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + i915_gem_object_unlock(obj); if (err) { pr_err("Failed to unbind object!\n"); goto out_unmap; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index e416e1f12d1a..e73d453a0d6b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -129,22 +129,49 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm) drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); +retry: + i915_gem_drain_free
[PATCH] drm: bridge: chipone-icn6211: Drop unnecessary bridge type
Explicit assignment of connector to bridge type during bridge addition is optional. Some of the bridges like ICN6211 has panel to be connected and that panel driver has taken care of associated connector type of it. Drop it. Signed-off-by: Jagan Teki --- drivers/gpu/drm/bridge/chipone-icn6211.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index 23c34039ac48..c60170865b74 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -238,7 +238,6 @@ static int chipone_probe(struct mipi_dsi_device *dsi) return ret; icn->bridge.funcs = &chipone_bridge_funcs; - icn->bridge.type = DRM_MODE_CONNECTOR_DPI; icn->bridge.of_node = dev->of_node; drm_bridge_add(&icn->bridge); -- 2.25.1
Re: [PATCH v5 1/3] drm/privacy_screen: Add drvdata in drm_privacy_screen
Hi All, On 1/7/22 20:02, Rajat Jain wrote: > Allow a privacy screen provider to stash its private data pointer in the > drm_privacy_screen, and update the drm_privacy_screen_register() call to > accept that. Also introduce a *_get_drvdata() so that it can retrieved > back when needed. > > This also touches the IBM Thinkpad platform driver, the only user of > privacy screen today, to pass NULL for now to the updated API. > > Signed-off-by: Rajat Jain > Reviewed-by: Hans de Goede I've pushed this series to drm-misc-next now. Regards, Hans > --- > v5: Same as v4 > v4: Added "Reviewed-by" from Hans > v3: Initial version. Came up due to review comments on v2 of other patches. > v2: No v2 > v1: No v1 > > drivers/gpu/drm/drm_privacy_screen.c| 5 - > drivers/platform/x86/thinkpad_acpi.c| 2 +- > include/drm/drm_privacy_screen_driver.h | 13 - > 3 files changed, 17 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/drm_privacy_screen.c > b/drivers/gpu/drm/drm_privacy_screen.c > index beaf99e9120a..03b149cc455b 100644 > --- a/drivers/gpu/drm/drm_privacy_screen.c > +++ b/drivers/gpu/drm/drm_privacy_screen.c > @@ -387,7 +387,8 @@ static void drm_privacy_screen_device_release(struct > device *dev) > * * An ERR_PTR(errno) on failure. > */ > struct drm_privacy_screen *drm_privacy_screen_register( > - struct device *parent, const struct drm_privacy_screen_ops *ops) > + struct device *parent, const struct drm_privacy_screen_ops *ops, > + void *data) > { > struct drm_privacy_screen *priv; > int ret; > @@ -404,6 +405,7 @@ struct drm_privacy_screen *drm_privacy_screen_register( > priv->dev.parent = parent; > priv->dev.release = drm_privacy_screen_device_release; > dev_set_name(&priv->dev, "privacy_screen-%s", dev_name(parent)); > + priv->drvdata = data; > priv->ops = ops; > > priv->ops->get_hw_state(priv); > @@ -439,6 +441,7 @@ void drm_privacy_screen_unregister(struct > drm_privacy_screen *priv) > mutex_unlock(&drm_privacy_screen_devs_lock); > > mutex_lock(&priv->lock); > + priv->drvdata = NULL; > priv->ops = NULL; > mutex_unlock(&priv->lock); > > diff --git a/drivers/platform/x86/thinkpad_acpi.c > b/drivers/platform/x86/thinkpad_acpi.c > index 341655d711ce..ccbfda2b0095 100644 > --- a/drivers/platform/x86/thinkpad_acpi.c > +++ b/drivers/platform/x86/thinkpad_acpi.c > @@ -9782,7 +9782,7 @@ static int tpacpi_lcdshadow_init(struct ibm_init_struct > *iibm) > return 0; > > lcdshadow_dev = drm_privacy_screen_register(&tpacpi_pdev->dev, > - &lcdshadow_ops); > + &lcdshadow_ops, NULL); > if (IS_ERR(lcdshadow_dev)) > return PTR_ERR(lcdshadow_dev); > > diff --git a/include/drm/drm_privacy_screen_driver.h > b/include/drm/drm_privacy_screen_driver.h > index 24591b607675..4ef246d5706f 100644 > --- a/include/drm/drm_privacy_screen_driver.h > +++ b/include/drm/drm_privacy_screen_driver.h > @@ -73,10 +73,21 @@ struct drm_privacy_screen { >* for more info. >*/ > enum drm_privacy_screen_status hw_state; > + /** > + * @drvdata: Private data owned by the privacy screen provider > + */ > + void *drvdata; > }; > > +static inline > +void *drm_privacy_screen_get_drvdata(struct drm_privacy_screen *priv) > +{ > + return priv->drvdata; > +} > + > struct drm_privacy_screen *drm_privacy_screen_register( > - struct device *parent, const struct drm_privacy_screen_ops *ops); > + struct device *parent, const struct drm_privacy_screen_ops *ops, > + void *data); > void drm_privacy_screen_unregister(struct drm_privacy_screen *priv); > > void drm_privacy_screen_call_notifier_chain(struct drm_privacy_screen *priv); >