Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pin_user_pages() calls for Direct I/O #17006

Merged
merged 1 commit into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions config/kernel-vfs-iov_iter.m4
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
__attribute__((unused)) enum iter_type i = iov_iter_type(&iter);
])

ZFS_LINUX_TEST_SRC([iov_iter_get_pages2], [
#include <linux/uio.h>
],[
struct iov_iter iter = { 0 };
struct page **pages = NULL;
size_t maxsize = 4096;
unsigned maxpages = 1;
size_t start;
size_t ret __attribute__ ((unused));

ret = iov_iter_get_pages2(&iter, pages, maxsize, maxpages,
&start);
])

ZFS_LINUX_TEST_SRC([iter_is_ubuf], [
#include <linux/uio.h>
],[
Expand Down Expand Up @@ -64,6 +78,19 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
AC_MSG_RESULT(no)
])


dnl #
dnl # Kernel 6.0 changed iov_iter_get_pages() to iov_iter_get_pages2().
dnl #
AC_MSG_CHECKING([whether iov_iter_get_pages2() is available])
ZFS_LINUX_TEST_RESULT([iov_iter_get_pages2], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_IOV_ITER_GET_PAGES2, 1,
[iov_iter_get_pages2() is available])
],[
AC_MSG_RESULT(no)
])

dnl #
dnl # Kernel 6.0 introduced the ITER_UBUF iov_iter type. iter_is_ubuf()
dnl # was also added to determine if the iov_iter is an ITER_UBUF.
Expand Down
10 changes: 10 additions & 0 deletions include/os/linux/spl/sys/uio.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ typedef enum zfs_uio_seg {
typedef struct {
struct page **pages; /* Mapped pages */
long npages; /* Number of mapped pages */
boolean_t pinned; /* Whether FOLL_PIN was used */
} zfs_uio_dio_t;

typedef struct zfs_uio {
Expand Down Expand Up @@ -199,4 +200,13 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
#define zfs_uio_iov_iter_type(iter) (iter)->type
#endif

#if defined(HAVE_ITER_IS_UBUF)
#define zfs_user_backed_iov_iter(iter) \
(iter_is_ubuf((iter)) || \
(zfs_uio_iov_iter_type((iter)) == ITER_IOVEC))
#else
#define zfs_user_backed_iov_iter(iter) \
(zfs_uio_iov_iter_type((iter)) == ITER_IOVEC)
#endif

#endif /* SPL_UIO_H */
64 changes: 40 additions & 24 deletions module/os/linux/zfs/zfs_uio.c
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,6 @@ zfs_uio_page_aligned(zfs_uio_t *uio)
return (aligned);
}


#if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64)
#define ZFS_MARKEED_PAGE 0x0
#define IS_ZFS_MARKED_PAGE(_p) 0
Expand Down Expand Up @@ -441,7 +440,6 @@ zfs_unmark_page(struct page *page)
}
#endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */

#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
static void
zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
{
Expand Down Expand Up @@ -473,7 +471,6 @@ zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
}
}
}
#endif

void
zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
Expand All @@ -482,21 +479,24 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
ASSERT(uio->uio_extflg & UIO_DIRECT);
ASSERT3P(uio->uio_dio.pages, !=, NULL);

if (uio->uio_dio.pinned) {
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
#else
for (long i = 0; i < uio->uio_dio.npages; i++) {
struct page *p = uio->uio_dio.pages[i];
unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
#endif
} else {
for (long i = 0; i < uio->uio_dio.npages; i++) {
struct page *p = uio->uio_dio.pages[i];

if (IS_ZFS_MARKED_PAGE(p)) {
zfs_unmark_page(p);
__free_page(p);
continue;
}
if (IS_ZFS_MARKED_PAGE(p)) {
zfs_unmark_page(p);
__free_page(p);
continue;
}

put_page(p);
put_page(p);
}
}
#endif

vmem_free(uio->uio_dio.pages,
uio->uio_dio.npages * sizeof (struct page *));
}
Expand All @@ -523,6 +523,7 @@ zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
if (len == 0)
return (0);

uio->uio_dio.pinned = B_TRUE;
#if defined(HAVE_ITER_IS_UBUF)
if (iter_is_ubuf(uio->uio_iter)) {
nr_pages = DIV_ROUND_UP(len, PAGE_SIZE);
Expand Down Expand Up @@ -569,8 +570,8 @@ zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)

return (0);
}
#endif

#else
static int
zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
{
Expand All @@ -581,9 +582,15 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);

while (wanted) {
#if defined(HAVE_IOV_ITER_GET_PAGES2)
cnt = iov_iter_get_pages2(uio->uio_iter,
&uio->uio_dio.pages[uio->uio_dio.npages],
wanted, maxpages, &start);
#else
cnt = iov_iter_get_pages(uio->uio_iter,
&uio->uio_dio.pages[uio->uio_dio.npages],
wanted, maxpages, &start);
#endif
if (cnt < 0) {
iov_iter_revert(uio->uio_iter, rollback);
return (SET_ERROR(-cnt));
Expand All @@ -595,15 +602,19 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE);
rollback += cnt;
wanted -= cnt;
#if !defined(HAVE_IOV_ITER_GET_PAGES2)
/*
* iov_iter_get_pages2() advances the iov_iter on success.
*/
iov_iter_advance(uio->uio_iter, cnt);
#endif

}
ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip);
iov_iter_revert(uio->uio_iter, rollback);

return (0);
}
#endif /* HAVE_PIN_USER_PAGES_UNLOCKED */

/*
* This function pins user pages. In the event that the user pages were not
Expand All @@ -621,7 +632,10 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
if (uio->uio_segflg == UIO_ITER) {
uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
error = zfs_uio_pin_user_pages(uio, rw);
if (zfs_user_backed_iov_iter(uio->uio_iter))
error = zfs_uio_pin_user_pages(uio, rw);
else
error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
#else
error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
#endif
Expand All @@ -632,22 +646,24 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
ASSERT3S(uio->uio_dio.npages, >=, 0);

if (error) {
if (uio->uio_dio.pinned) {
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
#else
for (long i = 0; i < uio->uio_dio.npages; i++)
put_page(uio->uio_dio.pages[i]);
unpin_user_pages(uio->uio_dio.pages,
uio->uio_dio.npages);
#endif
} else {
for (long i = 0; i < uio->uio_dio.npages; i++)
put_page(uio->uio_dio.pages[i]);
}

vmem_free(uio->uio_dio.pages, size);
return (error);
} else {
ASSERT3S(uio->uio_dio.npages, ==, npages);
}

#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
if (rw == UIO_WRITE)
if (rw == UIO_WRITE && !uio->uio_dio.pinned)
zfs_uio_dio_check_for_zero_page(uio);
#endif

uio->uio_extflg |= UIO_DIRECT;

Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos']
tags = ['functional', 'devices']

[tests/functional/direct:Linux]
tests = ['dio_write_verify']
tests = ['dio_loopback_dev', 'dio_write_verify']
tags = ['functional', 'direct']

[tests/functional/events:Linux]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1476,6 +1476,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/direct/dio_dedup.ksh \
functional/direct/dio_encryption.ksh \
functional/direct/dio_grow_block.ksh \
functional/direct/dio_loopback_dev.ksh \
functional/direct/dio_max_recordsize.ksh \
functional/direct/dio_mixed.ksh \
functional/direct/dio_mmap.ksh \
Expand Down
78 changes: 78 additions & 0 deletions tests/zfs-tests/tests/functional/direct/dio_loopback_dev.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2025 by Triad National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/direct/dio.cfg
. $STF_SUITE/tests/functional/direct/dio.kshlib

#
# DESCRIPTION:
# Verify Direct I/O reads work with loopback devices using direct=always.
#
# STRATEGY:
# 1. Create raidz zpool.
# 2. Create dataset with the direct dataset property set to always.
# 3. Create an empty file in dataset and setup loop device on it.
# 4. Read from loopback device.
#

verify_runnable "global"

function cleanup
{
if [[ -n $lofidev ]]; then
losetup -d $lofidev
fi
dio_cleanup
}

log_assert "Verify loopback devices with Direct I/O."

if ! is_linux; then
log_unsupported "This is just a check for Linux Direct I/O"
fi

log_onexit cleanup

# Create zpool
log_must truncate -s $MINVDEVSIZE $DIO_VDEVS
log_must create_pool $TESTPOOL1 "raidz" $DIO_VDEVS

# Creating dataset with direct=always
log_must eval "zfs create -o direct=always $TESTPOOL1/$TESTFS1"
mntpt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1)

# Getting a loopback device
lofidev=$(losetup -f)

# Create loopback device
log_must truncate -s 1M "$mntpt/temp_file"
log_must losetup $lofidev "$mntpt/temp_file"

# Read from looback device to make sure Direct I/O works with loopback device
log_must dd if=$lofidev of=/dev/null count=1 bs=4k

log_pass "Verified loopback devices for Direct I/O."
Loading