[PATCH V3 1/8] fs/ext4: Narrow scope of DAX check in setflags

2020-05-19 Thread ira . weiny
From: Ira Weiny 

When preventing DAX and journaling on an inode.  Use the effective DAX
check rather than the mount option.

This will be required to support per inode DAX flags.

Reviewed-by: Jan Kara 
Signed-off-by: Ira Weiny 
---
 fs/ext4/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bfc1281fc4cb..5813e5e73eab 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -393,9 +393,9 @@ static int ext4_ioctl_setflags(struct inode *inode,
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
/*
 * Changes to the journaling mode can cause unsafe changes to
-* S_DAX if we are using the DAX mount option.
+* S_DAX if the inode is DAX
 */
-   if (test_opt(inode->i_sb, DAX)) {
+   if (IS_DAX(inode)) {
err = -EBUSY;
goto flags_out;
}
-- 
2.25.1



[PATCH V3 5/8] fs/ext4: Only change S_DAX on inode load

2020-05-19 Thread ira . weiny
From: Ira Weiny 

To prevent complications with in memory inodes we only set S_DAX on
inode load.  FS_XFLAG_DAX can be changed at any time and S_DAX will
change after inode eviction and reload.

Add init bool to ext4_set_inode_flags() to indicate if the inode is
being newly initialized.

Assert that S_DAX is not set on an inode which is just being loaded.

Reviewed-by: Jan Kara 
Signed-off-by: Ira Weiny 

---
Changes from V2:
Rework based on moving the encryption patch to the end.

Changes from RFC:
Change J_ASSERT() to WARN_ON_ONCE()
Fix bug which would clear S_DAX incorrectly
---
 fs/ext4/ext4.h   |  2 +-
 fs/ext4/ialloc.c |  2 +-
 fs/ext4/inode.c  | 13 ++---
 fs/ext4/ioctl.c  |  3 ++-
 fs/ext4/super.c  |  4 ++--
 fs/ext4/verity.c |  2 +-
 6 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1a3daf2d18ef..86a0994332ce 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2692,7 +2692,7 @@ extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
 extern int ext4_break_layouts(struct inode *);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
-extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_set_inode_flags(struct inode *, bool init);
 extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4b8c9a9bdf0c..7941c140723f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1116,7 +1116,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct 
inode *dir,
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;
 
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, true);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d3a4c2ed7a1c..23e42a223235 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4419,11 +4419,13 @@ static bool ext4_should_enable_dax(struct inode *inode)
return false;
 }
 
-void ext4_set_inode_flags(struct inode *inode)
+void ext4_set_inode_flags(struct inode *inode, bool init)
 {
unsigned int flags = EXT4_I(inode)->i_flags;
unsigned int new_fl = 0;
 
+   WARN_ON_ONCE(IS_DAX(inode) && init);
+
if (flags & EXT4_SYNC_FL)
new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
@@ -4434,8 +4436,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
-   if (ext4_should_enable_dax(inode))
+
+   /* Because of the way inode_set_flags() works we must preserve S_DAX
+* here if already set. */
+   new_fl |= (inode->i_flags & S_DAX);
+   if (init && ext4_should_enable_dax(inode))
new_fl |= S_DAX;
+
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
@@ -4649,7 +4656,7 @@ struct inode *__ext4_iget(struct super_block *sb, 
unsigned long ino,
 * not initialized on a new filesystem. */
}
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, true);
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (ext4_has_feature_64bit(sb))
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5813e5e73eab..145083e8cd1e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -381,7 +381,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
ext4_clear_inode_flag(inode, i);
}
 
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, false);
+
inode->i_ctime = current_time(inode);
 
err = ext4_mark_iloc_dirty(handle, inode, );
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7b99c44d0a91..3cb9b48d3cc4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1348,7 +1348,7 @@ static int ext4_set_context(struct inode *inode, const 
void *ctx, size_t len,
 * Update inode->i_flags - S_ENCRYPTED will be enabled,
 * S_DAX may be disabled
 */
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, false);
}
return res;
}
@@ -1375,7 +1375,7 @@ static int ext4_set_context(struct inode *inode, const 
void *ctx, size_t len,
 * Update inode->i_flags - S_ENCRYPTED will be enabled,
 * S_DAX may be disabled
 */
-   ext4_set_inode_flags(inode);
+   ext4_set_inode_flags(inode, false);
res = 

[PATCH V3 6/8] fs/ext4: Make DAX mount option a tri-state

2020-05-19 Thread ira . weiny
From: Ira Weiny 

We add 'always', 'never', and 'inode' (default).  '-o dax' continues to
operate the same which is equivalent to 'always'.  This new
functionality is limited to ext4 only.

Specifically we introduce a 2nd DAX mount flag EXT4_MOUNT2_DAX_NEVER and set
it and EXT4_MOUNT_DAX_ALWAYS appropriately for the mode.

We also force EXT4_MOUNT2_DAX_NEVER if !CONFIG_FS_DAX.

Finally, EXT4_MOUNT2_DAX_INODE is used solely to detect if the user
specified that option for printing.

Reviewed-by: Jan Kara 
Signed-off-by: Ira Weiny 

---
Changes from V1:
Fix up mounting options to only show an option if specified
Fix remount to prevent dax changes
Isolate behavior to ext4 only

Changes from RFC:
Combine remount check for DAX_NEVER with DAX_ALWAYS
Update ext4_should_enable_dax()
---
 fs/ext4/ext4.h  |  2 ++
 fs/ext4/inode.c |  2 ++
 fs/ext4/super.c | 67 +
 3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 86a0994332ce..6235440e4c39 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1168,6 +1168,8 @@ struct ext4_inode_info {
  blocks */
 #define EXT4_MOUNT2_HURD_COMPAT0x0004 /* Support 
HURD-castrated
  file systems */
+#define EXT4_MOUNT2_DAX_NEVER  0x0008 /* Do not allow Direct 
Access */
+#define EXT4_MOUNT2_DAX_INODE  0x0010 /* For printing options only 
*/
 
 #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM  0x0008 /* User explicitly
specified journal checksum */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 23e42a223235..140b1930e2f4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,6 +4400,8 @@ int ext4_get_inode_loc(struct inode *inode, struct 
ext4_iloc *iloc)
 
 static bool ext4_should_enable_dax(struct inode *inode)
 {
+   if (test_opt2(inode->i_sb, DAX_NEVER))
+   return false;
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3cb9b48d3cc4..5ba65eb0e2ef 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1512,7 +1512,8 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
-   Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
+   Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
+   Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
Opt_nowarn_on_error, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@@ -1579,6 +1580,9 @@ static const match_table_t tokens = {
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
{Opt_dax, "dax"},
+   {Opt_dax_always, "dax=always"},
+   {Opt_dax_inode, "dax=inode"},
+   {Opt_dax_never, "dax=never"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
{Opt_warn_on_error, "warn_on_error"},
@@ -1726,6 +1730,7 @@ static int clear_qf_name(struct super_block *sb, int 
qtype)
 #define MOPT_NO_EXT3   0x0200
 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
 #define MOPT_STRING0x0400
+#define MOPT_SKIP  0x0800
 
 static const struct mount_opts {
int token;
@@ -1775,7 +1780,13 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
-   {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
+   {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
+   {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
+   MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+   {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
+   MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+   {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
+   MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -2084,13 +2095,32 @@ static int handle_mount_opt(struct super_block *sb, 
char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
 #endif
-   } else if (token == Opt_dax) {
+   } else if (token == Opt_dax || token == Opt_dax_always ||
+  token == Opt_dax_inode || token == Opt_dax_never) {
 #ifdef CONFIG_FS_DAX
-   ext4_msg(sb, KERN_WARNING,
-   "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
-   sbi->s_mount_opt |= m->mount_opt;
+   switch (token) {
+   case Opt_dax:
+   case Opt_dax_always:

[PATCH V3 4/8] fs/ext4: Update ext4_should_use_dax()

2020-05-19 Thread ira . weiny
From: Ira Weiny 

S_DAX should only be enabled when the underlying block device supports
dax.

Change ext4_should_use_dax() to check for device support prior to the
over riding mount option.

While we are at it change the function to ext4_should_enable_dax() as
this better reflects the ask as well as matches xfs.

Reviewed-by: Jan Kara 
Signed-off-by: Ira Weiny 

---
Changes from RFC
Change function name to 'should enable'
Clean up bool conversion
Reorder this for better bisect-ability
---
 fs/ext4/inode.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a10ff12194db..d3a4c2ed7a1c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4398,10 +4398,8 @@ int ext4_get_inode_loc(struct inode *inode, struct 
ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
 }
 
-static bool ext4_should_use_dax(struct inode *inode)
+static bool ext4_should_enable_dax(struct inode *inode)
 {
-   if (!test_opt(inode->i_sb, DAX_ALWAYS))
-   return false;
if (!S_ISREG(inode->i_mode))
return false;
if (ext4_should_journal_data(inode))
@@ -4412,7 +4410,13 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
-   return true;
+   if (!bdev_dax_supported(inode->i_sb->s_bdev,
+   inode->i_sb->s_blocksize))
+   return false;
+   if (test_opt(inode->i_sb, DAX_ALWAYS))
+   return true;
+
+   return false;
 }
 
 void ext4_set_inode_flags(struct inode *inode)
@@ -4430,7 +4434,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
-   if (ext4_should_use_dax(inode))
+   if (ext4_should_enable_dax(inode))
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
-- 
2.25.1



[PATCH V3 2/8] fs/ext4: Disallow verity if inode is DAX

2020-05-19 Thread ira . weiny
From: Ira Weiny 

Verity and DAX are incompatible.  Changing the DAX mode due to a verity
flag change is wrong without a corresponding address_space_operations
update.

Make the 2 options mutually exclusive by returning an error if DAX was
set first.

(Setting DAX is already disabled if Verity is set first.)

Reviewed-by: Jan Kara 
Signed-off-by: Ira Weiny 

---
Changes from V2:
Remove Section title 'Verity and DAX'

Changes:
remove WARN_ON_ONCE
Add documentation for DAX/Verity exclusivity
---
 Documentation/filesystems/ext4/verity.rst | 3 +++
 fs/ext4/verity.c  | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/Documentation/filesystems/ext4/verity.rst 
b/Documentation/filesystems/ext4/verity.rst
index 3e4c0ee0e068..e99ff3fd09f7 100644
--- a/Documentation/filesystems/ext4/verity.rst
+++ b/Documentation/filesystems/ext4/verity.rst
@@ -39,3 +39,6 @@ is encrypted as well as the data itself.
 
 Verity files cannot have blocks allocated past the end of the verity
 metadata.
+
+Verity and DAX are not compatible and attempts to set both of these flags
+on a file will fail.
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index dc5ec724d889..f05a09fb2ae4 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp)
handle_t *handle;
int err;
 
+   if (IS_DAX(inode))
+   return -EINVAL;
+
if (ext4_verity_in_progress(inode))
return -EBUSY;
 
-- 
2.25.1



[PATCH V3 8/8] Documentation/dax: Update DAX enablement for ext4

2020-05-19 Thread ira . weiny
From: Ira Weiny 

Update the document to reflect ext4 and xfs now behave the same.

Reviewed-by: Jan Kara 
Signed-off-by: Ira Weiny 

---
Changes from RFC:
Update with ext2 text...
---
 Documentation/filesystems/dax.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/dax.txt 
b/Documentation/filesystems/dax.txt
index 735fb4b54117..265c4f808dbf 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -25,7 +25,7 @@ size when creating the filesystem.
 Currently 3 filesystems support DAX: ext2, ext4 and xfs.  Enabling DAX on them
 is different.
 
-Enabling DAX on ext4 and ext2
+Enabling DAX on ext2
 -
 
 When mounting the filesystem, use the "-o dax" option on the command line or
@@ -33,8 +33,8 @@ add 'dax' to the options in /etc/fstab.  This works to enable 
DAX on all files
 within the filesystem.  It is equivalent to the '-o dax=always' behavior below.
 
 
-Enabling DAX on xfs

+Enabling DAX on xfs and ext4
+
 
 Summary
 ---
-- 
2.25.1



[PATCH V3 0/8] Enable ext4 support for per-file/directory DAX operations

2020-05-19 Thread ira . weiny
From: Ira Weiny 

Changes from V2:
Rework DAX exclusivity with verity and encryption based on feedback
from Eric

Enable the same per file DAX support in ext4 as was done for xfs.  This series
builds and depends on the V11 series for xfs.[1]

This passes the same xfstests test as XFS.

The only issue is that this modifies the old mount option parsing code rather
than waiting for the new parsing code to be finalized.

This series starts with 3 fixes which include making Verity and Encrypt truly
mutually exclusive from DAX.  I think these first 3 patches should be picked up
for 5.8 regardless of what is decided regarding the mount parsing.

[1] https://lore.kernel.org/lkml/20200428002142.404144-1-ira.we...@intel.com/

To: linux-kernel@vger.kernel.org
Cc: "Darrick J. Wong" 
Cc: Dan Williams 
Cc: Dave Chinner 
Cc: Christoph Hellwig 
Cc: "Theodore Y. Ts'o" 
Cc: Jan Kara 
Cc: linux-e...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-fsde...@vger.kernel.org


Ira Weiny (8):
  fs/ext4: Narrow scope of DAX check in setflags
  fs/ext4: Disallow verity if inode is DAX
  fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS
  fs/ext4: Update ext4_should_use_dax()
  fs/ext4: Only change S_DAX on inode load
  fs/ext4: Make DAX mount option a tri-state
  fs/ext4: Introduce DAX inode flag
  Documentation/dax: Update DAX enablement for ext4

 Documentation/filesystems/dax.txt |  6 +-
 Documentation/filesystems/ext4/verity.rst |  3 +
 fs/ext4/ext4.h| 22 +--
 fs/ext4/ialloc.c  |  2 +-
 fs/ext4/inode.c   | 25 +--
 fs/ext4/ioctl.c   | 41 ++--
 fs/ext4/super.c   | 80 ++-
 fs/ext4/verity.c  |  5 +-
 include/uapi/linux/fs.h   |  1 +
 9 files changed, 148 insertions(+), 37 deletions(-)

-- 
2.25.1



[PATCH V3 3/8] fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS

2020-05-19 Thread ira . weiny
From: Ira Weiny 

In prep for the new tri-state mount option which then introduces
EXT4_MOUNT_DAX_NEVER.

Reviewed-by: Jan Kara 
Signed-off-by: Ira Weiny 

---
Changes:
New patch
---
 fs/ext4/ext4.h  |  4 ++--
 fs/ext4/inode.c |  2 +-
 fs/ext4/super.c | 12 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 91eb4381cae5..1a3daf2d18ef 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1123,9 +1123,9 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_MINIX_DF0x00080 /* Mimics the Minix statfs */
 #define EXT4_MOUNT_NOLOAD  0x00100 /* Don't use existing journal*/
 #ifdef CONFIG_FS_DAX
-#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
+#define EXT4_MOUNT_DAX_ALWAYS  0x00200 /* Direct Access */
 #else
-#define EXT4_MOUNT_DAX 0
+#define EXT4_MOUNT_DAX_ALWAYS  0
 #endif
 #define EXT4_MOUNT_DATA_FLAGS  0x00C00 /* Mode for data writes: */
 #define EXT4_MOUNT_JOURNAL_DATA0x00400 /* Write data to 
journal */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a4aae6acdcb..a10ff12194db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4400,7 +4400,7 @@ int ext4_get_inode_loc(struct inode *inode, struct 
ext4_iloc *iloc)
 
 static bool ext4_should_use_dax(struct inode *inode)
 {
-   if (!test_opt(inode->i_sb, DAX))
+   if (!test_opt(inode->i_sb, DAX_ALWAYS))
return false;
if (!S_ISREG(inode->i_mode))
return false;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bf5fcb477f66..7b99c44d0a91 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1775,7 +1775,7 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
-   {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
+   {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -3982,7 +3982,7 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
 "both data=journal and dioread_nolock");
goto failed_mount;
}
-   if (test_opt(sb, DAX)) {
+   if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
 "both data=journal and dax");
goto failed_mount;
@@ -4092,7 +4092,7 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
goto failed_mount;
}
 
-   if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+   if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
@@ -5412,7 +5412,7 @@ static int ext4_remount(struct super_block *sb, int 
*flags, char *data)
err = -EINVAL;
goto restore_opts;
}
-   if (test_opt(sb, DAX)) {
+   if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
 "both data=journal and dax");
err = -EINVAL;
@@ -5433,10 +5433,10 @@ static int ext4_remount(struct super_block *sb, int 
*flags, char *data)
goto restore_opts;
}
 
-   if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
+   if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX_ALWAYS) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
-   sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
+   sbi->s_mount_opt ^= EXT4_MOUNT_DAX_ALWAYS;
}
 
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
-- 
2.25.1



[PATCH V3 7/8] fs/ext4: Introduce DAX inode flag

2020-05-19 Thread ira . weiny
From: Ira Weiny 

Add a flag to preserve FS_XFLAG_DAX in the ext4 inode.

Set the flag to be user visible and changeable.  Set the flag to be
inherited.  Allow applications to change the flag at any time with the
exception of if VERITY or ENCRYPT is set.

Disallow setting VERITY or ENCRYPT if DAX is set.

Finally, on regular files, flag the inode to not be cached to facilitate
changing S_DAX on the next creation of the inode.

Signed-off-by: Ira Weiny 

---
Change from V2:
Add in making verity and DAX exclusive.
'Squash' in making encryption and DAX exclusive.
Add in EXT4_INODE_DAX flag definition to be compatible with
ext4_[set|test]_inode_flag() bit operations
Use ext4_[set|test]_inode_flag() bit operations to be consistent
with other code.

Change from V0:
Add FS_DAX_FL to include/uapi/linux/fs.h
to be consistent
Move ext4_dax_dontcache() to ext4_ioctl_setflags()
This ensures that it is only set when the flags are going to be
set and not if there is an error
Also this sets don't cache in the FS_IOC_SETFLAGS case

Change from RFC:
use new d_mark_dontcache()
Allow caching if ALWAYS/NEVER is set
Rebased to latest Linus master
Change flag to unused 0x0100
update ext4_should_enable_dax()
---
 fs/ext4/ext4.h  | 14 ++
 fs/ext4/inode.c |  2 +-
 fs/ext4/ioctl.c | 34 +-
 fs/ext4/super.c |  3 +++
 fs/ext4/verity.c|  2 +-
 include/uapi/linux/fs.h |  1 +
 6 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6235440e4c39..467c30a789b6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -415,13 +415,16 @@ struct flex_groups {
 #define EXT4_VERITY_FL 0x0010 /* Verity protected inode */
 #define EXT4_EA_INODE_FL   0x0020 /* Inode used for large EA */
 /* 0x0040 was formerly EXT4_EOFBLOCKS_FL */
+
+#define EXT4_DAX_FL0x0100 /* Inode is DAX */
+
 #define EXT4_INLINE_DATA_FL0x1000 /* Inode has inline data. */
 #define EXT4_PROJINHERIT_FL0x2000 /* Create with parents 
projid */
 #define EXT4_CASEFOLD_FL   0x4000 /* Casefolded file */
 #define EXT4_RESERVED_FL   0x8000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE   0x705BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE0x604BC0FF /* User modifiable 
flags */
+#define EXT4_FL_USER_VISIBLE   0x715BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE0x614BC0FF /* User modifiable 
flags */
 
 /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
 #define EXT4_FL_XFLAG_VISIBLE  (EXT4_SYNC_FL | \
@@ -429,14 +432,16 @@ struct flex_groups {
 EXT4_APPEND_FL | \
 EXT4_NODUMP_FL | \
 EXT4_NOATIME_FL | \
-EXT4_PROJINHERIT_FL)
+EXT4_PROJINHERIT_FL | \
+EXT4_DAX_FL)
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
   EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
   EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
   EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
-  EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
+  EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
+  EXT4_DAX_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL 
|\
@@ -488,6 +493,7 @@ enum {
EXT4_INODE_VERITY   = 20,   /* Verity protected inode */
EXT4_INODE_EA_INODE = 21,   /* Inode used for large EA */
 /* 22 was formerly EXT4_INODE_EOFBLOCKS */
+   EXT4_INODE_DAX  = 24,   /* Inode is DAX */
EXT4_INODE_INLINE_DATA  = 28,   /* Data in inode. */
EXT4_INODE_PROJINHERIT  = 29,   /* Create with parents projid */
EXT4_INODE_RESERVED = 31,   /* reserved for ext4 lib */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 140b1930e2f4..ae61db8b8bae 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4418,7 +4418,7 @@ static bool ext4_should_enable_dax(struct inode *inode)
if (test_opt(inode->i_sb, DAX_ALWAYS))
return true;
 
-   return false;
+   return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
 }
 
 void ext4_set_inode_flags(struct inode *inode, bool init)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 

Re: general protection fault in kobject_get (2)

2020-05-19 Thread Greg KH
On Tue, May 19, 2020 at 09:53:16PM -0700, syzbot wrote:
> Hello,
> 
> syzbot found the following crash on:
> 
> HEAD commit:d00f26b6 Merge git://git.kernel.org/pub/scm/linux/kernel/g..
> git tree:   net-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=1316343c10
> kernel config:  https://syzkaller.appspot.com/x/.config?x=26d0bd769afe1a2c
> dashboard link: https://syzkaller.appspot.com/bug?extid=407fd358a932bbf639c6
> compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
> 
> Unfortunately, I don't have any reproducer for this crash yet.
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+407fd358a932bbf63...@syzkaller.appspotmail.com
> 
> general protection fault, probably for non-canonical address 
> 0xdc13:  [#1] PREEMPT SMP KASAN
> KASAN: null-ptr-deref in range [0x0098-0x009f]
> CPU: 1 PID: 16682 Comm: syz-executor.3 Not tainted 5.7.0-rc4-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
> Google 01/01/2011
> RIP: 0010:kobject_get+0x30/0x150 lib/kobject.c:640
> Code: 53 e8 d4 7e c6 fd 4d 85 e4 0f 84 a2 00 00 00 e8 c6 7e c6 fd 49 8d 7c 24 
> 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 48 89 fa 
> 83 e2 07 38 d0 7f 08 84 c0 0f 85 e7 00 00 00
> RSP: 0018:c9000772f240 EFLAGS: 00010203
> RAX: dc00 RBX: 85acfca0 RCX: c9000fc67000
> RDX: 0013 RSI: 83acadfa RDI: 009c
> RBP: 0060 R08: 8880a8dfa4c0 R09: ed100a03f403
> R10: 8880501fa017 R11: ed100a03f402 R12: 0060
> R13: c9000772f3c0 R14: 88805d1ec4e8 R15: 88805d1ec580
> FS:  7f1ebed26700() GS:8880ae70() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 004d88f0 CR3: a86c4000 CR4: 001406e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  get_device+0x20/0x30 drivers/base/core.c:2620
>  __ib_get_client_nl_info+0x1d4/0x2a0 drivers/infiniband/core/device.c:1863
>  ib_get_client_nl_info+0x30/0x180 drivers/infiniband/core/device.c:1883
>  nldev_get_chardev+0x52b/0xa40 drivers/infiniband/core/nldev.c:1625
>  rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline]
>  rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
>  rdma_nl_rcv+0x586/0x900 drivers/infiniband/core/netlink.c:259
>  netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
>  netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
>  netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
>  sock_sendmsg_nosec net/socket.c:652 [inline]
>  sock_sendmsg+0xcf/0x120 net/socket.c:672
>  sys_sendmsg+0x6e6/0x810 net/socket.c:2352
>  ___sys_sendmsg+0x100/0x170 net/socket.c:2406
>  __sys_sendmsg+0xe5/0x1b0 net/socket.c:2439
>  do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
>  entry_SYSCALL_64_after_hwframe+0x49/0xb3
> RIP: 0033:0x45c829
> Code: 0d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 
> 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 
> 83 db b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:7f1ebed25c78 EFLAGS: 0246 ORIG_RAX: 002e
> RAX: ffda RBX: 004ff720 RCX: 0045c829
> RDX:  RSI: 2200 RDI: 0003
> RBP: 0078bf00 R08:  R09: 
> R10:  R11: 0246 R12: 
> R13: 09ad R14: 004d5f10 R15: 7f1ebed266d4
> Modules linked in:
> ---[ end trace 239938a6c4c3c99f ]---
> RIP: 0010:kobject_get+0x30/0x150 lib/kobject.c:640
> Code: 53 e8 d4 7e c6 fd 4d 85 e4 0f 84 a2 00 00 00 e8 c6 7e c6 fd 49 8d 7c 24 
> 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 48 89 fa 
> 83 e2 07 38 d0 7f 08 84 c0 0f 85 e7 00 00 00
> RSP: 0018:c9000772f240 EFLAGS: 00010203
> RAX: dc00 RBX: 85acfca0 RCX: c9000fc67000
> RDX: 0013 RSI: 83acadfa RDI: 009c
> RBP: 0060 R08: 8880a8dfa4c0 R09: ed100a03f403
> R10: 8880501fa017 R11: ed100a03f402 R12: 0060
> R13: c9000772f3c0 R14: 88805d1ec4e8 R15: 88805d1ec580
> FS:  7f1ebed26700() GS:8880ae70() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 0073fad4 CR3: a86c4000 CR4: 001406e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400

Looks like an IB/rdma issue, poke those developers please :)


Re: [RFC PATCH 0/8] Qualcomm Cloud AI 100 driver

2020-05-19 Thread Greg Kroah-Hartman
On Tue, May 19, 2020 at 10:11:35PM -0700, Bjorn Andersson wrote:
> On Tue 19 May 21:59 PDT 2020, Greg Kroah-Hartman wrote:
> 
> > On Tue, May 19, 2020 at 10:41:15PM +0200, Daniel Vetter wrote:
> > > > Ok, that's a decision you are going to have to push upward on, as we
> > > > really can't take this without a working, open, userspace.
> > > 
> > > Uh wut.
> > > 
> > > So the merge criteria for drivers/accel (atm still drivers/misc but I
> > > thought that was interim until more drivers showed up) isn't actually
> > > "totally-not-a-gpu accel driver without open source userspace".
> > > 
> > > Instead it's "totally-not-a-gpu accel driver without open source
> > > userspace" _and_ you have to be best buddies with Greg. Or at least
> > > not be on the naughty company list. Since for habanalabs all you
> > > wanted is a few test cases to exercise the ioctls. Not the entire
> > > userspace.
> > 
> > Habanalabs now has their full library opensourced that their tools use
> > directly, so that's not an argument anymore.
> > 
> > My primary point here is the copyright owner of this code, because of
> > that, I'm not going to objet to allowing this to be merged without open
> > userspace code.
> > 
> 
> So because it's copyright Linux Foundation you are going to accept it
> without user space, after all?

Huh, no, the exact opposite, sorry, drop the "not" in that above
sentence.  My bad.

greg k-h


Re: [PATCH v1 2/6] bus: mhi: core: Mark device inactive soon after host issues a shutdown

2020-05-19 Thread kbuild test robot
Hi Bhaumik,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200519]
[cannot apply to linus/master v5.7-rc6 v5.7-rc5 v5.7-rc4 v5.7-rc6]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Bhaumik-Bhatt/Bug-fixes-and-bootup-and-shutdown-improvements/20200520-083400
base:fb57b1fabcb28f358901b2df90abd2b48abc1ca8
config: riscv-allyesconfig (attached as .config)
compiler: riscv64-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>, old ones prefixed by <<):

drivers/bus/mhi/core/main.c: In function 'mhi_intvec_threaded_handler':
>> drivers/bus/mhi/core/main.c:397:8: error: implicit declaration of function 
>> 'mhi_is_active' [-Werror=implicit-function-declaration]
397 |   if (!mhi_is_active(mhi_cntrl)) {
|^
cc1: some warnings being treated as errors

vim +/mhi_is_active +397 drivers/bus/mhi/core/main.c

   371  
   372  irqreturn_t mhi_intvec_threaded_handler(int irq_number, void *priv)
   373  {
   374  struct mhi_controller *mhi_cntrl = priv;
   375  struct device *dev = _cntrl->mhi_dev->dev;
   376  enum mhi_state state = MHI_STATE_MAX;
   377  enum mhi_pm_state pm_state = 0;
   378  enum mhi_ee_type ee = 0;
   379  bool handle_rddm = false;
   380  
   381  write_lock_irq(_cntrl->pm_lock);
   382  if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
   383  write_unlock_irq(_cntrl->pm_lock);
   384  goto exit_intvec;
   385  }
   386  
   387  state = mhi_get_mhi_state(mhi_cntrl);
   388  ee = mhi_cntrl->ee;
   389  mhi_cntrl->ee = mhi_get_exec_env(mhi_cntrl);
   390  dev_dbg(dev, "local ee:%s device ee:%s dev_state:%s\n",
   391  TO_MHI_EXEC_STR(mhi_cntrl->ee), TO_MHI_EXEC_STR(ee),
   392  TO_MHI_STATE_STR(state));
   393  
   394   /* If device supports RDDM don't bother processing SYS error */
   395  if (mhi_cntrl->rddm_image) {
   396  /* host may be performing a device power down already */
 > 397  if (!mhi_is_active(mhi_cntrl)) {
   398  write_unlock_irq(_cntrl->pm_lock);
   399  goto exit_intvec;
   400  }
   401  
   402  if (mhi_cntrl->ee == MHI_EE_RDDM && mhi_cntrl->ee != 
ee) {
   403  /* prevent clients from queueing any more 
packets */
   404  pm_state = mhi_tryset_pm_state(mhi_cntrl,
   405 
MHI_PM_SYS_ERR_DETECT);
   406  if (pm_state == MHI_PM_SYS_ERR_DETECT)
   407  handle_rddm = true;
   408  }
   409  
   410  write_unlock_irq(_cntrl->pm_lock);
   411  
   412  if (handle_rddm) {
   413  dev_err(dev, "RDDM event occurred!\n");
   414  mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_EE_RDDM);
   415  wake_up_all(_cntrl->state_event);
   416  }
   417  goto exit_intvec;
   418  }
   419  
   420  if (state == MHI_STATE_SYS_ERR) {
   421  dev_dbg(dev, "System error detected\n");
   422  pm_state = mhi_tryset_pm_state(mhi_cntrl,
   423 MHI_PM_SYS_ERR_DETECT);
   424  }
   425  
   426  write_unlock_irq(_cntrl->pm_lock);
   427  
   428  if (pm_state == MHI_PM_SYS_ERR_DETECT) {
   429  wake_up_all(_cntrl->state_event);
   430  
   431  /* For fatal errors, we let controller decide next step 
*/
   432  if (MHI_IN_PBL(ee))
   433  mhi_cntrl->status_cb(mhi_cntrl, 
MHI_CB_FATAL_ERROR);
   434  else
   435  mhi_pm_sys_err_handler(mhi_cntrl);
   436  }
   437  
   438  exit_intvec:
   439  
   440  return IRQ_HANDLED;
   441  }
   442  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


[tip:x86/urgent] BUILD SUCCESS d7110a26e5905ec2fe3fc88bc6a538901accb72b

2020-05-19 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git  
x86/urgent
branch HEAD: d7110a26e5905ec2fe3fc88bc6a538901accb72b  x86/mmiotrace: Use 
cpumask_available() for cpumask_var_t variables

elapsed time: 486m

configs tested: 98
configs skipped: 74

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
sparcallyesconfig
mips allyesconfig
m68k allyesconfig
i386  allnoconfig
i386defconfig
i386  debian-10.3
i386 allyesconfig
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a006-20200519
i386 randconfig-a005-20200519
i386 randconfig-a001-20200519
i386 randconfig-a003-20200519
i386 randconfig-a004-20200519
i386 randconfig-a002-20200519
x86_64   randconfig-a003-20200519
x86_64   randconfig-a005-20200519
x86_64   randconfig-a004-20200519
x86_64   randconfig-a006-20200519
x86_64   randconfig-a002-20200519
x86_64   randconfig-a001-20200519
i386 randconfig-a012-20200519
i386 randconfig-a014-20200519
i386 randconfig-a016-20200519
i386 randconfig-a011-20200519
i386 randconfig-a015-20200519
i386 randconfig-a013-20200519
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
x86_64  defconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
um   allmodconfig
umallnoconfig
um   allyesconfig
um  defconfig
x86_64   rhel
x86_64   rhel-7.6
x86_64rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64lkp
x86_64  fedora-25
x86_64  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[tip:perf/core] BUILD SUCCESS c50c75e9b87946499a62bffc021e95c87a1d57cd

2020-05-19 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git  
perf/core
branch HEAD: c50c75e9b87946499a62bffc021e95c87a1d57cd  perf/core: Replace 
zero-length array with flexible-array

elapsed time: 486m

configs tested: 98
configs skipped: 1

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
sparcallyesconfig
mips allyesconfig
m68k allyesconfig
i386  allnoconfig
i386defconfig
i386  debian-10.3
i386 allyesconfig
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a006-20200519
i386 randconfig-a005-20200519
i386 randconfig-a001-20200519
i386 randconfig-a003-20200519
i386 randconfig-a004-20200519
i386 randconfig-a002-20200519
x86_64   randconfig-a003-20200519
x86_64   randconfig-a005-20200519
x86_64   randconfig-a004-20200519
x86_64   randconfig-a006-20200519
x86_64   randconfig-a002-20200519
x86_64   randconfig-a001-20200519
i386 randconfig-a012-20200519
i386 randconfig-a014-20200519
i386 randconfig-a016-20200519
i386 randconfig-a011-20200519
i386 randconfig-a015-20200519
i386 randconfig-a013-20200519
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
x86_64  defconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
um   allmodconfig
umallnoconfig
um   allyesconfig
um  defconfig
x86_64   rhel
x86_64   rhel-7.6
x86_64rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64lkp
x86_64  fedora-25
x86_64  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[tip:locking/core] BUILD SUCCESS db78538c75e49c09b002a2cd96a19ae0c39be771

2020-05-19 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git  
locking/core
branch HEAD: db78538c75e49c09b002a2cd96a19ae0c39be771  locking/lockdep: Replace 
zero-length array with flexible-array

elapsed time: 486m

configs tested: 98
configs skipped: 1

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
sparcallyesconfig
mips allyesconfig
m68k allyesconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a006-20200519
i386 randconfig-a005-20200519
i386 randconfig-a001-20200519
i386 randconfig-a003-20200519
i386 randconfig-a004-20200519
i386 randconfig-a002-20200519
x86_64   randconfig-a003-20200519
x86_64   randconfig-a005-20200519
x86_64   randconfig-a004-20200519
x86_64   randconfig-a006-20200519
x86_64   randconfig-a002-20200519
x86_64   randconfig-a001-20200519
i386 randconfig-a012-20200519
i386 randconfig-a014-20200519
i386 randconfig-a016-20200519
i386 randconfig-a011-20200519
i386 randconfig-a015-20200519
i386 randconfig-a013-20200519
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
x86_64  defconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
umallnoconfig
um  defconfig
um   allmodconfig
um   allyesconfig
x86_64   rhel
x86_64   rhel-7.6
x86_64rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64lkp
x86_64  fedora-25
x86_64  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[tip:sched/core] BUILD SUCCESS d505b8af58912ae1e1a211fabc9995b19bd40828

2020-05-19 Thread kbuild test robot
   allnoconfig
i386 randconfig-a006-20200519
i386 randconfig-a005-20200519
i386 randconfig-a001-20200519
i386 randconfig-a003-20200519
i386 randconfig-a004-20200519
i386 randconfig-a002-20200519
x86_64   randconfig-a003-20200519
x86_64   randconfig-a005-20200519
x86_64   randconfig-a004-20200519
x86_64   randconfig-a006-20200519
x86_64   randconfig-a002-20200519
x86_64   randconfig-a001-20200519
i386 randconfig-a012-20200519
i386 randconfig-a014-20200519
i386 randconfig-a016-20200519
i386 randconfig-a011-20200519
i386 randconfig-a015-20200519
i386 randconfig-a013-20200519
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
x86_64  defconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
um   allmodconfig
umallnoconfig
um   allyesconfig
um  defconfig
x86_64   rhel
x86_64   rhel-7.6
x86_64rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64lkp
x86_64  fedora-25
x86_64  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[tip:sched/urgent] BUILD SUCCESS 39f23ce07b9355d05a64ae303ce20d1c4b92b957

2020-05-19 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git  
sched/urgent
branch HEAD: 39f23ce07b9355d05a64ae303ce20d1c4b92b957  sched/fair: Fix 
unthrottle_cfs_rq() for leaf_cfs_rq list

elapsed time: 486m

configs tested: 98
configs skipped: 1

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
sparcallyesconfig
mips allyesconfig
m68k allyesconfig
i386  allnoconfig
i386defconfig
i386  debian-10.3
i386 allyesconfig
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a006-20200519
i386 randconfig-a005-20200519
i386 randconfig-a001-20200519
i386 randconfig-a003-20200519
i386 randconfig-a004-20200519
i386 randconfig-a002-20200519
x86_64   randconfig-a003-20200519
x86_64   randconfig-a005-20200519
x86_64   randconfig-a004-20200519
x86_64   randconfig-a006-20200519
x86_64   randconfig-a002-20200519
x86_64   randconfig-a001-20200519
i386 randconfig-a012-20200519
i386 randconfig-a014-20200519
i386 randconfig-a016-20200519
i386 randconfig-a011-20200519
i386 randconfig-a015-20200519
i386 randconfig-a013-20200519
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
x86_64  defconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
um   allmodconfig
umallnoconfig
um   allyesconfig
um  defconfig
x86_64   rhel
x86_64   rhel-7.6
x86_64rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64lkp
x86_64  fedora-25
x86_64  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [PATCH v2] /dev/mem: Revoke mappings when a driver claims the region

2020-05-19 Thread Greg KH
On Tue, May 19, 2020 at 11:27:02AM -0700, Dan Williams wrote:
> On Tue, May 19, 2020 at 5:11 AM Greg KH  wrote:
> >
> > On Tue, May 19, 2020 at 12:03:06AM -0700, Dan Williams wrote:
> > > Close the hole of holding a mapping over kernel driver takeover event of
> > > a given address range.
> > >
> > > Commit 90a545e98126 ("restrict /dev/mem to idle io memory ranges")
> > > introduced CONFIG_IO_STRICT_DEVMEM with the goal of protecting the
> > > kernel against scenarios where a /dev/mem user tramples memory that a
> > > kernel driver owns. However, this protection only prevents *new* read(),
> > > write() and mmap() requests. Established mappings prior to the driver
> > > calling request_mem_region() are left alone.
> > >
> > > Especially with persistent memory, and the core kernel metadata that is
> > > stored there, there are plentiful scenarios for a /dev/mem user to
> > > violate the expectations of the driver and cause amplified damage.
> > >
> > > Teach request_mem_region() to find and shoot down active /dev/mem
> > > mappings that it believes it has successfully claimed for the exclusive
> > > use of the driver. Effectively a driver call to request_mem_region()
> > > becomes a hole-punch on the /dev/mem device.
> > >
> > > The typical usage of unmap_mapping_range() is part of
> > > truncate_pagecache() to punch a hole in a file, but in this case the
> > > implementation is only doing the "first half" of a hole punch. Namely it
> > > is just evacuating current established mappings of the "hole", and it
> > > relies on the fact that /dev/mem establishes mappings in terms of
> > > absolute physical address offsets. Once existing mmap users are
> > > invalidated they can attempt to re-establish the mapping, or attempt to
> > > continue issuing read(2) / write(2) to the invalidated extent, but they
> > > will then be subject to the CONFIG_IO_STRICT_DEVMEM checking that can
> > > block those subsequent accesses.
> > >
> > > Cc: Arnd Bergmann 
> > > Cc: Ingo Molnar 
> > > Cc: Kees Cook 
> > > Cc: Russell King 
> > > Cc: Andrew Morton 
> > > Cc: Greg Kroah-Hartman 
> > > Fixes: 90a545e98126 ("restrict /dev/mem to idle io memory ranges")
> > > Signed-off-by: Dan Williams 
> > > ---
> > > Changes since v1 [1]:
> > >
> > > - updated the changelog to describe the usage of unmap_mapping_range().
> > >   No other logic changes:
> > >
> > > [1]: 
> > > http://lore.kernel.org/r/158662721802.1893045.12301414116114602646.st...@dwillia2-desk3.amr.corp.intel.com
> > >
> > > Greg, Andrew,
> > >
> > > I have a regression test for this case now. This was found by an
> > > intermittent data corruption scenario on pmem from a test tool using
> > > /dev/mem.
> >
> > Ick, why are test tools messing around in /dev/mem :)
> 
> Yeah, I'm all for useful tools, just not at the expense of kernel integrity.
> 
> > Anyway, this seems sane to me, want me to take it through my tree?
> 
> Yes please, seems to belong with the driver core.

Ok, will wait for a v3 to handle the issue that was just found in
review.

thanks,

greg k-h


Re: [PATCH 09/15] device core: Add ability to handle multiple dma offsets

2020-05-19 Thread Greg Kroah-Hartman
On Tue, May 19, 2020 at 04:34:07PM -0400, Jim Quinlan wrote:
> diff --git a/include/linux/device.h b/include/linux/device.h
> index ac8e37cd716a..6cd916860b5f 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -493,6 +493,8 @@ struct dev_links_info {
>   * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller
>   *   DMA limit than the device itself supports.
>   * @dma_pfn_offset: offset of DMA memory range relatively of RAM
> + * @dma_map: Like dma_pfn_offset but used when there are multiple
> + *   pfn offsets for multiple dma-ranges.
>   * @dma_parms:   A low level driver may set these to teach IOMMU code 
> about
>   *   segment limitations.
>   * @dma_pools:   Dma pools (if dma'ble device).
> @@ -578,7 +580,12 @@ struct device {
>allocations such descriptors. */
>   u64 bus_dma_limit;  /* upstream dma constraint */
>   unsigned long   dma_pfn_offset;
> -
> +#ifdef CONFIG_DMA_PFN_OFFSET_MAP
> + const void *dma_offset_map; /* Like dma_pfn_offset, but for
> +  * the unlikely case of multiple
> +  * offsets. If non-null, dma_pfn_offset
> +  * will be 0. */
> +#endif
>   struct device_dma_parameters *dma_parms;
>  
>   struct list_headdma_pools;  /* dma pools (if dma'ble) */

I'll defer to Christoph here, but I thought we were trying to get rid of
stuff like this from struct device, not add new things to it for dma
apis.  And why is it a void *?

thanks,

greg k-h


Re: [PATCH v4 2/4] kasan: record and print the free track

2020-05-19 Thread Walter Wu
> On Wed, May 20, 2020 at 6:03 AM Walter Wu  wrote:
> >
> > > On Tue, May 19, 2020 at 4:25 AM Walter Wu  
> > > wrote:
> > > >
> > > > Move free track from slub alloc meta-data to slub free meta-data in
> > > > order to make struct kasan_free_meta size is 16 bytes. It is a good
> > > > size because it is the minimal redzone size and a good number of
> > > > alignment.
> > > >
> > > > For free track in generic KASAN, we do the modification in struct
> > > > kasan_alloc_meta and kasan_free_meta:
> > > > - remove free track from kasan_alloc_meta.
> > > > - add free track into kasan_free_meta.
> > > >
> > > > [1]https://bugzilla.kernel.org/show_bug.cgi?id=198437
> > > >
> > > > Signed-off-by: Walter Wu 
> > > > Suggested-by: Dmitry Vyukov 
> > > > Cc: Andrey Ryabinin 
> > > > Cc: Dmitry Vyukov 
> > > > Cc: Alexander Potapenko 
> > > > ---
> > > >  mm/kasan/common.c  | 22 ++
> > > >  mm/kasan/generic.c | 18 ++
> > > >  mm/kasan/kasan.h   |  7 +++
> > > >  mm/kasan/report.c  | 20 
> > > >  mm/kasan/tags.c| 37 +
> > > >  5 files changed, 64 insertions(+), 40 deletions(-)
> > > >
> > > > diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> > > > index 8bc618289bb1..47b53912f322 100644
> > > > --- a/mm/kasan/common.c
> > > > +++ b/mm/kasan/common.c
> > > > @@ -51,7 +51,7 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags)
> > > > return stack_depot_save(entries, nr_entries, flags);
> > > >  }
> > > >
> > > > -static inline void set_track(struct kasan_track *track, gfp_t flags)
> > > > +void kasan_set_track(struct kasan_track *track, gfp_t flags)
> > > >  {
> > > > track->pid = current->pid;
> > > > track->stack = kasan_save_stack(flags);
> > > > @@ -299,24 +299,6 @@ struct kasan_free_meta *get_free_info(struct 
> > > > kmem_cache *cache,
> > > > return (void *)object + cache->kasan_info.free_meta_offset;
> > > >  }
> > > >
> > > > -
> > > > -static void kasan_set_free_info(struct kmem_cache *cache,
> > > > -   void *object, u8 tag)
> > > > -{
> > > > -   struct kasan_alloc_meta *alloc_meta;
> > > > -   u8 idx = 0;
> > > > -
> > > > -   alloc_meta = get_alloc_info(cache, object);
> > > > -
> > > > -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
> > > > -   idx = alloc_meta->free_track_idx;
> > > > -   alloc_meta->free_pointer_tag[idx] = tag;
> > > > -   alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
> > > > -#endif
> > > > -
> > > > -   set_track(_meta->free_track[idx], GFP_NOWAIT);
> > > > -}
> > > > -
> > > >  void kasan_poison_slab(struct page *page)
> > > >  {
> > > > unsigned long i;
> > > > @@ -492,7 +474,7 @@ static void *__kasan_kmalloc(struct kmem_cache 
> > > > *cache, const void *object,
> > > > KASAN_KMALLOC_REDZONE);
> > > >
> > > > if (cache->flags & SLAB_KASAN)
> > > > -   set_track(_alloc_info(cache, object)->alloc_track, 
> > > > flags);
> > > > +   kasan_set_track(_alloc_info(cache, 
> > > > object)->alloc_track, flags);
> > > >
> > > > return set_tag(object, tag);
> > > >  }
> > > > diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
> > > > index 3372bdcaf92a..763d8a13e0ac 100644
> > > > --- a/mm/kasan/generic.c
> > > > +++ b/mm/kasan/generic.c
> > > > @@ -344,3 +344,21 @@ void kasan_record_aux_stack(void *addr)
> > > > alloc_info->aux_stack[1] = alloc_info->aux_stack[0];
> > > > alloc_info->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);
> > > >  }
> > > > +
> > > > +void kasan_set_free_info(struct kmem_cache *cache,
> > > > +   void *object, u8 tag)
> > > > +{
> > > > +   struct kasan_free_meta *free_meta;
> > > > +
> > > > +   free_meta = get_free_info(cache, object);
> > > > +   kasan_set_track(_meta->free_track, GFP_NOWAIT);
> > > > +}
> > > > +
> > > > +struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
> > > > +   void *object, u8 tag)
> > > > +{
> > > > +   struct kasan_free_meta *free_meta;
> > > > +
> > > > +   free_meta = get_free_info(cache, object);
> > > > +   return _meta->free_track;
> > > > +}
> > > > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> > > > index a7391bc83070..ad897ec36545 100644
> > > > --- a/mm/kasan/kasan.h
> > > > +++ b/mm/kasan/kasan.h
> > > > @@ -127,6 +127,9 @@ struct kasan_free_meta {
> > > >  * Otherwise it might be used for the allocator freelist.
> > > >  */
> > > > struct qlist_node quarantine_link;
> > > > +#ifdef CONFIG_KASAN_GENERIC
> > > > +   struct kasan_track free_track;
> > > > +#endif
> > > >  };
> > > >
> > > >  struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
> > > > @@ -168,6 +171,10 @@ void kasan_report_invalid_free(void *object, 
> > > > unsigned long ip);
> > > >  struct page *kasan_addr_to_page(const void *addr);
> > > >
> > > >  

Re: [PATCH 5.6 000/192] 5.6.14-rc2 review

2020-05-19 Thread Greg Kroah-Hartman
On Tue, May 19, 2020 at 01:37:20PM -0600, shuah wrote:
> On 5/18/20 11:47 PM, Greg Kroah-Hartman wrote:
> > This is the start of the stable review cycle for the 5.6.14 release.
> > There are 192 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> > 
> > Responses should be made by Thu, 21 May 2020 05:45:41 +.
> > Anything received after that time might be too late.
> > 
> > The whole patch series can be found in one patch at:
> > 
> > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.6.14-rc2.gz
> > or in the git tree and branch at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-5.6.y
> > and the diffstat can be found below.
> > 
> > thanks,
> > 
> > greg k-h
> > 
> 
> Compiled and booted on my test system. No dmesg regressions.

Thanks for testing all of these and letting me know.

greg k-h


Re: [PATCH 5.6 000/192] 5.6.14-rc2 review

2020-05-19 Thread Greg Kroah-Hartman
On Tue, May 19, 2020 at 09:30:22AM -0700, Guenter Roeck wrote:
> On 5/18/20 10:47 PM, Greg Kroah-Hartman wrote:
> > This is the start of the stable review cycle for the 5.6.14 release.
> > There are 192 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> > 
> > Responses should be made by Thu, 21 May 2020 05:45:41 +.
> > Anything received after that time might be too late.
> > 
> 
> Build results:
>   total: 155 pass: 155 fail: 0
> Qemu test results:
>   total: 431 pass: 431 fail: 0

Great, thanks for testing all of these and letting me know.

greg k-h


Re: [PATCH 06/12] xen-blkfront: add callbacks for PM suspend and hibernation

2020-05-19 Thread kbuild test robot
Hi Anchal,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.7-rc6]
[cannot apply to xen-tip/linux-next tip/irq/core tip/auto-latest next-20200519]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Anchal-Agarwal/Fix-PM-hibernation-in-Xen-guests/20200520-073211
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
03fb3acae4be8a6b680ffedb220a8b6c07260b40
config: x86_64-rhel (attached as .config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All error/warnings (new ones prefixed by >>, old ones prefixed by <<):

drivers/block/xen-blkfront.c: In function 'blkfront_freeze':
>> drivers/block/xen-blkfront.c:2699:30: warning: missing terminating " 
>> character
xenbus_dev_error(dev, err, "Hibernation Failed.
^
>> drivers/block/xen-blkfront.c:2699:30: error: missing terminating " character
xenbus_dev_error(dev, err, "Hibernation Failed.
^~~~
>> drivers/block/xen-blkfront.c:2700:4: error: 'The' undeclared (first use in 
>> this function)
The ring is still busy");
^~~
drivers/block/xen-blkfront.c:2700:4: note: each undeclared identifier is 
reported only once for each function it appears in
>> drivers/block/xen-blkfront.c:2700:8: error: expected ')' before 'ring'
The ring is still busy");
^~~~
drivers/block/xen-blkfront.c:2700:26: warning: missing terminating " character
The ring is still busy");
^
drivers/block/xen-blkfront.c:2700:26: error: missing terminating " character
The ring is still busy");
^~~
>> drivers/block/xen-blkfront.c:2704:2: error: expected ';' before '}' token
}
^

vim +2699 drivers/block/xen-blkfront.c

  2672  
  2673  static int blkfront_freeze(struct xenbus_device *dev)
  2674  {
  2675  unsigned int i;
  2676  struct blkfront_info *info = dev_get_drvdata(>dev);
  2677  struct blkfront_ring_info *rinfo;
  2678  /* This would be reasonable timeout as used in 
xenbus_dev_shutdown() */
  2679  unsigned int timeout = 5 * HZ;
  2680  unsigned long flags;
  2681  int err = 0;
  2682  
  2683  info->connected = BLKIF_STATE_FREEZING;
  2684  
  2685  blk_mq_freeze_queue(info->rq);
  2686  blk_mq_quiesce_queue(info->rq);
  2687  
  2688  for_each_rinfo(info, rinfo, i) {
  2689  /* No more gnttab callback work. */
  2690  gnttab_cancel_free_callback(>callback);
  2691  /* Flush gnttab callback work. Must be done with no locks 
held. */
  2692  flush_work(>work);
  2693  }
  2694  
  2695  for_each_rinfo(info, rinfo, i) {
  2696  spin_lock_irqsave(>ring_lock, flags);
  2697  if (RING_FULL(>ring)
  2698  || RING_HAS_UNCONSUMED_RESPONSES(>ring)) {
> 2699  xenbus_dev_error(dev, err, "Hibernation Failed.
> 2700  The ring is still busy");
  2701  info->connected = BLKIF_STATE_CONNECTED;
  2702  spin_unlock_irqrestore(>ring_lock, flags);
  2703  return -EBUSY;
> 2704  }
  2705  spin_unlock_irqrestore(>ring_lock, flags);
  2706  }
  2707  /* Kick the backend to disconnect */
  2708  xenbus_switch_state(dev, XenbusStateClosing);
  2709  
  2710  /*
  2711   * We don't want to move forward before the frontend is 
diconnected
  2712   * from the backend cleanly.
  2713   */
  2714  timeout = 
wait_for_completion_timeout(>wait_backend_disconnected,
  2715timeout);
  2716  if (!timeout) {
  2717  err = -EBUSY;
  2718  xenbus_dev_error(dev, err, "Freezing timed out;"
  2719   "the device may become inconsistent 
state");
  2720  }
  2721  
  2722  return err;
  2723  }
  2724  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH v2 12/15] ath10k: use new module_firmware_crashed()

2020-05-19 Thread Emmanuel Grumbach
Hi all,



Since I have been involved quite a bit in the firmware debugging
features in iwlwifi, I think I can give a few insights here.

But before this, we need to understand that there are several sources of issues:
1) the firmware may crash but the bus is still alive, you can still
use the bus to get the crash data
2) the bus is dead, when that happens, the firmware might even be in a
good condition, but since the bus is dead, you stop getting any
information about the firmware, and then, at some point, you get to
the conclusion that the firmware is dead. You can't get the crash data
that resides on the other side of the bus (you may have gathered data
in the DRAM directly, but that's a different thing), and you don't
have much recovery to do besides re-starting the PCI enumeration.

At Intel, we have seen both unfortunately. The bus issues are the ones
that are trickier obviously. Trickier to detect (because you just get
garbage from any request you issue on the bus), and trickier to
handle. One can argue that the kernel should *not* handle those and
let this in userspace hands. I guess it all depends on what component
you ship to your customer and what you customer asks from you  :).



>
> Hi Luis,
>
> On Tue, May 19, 2020 at 7:02 AM Luis Chamberlain  wrote:
> > On Mon, May 18, 2020 at 06:23:33PM -0700, Brian Norris wrote:
> > > On Sat, May 16, 2020 at 6:51 AM Johannes Berg  
> > > wrote:
> > > > In addition, look what we have in iwl_trans_pcie_removal_wk(). If we
> > > > detect that the device is really wedged enough that the only way we can
> > > > still try to recover is by completely unbinding the driver from it, then
> > > > we give userspace a uevent for that. I don't remember exactly how and
> > > > where that gets used (ChromeOS) though, but it'd be nice to have that
> > > > sort of thing as part of the infrastructure, in a sort of two-level
> > > > notification?
> > >
> > > 
> > > We use this on certain devices where we know the underlying hardware
> > > has design issues that may lead to device failure
> >
> > Ah, after reading below I see you meant for iwlwifi.
>
> Sorry, I was replying to Johannes, who I believe had his "we"="Intel"
> hat (as iwlwifi maintainer) on, and was pointing at
> iwl_trans_pcie_removal_wk().
>

This pcie_removal thing is for the bus dead thing. My 2) above.

> > If userspace can indeed grow to support this, that would be fantastic.
>
> Well, Chrome OS tailors its user space a bit more to the hardware (and
> kernel/drivers in use) than the average distro might. We already do
> this (for some values of "this") today. Is that "fantastic" to you? :D

I guess it can be fantastic if other vendors also suffer from this. Or
maybe that could be done as part of the PCI bus driver inside the
kernel?

>
> > > -- then when we see
> > > this sort of unrecoverable "firmware-death", we remove the
> > > device[*]+driver, force-reset the PCI device (SBR), and try to
> > > reload/reattach the driver. This all happens by way of a udev rule.
> >
> > So you've sprikled your own udev event here as part of your kernel delta?
>
> No kernel delta -- the event is there already:
> iwl_trans_pcie_removal_wk()
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/wireless/intel/iwlwifi/pcie/trans.c?h=v5.6#n2027
>
> And you can see our udev rules and scripts, in all their ugly details
> here, if you really care:
> https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/master/net-wireless/iwlwifi_rescan/files/
>
> > > We
> > > also log this sort of stuff (and metrics around it) for bug reports
> > > and health statistics, since we really hope to not see this happen
> > > often.
> >
> > Assuming perfection is ideal but silly. So, what infrastructure do you
> > use for this sort of issue?
>
> We don't yet log firmware crashes generally, but for all our current
> crash reports (including WARN()), they go through this:
> https://chromium.googlesource.com/chromiumos/platform2/+/master/crash-reporter/README.md
>
> For example, look for "cut here" in:
> https://chromium.googlesource.com/chromiumos/platform2/+/master/crash-reporter/anomaly_detector.cc
>
> For other specific metrics (like counting "EVENT=INACCESSIBLE"), we
> use the Chrome UMA system:
> https://chromium.googlesource.com/chromiumos/platform2/+/master/metrics/README.md
>
> I don't imagine the "infrastructure" side of any of that would be
> useful to you, but maybe the client-side gathering can at least show
> you what we do.
>
> > > [*] "We" (user space) don't actually do this...it happens via the
> > > 'remove_when_gone' module parameter abomination found in iwlwifi.
> >
> > BTW is this likely a place on iwlwifi where the firmware likely crashed?
>
> iwl_trans_pcie_removal_wk() is triggered because HW accesses timed out
> in a way that is likely due to a dead PCIe endpoint. It's not directly
> a firmware crash, although there may be firmware crashes reported
> around the same time.


Re: [PATCH] perf evsel: Get group fd from CPU0 for system wide event

2020-05-19 Thread Jin, Yao

Hi Jiri,

On 5/18/2020 11:28 AM, Jin, Yao wrote:

Hi Jiri,

On 5/15/2020 4:33 PM, Jiri Olsa wrote:

On Fri, May 15, 2020 at 02:04:57PM +0800, Jin, Yao wrote:

SNIP


I think I get the root cause. That should be a serious bug in get_group_fd, 
access violation!

For a group mixed with system-wide event and per-core event and the group
leader is system-wide event, access violation will happen.

perf_evsel__alloc_fd allocates one FD member for system-wide event (only 
FD(evsel, 0, 0) is valid).

But for per core event, perf_evsel__alloc_fd allocates N FD members (N =
ncpus). For example, for ncpus is 8, FD(evsel, 0, 0) to FD(evsel, 7, 0) are
valid.

get_group_fd(struct evsel *evsel, int cpu, int thread)
{
 struct evsel *leader = evsel->leader;

 fd = FD(leader, cpu, thread);    /* access violation may happen here */
}

If leader is system-wide event, only the FD(leader, 0, 0) is valid.

When get_group_fd accesses FD(leader, 1, 0), access violation happens.

My fix is:

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 28683b0eb738..db05b8a1e1a8 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1440,6 +1440,9 @@ static int get_group_fd(struct evsel *evsel, int cpu, int 
thread)
 if (evsel__is_group_leader(evsel))
 return -1;

+   if (leader->core.system_wide && !evsel->core.system_wide)
+   return -2;


so this effectively stops grouping system_wide events with others,
and I think it's correct, how about events that differ in cpumask?



My understanding for the events that differ in cpumaks is, if the leader's cpumask is not fully 
matched with the evsel's cpumask then we stop the grouping. Is this understanding correct?


I have done some tests and get some conclusions:

1. If the group is mixed with core and uncore events, the system_wide checking 
can distinguish them.

2. If the group is mixed with core and uncore events and "-a" is specified, the system_wide for core 
event is also false. So system_wide checking can distinguish them too


3. In my test, the issue only occurs when we collect the metric which is mixed with uncore event and 
core event, so maybe checking the system_wide is OK.



should we perhaps ensure this before we call open? go throught all
groups and check they are on the same cpus?



The issue doesn't happen at most of the time (only for the metric consisting of uncore event and 
core event), so fallback to stop grouping if call open is failed looks reasonable.


Thanks
Jin Yao


thanks,
jirka



+
 /*
  * Leader must be already processed/open,
  * if not it's a bug.
@@ -1665,6 +1668,11 @@ static int evsel__open_cpu(struct evsel *evsel, struct 
perf_cpu_map *cpus,
 pid = perf_thread_map__pid(threads, thread);

 group_fd = get_group_fd(evsel, cpu, thread);
+   if (group_fd == -2) {
+   errno = EINVAL;
+   err = -EINVAL;
+   goto out_close;
+   }
  retry_open:
 test_attr__ready();

It enables the perf_evlist__reset_weak_group. And in the second_pass (in
__run_perf_stat), the events will be opened successfully.

I have tested OK for this fix on cascadelakex.

Thanks
Jin Yao





Is this fix OK?

Another thing is, do you think if we need to rename "evsel->core.system_wide" to 
"evsel->core.has_cpumask".


The "system_wide" may misleading.

evsel->core.system_wide = pmu ? pmu->is_uncore : false;

"pmu->is_uncore" is true if PMU has a "cpumask". But it's not just uncore PMU which has cpumask. 
Some other PMUs, e.g. cstate_pkg, also have cpumask. So for this case, "has_cpumask" should be better.


But I'm not sure if the change is OK for other case, e.g. PT, which also uses 
"evsel->core.system_wide".


Thanks
Jin Yao


Re: [PATCH v2] drm/exynos: Remove dev_err() on platform_get_irq() failure

2020-05-19 Thread Inki Dae
Hi Tamseel,

Same patch[1] has been merged. So could you re-post this patch after rebasing 
it on top of exynos-drm-next branch?
After rebase, only g2d part would be valid.

Thanks,
Inki Dae

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos.git/commit/?h=exynos-drm-next=fdd79b0db1899f915f489e744a06846284fa3f1e

20. 5. 19. 오후 7:49에 Tamseel Shams 이(가) 쓴 글:
> platform_get_irq() will call dev_err() itself on failure,
> so there is no need for the driver to also do this.
> This is detected by coccinelle.
> 
> Also removing unnecessary curly braces around if () statement.
> 
> Signed-off-by: Tamseel Shams 
> ---
> Fixed review comment by j...@perches.com
> 
>  drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 +---
>  drivers/gpu/drm/exynos/exynos_drm_g2d.c | 1 -
>  drivers/gpu/drm/exynos/exynos_drm_rotator.c | 4 +---
>  drivers/gpu/drm/exynos/exynos_drm_scaler.c  | 4 +---
>  4 files changed, 3 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c 
> b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> index 902938d2568f..958e2c6a6702 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> @@ -1809,10 +1809,8 @@ static int exynos_dsi_probe(struct platform_device 
> *pdev)
>   }
>  
>   dsi->irq = platform_get_irq(pdev, 0);
> - if (dsi->irq < 0) {
> - dev_err(dev, "failed to request dsi irq resource\n");
> + if (dsi->irq < 0)
>   return dsi->irq;
> - }
>  
>   irq_set_status_flags(dsi->irq, IRQ_NOAUTOEN);
>   ret = devm_request_threaded_irq(dev, dsi->irq, NULL,
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c 
> b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> index fcee33a43aca..03be31427181 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
> @@ -1498,7 +1498,6 @@ static int g2d_probe(struct platform_device *pdev)
>  
>   g2d->irq = platform_get_irq(pdev, 0);
>   if (g2d->irq < 0) {
> - dev_err(dev, "failed to get irq\n");
>   ret = g2d->irq;
>   goto err_put_clk;
>   }
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c 
> b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
> index dafa87b82052..2d94afba031e 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
> @@ -293,10 +293,8 @@ static int rotator_probe(struct platform_device *pdev)
>   return PTR_ERR(rot->regs);
>  
>   irq = platform_get_irq(pdev, 0);
> - if (irq < 0) {
> - dev_err(dev, "failed to get irq\n");
> + if (irq < 0)
>   return irq;
> - }
>  
>   ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev),
>  rot);
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c 
> b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
> index 93c43c8d914e..ce1857138f89 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
> @@ -502,10 +502,8 @@ static int scaler_probe(struct platform_device *pdev)
>   return PTR_ERR(scaler->regs);
>  
>   irq = platform_get_irq(pdev, 0);
> - if (irq < 0) {
> - dev_err(dev, "failed to get irq\n");
> + if (irq < 0)
>   return irq;
> - }
>  
>   ret = devm_request_threaded_irq(dev, irq, NULL, scaler_irq_handler,
>   IRQF_ONESHOT, "drm_scaler", scaler);
> 


Re: [RFC PATCH 0/8] Qualcomm Cloud AI 100 driver

2020-05-19 Thread Greg Kroah-Hartman
On Tue, May 19, 2020 at 12:26:01PM -0600, Jeffrey Hugo wrote:
> On 5/19/2020 12:12 PM, Greg Kroah-Hartman wrote:
> > > > Especially given the copyright owner of this code, that would be just
> > > > crazy and foolish to not have open userspace code as well.  Firmware
> > > > would also be wonderful as well, go poke your lawyers about derivative
> > > > work issues and the like for fun conversations :)
> > > 
> > > Those are the kind of conversations I try to avoid  :)
> > 
> > Sounds like you are going to now have to have them, have fun!
> 
> Honestly, I fail to see where you think there is a derivative work, so, I'm
> not really sure what discussions I need to revisit with our lawyers.

Given that we are not lawyers, why don't we leave those types of
discussions up to the lawyers, and not depend on people like me and you
for that?  :)

If your lawyers think that the code division is fine as-is, that's
great, I'd be glad to review it if they add their signed-off-by: on it
verifying that the api divide is approved by them.

thanks!

greg k-h


[PATCH v6 11/12] mmap locking API: convert mmap_sem API comments

2020-05-19 Thread Michel Lespinasse
Convert comments that reference old mmap_sem APIs to reference
corresponding new mmap locking APIs instead.

Signed-off-by: Michel Lespinasse 
---
 Documentation/vm/hmm.rst   |  6 +++---
 arch/alpha/mm/fault.c  |  2 +-
 arch/ia64/mm/fault.c   |  2 +-
 arch/m68k/mm/fault.c   |  2 +-
 arch/microblaze/mm/fault.c |  2 +-
 arch/mips/mm/fault.c   |  2 +-
 arch/nds32/mm/fault.c  |  2 +-
 arch/nios2/mm/fault.c  |  2 +-
 arch/openrisc/mm/fault.c   |  2 +-
 arch/parisc/mm/fault.c |  2 +-
 arch/riscv/mm/fault.c  |  2 +-
 arch/sh/mm/fault.c |  2 +-
 arch/sparc/mm/fault_32.c   |  2 +-
 arch/sparc/mm/fault_64.c   |  2 +-
 arch/xtensa/mm/fault.c |  2 +-
 drivers/android/binder_alloc.c |  4 ++--
 fs/hugetlbfs/inode.c   |  2 +-
 fs/userfaultfd.c   |  2 +-
 mm/filemap.c   |  2 +-
 mm/gup.c   | 12 ++--
 mm/huge_memory.c   |  4 ++--
 mm/khugepaged.c|  2 +-
 mm/ksm.c   |  2 +-
 mm/memory.c|  4 ++--
 mm/mempolicy.c |  2 +-
 mm/migrate.c   |  4 ++--
 mm/mmap.c  |  2 +-
 mm/oom_kill.c  |  8 
 net/ipv4/tcp.c |  2 +-
 29 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 4e3e9362afeb..046817505033 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -194,15 +194,15 @@ The usage pattern is::
 
  again:
   range.notifier_seq = mmu_interval_read_begin(_sub);
-  down_read(>mmap_sem);
+  mmap_read_lock(mm);
   ret = hmm_range_fault();
   if (ret) {
-  up_read(>mmap_sem);
+  mmap_read_unlock(mm);
   if (ret == -EBUSY)
  goto again;
   return ret;
   }
-  up_read(>mmap_sem);
+  mmap_read_unlock(mm);
 
   take_lock(driver->update);
   if (mmu_interval_read_retry(, range.notifier_seq) {
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 36efa778ee1a..c2303a8c2b9f 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -171,7 +171,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
 
-/* No need to up_read(>mmap_sem) as we would
+/* No need to mmap_read_unlock(mm) as we would
 * have already released it in __lock_page_or_retry
 * in mm/filemap.c.
 */
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 9b95050c2048..0f788992608a 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -169,7 +169,7 @@ ia64_do_page_fault (unsigned long address, unsigned long 
isr, struct pt_regs *re
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
 
-/* No need to up_read(>mmap_sem) as we would
+/* No need to mmap_read_unlock(mm) as we would
 * have already released it in __lock_page_or_retry
 * in mm/filemap.c.
 */
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 650acab0d77d..a94a814ad6ad 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -165,7 +165,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long 
address,
flags |= FAULT_FLAG_TRIED;
 
/*
-* No need to up_read(>mmap_sem) as we would
+* No need to mmap_read_unlock(mm) as we would
 * have already released it in __lock_page_or_retry
 * in mm/filemap.c.
 */
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 9d7c423dea1d..ebf1ac50b291 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -239,7 +239,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long 
address,
flags |= FAULT_FLAG_TRIED;
 
/*
-* No need to up_read(>mmap_sem) as we would
+* No need to mmap_read_unlock(mm) as we would
 * have already released it in __lock_page_or_retry
 * in mm/filemap.c.
 */
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 9ef2dd39111e..01b168a90434 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -181,7 +181,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, 
unsigned long write,
flags |= FAULT_FLAG_TRIED;
 
/*
-* No need to up_read(>mmap_sem) as we would
+ 

[PATCH v6 09/12] mmap locking API: add mmap_assert_locked() and mmap_assert_write_locked()

2020-05-19 Thread Michel Lespinasse
Add new APIs to assert that mmap_sem is held.

Using this instead of rwsem_is_locked and lockdep_assert_held[_write]
makes the assertions more tolerant of future changes to the lock type.

Signed-off-by: Michel Lespinasse 
---
 arch/x86/events/core.c|  2 +-
 fs/userfaultfd.c  |  6 +++---
 include/linux/mmap_lock.h | 14 ++
 mm/gup.c  |  2 +-
 mm/hmm.c  |  2 +-
 mm/memory.c   |  2 +-
 mm/mmu_notifier.c |  6 +++---
 mm/pagewalk.c |  6 +++---
 mm/util.c |  2 +-
 9 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index a619763e96e1..66559ac4f89e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2182,7 +2182,7 @@ static void x86_pmu_event_mapped(struct perf_event 
*event, struct mm_struct *mm)
 * For now, this can't happen because all callers hold mmap_sem
 * for write.  If this changes, we'll need a different solution.
 */
-   lockdep_assert_held_write(>mmap_sem);
+   mmap_assert_write_locked(mm);
 
if (atomic_inc_return(>context.perf_rdpmc_allowed) == 1)
on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 9c645eee1a59..12b492409040 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -234,7 +234,7 @@ static inline bool userfaultfd_huge_must_wait(struct 
userfaultfd_ctx *ctx,
pte_t *ptep, pte;
bool ret = true;
 
-   VM_BUG_ON(!rwsem_is_locked(>mmap_sem));
+   mmap_assert_locked(mm);
 
ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
 
@@ -286,7 +286,7 @@ static inline bool userfaultfd_must_wait(struct 
userfaultfd_ctx *ctx,
pte_t *pte;
bool ret = true;
 
-   VM_BUG_ON(!rwsem_is_locked(>mmap_sem));
+   mmap_assert_locked(mm);
 
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
@@ -405,7 +405,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned 
long reason)
 * Coredumping runs without mmap_sem so we can only check that
 * the mmap_sem is held, if PF_DUMPCORE was not set.
 */
-   WARN_ON_ONCE(!rwsem_is_locked(>mmap_sem));
+   mmap_assert_locked(mm);
 
ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
if (!ctx)
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index acac1bf5ecd2..43ef914e6468 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_MMAP_LOCK_H
 #define _LINUX_MMAP_LOCK_H
 
+#include 
+
 #define MMAP_LOCK_INITIALIZER(name) \
.mmap_sem = __RWSEM_INITIALIZER((name).mmap_sem),
 
@@ -73,4 +75,16 @@ static inline void mmap_read_unlock_non_owner(struct 
mm_struct *mm)
up_read_non_owner(>mmap_sem);
 }
 
+static inline void mmap_assert_locked(struct mm_struct *mm)
+{
+   lockdep_assert_held(>mmap_sem);
+   VM_BUG_ON_MM(!rwsem_is_locked(>mmap_sem), mm);
+}
+
+static inline void mmap_assert_write_locked(struct mm_struct *mm)
+{
+   lockdep_assert_held_write(>mmap_sem);
+   VM_BUG_ON_MM(!rwsem_is_locked(>mmap_sem), mm);
+}
+
 #endif /* _LINUX_MMAP_LOCK_H */
diff --git a/mm/gup.c b/mm/gup.c
index 631285295950..c1c0b37d0e8f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1405,7 +1405,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
VM_BUG_ON(end   & ~PAGE_MASK);
VM_BUG_ON_VMA(start < vma->vm_start, vma);
VM_BUG_ON_VMA(end   > vma->vm_end, vma);
-   VM_BUG_ON_MM(!rwsem_is_locked(>mmap_sem), mm);
+   mmap_assert_locked(mm);
 
gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
if (vma->vm_flags & VM_LOCKONFAULT)
diff --git a/mm/hmm.c b/mm/hmm.c
index 280585833adf..660a4bcf932a 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -581,7 +581,7 @@ long hmm_range_fault(struct hmm_range *range)
struct mm_struct *mm = range->notifier->mm;
int ret;
 
-   lockdep_assert_held(>mmap_sem);
+   mmap_assert_locked(mm);
 
do {
/* If range is no longer valid force retry. */
diff --git a/mm/memory.c b/mm/memory.c
index e6dd3309c5a3..20f98ea8968e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1214,7 +1214,7 @@ static inline unsigned long zap_pud_range(struct 
mmu_gather *tlb,
next = pud_addr_end(addr, end);
if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
if (next - addr != HPAGE_PUD_SIZE) {
-   
VM_BUG_ON_VMA(!rwsem_is_locked(>mm->mmap_sem), vma);
+   mmap_assert_locked(tlb->mm);
split_huge_pud(vma, pud, addr);
} else if (zap_huge_pud(tlb, vma, pud, addr))
goto next;
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index cfd0a03bf5cc..24eb9d1ed0a7 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -609,7 

[PATCH v6 02/12] MMU notifier: use the new mmap locking API

2020-05-19 Thread Michel Lespinasse
This use is converted manually ahead of the next patch in the series,
as it requires including a new header which the automated conversion
would miss.

Signed-off-by: Michel Lespinasse 
Reviewed-by: Daniel Jordan 
Reviewed-by: Davidlohr Bueso 
Reviewed-by: Laurent Dufour 
Reviewed-by: Vlastimil Babka 
---
 include/linux/mmu_notifier.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 736f6918335e..2f462710a1a4 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -277,9 +278,9 @@ mmu_notifier_get(const struct mmu_notifier_ops *ops, struct 
mm_struct *mm)
 {
struct mmu_notifier *ret;
 
-   down_write(>mmap_sem);
+   mmap_write_lock(mm);
ret = mmu_notifier_get_locked(ops, mm);
-   up_write(>mmap_sem);
+   mmap_write_unlock(mm);
return ret;
 }
 void mmu_notifier_put(struct mmu_notifier *subscription);
-- 
2.26.2.761.g0e0b3e54be-goog



[PATCH v6 10/12] mmap locking API: rename mmap_sem to mmap_lock

2020-05-19 Thread Michel Lespinasse
Rename the mmap_sem field to mmap_lock. Any new uses of this lock
should now go through the new mmap locking api. The mmap_lock is
still implemented as a rwsem, though this could change in the future.

Signed-off-by: Michel Lespinasse 
Reviewed-by: Vlastimil Babka 
---
 arch/ia64/mm/fault.c  |  4 +--
 arch/x86/mm/fault.c   |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c |  2 +-
 include/linux/mm_types.h  |  2 +-
 include/linux/mmap_lock.h | 38 +--
 mm/memory.c   |  2 +-
 mm/mmap.c |  4 +--
 mm/mmu_notifier.c |  2 +-
 8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 693f00b117e1..9b95050c2048 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -70,8 +70,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, 
struct pt_regs *re
mask = isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
 
-   /* mmap_sem is performance critical */
-   prefetchw(>mmap_sem);
+   /* mmap_lock is performance critical */
+   prefetchw(>mmap_lock);
 
/*
 * If we're in an interrupt or have no user context, we must not take 
the fault..
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 181f66b9049f..35f530f9dfc0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1522,7 +1522,7 @@ dotraplinkage void
 do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
unsigned long address)
 {
-   prefetchw(>mm->mmap_sem);
+   prefetchw(>mm->mmap_lock);
trace_page_fault_entries(regs, hw_error_code, address);
 
if (unlikely(kmmio_fault(regs, address)))
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c 
b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index dc9ef302f517..701f3995f621 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -661,7 +661,7 @@ static int etnaviv_gem_userptr_get_pages(struct 
etnaviv_gem_object *etnaviv_obj)
struct etnaviv_gem_userptr *userptr = _obj->userptr;
int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
 
-   might_lock_read(>mm->mmap_sem);
+   might_lock_read(>mm->mmap_lock);
 
if (userptr->mm != current->mm)
return -EPERM;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4aba6c0c2ba8..d13b90399c16 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -436,7 +436,7 @@ struct mm_struct {
spinlock_t page_table_lock; /* Protects page tables and some
 * counters
 */
-   struct rw_semaphore mmap_sem;
+   struct rw_semaphore mmap_lock;
 
struct list_head mmlist; /* List of maybe swapped mm's. These
  * are globally strung together off
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 43ef914e6468..b5bd86778cca 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -4,67 +4,67 @@
 #include 
 
 #define MMAP_LOCK_INITIALIZER(name) \
-   .mmap_sem = __RWSEM_INITIALIZER((name).mmap_sem),
+   .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
 
 static inline void mmap_init_lock(struct mm_struct *mm)
 {
-   init_rwsem(>mmap_sem);
+   init_rwsem(>mmap_lock);
 }
 
 static inline void mmap_write_lock(struct mm_struct *mm)
 {
-   down_write(>mmap_sem);
+   down_write(>mmap_lock);
 }
 
 static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
 {
-   down_write_nested(>mmap_sem, subclass);
+   down_write_nested(>mmap_lock, subclass);
 }
 
 static inline int mmap_write_lock_killable(struct mm_struct *mm)
 {
-   return down_write_killable(>mmap_sem);
+   return down_write_killable(>mmap_lock);
 }
 
 static inline bool mmap_write_trylock(struct mm_struct *mm)
 {
-   return down_write_trylock(>mmap_sem) != 0;
+   return down_write_trylock(>mmap_lock) != 0;
 }
 
 static inline void mmap_write_unlock(struct mm_struct *mm)
 {
-   up_write(>mmap_sem);
+   up_write(>mmap_lock);
 }
 
 static inline void mmap_write_downgrade(struct mm_struct *mm)
 {
-   downgrade_write(>mmap_sem);
+   downgrade_write(>mmap_lock);
 }
 
 static inline void mmap_read_lock(struct mm_struct *mm)
 {
-   down_read(>mmap_sem);
+   down_read(>mmap_lock);
 }
 
 static inline int mmap_read_lock_killable(struct mm_struct *mm)
 {
-   return down_read_killable(>mmap_sem);
+   return down_read_killable(>mmap_lock);
 }
 
 static inline bool mmap_read_trylock(struct mm_struct *mm)
 {
-   return down_read_trylock(>mmap_sem) != 0;
+   return down_read_trylock(>mmap_lock) != 0;
 }
 
 static 

[PATCH v6 08/12] mmap locking API: add MMAP_LOCK_INITIALIZER

2020-05-19 Thread Michel Lespinasse
Define a new initializer for the mmap locking api.
Initially this just evaluates to __RWSEM_INITIALIZER as the API
is defined as wrappers around rwsem.

Signed-off-by: Michel Lespinasse 
Reviewed-by: Laurent Dufour 
Reviewed-by: Vlastimil Babka 
---
 arch/x86/kernel/tboot.c| 2 +-
 drivers/firmware/efi/efi.c | 2 +-
 include/linux/mmap_lock.h  | 3 +++
 mm/init-mm.c   | 2 +-
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index b89f6ac6a0c0..885058325c20 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -90,7 +90,7 @@ static struct mm_struct tboot_mm = {
.pgd= swapper_pg_dir,
.mm_users   = ATOMIC_INIT(2),
.mm_count   = ATOMIC_INIT(1),
-   .mmap_sem   = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+   MMAP_LOCK_INITIALIZER(init_mm)
.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
 };
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 911a2bd0f6b7..916313ec8acb 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -54,7 +54,7 @@ struct mm_struct efi_mm = {
.mm_rb  = RB_ROOT,
.mm_users   = ATOMIC_INIT(2),
.mm_count   = ATOMIC_INIT(1),
-   .mmap_sem   = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
+   MMAP_LOCK_INITIALIZER(efi_mm)
.page_table_lock= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
.cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index d1826ce42f00..acac1bf5ecd2 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -1,6 +1,9 @@
 #ifndef _LINUX_MMAP_LOCK_H
 #define _LINUX_MMAP_LOCK_H
 
+#define MMAP_LOCK_INITIALIZER(name) \
+   .mmap_sem = __RWSEM_INITIALIZER((name).mmap_sem),
+
 static inline void mmap_init_lock(struct mm_struct *mm)
 {
init_rwsem(>mmap_sem);
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 19603302a77f..fe9c03d8e07b 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -31,7 +31,7 @@ struct mm_struct init_mm = {
.pgd= swapper_pg_dir,
.mm_users   = ATOMIC_INIT(2),
.mm_count   = ATOMIC_INIT(1),
-   .mmap_sem   = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+   MMAP_LOCK_INITIALIZER(init_mm)
.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.arg_lock   =  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
-- 
2.26.2.761.g0e0b3e54be-goog



[PATCH v6 07/12] mmap locking API: add mmap_read_trylock_non_owner()

2020-05-19 Thread Michel Lespinasse
Add a couple APIs used by kernel/bpf/stackmap.c only:
- mmap_read_trylock_non_owner()
- mmap_read_unlock_non_owner() (may be called from a work queue).

It's still not ideal that bpf/stackmap subverts the lock ownership
in this way. Thanks to Peter Zijlstra for suggesting this API as the
least-ugly way of addressing this in the short term.

Signed-off-by: Michel Lespinasse 
Reviewed-by: Daniel Jordan 
Reviewed-by: Vlastimil Babka 
---
 include/linux/mmap_lock.h | 14 ++
 kernel/bpf/stackmap.c | 17 +
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index a757cb30ae77..d1826ce42f00 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -56,4 +56,18 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
up_read(>mmap_sem);
 }
 
+static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
+{
+   if (down_read_trylock(>mmap_sem)) {
+   rwsem_release(>mmap_sem.dep_map, _RET_IP_);
+   return true;
+   }
+   return false;
+}
+
+static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
+{
+   up_read_non_owner(>mmap_sem);
+}
+
 #endif /* _LINUX_MMAP_LOCK_H */
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 11d41f0c7005..998968659892 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -33,7 +33,7 @@ struct bpf_stack_map {
 /* irq_work to run up_read() for build_id lookup in nmi context */
 struct stack_map_irq_work {
struct irq_work irq_work;
-   struct rw_semaphore *sem;
+   struct mm_struct *mm;
 };
 
 static void do_up_read(struct irq_work *entry)
@@ -44,8 +44,7 @@ static void do_up_read(struct irq_work *entry)
return;
 
work = container_of(entry, struct stack_map_irq_work, irq_work);
-   up_read_non_owner(work->sem);
-   work->sem = NULL;
+   mmap_read_unlock_non_owner(work->mm);
 }
 
 static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
@@ -317,7 +316,7 @@ static void stack_map_get_build_id_offset(struct 
bpf_stack_build_id *id_offs,
 * with build_id.
 */
if (!user || !current || !current->mm || irq_work_busy ||
-   mmap_read_trylock(current->mm) == 0) {
+   !mmap_read_trylock_non_owner(current->mm)) {
/* cannot access current->mm, fall back to ips */
for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
@@ -342,16 +341,10 @@ static void stack_map_get_build_id_offset(struct 
bpf_stack_build_id *id_offs,
}
 
if (!work) {
-   mmap_read_unlock(current->mm);
+   mmap_read_unlock_non_owner(current->mm);
} else {
-   work->sem = >mm->mmap_sem;
+   work->mm = current->mm;
irq_work_queue(>irq_work);
-   /*
-* The irq_work will release the mmap_sem with
-* up_read_non_owner(). The rwsem_release() is called
-* here to release the lock from lockdep's perspective.
-*/
-   rwsem_release(>mm->mmap_sem.dep_map, _RET_IP_);
}
 }
 
-- 
2.26.2.761.g0e0b3e54be-goog



[PATCH v6 05/12] mmap locking API: convert mmap_sem call sites missed by coccinelle

2020-05-19 Thread Michel Lespinasse
Convert the last few remaining mmap_sem rwsem calls to use the new
mmap locking API. These were missed by coccinelle for some reason
(I think coccinelle does not support some of the preprocessor
constructs in these files ?)

Signed-off-by: Michel Lespinasse 
Reviewed-by: Daniel Jordan 
Reviewed-by: Laurent Dufour 
Reviewed-by: Vlastimil Babka 
---
 arch/mips/mm/fault.c   | 10 +-
 arch/riscv/mm/pageattr.c   |  4 ++--
 arch/x86/kvm/mmu/paging_tmpl.h |  8 
 fs/proc/base.c |  6 +++---
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index f8d62cd83b36..9ef2dd39111e 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -97,7 +97,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, 
unsigned long write,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
 retry:
-   down_read(>mmap_sem);
+   mmap_read_lock(mm);
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
@@ -190,7 +190,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, 
unsigned long write,
}
}
 
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
return;
 
 /*
@@ -198,7 +198,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, 
unsigned long write,
  * Fix it, but check if it's kernel or user first..
  */
 bad_area:
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
 
 bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
@@ -250,14 +250,14 @@ static void __kprobes __do_page_fault(struct pt_regs 
*regs, unsigned long write,
 * We ran out of memory, call the OOM killer, and return the userspace
 * (which will retry the fault, or kill us if we got oom-killed).
 */
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
if (!user_mode(regs))
goto no_context;
pagefault_out_of_memory();
return;
 
 do_sigbus:
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
 
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 728759eb530a..b9072c043222 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -117,10 +117,10 @@ static int __set_memory(unsigned long addr, int numpages, 
pgprot_t set_mask,
if (!numpages)
return 0;
 
-   down_read(_mm.mmap_sem);
+   mmap_read_lock(_mm);
ret =  walk_page_range_novma(_mm, start, end, _ops, NULL,
 );
-   up_read(_mm.mmap_sem);
+   mmap_read_unlock(_mm);
 
flush_tlb_kernel_range(start, end);
 
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 9bdf9b7d9a96..40e5bb67cc09 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -165,22 +165,22 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, 
struct kvm_mmu *mmu,
unsigned long pfn;
unsigned long paddr;
 
-   down_read(>mm->mmap_sem);
+   mmap_read_lock(current->mm);
vma = find_vma_intersection(current->mm, vaddr, vaddr + 
PAGE_SIZE);
if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
-   up_read(>mm->mmap_sem);
+   mmap_read_unlock(current->mm);
return -EFAULT;
}
pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
paddr = pfn << PAGE_SHIFT;
table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
if (!table) {
-   up_read(>mm->mmap_sem);
+   mmap_read_unlock(current->mm);
return -EFAULT;
}
ret = CMPXCHG([index], orig_pte, new_pte);
memunmap(table);
-   up_read(>mm->mmap_sem);
+   mmap_read_unlock(current->mm);
}
 
return (ret != orig_pte);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9a68032d8d73..a96377557db7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2314,7 +2314,7 @@ proc_map_files_readdir(struct file *file, struct 
dir_context *ctx)
if (!mm)
goto out_put_task;
 
-   ret = down_read_killable(>mmap_sem);
+   ret = mmap_read_lock_killable(mm);
if (ret) {
mmput(mm);
goto out_put_task;
@@ -2341,7 +2341,7 @@ proc_map_files_readdir(struct file *file, struct 
dir_context *ctx)
p = genradix_ptr_alloc(, nr_files++, GFP_KERNEL);
if (!p) {
ret = -ENOMEM;
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);
mmput(mm);
goto out_put_task;
}
@@ -2350,7 +2350,7 @@ 

[PATCH v6 00/12] Add a new mmap locking API wrapping mmap_sem calls

2020-05-19 Thread Michel Lespinasse
Reposting this patch series on top of v5.7-rc6. I think this is ready
for inclusion into the -mm tree; however there were some minor points
of feedback to address and also it was easier to regenerate a full
version after the v5.5 (only updating patches 09/10 and 10/10) caused
some confusion.


This patch series adds a new mmap locking API replacing the existing
mmap_sem lock and unlocks. Initially the API is just implemente in terms
of inlined rwsem calls, so it doesn't provide any new functionality.

There are two justifications for the new API:

- At first, it provides an easy hooking point to instrument mmap_sem
  locking latencies independently of any other rwsems.

- In the future, it may be a starting point for replacing the rwsem
  implementation with a different one, such as range locks. This is
  something that is being explored, even though there is no wide concensus
  about this possible direction yet.
  (see https://patchwork.kernel.org/cover/11401483/)


Changes since v5.5 of the patchset:

- Applied the changes on top of v5.7-rc6. This was a straight rebase
  except for the changes noted here.

- Re-generated the coccinelle changes (patch 04/12).

- Patch 08/12: use (name) in the MMAP_LOCK_INITIALIZER macro.

- Patch 09/12: use lockdep_assert_held() / lockdep_assert_held_write()
  so that mmap_assert_locked() and mmap_assert_write_locked() get better
  coverage when lockdep is enabled but CONFIG_DEBUG_VM is not.

- Added patches 11 and 12, converting comments that referenced mmap_sem
  rwsem calls or the mmap_sem lock itself, to reference the corresponding
  mmap locking APIs or the mmap_lock itself.


Changes since v5 of the patchset:

- Patch 09/10: Add both mmap_assert_locked() and mmap_assert_write_locked();
  convert some call sites that were using lockdep assertions to use these
  new APIs instead.


Changes since v4 of the patchset:

- Applied the changes on top of v5.7-rc2. This was a straight rebase
  except for changes noted here.

- Patch 01/10: renamed the mmap_write_downgrade API
  (as suggested by Davidlohr Bueso).

- Patch 05/10: added arch/riscv/mm/pageattr.c changes that had been
  previously missed, as found by the kbuild bot.

- Patch 06/10: use SINGLE_DEPTH_NESTING as suggested by Matthew Wilcox.

- Patch 08/10: change MMAP_LOCK_INITIALIZER definition
  as suggested by Matthew Wilcox.

- Patch 09/10: add mm_assert_locked API as suggested by Matthew Wilcox.


Changes since v3 of the patchset:

- The changes now apply on top of v5.7-rc1. This was a straight rebase
  except for changes noted here.

- Re-generated the coccinelle changes (patch 04/10).

- Patch 06/10: removed the mmap_write_unlock_nested API;
  mmap_write_lock_nested() calls now pair with the regular mmap_write_unlock()
  as was suggested by many people.

- Patch 07/10: removed the mmap_read_release API; this is replaced with
  mmap_read_trylock_non_owner() which pairs with mmap_read_unlock_non_owner()
  Thanks to Peter Zijlstra for the suggestion.


Changes since v2 of the patchset:

- Removed the mmap_is_locked API - v2 had removed all uses of it,
  but the actual function definition was still there unused.
  Thanks to Jason Gunthorpe for noticing the unused mmap_is_locked function.


Changes since v1 of the patchset:

- Manually convert drivers/dma-buf/dma-resv.c ahead of the automated
  coccinelle conversion as this file requires a new include statement.
  Thanks to Intel's kbuild test bot for finding the issue.

- In coccinelle automated conversion, apply a single coccinelle rule
  as suggested by Markus Elfring.

- In manual conversion of sites missed by coccinelle, fix an issue where
  I had used mm_read_unlock (from an older version of my patchset) instead
  of mmap_read_unlock in some arch/mips code.
  This was also identified by Intel's kbuild test bot.

- Do not add a new mmap_is_locked API, and use lockdep_assert_held instead.
  Thanks to Jason Gunthorpe and Matthew Wilcox for the suggestion.


The changes apply on top of v5.7-rc6.

I think these changes are ready for integration into the -mm tree now
(for integration into v5.8). The coccinelle part of the change is
relatively invasive, but can be skipped over on a file by file basis
if it causes any conflicts with other pending changes. The new mmap
locking API can interoperate with new code that is still using direct
rwsem calls, until the last patch in the series which renames mmap_sem
to enforce using the new API. Maybe that last patch could be delayed for
a bit, so that we'd get a chance to convert any new code that locks
mmap_sem in the -rc1 release before applying that last patch.


Michel Lespinasse (12):
  mmap locking API: initial implementation as rwsem wrappers
  MMU notifier: use the new mmap locking API
  DMA  reservations: use the new mmap locking API
  mmap locking API: use coccinelle to convert mmap_sem rwsem call sites
  mmap locking API: convert mmap_sem call sites missed by coccinelle
  mmap locking API: convert nested write 

[PATCH v6 06/12] mmap locking API: convert nested write lock sites

2020-05-19 Thread Michel Lespinasse
Add API for nested write locks and convert the few call sites doing that.

Signed-off-by: Michel Lespinasse 
Reviewed-by: Daniel Jordan 
Reviewed-by: Laurent Dufour 
Reviewed-by: Vlastimil Babka 
---
 arch/um/include/asm/mmu_context.h | 3 ++-
 include/linux/mmap_lock.h | 5 +
 kernel/fork.c | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/um/include/asm/mmu_context.h 
b/arch/um/include/asm/mmu_context.h
index 62262c5c7785..17ddd4edf875 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -47,7 +48,7 @@ static inline void activate_mm(struct mm_struct *old, struct 
mm_struct *new)
 * when the new ->mm is used for the first time.
 */
__switch_mm(>context.id);
-   down_write_nested(>mmap_sem, 1);
+   mmap_write_lock_nested(new, SINGLE_DEPTH_NESTING);
uml_setup_stubs(new);
mmap_write_unlock(new);
 }
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 97ac53b66052..a757cb30ae77 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -11,6 +11,11 @@ static inline void mmap_write_lock(struct mm_struct *mm)
down_write(>mmap_sem);
 }
 
+static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
+{
+   down_write_nested(>mmap_sem, subclass);
+}
+
 static inline int mmap_write_lock_killable(struct mm_struct *mm)
 {
return down_write_killable(>mmap_sem);
diff --git a/kernel/fork.c b/kernel/fork.c
index a4db6bc952c6..e702e84897fa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -499,7 +499,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
/*
 * Not linked in yet - no deadlock potential:
 */
-   down_write_nested(>mmap_sem, SINGLE_DEPTH_NESTING);
+   mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);
 
/* No ordering required: file already has been exposed. */
RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
-- 
2.26.2.761.g0e0b3e54be-goog



[PATCH v6 03/12] DMA reservations: use the new mmap locking API

2020-05-19 Thread Michel Lespinasse
This use is converted manually ahead of the next patch in the series,
as it requires including a new header which the automated conversion
would miss.

Signed-off-by: Michel Lespinasse 
Reviewed-by: Daniel Jordan 
Reviewed-by: Laurent Dufour 
Reviewed-by: Vlastimil Babka 
---
 drivers/dma-buf/dma-resv.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 4264e64788c4..b45f8514dc82 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -34,6 +34,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 /**
@@ -109,7 +110,7 @@ static int __init dma_resv_lockdep(void)
 
dma_resv_init();
 
-   down_read(>mmap_sem);
+   mmap_read_lock(mm);
ww_acquire_init(, _ww_class);
ret = dma_resv_lock(, );
if (ret == -EDEADLK)
@@ -118,7 +119,7 @@ static int __init dma_resv_lockdep(void)
fs_reclaim_release(GFP_KERNEL);
ww_mutex_unlock();
ww_acquire_fini();
-   up_read(>mmap_sem);
+   mmap_read_unlock(mm);

mmput(mm);
 
-- 
2.26.2.761.g0e0b3e54be-goog



[PATCH v6 12/12] mmap locking API: convert mmap_sem comments

2020-05-19 Thread Michel Lespinasse
Convert comments that reference mmap_sem to reference mmap_lock instead.

Signed-off-by: Michel Lespinasse 
---
 .../admin-guide/mm/numa_memory_policy.rst | 10 ++---
 Documentation/admin-guide/mm/userfaultfd.rst  |  2 +-
 Documentation/filesystems/locking.rst |  2 +-
 Documentation/vm/transhuge.rst|  4 +-
 arch/arc/mm/fault.c   |  2 +-
 arch/arm/kernel/vdso.c|  2 +-
 arch/arm/mm/fault.c   |  2 +-
 arch/ia64/mm/fault.c  |  2 +-
 arch/microblaze/mm/fault.c|  2 +-
 arch/nds32/mm/fault.c |  2 +-
 arch/powerpc/include/asm/pkeys.h  |  2 +-
 arch/powerpc/kvm/book3s_hv_uvmem.c|  6 +--
 arch/powerpc/mm/book3s32/tlb.c|  2 +-
 arch/powerpc/mm/book3s64/hash_pgtable.c   |  4 +-
 arch/powerpc/mm/book3s64/subpage_prot.c   |  2 +-
 arch/powerpc/mm/fault.c   |  8 ++--
 arch/powerpc/mm/pgtable.c |  2 +-
 arch/powerpc/platforms/cell/spufs/file.c  |  6 +--
 arch/riscv/mm/fault.c |  2 +-
 arch/s390/kvm/priv.c  |  2 +-
 arch/s390/mm/fault.c  |  2 +-
 arch/s390/mm/gmap.c   | 32 +++
 arch/s390/mm/pgalloc.c|  2 +-
 arch/sh/mm/cache-sh4.c|  2 +-
 arch/sh/mm/fault.c|  2 +-
 arch/sparc/mm/fault_64.c  |  2 +-
 arch/um/kernel/skas/mmu.c |  2 +-
 arch/um/kernel/tlb.c  |  2 +-
 arch/unicore32/mm/fault.c |  2 +-
 arch/x86/events/core.c|  2 +-
 arch/x86/include/asm/mmu.h|  2 +-
 arch/x86/include/asm/pgtable-3level.h |  8 ++--
 arch/x86/kernel/cpu/resctrl/pseudo_lock.c |  2 +-
 arch/x86/kernel/cpu/resctrl/rdtgroup.c|  6 +--
 arch/x86/kernel/ldt.c |  2 +-
 arch/x86/mm/fault.c   | 12 +++---
 drivers/char/mspec.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |  6 +--
 drivers/gpu/drm/i915/i915_perf.c  |  2 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c   |  6 +--
 drivers/infiniband/core/uverbs_main.c |  2 +-
 drivers/infiniband/hw/hfi1/mmu_rb.c   |  2 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c |  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/sgi-gru/grufault.c   |  8 ++--
 drivers/oprofile/buffer_sync.c|  2 +-
 drivers/staging/android/ashmem.c  |  4 +-
 drivers/staging/comedi/comedi_fops.c  |  2 +-
 drivers/tty/vt/consolemap.c   |  2 +-
 drivers/xen/gntdev.c  |  2 +-
 fs/coredump.c |  4 +-
 fs/exec.c |  2 +-
 fs/ext2/file.c|  2 +-
 fs/ext4/super.c   |  6 +--
 fs/kernfs/file.c  |  4 +-
 fs/proc/base.c|  6 +--
 fs/proc/task_mmu.c|  6 +--
 fs/userfaultfd.c  | 18 -
 fs/xfs/xfs_file.c |  2 +-
 fs/xfs/xfs_inode.c| 14 +++
 fs/xfs/xfs_iops.c |  4 +-
 include/asm-generic/pgtable.h |  6 +--
 include/linux/fs.h|  4 +-
 include/linux/huge_mm.h   |  2 +-
 include/linux/mempolicy.h |  2 +-
 include/linux/mm.h| 10 ++---
 include/linux/mm_types.h  |  2 +-
 include/linux/mmu_notifier.h  |  8 ++--
 include/linux/pagemap.h   |  2 +-
 include/linux/rmap.h  |  2 +-
 include/linux/sched/mm.h  | 10 ++---
 kernel/acct.c |  2 +-
 kernel/cgroup/cpuset.c|  4 +-
 kernel/events/core.c  |  6 +--
 kernel/events/uprobes.c   |  4 +-
 kernel/exit.c |  2 +-
 kernel/relay.c|  2 +-
 kernel/sys.c  |  4 +-
 lib/test_lockup.c |  8 ++--
 mm/filemap.c  | 38 +-
 mm/frame_vector.c |  2 +-
 mm/gup.c  | 38 +-
 mm/huge_memory.c  |  4 +-
 mm/hugetlb.c  |  2 +-
 

[PATCH v6 01/12] mmap locking API: initial implementation as rwsem wrappers

2020-05-19 Thread Michel Lespinasse
This change wraps the existing mmap_sem related rwsem calls into a new
mmap locking API. There are two justifications for the new API:

- At first, it provides an easy hooking point to instrument mmap_sem
  locking latencies independently of any other rwsems.

- In the future, it may be a starting point for replacing the rwsem
  implementation with a different one, such as range locks.

Signed-off-by: Michel Lespinasse 
Reviewed-by: Daniel Jordan 
Reviewed-by: Davidlohr Bueso 
Reviewed-by: Laurent Dufour 
Reviewed-by: Vlastimil Babka 
---
 include/linux/mm.h|  1 +
 include/linux/mmap_lock.h | 54 +++
 2 files changed, 55 insertions(+)
 create mode 100644 include/linux/mmap_lock.h

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5a323422d783..051ec782bdbb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
new file mode 100644
index ..97ac53b66052
--- /dev/null
+++ b/include/linux/mmap_lock.h
@@ -0,0 +1,54 @@
+#ifndef _LINUX_MMAP_LOCK_H
+#define _LINUX_MMAP_LOCK_H
+
+static inline void mmap_init_lock(struct mm_struct *mm)
+{
+   init_rwsem(>mmap_sem);
+}
+
+static inline void mmap_write_lock(struct mm_struct *mm)
+{
+   down_write(>mmap_sem);
+}
+
+static inline int mmap_write_lock_killable(struct mm_struct *mm)
+{
+   return down_write_killable(>mmap_sem);
+}
+
+static inline bool mmap_write_trylock(struct mm_struct *mm)
+{
+   return down_write_trylock(>mmap_sem) != 0;
+}
+
+static inline void mmap_write_unlock(struct mm_struct *mm)
+{
+   up_write(>mmap_sem);
+}
+
+static inline void mmap_write_downgrade(struct mm_struct *mm)
+{
+   downgrade_write(>mmap_sem);
+}
+
+static inline void mmap_read_lock(struct mm_struct *mm)
+{
+   down_read(>mmap_sem);
+}
+
+static inline int mmap_read_lock_killable(struct mm_struct *mm)
+{
+   return down_read_killable(>mmap_sem);
+}
+
+static inline bool mmap_read_trylock(struct mm_struct *mm)
+{
+   return down_read_trylock(>mmap_sem) != 0;
+}
+
+static inline void mmap_read_unlock(struct mm_struct *mm)
+{
+   up_read(>mmap_sem);
+}
+
+#endif /* _LINUX_MMAP_LOCK_H */
-- 
2.26.2.761.g0e0b3e54be-goog



Re: [PATCH] s390/sclp_vt220: Fix console name to match device

2020-05-19 Thread Christian Borntraeger


On 19.05.20 20:16, Valentin Vidic wrote:
> Console name reported in /proc/consoles:
> 
>   ttyS1-W- (EC p  )4:65
> 
> does not match device name:
> 
>   crw--w1 root root4,  65 May 17 12:18 /dev/ttysclp0
> 
> so debian-installer gets confused and fails to start.
> 
> Signed-off-by: Valentin Vidic 
> Cc: sta...@vger.kernel.org

This is not as simple. ttyS1 is the the console name and ttysclp0 is the tty 
name.
This has mostly historic reasons and it obviously causes problems.
But there is  documentation out that that actually describes the use of 
console=ttyS1 console=ttyS0.
to have console output on both sclp consoles and there are probably scripts
using ttyS1.

I am wondering. The tty for ttyS0 is named sclp_line0. Does this work in LPAR?


> ---
>  drivers/s390/char/sclp_vt220.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
> index 3f9a6ef650fa..3c2ed6d01387 100644
> --- a/drivers/s390/char/sclp_vt220.c
> +++ b/drivers/s390/char/sclp_vt220.c
> @@ -35,8 +35,8 @@
>  #define SCLP_VT220_MINOR 65
>  #define SCLP_VT220_DRIVER_NAME   "sclp_vt220"
>  #define SCLP_VT220_DEVICE_NAME   "ttysclp"
> -#define SCLP_VT220_CONSOLE_NAME  "ttyS"
> -#define SCLP_VT220_CONSOLE_INDEX 1   /* console=ttyS1 */
> +#define SCLP_VT220_CONSOLE_NAME  "ttysclp"
> +#define SCLP_VT220_CONSOLE_INDEX 0   /* console=ttysclp0 */
>  
>  /* Representation of a single write request */
>  struct sclp_vt220_request {
> 


Re: [PATCH v4 00/15] virtio-mem: paravirtualized memory

2020-05-19 Thread teawater
Hi David,

Thanks for your work.
I tried this version with cloud-hypervisor master.  It worked very well.

Best,
Hui

> 2020年5月7日 22:01,David Hildenbrand  写道:
> 
> This series is based on v5.7-rc4. The patches are located at:
>https://github.com/davidhildenbrand/linux.git virtio-mem-v4
> 
> This is basically a resend of v3 [1], now based on v5.7-rc4 and restested.
> One patch was reshuffled and two ACKs I missed to add were added. The
> rebase did not require any modifications to patches.
> 
> Details about virtio-mem can be found in the cover letter of v2 [2]. A
> basic QEMU implementation was posted yesterday [3].
> 
> [1] https://lkml.kernel.org/r/20200507103119.11219-1-da...@redhat.com
> [2] https://lkml.kernel.org/r/20200311171422.10484-1-da...@redhat.com
> [3] https://lkml.kernel.org/r/20200506094948.76388-1-da...@redhat.com
> 
> v3 -> v4:
> - Move "MAINTAINERS: Add myself as virtio-mem maintainer" to #2
> - Add two ACKs from Andrew (in reply to v2)
> -- "mm: Allow to offline unmovable PageOffline() pages via ..."
> -- "mm/memory_hotplug: Introduce offline_and_remove_memory()"
> 
> v2 -> v3:
> - "virtio-mem: Paravirtualized memory hotplug"
> -- Include "linux/slab.h" to fix build issues
> -- Remember the "region_size", helpful for patch #11
> -- Minor simplifaction in virtio_mem_overlaps_range()
> -- Use notifier_from_errno() instead of notifier_to_errno() in notifier
> -- More reliable check for added memory when unloading the driver
> - "virtio-mem: Allow to specify an ACPI PXM as nid"
> -- Also print the nid
> - Added patch #11-#15
> 
> David Hildenbrand (15):
>  virtio-mem: Paravirtualized memory hotplug
>  MAINTAINERS: Add myself as virtio-mem maintainer
>  virtio-mem: Allow to specify an ACPI PXM as nid
>  virtio-mem: Paravirtualized memory hotunplug part 1
>  virtio-mem: Paravirtualized memory hotunplug part 2
>  mm: Allow to offline unmovable PageOffline() pages via
>MEM_GOING_OFFLINE
>  virtio-mem: Allow to offline partially unplugged memory blocks
>  mm/memory_hotplug: Introduce offline_and_remove_memory()
>  virtio-mem: Offline and remove completely unplugged memory blocks
>  virtio-mem: Better retry handling
>  virtio-mem: Add parent resource for all added "System RAM"
>  virtio-mem: Drop manual check for already present memory
>  virtio-mem: Unplug subblocks right-to-left
>  virtio-mem: Use -ETXTBSY as error code if the device is busy
>  virtio-mem: Try to unplug the complete online memory block first
> 
> MAINTAINERS |7 +
> drivers/acpi/numa/srat.c|1 +
> drivers/virtio/Kconfig  |   17 +
> drivers/virtio/Makefile |1 +
> drivers/virtio/virtio_mem.c | 1962 +++
> include/linux/memory_hotplug.h  |1 +
> include/linux/page-flags.h  |   10 +
> include/uapi/linux/virtio_ids.h |1 +
> include/uapi/linux/virtio_mem.h |  208 
> mm/memory_hotplug.c |   81 +-
> mm/page_alloc.c |   26 +
> mm/page_isolation.c |9 +
> 12 files changed, 2314 insertions(+), 10 deletions(-)
> create mode 100644 drivers/virtio/virtio_mem.c
> create mode 100644 include/uapi/linux/virtio_mem.h
> 
> -- 
> 2.25.3



Re: [PATCH v12 10/10] KVM: x86: Enable CET virtualization and advertise CET to userspace

2020-05-19 Thread Sean Christopherson
On Wed, May 06, 2020 at 04:21:09PM +0800, Yang Weijiang wrote:
> Set the feature bits so that CET capabilities can be seen in guest via
> CPUID enumeration. Add CR4.CET bit support in order to allow guest set CET
> master control bit(CR4.CET).
> 
> Signed-off-by: Yang Weijiang 
> ---
>  arch/x86/include/asm/kvm_host.h | 3 ++-
>  arch/x86/kvm/cpuid.c| 5 +++--
>  2 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f68c825e94ad..21f3c89d8c70 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -95,7 +95,8 @@
> | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | 
> X86_CR4_PCIDE \
> | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
> | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
> -   | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
> +   | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
> +   | X86_CR4_CET))
>  
>  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
>  
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 984ab2b395b3..333a9e0d7cdf 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -344,7 +344,8 @@ void kvm_set_cpu_caps(void)
>   F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
>   F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
>   F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
> - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/
> + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
> + F(SHSTK)
>   );
>   /* Set LA57 based on hardware capability. */
>   if (cpuid_ecx(7) & F(LA57))
> @@ -353,7 +354,7 @@ void kvm_set_cpu_caps(void)
>   kvm_cpu_cap_mask(CPUID_7_EDX,
>   F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
>   F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
> - F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM)
> + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | F(IBT)

SHSTK and IBT need to be disabled in vmx_set_cpu_caps() if unrestricted guest
is disabled.  CET won't play nice with emulating arbitrary instructions, e.g.
KVM doesn't enforce ENDBR and doesn't keep SSP up-to-date (and no one is
advocating fully emulating CET).

Paolo also floated the idea of providing a reduced opcode set, e.g. only I/O,
MOV, and ALU instructions, but I don't think that needs to be done in the
initial CET enabling as it's more of a defense-in-depth than a functional
requirement.

No need to respin a new series just for this, it can wait until I've looked
through this version.

Original thread: 
https://lkml.kernel.org/r/20200515161919.29249-1-pbonz...@redhat.com

>   );
>  
>   /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
> -- 
> 2.17.2
> 


Re: [RFC V2] mm/vmstat: Add events for PMD based THP migration without split

2020-05-19 Thread John Hubbard

On 2020-05-19 20:32, Anshuman Khandual wrote:
...

How about not being quite so granular on the THP config options, and
just guarding these events with the overall CONFIG_TRANSPARENT_HUGEPAGE
option, instead of the sub-option CONFIG_ARCH_ENABLE_THP_MIGRATION?

I tentatively think it's harmless and not really misleading to have
/proc/vmstat showing this in all THP-enabled configurations:

thp_pmd_migration_success 0
thp_pmd_migration_failure 0

...if THP is enabled, and *whether or not* _THP_MIGRATION is enabled.
And this simplifies things a bit. Given how the .config options can get,
I think simplifying would be nice.

However, I'm ready to be corrected on that, if it's a bad idea for
other API reasons perhaps.  Can anyone please comment?


There is no THP migration events to track unless it is enabled. Why to
show these statistics (as 0) when its not even possible. If the config
simplicity is the only intended rationale here, it might not be the
case either. These events and their tracking would still need to be
wrapped with CONFIG_TRANSPARENT_HUGEPAGE otherwise.

If your concern is more towards CONFIG_ARCH_ENABLE_THP_MIGRATION being
unsuitable or with complex dependencies, then that is something how THP
migration feature itself is implemented currently and adding VM events
does not address that. A possible patch in the future patch could solve
all these (together).

But sure, let's hear it for what others have to say on this.



Well, I don't want to hold up progress. If it's not very convincing to you,
let's just drop the idea/ It was kind of weak. :)



+    THP_PMD_MIGRATION_SUCCESS,
+    THP_PMD_MIGRATION_FAILURE,
+#endif
   #endif
   #ifdef CONFIG_MEMORY_BALLOON
   BALLOON_INFLATE,
diff --git a/mm/migrate.c b/mm/migrate.c
index 7160c1556f79..5325700a3e90 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1170,6 +1170,18 @@ static int __unmap_and_move(struct page *page, struct 
page *newpage,
   #define ICE_noinline
   #endif
   +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline void thp_migration_success(bool success)



I think this should be named

     thp_pmd_migration_success()

, since that's what you're really counting. Or, you could
name the events THP_MIGRATION_SUCCESS|FAILURE. Either way,
just so the function name matches the events it's counting.


Makes sense but IMHO we should keep _pmd_ to be more specific.
Will change the name here as thp_pmd_migration_success().





+{
+    if (success)
+    count_vm_event(THP_PMD_MIGRATION_SUCCESS);
+    else
+    count_vm_event(THP_PMD_MIGRATION_FAILURE);
+}
+#else
+static inline void thp_migration_success(bool success) { }



This whole ifdef clause would disappear if my suggestion above is


We will have to protect these with CONFIG_TRANSPARENT_HUGEPAGE as
the events are still conditionally available.



Yes you are right, of course. And I even worked through that, but then
when I sat down to write a response my fingers typed v1 of my understanding
instead of v2. No one knows why. :) Sorry about the misinformation there.


accepted. However, if not, then I believe the convention for this
kind of situation is:

static inline void thp_migration_success(bool success)
{
}


AFAIK, we have examples both ways but will change if this is preferred.



Not worth worrying about, but I do recall a few recent code reviews that
all preferred the multi-line version, which is why I suggested it.

Anyway, either way, with the thp_pmd_migration_success() name change, you
can add:

Reviewed-by: John Hubbard 


thanks,
--
John Hubbard
NVIDIA


Re: seccomp feature development

2020-05-19 Thread Alexei Starovoitov
On Wed, May 20, 2020 at 11:20:45AM +1000, Aleksa Sarai wrote:
> 
> No it won't become copy_from_user(), nor will there be a TOCTOU race.
> 
> The idea is that seccomp will proactively copy the struct (and
> recursively any of the struct pointers inside) before the syscall runs
> -- as this is done by seccomp it doesn't require any copy_from_user()
> primitives in cBPF. We then run the cBPF filter on the copied struct,
> just like how cBPF programs currently operate on seccomp_data (how this
> would be exposed to the cBPF program as part of the seccomp ABI is the
> topic of discussion here).
> 
> Then, when the actual syscall code runs, the struct will have already
> been copied and the syscall won't copy it again.

Let's take bpf syscall as an example.
Are you suggesting that all of syscall logic of conditionally parsing
the arguments will be copy-pasted into seccomp-syscall infra, then
it will do copy_from_user() all the data and replace all aligned_u64
in "union bpf_attr" with kernel copied pointers instead of user pointers
and make all of bpf syscall's copy_from_user() actions to be conditional ?
If seccomp is on, use kernel pointers... if seccomp is off, do copy_from_user ?
And the same idea will be replicated for all syscalls?


Re: [PATCH v3 64/75] x86/sev-es: Cache CPUID results for improved performance

2020-05-19 Thread Sean Christopherson
On Tue, Apr 28, 2020 at 05:17:14PM +0200, Joerg Roedel wrote:
> From: Mike Stunes 
> 
> To avoid a future VMEXIT for a subsequent CPUID function, cache the
> results returned by CPUID into an xarray.
> 
>  [tl: coding standard changes, register zero extension]
> 
> Signed-off-by: Mike Stunes 
> Signed-off-by: Tom Lendacky 
> [ jroe...@suse.de: - Wrapped cache handling into vc_handle_cpuid_cached()
>- Used lower_32_bits() where applicable
>  - Moved cache_index out of struct es_em_ctxt ]
> Co-developed-by: Joerg Roedel 
> Signed-off-by: Joerg Roedel 
> ---

...

> +struct sev_es_cpuid_cache_entry {
> + unsigned long eax;
> + unsigned long ebx;
> + unsigned long ecx;
> + unsigned long edx;

Why are these unsigned longs?  CPUID returns 32-bit values, this wastes 16
bytes per entry.

> +};
> +
> +static struct xarray sev_es_cpuid_cache;
> +static bool __ro_after_init sev_es_cpuid_cache_initialized;
> +
>  /* For early boot hypervisor communication in SEV-ES enabled guests */
>  static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
>  
> @@ -463,6 +474,9 @@ void __init sev_es_init_vc_handling(void)
>   sev_es_setup_vc_stack(cpu);
>   }
>  
> + xa_init_flags(_es_cpuid_cache, XA_FLAGS_LOCK_IRQ);
> + sev_es_cpuid_cache_initialized = true;
> +
>   init_vc_stack_names();
>  }
>  
> @@ -744,6 +758,91 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb,
>   return ret;
>  }
>  
> +static unsigned long sev_es_get_cpuid_cache_index(struct es_em_ctxt *ctxt)
> +{
> + unsigned long hi, lo;
> +
> + /* Don't attempt to cache until the xarray is initialized */
> + if (!sev_es_cpuid_cache_initialized)
> + return ULONG_MAX;
> +
> + lo = lower_32_bits(ctxt->regs->ax);
> +
> + /*
> +  * CPUID 0x000d requires both RCX and XCR0, so it can't be
> +  * cached.
> +  */
> + if (lo == 0x000d)
> + return ULONG_MAX;
> +
> + /*
> +  * Some callers of CPUID don't always set RCX to zero for CPUID
> +  * functions that don't require RCX, which can result in excessive
> +  * cached values, so RCX needs to be manually zeroed for use as part
> +  * of the cache index. Future CPUID values may need RCX, but since
> +  * they can't be known, they must not be cached.
> +  */
> + if (lo > 0x8020)
> + return ULONG_MAX;
> +
> + switch (lo) {
> + case 0x0007:

OSPKE may or may not be cached correctly depending on when
sev_es_cpuid_cache_initialized is set.

> + case 0x000b:
> + case 0x000f:
> + case 0x0010:
> + case 0x801d:
> + case 0x8020:
> + hi = ctxt->regs->cx << 32;
> + break;
> + default:
> + hi = 0;
> + }
> +
> + return hi | lo;

This needs to be way more restrictive on what is cached.  Unless I've
overlooked something, this lets userspace trigger arbitrary, unaccounted
kernel memory allocations.  E.g.

for (i = 0; i <= 0x8020; i++) {
for (j = 0; j <= 0x; j++) {
cpuid(i, j);
if (i != 7 || i != 0xb || i != 0xf || i != 0x10 ||
i != 0x801d || i != 0x8020)
break;
}
}

The whole cache on-demand approach seems like overkill.  The number of CPUID
leaves that are invoked after boot with any regularity can probably be counted
on one hand.   IIRC glibc invokes CPUID to gather TLB/cache info, XCR0-based
features, and one or two other leafs.  A statically sized global array that's
arbitrarily index a la x86_capability would be just as simple and more
performant.  It would also allow fancier things like emulating CPUID 0xD in
the guest if you want to go down that road.


Re: [RFC PATCH 0/8] Qualcomm Cloud AI 100 driver

2020-05-19 Thread Greg Kroah-Hartman
On Tue, May 19, 2020 at 10:41:15PM +0200, Daniel Vetter wrote:
> On Tue, May 19, 2020 at 07:41:20PM +0200, Greg Kroah-Hartman wrote:
> > On Tue, May 19, 2020 at 08:57:38AM -0600, Jeffrey Hugo wrote:
> > > On 5/18/2020 11:08 PM, Dave Airlie wrote:
> > > > On Fri, 15 May 2020 at 00:12, Jeffrey Hugo  wrote:
> > > > >
> > > > > Introduction:
> > > > > Qualcomm Cloud AI 100 is a PCIe adapter card which contains a 
> > > > > dedicated
> > > > > SoC ASIC for the purpose of efficently running Deep Learning inference
> > > > > workloads in a data center environment.
> > > > >
> > > > > The offical press release can be found at -
> > > > > https://www.qualcomm.com/news/releases/2019/04/09/qualcomm-brings-power-efficient-artificial-intelligence-inference
> > > > >
> > > > > The offical product website is -
> > > > > https://www.qualcomm.com/products/datacenter-artificial-intelligence
> > > > >
> > > > > At the time of the offical press release, numerious technology news 
> > > > > sites
> > > > > also covered the product.  Doing a search of your favorite site is 
> > > > > likely
> > > > > to find their coverage of it.
> > > > >
> > > > > It is our goal to have the kernel driver for the product fully 
> > > > > upstream.
> > > > > The purpose of this RFC is to start that process.  We are still doing
> > > > > development (see below), and thus not quite looking to gain 
> > > > > acceptance quite
> > > > > yet, but now that we have a working driver we beleive we are at the 
> > > > > stage
> > > > > where meaningful conversation with the community can occur.
> > > >
> > > >
> > > > Hi Jeffery,
> > > >
> > > > Just wondering what the userspace/testing plans for this driver.
> > > >
> > > > This introduces a new user facing API for a device without pointers to
> > > > users or tests for that API.
> > >
> > > We have daily internal testing, although I don't expect you to take my 
> > > word
> > > for that.
> > >
> > > I would like to get one of these devices into the hands of Linaro, so that
> > > it can be put into KernelCI.  Similar to other Qualcomm products. I'm 
> > > trying
> > > to convince the powers that be to make this happen.
> > >
> > > Regarding what the community could do on its own, everything but the Linux
> > > driver is considered proprietary - that includes the on device firmware 
> > > and
> > > the entire userspace stack.  This is a decision above my pay grade.
> >
> > Ok, that's a decision you are going to have to push upward on, as we
> > really can't take this without a working, open, userspace.
> 
> Uh wut.
> 
> So the merge criteria for drivers/accel (atm still drivers/misc but I
> thought that was interim until more drivers showed up) isn't actually
> "totally-not-a-gpu accel driver without open source userspace".
> 
> Instead it's "totally-not-a-gpu accel driver without open source
> userspace" _and_ you have to be best buddies with Greg. Or at least
> not be on the naughty company list. Since for habanalabs all you
> wanted is a few test cases to exercise the ioctls. Not the entire
> userspace.

Also, to be fair, I have changed my mind after seeing the mess of
complexity that these "ioctls for everyone!" type of pass-through
these kinds of drivers are creating.  You were right, we need open
userspace code in order to be able to properly evaluate and figure out
what they are doing is right or not and be able to maintain things over
time correctly.

So I was wrong, and you were right, my apologies for my previous
stubbornness.

thanks,

greg k-h


Re: [PATCH v4 2/4] kasan: record and print the free track

2020-05-19 Thread Walter Wu
> On Wed, May 20, 2020 at 6:03 AM Walter Wu  wrote:
> >
> > > On Tue, May 19, 2020 at 4:25 AM Walter Wu  
> > > wrote:
> > > >
> > > > Move free track from slub alloc meta-data to slub free meta-data in
> > > > order to make struct kasan_free_meta size is 16 bytes. It is a good
> > > > size because it is the minimal redzone size and a good number of
> > > > alignment.
> > > >
> > > > For free track in generic KASAN, we do the modification in struct
> > > > kasan_alloc_meta and kasan_free_meta:
> > > > - remove free track from kasan_alloc_meta.
> > > > - add free track into kasan_free_meta.
> > > >
> > > > [1]https://bugzilla.kernel.org/show_bug.cgi?id=198437
> > > >
> > > > Signed-off-by: Walter Wu 
> > > > Suggested-by: Dmitry Vyukov 
> > > > Cc: Andrey Ryabinin 
> > > > Cc: Dmitry Vyukov 
> > > > Cc: Alexander Potapenko 
> > > > ---
> > > >  mm/kasan/common.c  | 22 ++
> > > >  mm/kasan/generic.c | 18 ++
> > > >  mm/kasan/kasan.h   |  7 +++
> > > >  mm/kasan/report.c  | 20 
> > > >  mm/kasan/tags.c| 37 +
> > > >  5 files changed, 64 insertions(+), 40 deletions(-)
> > > >
> > > > diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> > > > index 8bc618289bb1..47b53912f322 100644
> > > > --- a/mm/kasan/common.c
> > > > +++ b/mm/kasan/common.c
> > > > @@ -51,7 +51,7 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags)
> > > > return stack_depot_save(entries, nr_entries, flags);
> > > >  }
> > > >
> > > > -static inline void set_track(struct kasan_track *track, gfp_t flags)
> > > > +void kasan_set_track(struct kasan_track *track, gfp_t flags)
> > > >  {
> > > > track->pid = current->pid;
> > > > track->stack = kasan_save_stack(flags);
> > > > @@ -299,24 +299,6 @@ struct kasan_free_meta *get_free_info(struct 
> > > > kmem_cache *cache,
> > > > return (void *)object + cache->kasan_info.free_meta_offset;
> > > >  }
> > > >
> > > > -
> > > > -static void kasan_set_free_info(struct kmem_cache *cache,
> > > > -   void *object, u8 tag)
> > > > -{
> > > > -   struct kasan_alloc_meta *alloc_meta;
> > > > -   u8 idx = 0;
> > > > -
> > > > -   alloc_meta = get_alloc_info(cache, object);
> > > > -
> > > > -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
> > > > -   idx = alloc_meta->free_track_idx;
> > > > -   alloc_meta->free_pointer_tag[idx] = tag;
> > > > -   alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
> > > > -#endif
> > > > -
> > > > -   set_track(_meta->free_track[idx], GFP_NOWAIT);
> > > > -}
> > > > -
> > > >  void kasan_poison_slab(struct page *page)
> > > >  {
> > > > unsigned long i;
> > > > @@ -492,7 +474,7 @@ static void *__kasan_kmalloc(struct kmem_cache 
> > > > *cache, const void *object,
> > > > KASAN_KMALLOC_REDZONE);
> > > >
> > > > if (cache->flags & SLAB_KASAN)
> > > > -   set_track(_alloc_info(cache, object)->alloc_track, 
> > > > flags);
> > > > +   kasan_set_track(_alloc_info(cache, 
> > > > object)->alloc_track, flags);
> > > >
> > > > return set_tag(object, tag);
> > > >  }
> > > > diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
> > > > index 3372bdcaf92a..763d8a13e0ac 100644
> > > > --- a/mm/kasan/generic.c
> > > > +++ b/mm/kasan/generic.c
> > > > @@ -344,3 +344,21 @@ void kasan_record_aux_stack(void *addr)
> > > > alloc_info->aux_stack[1] = alloc_info->aux_stack[0];
> > > > alloc_info->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);
> > > >  }
> > > > +
> > > > +void kasan_set_free_info(struct kmem_cache *cache,
> > > > +   void *object, u8 tag)
> > > > +{
> > > > +   struct kasan_free_meta *free_meta;
> > > > +
> > > > +   free_meta = get_free_info(cache, object);
> > > > +   kasan_set_track(_meta->free_track, GFP_NOWAIT);
> > > > +}
> > > > +
> > > > +struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
> > > > +   void *object, u8 tag)
> > > > +{
> > > > +   struct kasan_free_meta *free_meta;
> > > > +
> > > > +   free_meta = get_free_info(cache, object);
> > > > +   return _meta->free_track;
> > > > +}
> > > > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> > > > index a7391bc83070..ad897ec36545 100644
> > > > --- a/mm/kasan/kasan.h
> > > > +++ b/mm/kasan/kasan.h
> > > > @@ -127,6 +127,9 @@ struct kasan_free_meta {
> > > >  * Otherwise it might be used for the allocator freelist.
> > > >  */
> > > > struct qlist_node quarantine_link;
> > > > +#ifdef CONFIG_KASAN_GENERIC
> > > > +   struct kasan_track free_track;
> > > > +#endif
> > > >  };
> > > >
> > > >  struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
> > > > @@ -168,6 +171,10 @@ void kasan_report_invalid_free(void *object, 
> > > > unsigned long ip);
> > > >  struct page *kasan_addr_to_page(const void *addr);
> > > >
> > > >  

Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice

2020-05-19 Thread Ira Weiny
On Tue, May 19, 2020 at 12:42:15PM -0700, Guenter Roeck wrote:
> On Tue, May 19, 2020 at 11:40:32AM -0700, Ira Weiny wrote:
> > On Tue, May 19, 2020 at 09:54:22AM -0700, Guenter Roeck wrote:
> > > On Mon, May 18, 2020 at 11:48:43AM -0700, ira.we...@intel.com wrote:
> > > > From: Ira Weiny 
> > > > 
> > > > The kunmap_atomic clean up failed to remove one set of pagefault/preempt
> > > > enables when vaddr is not in the fixmap.
> > > > 
> > > > Fixes: bee2128a09e6 ("arch/kunmap_atomic: consolidate duplicate code")
> > > > Signed-off-by: Ira Weiny 
> > > 
> > > microblazeel works with this patch,
> > 
> > Awesome...  Andrew in my rush yesterday I should have put a reported by on 
> > the
> > patch for Guenter as well.
> > 
> > Sorry about that Guenter,
> 
> No worries.
> 
> > Ira
> > 
> > > as do the nosmp sparc32 boot tests,
> > > but sparc32 boot tests with SMP enabled still fail with lots of messages
> > > such as:
> > > 
> > > BUG: Bad page state in process swapper/0  pfn:006a1
> > > page:f0933420 refcount:0 mapcount:1 mapping:(ptrval) index:0x1
> > > flags: 0x0()
> > > raw:  0100 0122  0001   
> > > 
> > > page dumped because: nonzero mapcount
> > > Modules linked in:
> > > CPU: 0 PID: 1 Comm: swapper/0 Tainted: GB 
> > > 5.7.0-rc6-next-20200518-2-gb178d2d56f29 #1
> > > [f00e7ab8 :
> > > bad_page+0xa8/0x108 ]
> > > [f00e8b54 :
> > > free_pcppages_bulk+0x154/0x52c ]
> > > [f00ea024 :
> > > free_unref_page+0x54/0x6c ]
> > > [f00ed864 :
> > > free_reserved_area+0x58/0xec ]
> > > [f0527104 :
> > > kernel_init+0x14/0x110 ]
> > > [f000b77c :
> > > ret_from_kernel_thread+0xc/0x38 ]
> > > [ :
> > > 0x0 ]
> > > 
> > > Code path leading to that message is different but always the same
> > > from free_unref_page().

Actually it occurs to me that the patch consolidating kmap_prot is odd for
sparc 32 bit...

Its a long shot but could you try reverting this patch?

4ea7d2419e3f kmap: consolidate kmap_prot definitions

Alternately I will need to figure out how to run the sparc on qemu here...

Thanks very much for all the testing though!  :-D

Ira

> > > 
> > > Still testing ppc images.
> > > 
> 
> ppc image tests are passing with this patch.
> 
> Guenter


[PATCH] MIPS: SGI-IP27: Remove not used includes and comment in ip27-timer.c

2020-05-19 Thread Tiezhu Yang
After commit 0ce5ebd24d25 ("mfd: ioc3: Add driver for SGI IOC3 chip"),
the related includes and comment about ioc3 are not used any more in
ip27-timer.c, remove them.

Signed-off-by: Tiezhu Yang 
---
 arch/mips/sgi-ip27/ip27-timer.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 11ffb3e..115b1d9 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -21,7 +21,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -31,10 +30,6 @@
 
 #define TICK_SIZE (tick_nsec / 1000)
 
-/* Includes for ioc3_init().  */
-#include 
-#include 
-
 static int rt_next_event(unsigned long delta, struct clock_event_device *evt)
 {
unsigned int cpu = smp_processor_id();
-- 
2.1.0



Re: [RFC PATCH 0/8] Qualcomm Cloud AI 100 driver

2020-05-19 Thread Bjorn Andersson
On Tue 19 May 21:59 PDT 2020, Greg Kroah-Hartman wrote:

> On Tue, May 19, 2020 at 10:41:15PM +0200, Daniel Vetter wrote:
> > > Ok, that's a decision you are going to have to push upward on, as we
> > > really can't take this without a working, open, userspace.
> > 
> > Uh wut.
> > 
> > So the merge criteria for drivers/accel (atm still drivers/misc but I
> > thought that was interim until more drivers showed up) isn't actually
> > "totally-not-a-gpu accel driver without open source userspace".
> > 
> > Instead it's "totally-not-a-gpu accel driver without open source
> > userspace" _and_ you have to be best buddies with Greg. Or at least
> > not be on the naughty company list. Since for habanalabs all you
> > wanted is a few test cases to exercise the ioctls. Not the entire
> > userspace.
> 
> Habanalabs now has their full library opensourced that their tools use
> directly, so that's not an argument anymore.
> 
> My primary point here is the copyright owner of this code, because of
> that, I'm not going to objet to allowing this to be merged without open
> userspace code.
> 

So because it's copyright Linux Foundation you are going to accept it
without user space, after all?

Regards,
Bjorn


Re: [PATCH 06/12] xen-blkfront: add callbacks for PM suspend and hibernation

2020-05-19 Thread kbuild test robot
Hi Anchal,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.7-rc6]
[cannot apply to xen-tip/linux-next tip/irq/core tip/auto-latest next-20200519]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Anchal-Agarwal/Fix-PM-hibernation-in-Xen-guests/20200520-073211
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
03fb3acae4be8a6b680ffedb220a8b6c07260b40
config: x86_64-randconfig-a016-20200519 (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project 
e6658079aca6d971b4e9d7137a3a2ecbc9c34aec)
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All error/warnings (new ones prefixed by >>, old ones prefixed by <<):

>> drivers/block/xen-blkfront.c:2699:30: warning: missing terminating '"' 
>> character [-Winvalid-pp-token]
xenbus_dev_error(dev, err, "Hibernation Failed.
^
>> drivers/block/xen-blkfront.c:2699:30: error: expected expression
drivers/block/xen-blkfront.c:2700:26: warning: missing terminating '"' 
character [-Winvalid-pp-token]
The ring is still busy");
^
>> drivers/block/xen-blkfront.c:2726:1: error: function definition is not 
>> allowed here
{
^
>> drivers/block/xen-blkfront.c:2762:10: error: use of undeclared identifier 
>> 'blkfront_restore'
.thaw = blkfront_restore,
^
drivers/block/xen-blkfront.c:2763:13: error: use of undeclared identifier 
'blkfront_restore'
.restore = blkfront_restore
^
drivers/block/xen-blkfront.c:2767:1: error: function definition is not allowed 
here
{
^
drivers/block/xen-blkfront.c:2800:1: error: function definition is not allowed 
here
{
^
drivers/block/xen-blkfront.c:2822:1: error: function definition is not allowed 
here
{
^
>> drivers/block/xen-blkfront.c:2863:13: error: use of undeclared identifier 
>> 'xlblk_init'
module_init(xlblk_init);
^
drivers/block/xen-blkfront.c:2867:1: error: function definition is not allowed 
here
{
^
>> drivers/block/xen-blkfront.c:2874:13: error: use of undeclared identifier 
>> 'xlblk_exit'
module_exit(xlblk_exit);
^
>> drivers/block/xen-blkfront.c:2880:24: error: expected '}'
MODULE_ALIAS("xenblk");
^
drivers/block/xen-blkfront.c:2674:1: note: to match this '{'
{
^
>> drivers/block/xen-blkfront.c:2738:45: warning: ISO C90 forbids mixing 
>> declarations and code [-Wdeclaration-after-statement]
static const struct block_device_operations xlvbd_block_fops =
^
3 warnings and 11 errors generated.

vim +2699 drivers/block/xen-blkfront.c

  2672  
  2673  static int blkfront_freeze(struct xenbus_device *dev)
  2674  {
  2675  unsigned int i;
  2676  struct blkfront_info *info = dev_get_drvdata(>dev);
  2677  struct blkfront_ring_info *rinfo;
  2678  /* This would be reasonable timeout as used in 
xenbus_dev_shutdown() */
  2679  unsigned int timeout = 5 * HZ;
  2680  unsigned long flags;
  2681  int err = 0;
  2682  
  2683  info->connected = BLKIF_STATE_FREEZING;
  2684  
  2685  blk_mq_freeze_queue(info->rq);
  2686  blk_mq_quiesce_queue(info->rq);
  2687  
  2688  for_each_rinfo(info, rinfo, i) {
  2689  /* No more gnttab callback work. */
  2690  gnttab_cancel_free_callback(>callback);
  2691  /* Flush gnttab callback work. Must be done with no locks 
held. */
  2692  flush_work(>work);
  2693  }
  2694  
  2695  for_each_rinfo(info, rinfo, i) {
  2696  spin_lock_irqsave(>ring_lock, flags);
  2697  if (RING_FULL(>ring)
  2698  || RING_HAS_UNCONSUMED_RESPONSES(>ring)) {
> 2699  xenbus_dev_error(dev, err, "Hibernation Failed.
  2700  The ring is still busy");
  2701  info->connected = BLKIF_STATE_CONNECTED;
  2702  spin_unlock_irqrestore(>ring_lock, flags);
  2703  return -EBUSY;
  2704  }
  2705  spin_unlock_irqrestore(>ring_lock, flags);
  2706  }
  2707  /* Kick the backend to disconnect */
  2708  xenbus_switch_state(dev, XenbusStat

linux-next boot error: BUG: Invalid wait context ]

2020-05-19 Thread syzbot
Hello,

syzbot found the following crash on:

HEAD commit:fb57b1fa Add linux-next specific files for 20200519
git tree:   linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=17c9196e10
kernel config:  https://syzkaller.appspot.com/x/.config?x=2522f758a3588c2d
dashboard link: https://syzkaller.appspot.com/bug?extid=08003d278f04ed0944e0
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+08003d278f04ed094...@syzkaller.appspotmail.com

=
[ BUG: Invalid wait context ]
5.7.0-rc6-next-20200519-syzkaller #0 Not tainted
-
swapper/1/0 is trying to lock:
8880ae737518 (>lock){..-.}-{3:3}, at: spin_lock 
include/linux/spinlock.h:353 [inline]
8880ae737518 (>lock){..-.}-{3:3}, at: __queue_work+0x2bf/0x1350 
kernel/workqueue.c:1448
other info that might help us debug this:
context-{2:2}
1 lock held by swapper/1/0:
 #0: 89bc0040 (rcu_read_lock){}-{1:3}, at: 
__queue_work+0x175/0x1350 kernel/workqueue.c:1411
stack backtrace:
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.7.0-rc6-next-20200519-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
 
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x18f/0x20d lib/dump_stack.c:118
 print_lock_invalid_wait_context kernel/locking/lockdep.c:3988 [inline]
 check_wait_context kernel/locking/lockdep.c:4049 [inline]
 __lock_acquire.cold+0x26c/0x458 kernel/locking/lockdep.c:4286
 lock_acquire+0x20e/0x960 kernel/locking/lockdep.c:4915
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:353 [inline]
 __queue_work+0x2bf/0x1350 kernel/workqueue.c:1448
 queue_work_on+0x18b/0x200 kernel/workqueue.c:1517
 tick_nohz_activate kernel/time/tick-sched.c:1244 [inline]
 tick_nohz_activate kernel/time/tick-sched.c:1237 [inline]
 tick_setup_sched_timer+0x20e/0x380 kernel/time/tick-sched.c:1378
 hrtimer_switch_to_hres kernel/time/hrtimer.c:739 [inline]
 hrtimer_run_queues+0x327/0x3e0 kernel/time/hrtimer.c:1746
 run_local_timers+0x49/0x130 kernel/time/timer.c:1798
 update_process_times+0x1e/0x60 kernel/time/timer.c:1725
 tick_periodic+0x79/0x170 kernel/time/tick-common.c:99
 tick_handle_periodic+0x41/0x130 kernel/time/tick-common.c:111
 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1080 [inline]
 smp_apic_timer_interrupt+0x1ad/0x6a0 arch/x86/kernel/apic/apic.c:1105
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:828
 
RIP: 0010:native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:61
Code: cc cc cc cc cc cc cc cc cc cc cc cc e9 07 00 00 00 0f 00 2d 74 91 59 00 
f4 c3 66 90 e9 07 00 00 00 0f 00 2d 64 91 59 00 fb f4  cc 48 b8 00 00 00 00 
00 fc ff df 41 57 41 56 41 55 41 54 55 53
RSP: :c9d3fd50 EFLAGS: 0286 ORIG_RAX: ff13
RAX: 11369a43 RBX: 1920001a7fab RCX: 
RDX: dc00 RSI: 0006 RDI: 8880a95f0c0c
RBP: dc00 R08: 8880a95f0340 R09: 
R10:  R11:  R12: ed10152be068
R13: 0001 R14: 8aabeb08 R15: 
 arch_safe_halt arch/x86/include/asm/paravirt.h:150 [inline]
 default_idle+0x91/0x3d0 arch/x86/kernel/process.c:708
 cpuidle_idle_call kernel/sched/idle.c:154 [inline]
 do_idle+0x393/0x690 kernel/sched/idle.c:269
 cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:361
 start_secondary+0x2f8/0x410 arch/x86/kernel/smpboot.c:268
 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242
random: fast init done
random: 7 urandom warning(s) missed due to ratelimiting


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.


Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice

2020-05-19 Thread Ira Weiny
On Tue, May 19, 2020 at 12:42:15PM -0700, Guenter Roeck wrote:
> On Tue, May 19, 2020 at 11:40:32AM -0700, Ira Weiny wrote:
> > On Tue, May 19, 2020 at 09:54:22AM -0700, Guenter Roeck wrote:
> > > On Mon, May 18, 2020 at 11:48:43AM -0700, ira.we...@intel.com wrote:
> > > > From: Ira Weiny 
> > > > 
> > > > The kunmap_atomic clean up failed to remove one set of pagefault/preempt
> > > > enables when vaddr is not in the fixmap.
> > > > 
> > > > Fixes: bee2128a09e6 ("arch/kunmap_atomic: consolidate duplicate code")
> > > > Signed-off-by: Ira Weiny 
> > > 
> > > microblazeel works with this patch,
> > 
> > Awesome...  Andrew in my rush yesterday I should have put a reported by on 
> > the
> > patch for Guenter as well.
> > 
> > Sorry about that Guenter,
> 
> No worries.
> 
> > Ira
> > 
> > > as do the nosmp sparc32 boot tests,
> > > but sparc32 boot tests with SMP enabled still fail with lots of messages
> > > such as:
> > > 
> > > BUG: Bad page state in process swapper/0  pfn:006a1
> > > page:f0933420 refcount:0 mapcount:1 mapping:(ptrval) index:0x1
> > > flags: 0x0()
> > > raw:  0100 0122  0001   
> > > 
> > > page dumped because: nonzero mapcount
> > > Modules linked in:
> > > CPU: 0 PID: 1 Comm: swapper/0 Tainted: GB 
> > > 5.7.0-rc6-next-20200518-2-gb178d2d56f29 #1
> > > [f00e7ab8 :
> > > bad_page+0xa8/0x108 ]
> > > [f00e8b54 :
> > > free_pcppages_bulk+0x154/0x52c ]
> > > [f00ea024 :
> > > free_unref_page+0x54/0x6c ]
> > > [f00ed864 :
> > > free_reserved_area+0x58/0xec ]
> > > [f0527104 :
> > > kernel_init+0x14/0x110 ]
> > > [f000b77c :
> > > ret_from_kernel_thread+0xc/0x38 ]
> > > [ :
> > > 0x0 ]

I'm really not seeing how this is related to the kmap clean up.

But just to make sure I'm trying to run your environment for sparc and having
less luck than with microblaze.

Could you give me the command which is failing above?

Ira

> > > 
> > > Code path leading to that message is different but always the same
> > > from free_unref_page().
> > > 
> > > Still testing ppc images.
> > > 
> 
> ppc image tests are passing with this patch.
> 
> Guenter


Re: [RFC PATCH v3 2/2] CPPC: add support for SW BOOST

2020-05-19 Thread Viresh Kumar
On 19-05-20, 19:41, Xiongfeng Wang wrote:
> To add SW BOOST support for CPPC, we need to get the max frequency of
> boost mode and non-boost mode. ACPI spec 6.2 section 8.4.7.1 describe
> the following two CPC registers.
> 
> "Highest performance is the absolute maximum performance an individual
> processor may reach, assuming ideal conditions. This performance level
> may not be sustainable for long durations, and may only be achievable if
> other platform components are in a specific state; for example, it may
> require other processors be in an idle state.
> 
> Nominal Performance is the maximum sustained performance level of the
> processor, assuming ideal operating conditions. In absence of an
> external constraint (power, thermal, etc.) this is the performance level
> the platform is expected to be able to maintain continuously. All
> processors are expected to be able to sustain their nominal performance
> state simultaneously."
> 
> To add SW BOOST support for CPPC, we can use Highest Performance as the
> max performance in boost mode and Nominal Performance as the max
> performance in non-boost mode. If the Highest Performance is greater
> than the Nominal Performance, we assume SW BOOST is supported.
> 
> The current CPPC driver does not support SW BOOST and use 'Highest
> Performance' as the max performance the CPU can achieve. 'Nominal
> Performance' is used to convert 'performance' to 'frequency'. That
> means, if firmware enable boost and provide a value for Highest
> Performance which is greater than Nominal Performance, boost feature is
> enabled by default.
> 
> Because SW BOOST is disabled by default, so, after this patch, boost
> feature is disabled by default even if boost is enabled by firmware.
> 
> Signed-off-by: Xiongfeng Wang 
> ---
>  drivers/cpufreq/cppc_cpufreq.c | 39 +--
>  1 file changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index bda0b24..792ed9e 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -37,6 +37,7 @@
>   * requested etc.
>   */
>  static struct cppc_cpudata **all_cpu_data;
> +static bool boost_supported;
>  
>  struct cppc_workaround_oem_info {
>   char oem_id[ACPI_OEM_ID_SIZE + 1];
> @@ -310,7 +311,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy 
> *policy)
>* Section 8.4.7.1.1.5 of ACPI 6.1 spec)
>*/
>   policy->min = cppc_cpufreq_perf_to_khz(cpu, 
> cpu->perf_caps.lowest_nonlinear_perf);
> - policy->max = cppc_cpufreq_perf_to_khz(cpu, 
> cpu->perf_caps.highest_perf);
> + policy->max = cppc_cpufreq_perf_to_khz(cpu, 
> cpu->perf_caps.nominal_perf);
>  
>   /*
>* Set cpuinfo.min_freq to Lowest to make the full range of performance
> @@ -318,7 +319,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy 
> *policy)
>* nonlinear perf
>*/
>   policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu, 
> cpu->perf_caps.lowest_perf);
> - policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, 
> cpu->perf_caps.highest_perf);
> + policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, 
> cpu->perf_caps.nominal_perf);
>  
>   policy->transition_delay_us = 
> cppc_cpufreq_get_transition_delay_us(cpu_num);
>   policy->shared_type = cpu->shared_type;
> @@ -343,6 +344,13 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy 
> *policy)
>  
>   cpu->cur_policy = policy;
>  
> + /*
> +  * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
> +  * is supported.
> +  */
> + if (cpu->perf_caps.highest_perf > cpu->perf_caps.nominal_perf)
> + boost_supported = true;
> +
>   /* Set policy->cur to max now. The governors will adjust later. */
>   policy->cur = cppc_cpufreq_perf_to_khz(cpu,
>   cpu->perf_caps.highest_perf);
> @@ -410,6 +418,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int 
> cpunum)
>   return cppc_get_rate_from_fbctrs(cpu, fb_ctrs_t0, fb_ctrs_t1);
>  }
>  
> +static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
> +{
> + struct cppc_cpudata *cpudata;
> + int ret = 0;

No need to initialize this.

> +
> + if (!boost_supported) {
> + pr_err("BOOST not supported by CPU or firmware\n");
> + return -EINVAL;
> + }
> +
> + cpudata = all_cpu_data[policy->cpu];
> + if (state)
> + policy->max = cppc_cpufreq_perf_to_khz(cpudata,
> + cpudata->perf_caps.highest_perf);
> + else
> + policy->max = cppc_cpufreq_perf_to_khz(cpudata,
> + cpudata->perf_caps.nominal_perf);
> + policy->cpuinfo.max_freq = policy->max;
> +
> + ret = freq_qos_update_request(policy->max_freq_req, policy->max);
> + if (ret < 0)
> + return ret;
> +
> + 

Re: [PATCH 2/2] Add a new sysctl knob: unprivileged_userfaultfd_user_mode_only

2020-05-19 Thread Andrea Arcangeli
Hello everyone,

On Fri, May 08, 2020 at 12:54:03PM -0400, Michael S. Tsirkin wrote:
> On Fri, May 08, 2020 at 12:52:34PM -0400, Michael S. Tsirkin wrote:
> > On Wed, Apr 22, 2020 at 05:26:32PM -0700, Daniel Colascione wrote:
> > > This sysctl can be set to either zero or one. When zero (the default)
> > > the system lets all users call userfaultfd with or without
> > > UFFD_USER_MODE_ONLY, modulo other access controls. When
> > > unprivileged_userfaultfd_user_mode_only is set to one, users without
> > > CAP_SYS_PTRACE must pass UFFD_USER_MODE_ONLY to userfaultfd or the API
> > > will fail with EPERM. This facility allows administrators to reduce
> > > the likelihood that an attacker with access to userfaultfd can delay
> > > faulting kernel code to widen timing windows for other exploits.
> > > 
> > > Signed-off-by: Daniel Colascione 
> > 
> > The approach taken looks like a hard-coded security policy.
> > For example, it won't be possible to set the sysctl knob
> > in question on any sytem running kvm. So this is
> > no good for any general purpose system.
> > 
> > What's wrong with using a security policy for this instead?
> 
> In fact I see the original thread already mentions selinux,
> so it's just a question of making this controllable by
> selinux.

I agree it'd be preferable if it was not hardcoded, but then this
patchset is also much simpler than the previous controlling it through
selinux..

I was thinking, an alternative policy that could control it without
hard-coding it, is a seccomp-bpf filter, then you can drop 2/2 as
well, not just 1/6-4/6.

If you keep only 1/2, can't seccomp-bpf enforce userfaultfd to be
always called with flags==0x1 without requiring extra modifications in
the kernel?

Can't you get the feature party with the CAP_SYS_PTRACE capability
too, if you don't wrap those tasks with the ptrace capability under
that seccomp filter?

As far as I can tell, it's unprecedented to create a flag for a
syscall API, with the only purpose of implementing a seccomp-bpf
filter verifying such flag is set, but then if you want to control it
with LSM it's even more complex than doing it with seccomp-bpf, and it
requires more kernel code too. We could always add 2/2 later, such
possibility won't disappear, in fact we could also add 1/6-4/6 later
too if that is not enough.

If we could begin by merging only 1/2 from this new series and be done
with the kernel changes, because we offload the rest of the work to
the kernel eBPF JIT, I think it'd be ideal.

Thanks,
Andrea



Re: [RFC PATCH v3 1/2] cpufreq: change '.set_boost' to act on only one policy

2020-05-19 Thread Viresh Kumar
On 19-05-20, 19:41, Xiongfeng Wang wrote:
> Macro 'for_each_active_policy()' is defined internally. To avoid some
> cpufreq driver needing this macro to iterate over all the policies in
> '.set_boost' callback, we redefine '.set_boost' to act on only one
> policy and pass the policy as an argument.
> 'cpufreq_boost_trigger_state()' iterate over all the policies to set
> boost for the system. This is preparation for adding SW BOOST support
> for CPPC.
> 
> Signed-off-by: Xiongfeng Wang 
> ---
>  drivers/cpufreq/acpi-cpufreq.c |  4 ++--
>  drivers/cpufreq/cpufreq.c  | 53 
> +-
>  include/linux/cpufreq.h|  2 +-
>  3 files changed, 30 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 289e8ce..b0a9eb5 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -126,7 +126,7 @@ static void boost_set_msr_each(void *p_en)
>   boost_set_msr(enable);
>  }
>  
> -static int set_boost(int val)
> +static int set_boost(struct cpufreq_policy *policy, int val)
>  {
>   get_online_cpus();
>   on_each_cpu(boost_set_msr_each, (void *)(long)val, 1);

I think (Rafael can confirm), that you need to update this as well. You don't
need to run for each cpu now, but for each CPU in the policy.

> @@ -162,7 +162,7 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, 
> const char *buf,
>   if (ret || val > 1)
>   return -EINVAL;
>  
> - set_boost(val);
> + set_boost(policy, val);
>  
>   return count;
>  }
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index d03f250..d0d86b1 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -2532,34 +2532,29 @@ void cpufreq_update_limits(unsigned int cpu)
>  /*
>   *   BOOST*
>   */
> -static int cpufreq_boost_set_sw(int state)
> +static int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state)
>  {
> - struct cpufreq_policy *policy;
> -
> - for_each_active_policy(policy) {
> - int ret;
> -
> - if (!policy->freq_table)
> - return -ENXIO;
> + int ret;
>  
> - ret = cpufreq_frequency_table_cpuinfo(policy,
> -   policy->freq_table);
> - if (ret) {
> - pr_err("%s: Policy frequency update failed\n",
> -__func__);
> - return ret;
> - }
> + if (!policy->freq_table)
> + return -ENXIO;
>  
> - ret = freq_qos_update_request(policy->max_freq_req, 
> policy->max);
> - if (ret < 0)
> - return ret;
> + ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table);
> + if (ret) {
> + pr_err("%s: Policy frequency update failed\n", __func__);
> + return ret;
>   }
>  
> + ret = freq_qos_update_request(policy->max_freq_req, policy->max);
> + if (ret < 0)
> + return ret;
> +
>   return 0;
>  }
>  
>  int cpufreq_boost_trigger_state(int state)
>  {
> + struct cpufreq_policy *policy;
>   unsigned long flags;
>   int ret = 0;
>  
> @@ -2570,16 +2565,22 @@ int cpufreq_boost_trigger_state(int state)
>   cpufreq_driver->boost_enabled = state;
>   write_unlock_irqrestore(_driver_lock, flags);
>  
> - ret = cpufreq_driver->set_boost(state);
> - if (ret) {
> - write_lock_irqsave(_driver_lock, flags);
> - cpufreq_driver->boost_enabled = !state;
> - write_unlock_irqrestore(_driver_lock, flags);
> -
> - pr_err("%s: Cannot %s BOOST\n",
> -__func__, state ? "enable" : "disable");
> + for_each_active_policy(policy) {
> + ret = cpufreq_driver->set_boost(policy, state);
> + if (ret)
> + goto err_reset_state;
>   }
>  
> + return 0;
> +
> +err_reset_state:
> + write_lock_irqsave(_driver_lock, flags);
> + cpufreq_driver->boost_enabled = !state;
> + write_unlock_irqrestore(_driver_lock, flags);
> +
> + pr_err("%s: Cannot %s BOOST\n",
> +__func__, state ? "enable" : "disable");
> +
>   return ret;
>  }
>  
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 67d5950..3494f67 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -367,7 +367,7 @@ struct cpufreq_driver {
>  
>   /* platform specific boost support code */
>   boolboost_enabled;
> - int (*set_boost)(int state);
> + int (*set_boost)(struct cpufreq_policy *policy, int state);
>  };
>  
>  /* flags */
> -- 
> 1.7.12.4

-- 

Re: [RFC PATCH 2/2] init: Allow multi-line output of kernel command line

2020-05-19 Thread Joe Perches
On Wed, 2020-05-20 at 13:41 +0900, Sergey Senozhatsky wrote:
> On (20/05/19 12:42), Joe Perches wrote:
> > +static void __init print_cmdline(char *line)
> > +{
> > +#ifdef CONFIG_PRINTK
> > +   const char *prefix = "Kernel command line";
> > +   size_t len = strlen(line);
> > +
> > +   while (len > PRINTK_LOG_LINE_MAX) {
> > +   char *pos = line;
> > +   char *last_pos = pos + PRINTK_LOG_LINE_MAX - 1;
> > +   char saved_char;
> > +   /* Find last space char within the maximum line length */
> > +   while ((pos = memchr(pos, ' ', len - (pos - line))) &&
> > +  (pos - line) < PRINTK_LOG_LINE_MAX - 1) {
> 
> Don't you need to also count in the 'prefix' length?

yup.

> > +   last_pos = pos;
> > +   }
> > +   saved_char = line[last_pos - line];
> > +   line[last_pos - line] = 0;
> > +   pr_notice("%s: %s\n", prefix, line);
> > +   prefix = "Kernel command line (continued)";
> > +   line[last_pos - line] = saved_char;
> > +   len -= pos - line;
> > +   line += pos - line;
> > +   }
> > +
> > +   pr_notice("%s: %s\n", prefix, line);
> > +#endif
> > +}
> 
> I like this in general. And I agree that we better handle this
> externally, on the printk() caller side, so that printk() will
> still have sane limits and won't print a 1G string for example.
> 
> I wonder if we need to export PRINTK_LOG_LINE_MAX.

I think a #define works well enough.(

>  Maybe we can
> use here something rather random and much shorter instead. E.g.
> 256 chars. Hmm. How 

min(some_max like 132/256, PRINTK_LOG_LINE_MAX)

would work.

> many crash/monitoring tools can get confused
> by multiple "Kernel command line" prefixes?

I doubt any as it's an init only function.




Re: [PATCH] MIPS: SGI-IP27: Remove duplicated include in ip27-timer.c

2020-05-19 Thread Tiezhu Yang

On 05/20/2020 12:03 AM, Thomas Bogendoerfer wrote:

On Tue, May 19, 2020 at 08:28:11PM +0800, Tiezhu Yang wrote:

After commit 9d0aaf98dc24 ("MIPS: SGI-IP27: Move all shared IP27
declarations to ip27-common.h"), ip27-common.h is included more
than once in ip27-timer.c, remove it.

Signed-off-by: Tiezhu Yang 

applied to mips-next. I only removed the second #include. If you
want to clean this up further the includes and comment about
ioc3_init() could be removed as well.


OK, thank you. I will do it later.

Thanks,
Tiezhu Yang



Thomas.





Re: [RFC PATCH 0/8] Qualcomm Cloud AI 100 driver

2020-05-19 Thread Greg Kroah-Hartman
On Tue, May 19, 2020 at 10:41:15PM +0200, Daniel Vetter wrote:
> > Ok, that's a decision you are going to have to push upward on, as we
> > really can't take this without a working, open, userspace.
> 
> Uh wut.
> 
> So the merge criteria for drivers/accel (atm still drivers/misc but I
> thought that was interim until more drivers showed up) isn't actually
> "totally-not-a-gpu accel driver without open source userspace".
> 
> Instead it's "totally-not-a-gpu accel driver without open source
> userspace" _and_ you have to be best buddies with Greg. Or at least
> not be on the naughty company list. Since for habanalabs all you
> wanted is a few test cases to exercise the ioctls. Not the entire
> userspace.

Habanalabs now has their full library opensourced that their tools use
directly, so that's not an argument anymore.

My primary point here is the copyright owner of this code, because of
that, I'm not going to objet to allowing this to be merged without open
userspace code.

thanks,

greg k-h


Re: [PATCH net-next v1 1/2] ethtool: provide UAPI for PHY Signal Quality Index (SQI)

2020-05-19 Thread Oleksij Rempel
On Tue, May 19, 2020 at 04:03:48PM +0200, Andrew Lunn wrote:
> > --- a/net/ethtool/common.c
> > +++ b/net/ethtool/common.c
> > @@ -310,6 +310,16 @@ int __ethtool_get_link(struct net_device *dev)
> > return netif_running(dev) && dev->ethtool_ops->get_link(dev);
> >  }
> >  
> > +int __ethtool_get_sqi(struct net_device *dev)
> > +{
> > +   struct phy_device *phydev = dev->phydev;
> > +
> > +   if (!phydev->drv->get_sqi)
> > +   return -EOPNOTSUPP;
> > +
> > +   return phydev->drv->get_sqi(phydev);
> > +}
> > +
> 
> You are only providing access via netlink ethtool? There is no ioctl
> method to get this.

ack

> If so, i wonder if common.c is the correct place
> for this, or if it should be moved into linkstate.c. You can then drop
> the __.

ok

-- 
Pengutronix e.K.   | |
Steuerwalder Str. 21   | http://www.pengutronix.de/  |
31137 Hildesheim, Germany  | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


signature.asc
Description: PGP signature


Re: [PATCH net-next v1 1/2] ethtool: provide UAPI for PHY Signal Quality Index (SQI)

2020-05-19 Thread Oleksij Rempel
On Tue, May 19, 2020 at 03:26:30PM +0200, Andrew Lunn wrote:
> On Tue, May 19, 2020 at 09:51:59AM +0200, Oleksij Rempel wrote:
> > Signal Quality Index is a mandatory value required by "OPEN Alliance
> > SIG" for the 100Base-T1 PHYs [1]. This indicator can be used for cable
> > integrity diagnostic and investigating other noise sources and
> > implement by at least two vendors: NXP[2] and TI[3].
> 
> Hi Oleksij
> 
> With a multi part patch set, please always include a cover note,
> describing what the patchset as a whole does.

ok

> > +int __ethtool_get_sqi(struct net_device *dev)
> > +{
> > +   struct phy_device *phydev = dev->phydev;
> > +
> > +   if (!phydev->drv->get_sqi)
> > +   return -EOPNOTSUPP;
> > +
> > +   return phydev->drv->get_sqi(phydev);
> > +}
> 
> You are not doing any locking here, which you should. Due to modules
> vs built in, it can be a bit tricky getting this right. Take a look at
> how ethtool ioctl.c uses phy_ethtool_get_stats() and that inline
> function itself.

ok.

-- 
Pengutronix e.K.   | |
Steuerwalder Str. 21   | http://www.pengutronix.de/  |
31137 Hildesheim, Germany  | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


signature.asc
Description: PGP signature


general protection fault in kobject_get (2)

2020-05-19 Thread syzbot
Hello,

syzbot found the following crash on:

HEAD commit:d00f26b6 Merge git://git.kernel.org/pub/scm/linux/kernel/g..
git tree:   net-next
console output: https://syzkaller.appspot.com/x/log.txt?x=1316343c10
kernel config:  https://syzkaller.appspot.com/x/.config?x=26d0bd769afe1a2c
dashboard link: https://syzkaller.appspot.com/bug?extid=407fd358a932bbf639c6
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)

Unfortunately, I don't have any reproducer for this crash yet.

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+407fd358a932bbf63...@syzkaller.appspotmail.com

general protection fault, probably for non-canonical address 
0xdc13:  [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0098-0x009f]
CPU: 1 PID: 16682 Comm: syz-executor.3 Not tainted 5.7.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
RIP: 0010:kobject_get+0x30/0x150 lib/kobject.c:640
Code: 53 e8 d4 7e c6 fd 4d 85 e4 0f 84 a2 00 00 00 e8 c6 7e c6 fd 49 8d 7c 24 
3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 48 89 fa 83 
e2 07 38 d0 7f 08 84 c0 0f 85 e7 00 00 00
RSP: 0018:c9000772f240 EFLAGS: 00010203
RAX: dc00 RBX: 85acfca0 RCX: c9000fc67000
RDX: 0013 RSI: 83acadfa RDI: 009c
RBP: 0060 R08: 8880a8dfa4c0 R09: ed100a03f403
R10: 8880501fa017 R11: ed100a03f402 R12: 0060
R13: c9000772f3c0 R14: 88805d1ec4e8 R15: 88805d1ec580
FS:  7f1ebed26700() GS:8880ae70() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 004d88f0 CR3: a86c4000 CR4: 001406e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 get_device+0x20/0x30 drivers/base/core.c:2620
 __ib_get_client_nl_info+0x1d4/0x2a0 drivers/infiniband/core/device.c:1863
 ib_get_client_nl_info+0x30/0x180 drivers/infiniband/core/device.c:1883
 nldev_get_chardev+0x52b/0xa40 drivers/infiniband/core/nldev.c:1625
 rdma_nl_rcv_msg drivers/infiniband/core/netlink.c:195 [inline]
 rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
 rdma_nl_rcv+0x586/0x900 drivers/infiniband/core/netlink.c:259
 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
 netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
 sock_sendmsg_nosec net/socket.c:652 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:672
 sys_sendmsg+0x6e6/0x810 net/socket.c:2352
 ___sys_sendmsg+0x100/0x170 net/socket.c:2406
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2439
 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
 entry_SYSCALL_64_after_hwframe+0x49/0xb3
RIP: 0033:0x45c829
Code: 0d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 
89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 
db b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:7f1ebed25c78 EFLAGS: 0246 ORIG_RAX: 002e
RAX: ffda RBX: 004ff720 RCX: 0045c829
RDX:  RSI: 2200 RDI: 0003
RBP: 0078bf00 R08:  R09: 
R10:  R11: 0246 R12: 
R13: 09ad R14: 004d5f10 R15: 7f1ebed266d4
Modules linked in:
---[ end trace 239938a6c4c3c99f ]---
RIP: 0010:kobject_get+0x30/0x150 lib/kobject.c:640
Code: 53 e8 d4 7e c6 fd 4d 85 e4 0f 84 a2 00 00 00 e8 c6 7e c6 fd 49 8d 7c 24 
3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 48 89 fa 83 
e2 07 38 d0 7f 08 84 c0 0f 85 e7 00 00 00
RSP: 0018:c9000772f240 EFLAGS: 00010203
RAX: dc00 RBX: 85acfca0 RCX: c9000fc67000
RDX: 0013 RSI: 83acadfa RDI: 009c
RBP: 0060 R08: 8880a8dfa4c0 R09: ed100a03f403
R10: 8880501fa017 R11: ed100a03f402 R12: 0060
R13: c9000772f3c0 R14: 88805d1ec4e8 R15: 88805d1ec580
FS:  7f1ebed26700() GS:8880ae70() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 0073fad4 CR3: a86c4000 CR4: 001406e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.


Re: [PATCH] mailbox: imx: Disable the clock on devm_mbox_controller_register() failure

2020-05-19 Thread Oleksij Rempel
On Wed, May 20, 2020 at 12:22:46AM -0300, Fabio Estevam wrote:
> devm_mbox_controller_register() may fail, and in the case of failure the
> priv->clk clock that was previously enabled, should be disabled.
> 
> Fixes: 2bb7005696e2 ("mailbox: Add support for i.MX messaging unit")
> Signed-off-by: Fabio Estevam 

Acked-by: Oleksij Rempel 

> ---
>  drivers/mailbox/imx-mailbox.c | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c
> index 7906624a731c..3f7c4548c18f 100644
> --- a/drivers/mailbox/imx-mailbox.c
> +++ b/drivers/mailbox/imx-mailbox.c
> @@ -508,7 +508,13 @@ static int imx_mu_probe(struct platform_device *pdev)
>  
>   platform_set_drvdata(pdev, priv);
>  
> - return devm_mbox_controller_register(dev, >mbox);
> + ret = devm_mbox_controller_register(dev, >mbox);
> + if (ret) {
> + clk_disable_unprepare(priv->clk);
> + return ret;
> + }
> +
> + return 0;
>  }
>  
>  static int imx_mu_remove(struct platform_device *pdev)
> -- 
> 2.17.1
> 
> 

-- 
Pengutronix e.K.   | |
Steuerwalder Str. 21   | http://www.pengutronix.de/  |
31137 Hildesheim, Germany  | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


signature.asc
Description: PGP signature


Re: [RFC][PATCH 5/5] thermal: int340x: Use new device interface

2020-05-19 Thread Amit Kucheria
On Mon, May 4, 2020 at 11:47 PM Srinivas Pandruvada
 wrote:
>
> Use the new framework to send notifications for:
> - Setting temperature threshold for notification to avoid polling
> - Send THERMAL_TRIP_REACHED event on reaching threshold
> - Send THERMAL_TRIP_UPDATE when firmware change the the existing trip
> temperature

I am a little confused here. I would've expected the thermal core to
send the THERMAL_TRIP_* notifications, not platform drivers. Why
shouldn't this be done in thermal core?

>
> Signed-off-by: Srinivas Pandruvada 
> ---
>  .../intel/int340x_thermal/int3403_thermal.c   |  3 ++
>  .../int340x_thermal/int340x_thermal_zone.c| 29 +++
>  .../int340x_thermal/int340x_thermal_zone.h|  7 +
>  .../processor_thermal_device.c|  1 +
>  4 files changed, 40 insertions(+)
>
> diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c 
> b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
> index f86cbb125e2f..77c014a113a4 100644
> --- a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
> +++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
> @@ -63,15 +63,18 @@ static void int3403_notify(acpi_handle handle,
>
> switch (event) {
> case INT3403_PERF_CHANGED_EVENT:
> +   int340x_thermal_send_user_event(obj->int340x_zone, 
> THERMAL_PERF_CHANGED, 0);
> break;
> case INT3403_THERMAL_EVENT:
> int340x_thermal_zone_device_update(obj->int340x_zone,
>THERMAL_TRIP_VIOLATED);
> +   int340x_thermal_send_user_event(obj->int340x_zone, 
> THERMAL_TRIP_REACHED, 0);
> break;
> case INT3403_PERF_TRIP_POINT_CHANGED:
> int340x_thermal_read_trips(obj->int340x_zone);
> int340x_thermal_zone_device_update(obj->int340x_zone,
>THERMAL_TRIP_CHANGED);
> +   int340x_thermal_send_user_event(obj->int340x_zone, 
> THERMAL_TRIP_UPDATE, 0);
> break;
> default:
> dev_err(>pdev->dev, "Unsupported event [0x%x]\n", 
> event);
> diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c 
> b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
> index 432213272f1e..9568a2db7afd 100644
> --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
> +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
> @@ -146,12 +146,41 @@ static int int340x_thermal_get_trip_hyst(struct 
> thermal_zone_device *zone,
> return 0;
>  }
>
> +static int int340x_thermal_get_thres_low(struct thermal_zone_device *zone, 
> int *temp)
> +{
> +   struct int34x_thermal_zone *d = zone->devdata;
> +
> +   *temp = d->aux_trips[0];
> +
> +   return 0;
> +}
> +
> +static int int340x_thermal_set_thres_low(struct thermal_zone_device *zone, 
> int temp)
> +{
> +   struct int34x_thermal_zone *d = zone->devdata;
> +   acpi_status status;
> +
> +   if (d->override_ops && d->override_ops->set_trip_temp)
> +   return d->override_ops->set_trip_temp(zone, 0, temp);
> +
> +   status = acpi_execute_simple_method(d->adev->handle, "PAT0",
> +   millicelsius_to_deci_kelvin(temp));
> +   if (ACPI_FAILURE(status))
> +   return -EIO;
> +
> +   d->aux_trips[0] = temp;
> +
> +   return 0;
> +}
> +
>  static struct thermal_zone_device_ops int340x_thermal_zone_ops = {
> .get_temp   = int340x_thermal_get_zone_temp,
> .get_trip_temp  = int340x_thermal_get_trip_temp,
> .get_trip_type  = int340x_thermal_get_trip_type,
> .set_trip_temp  = int340x_thermal_set_trip_temp,
> .get_trip_hyst =  int340x_thermal_get_trip_hyst,
> +   .set_temp_thres_low = int340x_thermal_set_thres_low,
> +   .get_temp_thres_low = int340x_thermal_get_thres_low,
>  };
>
>  static int int340x_thermal_get_trip_config(acpi_handle handle, char *name,
> diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h 
> b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
> index 3b4971df1b33..142027e4955f 100644
> --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
> +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
> @@ -58,4 +58,11 @@ static inline void int340x_thermal_zone_device_update(
> thermal_zone_device_update(tzone->zone, event);
>  }
>
> +static inline void int340x_thermal_send_user_event(
> +   struct int34x_thermal_zone *tzone,
> +   enum thermal_device_events event,
> +   u64 data)
> +{
> +   thermal_dev_send_event(tzone->zone->id, event, data);
> +}
>  #endif
> diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c 
> 

Re: [PATCH v1] usb: musb: dsps: set MUSB_DA8XX quirk for AM335x

2020-05-19 Thread Oleksij Rempel
On Tue, May 19, 2020 at 05:18:51PM -0500, Bin Liu wrote:
> Hi,
> 
> On Fri, Mar 27, 2020 at 06:38:49AM +0100, Oleksij Rempel wrote:
> > Beagle Bone Black has different memory corruptions if kernel is
> > configured with USB_TI_CPPI41_DMA=y. This issue is reproducible with
> > ath9k-htc driver (ar9271 based wifi usb controller):
> > 
> > root@AccessBox:~ iw dev wlan0 set monitor  fcsfail otherbss
> > root@AccessBox:~ ip l s dev wlan0 up
> > kmemleak: Cannot insert 0xda577e40 into the object search tree (overlaps 
> > existing)
> > CPU: 0 PID: 176 Comm: ip Not tainted 5.5.0 #7
> > Hardware name: Generic AM33XX (Flattened Device Tree)
> > [] (unwind_backtrace) from [] (show_stack+0x18/0x1c)
> > [] (show_stack) from [] (dump_stack+0x84/0x98)
> > [] (dump_stack) from [] (create_object+0x2f8/0x324)
> > [] (create_object) from [] 
> > (kmem_cache_alloc+0x1a8/0x39c)
> > [] (kmem_cache_alloc) from [] (__alloc_skb+0x60/0x174)
> > [] (__alloc_skb) from [] (ath9k_wmi_cmd+0x50/0x184 
> > [ath9k_htc])
> > [] (ath9k_wmi_cmd [ath9k_htc]) from [] 
> > (ath9k_regwrite_multi+0x54/0x84 [ath9k_htc])
> > [] (ath9k_regwrite_multi [ath9k_htc]) from [] 
> > (ath9k_regwrite+0xf0/0xfc [ath9k_htc])
> > [] (ath9k_regwrite [ath9k_htc]) from [] 
> > (ar5008_hw_process_ini+0x280/0x6c0 [ath9k_hw])
> > [] (ar5008_hw_process_ini [ath9k_hw]) from [] 
> > (ath9k_hw_reset+0x270/0x1458 [ath9k_hw])
> > [] (ath9k_hw_reset [ath9k_hw]) from [] 
> > (ath9k_htc_start+0xb0/0x22c [ath9k_htc])
> > [] (ath9k_htc_start [ath9k_htc]) from [] 
> > (drv_start+0x4c/0x1e8 [mac80211])
> > [] (drv_start [mac80211]) from [] 
> > (ieee80211_do_open+0x480/0x954 [mac80211])
> > [] (ieee80211_do_open [mac80211]) from [] 
> > (__dev_open+0xdc/0x160)
> > [] (__dev_open) from [] (__dev_change_flags+0x1a4/0x204)
> > [] (__dev_change_flags) from [] 
> > (dev_change_flags+0x20/0x50)
> > [] (dev_change_flags) from [] (do_setlink+0x2ac/0x978)
> > 
> > After applying this patch, the system is running in monitor mode without
> > noticeable issues.
> > 
> > Suggested-by: Michael Grzeschik 
> > Signed-off-by: Oleksij Rempel 
> > ---
> >  drivers/usb/musb/musb_dsps.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
> > index 88923175f71e..c01f9e9e69f5 100644
> > --- a/drivers/usb/musb/musb_dsps.c
> > +++ b/drivers/usb/musb/musb_dsps.c
> > @@ -690,7 +690,7 @@ static void dsps_dma_controller_resume(struct dsps_glue 
> > *glue) {}
> >  #endif /* CONFIG_USB_TI_CPPI41_DMA */
> >  
> >  static struct musb_platform_ops dsps_ops = {
> > -   .quirks = MUSB_DMA_CPPI41 | MUSB_INDEXED_EP,
> > +   .quirks = MUSB_DMA_CPPI41 | MUSB_INDEXED_EP | MUSB_DA8XX,
> 
> The MUSB_DA8XX flag cannot be simply applied to MUSB_DSPS, at least the
> teardown and autoreq register offsets are different as show in
> cppi41_dma_controller_create().

ok

> Do you understand what exactly caused the issue?

No.

Disabling DMA support "solve" this issue as well.

Beside, with DMA support, there remains one more crash with different symptoms.
I can workaround it by disabling CPU Freq governor, or setting it to 
performance.

> The kernel trace above doesn't provide enuough information.

Do you have any suggestions how to instrument the kernel to get needed
information? Or, should I try to capture USB traffic before the crash? 

If it helps, ath9k_htc is a usb wifi adapter. It generates a lot of
USB traffic on multiple endpoints. Bulk with data packets and Interrupt
with register accesses, LED blinking... etc.

Regards,
Oleksij
-- 
Pengutronix e.K.   | |
Steuerwalder Str. 21   | http://www.pengutronix.de/  |
31137 Hildesheim, Germany  | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


signature.asc
Description: PGP signature


mmotm 2020-05-19-21-47 uploaded

2020-05-19 Thread Andrew Morton
The mm-of-the-moment snapshot 2020-05-19-21-47 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (5.x
or 5.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

https://github.com/hnaz/linux-mm

The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is also available at

https://github.com/hnaz/linux-mm



This mmotm tree contains the following patches against 5.7-rc6:
(patches marked "*" will be included in linux-next)

  origin.patch
* checkpatch-test-git_dir-changes.patch
* proc-kpageflags-prevent-an-integer-overflow-in-stable_page_flags.patch
* proc-kpageflags-do-not-use-uninitialized-struct-pages.patch
* kcov-cleanup-debug-messages.patch
* kcov-fix-potential-use-after-free-in-kcov_remote_start.patch
* kcov-move-t-kcov-assignments-into-kcov_start-stop.patch
* kcov-move-t-kcov_sequence-assignment.patch
* kcov-use-t-kcov_mode-as-enabled-indicator.patch
* kcov-collect-coverage-from-interrupts.patch
* usb-core-kcov-collect-coverage-from-usb-complete-callback.patch
* memcg-optimize-memorynuma_stat-like-memorystat.patch
* lib-lzo-fix-ambiguous-encoding-bug-in-lzo-rle.patch
* device-dax-dont-leak-kernel-memory-to-user-space-after-unloading-kmem.patch
* x86-bitops-fix-build-regression.patch
* mm-compaction-avoid-vm_bug_onpageslab-in-page_mapcount.patch
* rapidio-fix-an-error-in-get_user_pages_fast-error-handling.patch
* selftests-vm-gitignore-add-mremap_dontunmap.patch
* selftests-vm-write_to_hugetlbfsc-fix-unused-variable-warning.patch
* kasan-disable-branch-tracing-for-core-runtime.patch
* sh-include-linux-time_typesh-for-sockios.patch
* maintainers-update-email-address-for-naoya-horiguchi.patch
* scripts-support-compiled-source-improved-precise.patch
* scripts-add-a-intermediate-file-for-make-gtags.patch
* squashfs-migrate-from-ll_rw_block-usage-to-bio.patch
* squashfs-migrate-from-ll_rw_block-usage-to-bio-fix.patch
* ocfs2-add-missing-annotation-for-dlm_empty_lockres.patch
* ocfs2-mount-shared-volume-without-ha-stack.patch
* drivers-tty-serial-sh-scic-suppress-uninitialized-var-warning.patch
* ramfs-support-o_tmpfile.patch
* vfs-track-per-sb-writeback-errors-and-report-them-to-syncfs.patch
* buffer-record-blockdev-write-errors-in-super_block-that-it-backs.patch
* kernel-watchdog-flush-all-printk-nmi-buffers-when-hardlockup-detected.patch
  mm.patch
* usercopy-mark-dma-kmalloc-caches-as-usercopy-caches.patch
* mm-slub-fix-corrupted-freechain-in-deactivate_slab.patch
* mm-slub-fix-corrupted-freechain-in-deactivate_slab-fix.patch
* slub-remove-userspace-notifier-for-cache-add-remove.patch
* slub-remove-kmalloc-under-list_lock-from-list_slab_objects.patch
* mm-slub-fix-stack-overruns-with-slub_stats.patch
* mm-slub-add-panic_on_error-to-the-debug-facilities-fix.patch
* mm-dump_page-do-not-crash-with-invalid-mapping-pointer.patch
* mm-move-readahead-prototypes-from-mmh.patch
* mm-return-void-from-various-readahead-functions.patch
* mm-ignore-return-value-of-readpages.patch
* mm-move-readahead-nr_pages-check-into-read_pages.patch
* mm-add-new-readahead_control-api.patch
* mm-use-readahead_control-to-pass-arguments.patch
* mm-rename-various-offset-parameters-to-index.patch
* mm-rename-readahead-loop-variable-to-i.patch
* mm-remove-page_offset-from-readahead-loop.patch
* mm-put-readahead-pages-in-cache-earlier.patch
* mm-add-readahead-address-space-operation.patch
* mm-move-end_index-check-out-of-readahead-loop.patch
* mm-add-page_cache_readahead_unbounded.patch
* mm-document-why-we-dont-set-pagereadahead.patch
* mm-use-memalloc_nofs_save-in-readahead-path.patch
* fs-convert-mpage_readpages-to-mpage_readahead.patch
* btrfs-convert-from-readpages-to-readahead.patch
* erofs-convert-uncompressed-files-from-readpages-to-readahead.patch
* erofs-convert-compressed-files-from-readpages-to-readahead.patch
* 

Re: [PATCH V2] powerpc/perf: Add support for outputting extended regs in perf intr_regs

2020-05-19 Thread Madhavan Srinivasan




On 5/19/20 11:45 AM, Athira Rajeev wrote:

From: Anju T Sudhakar 

Add support for perf extended register capability in powerpc.
The capability flag PERF_PMU_CAP_EXTENDED_REGS, is used to indicate the
PMU which support extended registers. The generic code define the mask
of extended registers as 0 for non supported architectures.

Patch adds extended regs support for power9 platform by
exposing MMCR0, MMCR1 and MMCR2 registers.

REG_RESERVED mask needs update to include extended regs.
`PERF_REG_EXTENDED_MASK`, contains mask value of the supported registers,
is defined at runtime in the kernel based on platform since the supported
registers may differ from one processor version to another and hence the
MASK value.

Perf tools side uses extended mask to display the platform
supported register names (with -I? option) to the user and also
send this mask to the kernel to capture the extended registers
in each sample. Hence decide the mask value based on the processor
version.

with patch
--

available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11
r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26
r27 r28 r29 r30 r31 nip msr orig_r3 ctr link xer ccr softe
trap dar dsisr sier mmcra mmcr0 mmcr1 mmcr2

PERF_RECORD_SAMPLE(IP, 0x1): 4784/4784: 0 period: 1 addr: 0
... intr regs: mask 0x ABI 64-bit
 r00xc012b77c
 r10xc03fe5e03930
 r20xc1b0e000
 r30xc03fdcddf800
 r40xc03fc788
 r50x9c422724be
 r60xc03fe5e03908
 r70xff63bddc8706
 r80x9e4
 r90x0
 r10   0x1
 r11   0x0
 r12   0xc01299c0
 r13   0xc03c4800
 r14   0x0
 r15   0x7fffdd8b8b00
 r16   0x0
 r17   0x7fffdd8be6b8
 r18   0x7e7076607730
 r19   0x2f
 r20   0xc0001fc26c68
 r21   0xc0002041e4227e00
 r22   0xc0002018fb60
 r23   0x1
 r24   0xc03ffec4d900
 r25   0x8000
 r26   0x0
 r27   0x1
 r28   0x1
 r29   0xc1be1260
 r30   0x6008010
 r31   0xc03ffebb7218
 nip   0xc012b910
 msr   0x90009033
 orig_r3 0xc012b86c
 ctr   0xc01299c0
 link  0xc012b77c
 xer   0x0
 ccr   0x2800
 softe 0x1
 trap  0xf00
 dar   0x0
 dsisr 0x800
 sier  0x0
 mmcra 0x800
 mmcr0 0x82008090
 mmcr1 0x1e00
 mmcr2 0x0
  ... thread: perf:4784

Signed-off-by: Anju T Sudhakar 
[Defined PERF_REG_EXTENDED_MASK at run time to add support for different 
platforms ]
Signed-off-by: Athira Rajeev 
---
Changes from v1 -> v2

- PERF_REG_EXTENDED_MASK` is defined at runtime in the kernel
based on platform. This will give flexibility in using extended
regs for all processor versions where the supported registers may differ.
- removed PERF_REG_EXTENDED_MASK from the perf tools side. Based on the
processor version(from PVR value), tool side will return the appropriate
extended mask
- Since tool changes can handle without a "PERF_REG_EXTENDED_MASK" macro,
dropped patch to set NO_AUXTRACE.
- Addressed review comments from Ravi Bangoria for V1

---

  arch/powerpc/include/asm/perf_event_server.h|  8 
  arch/powerpc/include/uapi/asm/perf_regs.h   | 14 ++-
  arch/powerpc/perf/core-book3s.c |  1 +
  arch/powerpc/perf/perf_regs.c   | 34 ++--
  arch/powerpc/perf/power9-pmu.c  |  6 +++
  tools/arch/powerpc/include/uapi/asm/perf_regs.h | 14 ++-
  tools/perf/arch/powerpc/include/perf_regs.h |  5 ++-
  tools/perf/arch/powerpc/util/perf_regs.c| 54 +
  8 files changed, 130 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 3e9703f..1458e1a 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -15,6 +15,9 @@
  #define MAX_EVENT_ALTERNATIVES8
  #define MAX_LIMITED_HWCOUNTERS2

+extern u64 mask_var;
+#define PERF_REG_EXTENDED_MASK  mask_var
+
  struct perf_event;

  /*
@@ -55,6 +58,11 @@ struct power_pmu {
int *blacklist_ev;
/* BHRB entries in the PMU */
int bhrb_nr;
+   /*
+* set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
+* the pmu supports extended perf regs capability
+*/
+   int capabilities;
  };

  /*
diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
index f599064..485b1d5 100644
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -48,6 +48,18 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
-   PERF_REG_POWERPC_MAX,
+   /* Extended registers */
+   

Re: [PATCH v4 2/4] kasan: record and print the free track

2020-05-19 Thread Dmitry Vyukov
On Wed, May 20, 2020 at 6:03 AM Walter Wu  wrote:
>
> > On Tue, May 19, 2020 at 4:25 AM Walter Wu  wrote:
> > >
> > > Move free track from slub alloc meta-data to slub free meta-data in
> > > order to make struct kasan_free_meta size is 16 bytes. It is a good
> > > size because it is the minimal redzone size and a good number of
> > > alignment.
> > >
> > > For free track in generic KASAN, we do the modification in struct
> > > kasan_alloc_meta and kasan_free_meta:
> > > - remove free track from kasan_alloc_meta.
> > > - add free track into kasan_free_meta.
> > >
> > > [1]https://bugzilla.kernel.org/show_bug.cgi?id=198437
> > >
> > > Signed-off-by: Walter Wu 
> > > Suggested-by: Dmitry Vyukov 
> > > Cc: Andrey Ryabinin 
> > > Cc: Dmitry Vyukov 
> > > Cc: Alexander Potapenko 
> > > ---
> > >  mm/kasan/common.c  | 22 ++
> > >  mm/kasan/generic.c | 18 ++
> > >  mm/kasan/kasan.h   |  7 +++
> > >  mm/kasan/report.c  | 20 
> > >  mm/kasan/tags.c| 37 +
> > >  5 files changed, 64 insertions(+), 40 deletions(-)
> > >
> > > diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> > > index 8bc618289bb1..47b53912f322 100644
> > > --- a/mm/kasan/common.c
> > > +++ b/mm/kasan/common.c
> > > @@ -51,7 +51,7 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags)
> > > return stack_depot_save(entries, nr_entries, flags);
> > >  }
> > >
> > > -static inline void set_track(struct kasan_track *track, gfp_t flags)
> > > +void kasan_set_track(struct kasan_track *track, gfp_t flags)
> > >  {
> > > track->pid = current->pid;
> > > track->stack = kasan_save_stack(flags);
> > > @@ -299,24 +299,6 @@ struct kasan_free_meta *get_free_info(struct 
> > > kmem_cache *cache,
> > > return (void *)object + cache->kasan_info.free_meta_offset;
> > >  }
> > >
> > > -
> > > -static void kasan_set_free_info(struct kmem_cache *cache,
> > > -   void *object, u8 tag)
> > > -{
> > > -   struct kasan_alloc_meta *alloc_meta;
> > > -   u8 idx = 0;
> > > -
> > > -   alloc_meta = get_alloc_info(cache, object);
> > > -
> > > -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
> > > -   idx = alloc_meta->free_track_idx;
> > > -   alloc_meta->free_pointer_tag[idx] = tag;
> > > -   alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
> > > -#endif
> > > -
> > > -   set_track(_meta->free_track[idx], GFP_NOWAIT);
> > > -}
> > > -
> > >  void kasan_poison_slab(struct page *page)
> > >  {
> > > unsigned long i;
> > > @@ -492,7 +474,7 @@ static void *__kasan_kmalloc(struct kmem_cache 
> > > *cache, const void *object,
> > > KASAN_KMALLOC_REDZONE);
> > >
> > > if (cache->flags & SLAB_KASAN)
> > > -   set_track(_alloc_info(cache, object)->alloc_track, 
> > > flags);
> > > +   kasan_set_track(_alloc_info(cache, 
> > > object)->alloc_track, flags);
> > >
> > > return set_tag(object, tag);
> > >  }
> > > diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
> > > index 3372bdcaf92a..763d8a13e0ac 100644
> > > --- a/mm/kasan/generic.c
> > > +++ b/mm/kasan/generic.c
> > > @@ -344,3 +344,21 @@ void kasan_record_aux_stack(void *addr)
> > > alloc_info->aux_stack[1] = alloc_info->aux_stack[0];
> > > alloc_info->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);
> > >  }
> > > +
> > > +void kasan_set_free_info(struct kmem_cache *cache,
> > > +   void *object, u8 tag)
> > > +{
> > > +   struct kasan_free_meta *free_meta;
> > > +
> > > +   free_meta = get_free_info(cache, object);
> > > +   kasan_set_track(_meta->free_track, GFP_NOWAIT);
> > > +}
> > > +
> > > +struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
> > > +   void *object, u8 tag)
> > > +{
> > > +   struct kasan_free_meta *free_meta;
> > > +
> > > +   free_meta = get_free_info(cache, object);
> > > +   return _meta->free_track;
> > > +}
> > > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> > > index a7391bc83070..ad897ec36545 100644
> > > --- a/mm/kasan/kasan.h
> > > +++ b/mm/kasan/kasan.h
> > > @@ -127,6 +127,9 @@ struct kasan_free_meta {
> > >  * Otherwise it might be used for the allocator freelist.
> > >  */
> > > struct qlist_node quarantine_link;
> > > +#ifdef CONFIG_KASAN_GENERIC
> > > +   struct kasan_track free_track;
> > > +#endif
> > >  };
> > >
> > >  struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
> > > @@ -168,6 +171,10 @@ void kasan_report_invalid_free(void *object, 
> > > unsigned long ip);
> > >  struct page *kasan_addr_to_page(const void *addr);
> > >
> > >  depot_stack_handle_t kasan_save_stack(gfp_t flags);
> > > +void kasan_set_track(struct kasan_track *track, gfp_t flags);
> > > +void kasan_set_free_info(struct kmem_cache *cache, void *object, u8 tag);
> > > +struct kasan_track *kasan_get_free_track(struct kmem_cache 

Re: [RFC][PATCH 1/5] thermal: Add support for /dev/thermal_notify

2020-05-19 Thread Amit Kucheria
On Mon, May 4, 2020 at 11:47 PM Srinivas Pandruvada
 wrote:
>
> This change adds an optional feature to add a new device entry
> /dev/thermal_notify.
>
> When config CONFIG_THERMAL_USER_EVENT_INTERFACE is selected, this new
> device entry is created.
>
> Thermal core or any thermal driver can use thermal_dev_send_event() interface

Do you have any particular use case in mind where a platform driver
will use this interface to send platform-specific events?

IMO, we should probably try to keep this restricted to messages from
thermal core if we are to have any hope of having a standard library
in userspace capable of parsing these thermal events.

> to send events. Each user events follows a standard format:
> - zone_id
> - event_id
> - event_data
> - reserved for future, currently 0s
>
> User space can basically:
> fd = open ("/dev/thermal_notify")
> In a loop
> read (fd)
> read and process event
>
> or
> fd = open ("/dev/thermal_notify")
> Set the fs as non blocking
> In a loop
> Use poll() and wait
> read and process event
>
> There are predefined events added to thermal.h. Based on need they can
> be extended.
>
> Signed-off-by: Srinivas Pandruvada 
> ---
>  drivers/thermal/Kconfig  |   9 ++
>  drivers/thermal/Makefile |   3 +
>  drivers/thermal/thermal_dev_if.c | 195 +++
>  include/linux/thermal.h  |  24 
>  4 files changed, 231 insertions(+)
>  create mode 100644 drivers/thermal/thermal_dev_if.c
>
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index 91af271e9bb0..27d05d62458e 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -78,6 +78,15 @@ config THERMAL_WRITABLE_TRIPS
>   Say 'Y' here if you would like to allow userspace tools to
>   change trip temperatures.
>
> +config THERMAL_USER_EVENT_INTERFACE
> +   bool "Allow user space to read thermal events from a dev file"
> +   help
> + This option allows a user space program to read thermal events
> + via /dev/thermal_notify file.
> +
> + Say 'Y' here if you would like to allow userspace programs to
> + read thermal events.
> +
>  choice
> prompt "Default Thermal governor"
> default THERMAL_DEFAULT_GOV_STEP_WISE
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 8c8ed7b79915..8f65832d755a 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -11,6 +11,9 @@ thermal_sys-y += thermal_core.o 
> thermal_sysfs.o \
>  thermal_sys-$(CONFIG_THERMAL_HWMON)+= thermal_hwmon.o
>  thermal_sys-$(CONFIG_THERMAL_OF)   += of-thermal.o
>
> +# Thermal user space events
> +obj-$(CONFIG_THERMAL_USER_EVENT_INTERFACE) += thermal_dev_if.o
> +
>  # governors
>  thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE)   += fair_share.o
>  thermal_sys-$(CONFIG_THERMAL_GOV_BANG_BANG)+= gov_bang_bang.o
> diff --git a/drivers/thermal/thermal_dev_if.c 
> b/drivers/thermal/thermal_dev_if.c
> new file mode 100644
> index ..763bfe9eef9d
> --- /dev/null
> +++ b/drivers/thermal/thermal_dev_if.c
> @@ -0,0 +1,195 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Thermal device file interface
> + * Copyright (c) 2020, Intel Corporation.
> + * All rights reserved.
> + *
> + * Author: Srinivas Pandruvada 
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define THERMAL_DEV_FIFO_SIZE  1024
> +
> +struct thermal_chdev_sample {
> +   int zone_id;
> +   int event;
> +   u64 event_data;
> +   u64 reserved;
> +};
> +
> +struct thermal_chdev {
> +   struct miscdevice therm_dev;
> +   struct kfifo data_fifo;
> +   unsigned long misc_opened;
> +   wait_queue_head_t wait;
> +};
> +
> +static DEFINE_MUTEX(thermal_chdev_mutex);
> +static struct thermal_chdev *thermal_chdev;
> +
> +static int thermal_chdev_open(struct inode *inode, struct file *file)
> +{
> +   struct thermal_chdev *chdev;
> +
> +   chdev = container_of(file->private_data, struct thermal_chdev, 
> therm_dev);
> +
> +   /* We essentially have single reader and writer */
> +   if (test_and_set_bit(0, >misc_opened))
> +   return -EBUSY;
> +
> +   return stream_open(inode, file);
> +}
> +
> +static int thermal_chdev_release(struct inode *inode, struct file *file)
> +{
> +   struct thermal_chdev *chdev;
> +
> +   chdev = container_of(file->private_data, struct thermal_chdev, 
> therm_dev);
> +
> +   clear_bit(0, >misc_opened);
> +
> +   return 0;
> +}
> +
> +static __poll_t thermal_chdev_poll(struct file *file, struct 
> poll_table_struct *wait)
> +{
> +   struct thermal_chdev *chdev;
> +   __poll_t mask = 0;
> +
> +   chdev = container_of(file->private_data, struct thermal_chdev, 
> therm_dev);
> +
> +   

[PATCH] m68k/mac: Don't call via_flush_cache() on Mac IIfx

2020-05-19 Thread Finn Thain
There is no VIA2 chip on the Mac IIfx, so don't call via_flush_cache().
This avoids a boot crash which appeared in v5.4.

printk: console [ttyS0] enabled
printk: bootconsole [debug0] disabled
printk: bootconsole [debug0] disabled
Calibrating delay loop... 9.61 BogoMIPS (lpj=48064)
pid_max: default: 32768 minimum: 301
Mount-cache hash table entries: 1024 (order: 0, 4096 bytes, linear)
Mountpoint-cache hash table entries: 1024 (order: 0, 4096 bytes, linear)
devtmpfs: initialized
random: get_random_u32 called from bucket_table_alloc.isra.27+0x68/0x194 with 
crng_init=0
clocksource: jiffies: mask: 0x max_cycles: 0x, max_idle_ns: 
1911260446275 ns
futex hash table entries: 256 (order: -1, 3072 bytes, linear)
NET: Registered protocol family 16
Data read fault at 0x in Super Data (pc=0x8a6a)
BAD KERNEL BUSERR
Oops: 
Modules linked in:
PC: [<8a6a>] via_flush_cache+0x12/0x2c
SR: 2700  SP: 01c1fe3c  a2: 01c24000
d0: 1119d1: 000cd2: 00012000d3: 000f
d4: 01c06840d5: 00033b92a0: a1: 
Process swapper (pid: 1, task=01c24000)
Frame format=B ssw=0755 isc=0200 isb=fff7 daddr= dobuf=01c1fed0
baddr=8a6e dibuf=004e ver=f
Stack from 01c1fec4:
01c1fed0 7d7e 00010080 01c1fedc 792e 0001 01c1fef4 6b40
01c8 0004 0006 0003 01c1ff1c 004a545e 004ff200 0004
 0003 01c06840 00033b92 004a5410 004b6c88 01c1ff84 21e2
0073 0003 01c06840 00033b92 0038507a 004bb094 004b6ca8 004b6c88
004b6ca4 004b6c88 21ae 00020002  01c0685d  01c1ffb4
0049f938 00409c85 01c06840 0045bd40 0073 0002 0002 
Call Trace: [<7d7e>] mac_cache_card_flush+0x12/0x1c
 [<00010080>] fix_dnrm+0x2/0x18
 [<792e>] cache_push+0x46/0x5a
 [<6b40>] arch_dma_prep_coherent+0x60/0x6e
 [<0004>] switched_to_dl+0x76/0xd0
 [<004a545e>] dma_atomic_pool_init+0x4e/0x188
 [<0004>] switched_to_dl+0x76/0xd0
 [<00033b92>] parse_args+0x0/0x370
 [<004a5410>] dma_atomic_pool_init+0x0/0x188
 [<21e2>] do_one_initcall+0x34/0x1be
 [<00033b92>] parse_args+0x0/0x370
 [<0038507a>] strcpy+0x0/0x1e
 [<21ae>] do_one_initcall+0x0/0x1be
 [<00020002>] do_proc_dointvec_conv+0x54/0x74
 [<0049f938>] kernel_init_freeable+0x126/0x190
 [<0049f94c>] kernel_init_freeable+0x13a/0x190
 [<004a5410>] dma_atomic_pool_init+0x0/0x188
 [<00041798>] complete+0x0/0x3c
 [<000b9b0c>] kfree+0x0/0x20a
 [<0038df98>] schedule+0x0/0xd0
 [<0038d604>] kernel_init+0x0/0xda
 [<0038d610>] kernel_init+0xc/0xda
 [<0038d604>] kernel_init+0x0/0xda
 [<2d38>] ret_from_kernel_thread+0xc/0x14
Code:  2079 0048 10da 2279 0048 10c8 d3c8 <1011> 0200 fff7 1280 d1f9 0048 
10c8 1010  0008 1080 4e5e 4e75 4e56  2039
Disabling lock debugging due to kernel taint
Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b

Thanks to Stan Johnson for capturing the console log and running git
bisect.

Git bisect said commit 8e3a68fb55e0 ("dma-mapping: make
dma_atomic_pool_init self-contained") is the first "bad" commit. I don't
know why. Perhaps mach_l2_flush first became reachable with that commit.

Cc: Joshua Thompson 
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-and-tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 arch/m68k/include/asm/mac_via.h |  1 +
 arch/m68k/mac/config.c  | 21 ++---
 arch/m68k/mac/via.c |  6 +-
 3 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h
index de1470c4d829..1149251ea58d 100644
--- a/arch/m68k/include/asm/mac_via.h
+++ b/arch/m68k/include/asm/mac_via.h
@@ -257,6 +257,7 @@ extern int rbv_present,via_alt_mapping;
 
 struct irq_desc;
 
+extern void via_l2_flush(int writeback);
 extern void via_register_interrupts(void);
 extern void via_irq_enable(int);
 extern void via_irq_disable(int);
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 611f73bfc87c..d0126ab01360 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -59,7 +59,6 @@ extern void iop_preinit(void);
 extern void iop_init(void);
 extern void via_init(void);
 extern void via_init_clock(irq_handler_t func);
-extern void via_flush_cache(void);
 extern void oss_init(void);
 extern void psc_init(void);
 extern void baboon_init(void);
@@ -130,21 +129,6 @@ int __init mac_parse_bootinfo(const struct bi_record 
*record)
return unknown;
 }
 
-/*
- * Flip into 24bit mode for an instant - flushes the L2 cache card. We
- * have to disable interrupts for this. Our IRQ handlers will crap
- * themselves if they take an IRQ in 24bit mode!
- */
-
-static void mac_cache_card_flush(int writeback)
-{
-   unsigned long flags;
-
-   local_irq_save(flags);
-   via_flush_cache();
-   local_irq_restore(flags);
-}
-
 void __init config_mac(void)
 {
if (!MACH_IS_MAC)
@@ -175,9 +159,8 @@ void __init 

Re: [RFC PATCH 2/2] init: Allow multi-line output of kernel command line

2020-05-19 Thread Sergey Senozhatsky
On (20/05/19 12:42), Joe Perches wrote:
> +static void __init print_cmdline(char *line)
> +{
> +#ifdef CONFIG_PRINTK
> + const char *prefix = "Kernel command line";
> + size_t len = strlen(line);
> +
> + while (len > PRINTK_LOG_LINE_MAX) {
> + char *pos = line;
> + char *last_pos = pos + PRINTK_LOG_LINE_MAX - 1;
> + char saved_char;
> + /* Find last space char within the maximum line length */
> + while ((pos = memchr(pos, ' ', len - (pos - line))) &&
> +(pos - line) < PRINTK_LOG_LINE_MAX - 1) {

Don't you need to also count in the 'prefix' length?

> + last_pos = pos;
> + }
> + saved_char = line[last_pos - line];
> + line[last_pos - line] = 0;
> + pr_notice("%s: %s\n", prefix, line);
> + prefix = "Kernel command line (continued)";
> + line[last_pos - line] = saved_char;
> + len -= pos - line;
> + line += pos - line;
> + }
> +
> + pr_notice("%s: %s\n", prefix, line);
> +#endif
> +}

I like this in general. And I agree that we better handle this
externally, on the printk() caller side, so that printk() will
still have sane limits and won't print a 1G string for example.

I wonder if we need to export PRINTK_LOG_LINE_MAX. Maybe we can
use here something rather random and much shorter instead. E.g.
256 chars. Hmm. How many crash/monitoring tools can get confused
by multiple "Kernel command line" prefixes?

-ss


[PATCH] m68k/mac: Remove misleading comment

2020-05-19 Thread Finn Thain
This code path was tested on a Quadra 950 a long time ago and the
comment isn't needed.

Cc: Joshua Thompson 
Signed-off-by: Finn Thain 
---
 arch/m68k/mac/iop.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index 100e5112fd9e..d99c7ea08d8c 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -299,7 +299,6 @@ void __init iop_init(void)
 
 /*
  * Register the interrupt handler for the IOPs.
- * TODO: might be wrong for non-OSS machines. Anyone?
  */
 
 void __init iop_register_interrupts(void)
-- 
2.26.2



[PATCH] m68k/mac: Avoid stuck ISM IOP interrupt on Quadra 900/950

2020-05-19 Thread Finn Thain
On a Quadra 900/950, the ISM IOP IRQ output pin is connected to an
edge-triggered input on VIA2. It is theoretically possible that this
signal could fail to produce the expected VIA2 interrupt.

The two IOP interrupt flags can be asserted in any order but the logic
in iop_ism_irq() does not allow for that. In particular, INT0 can be
asserted right after INT0 is checked and before INT1 is cleared.

Such an interrupt would produce no new edge and VIA2 would detect no
further interrupts from the IOP. Avoid this by looping over the INT0/1
handlers so an edge can be produced.

Cc: Joshua Thompson 
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 arch/m68k/mac/iop.c | 50 +
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index 9bfa17015768..100e5112fd9e 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -566,36 +566,42 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id)
uint iop_num = (uint) dev_id;
volatile struct mac_iop *iop = iop_base[iop_num];
int i,state;
+   u8 events = iop->status_ctrl & (IOP_INT0 | IOP_INT1);
 
iop_pr_debug("status %02X\n", iop->status_ctrl);
 
-   /* INT0 indicates a state change on an outgoing message channel */
-
-   if (iop->status_ctrl & IOP_INT0) {
-   iop->status_ctrl = IOP_INT0 | IOP_RUN | IOP_AUTOINC;
-   iop_pr_debug("new status %02X, send states", iop->status_ctrl);
-   for (i = 0 ; i < NUM_IOP_CHAN  ; i++) {
-   state = iop_readb(iop, IOP_ADDR_SEND_STATE + i);
-   iop_pr_cont(" %02X", state);
-   if (state == IOP_MSG_COMPLETE) {
-   iop_handle_send(iop_num, i);
+   do {
+   /* INT0 indicates state change on an outgoing message channel */
+   if (events & IOP_INT0) {
+   iop->status_ctrl = IOP_INT0 | IOP_RUN | IOP_AUTOINC;
+   iop_pr_debug("new status %02X, send states",
+iop->status_ctrl);
+   for (i = 0; i < NUM_IOP_CHAN; i++) {
+   state = iop_readb(iop, IOP_ADDR_SEND_STATE + i);
+   iop_pr_cont(" %02X", state);
+   if (state == IOP_MSG_COMPLETE)
+   iop_handle_send(iop_num, i);
}
+   iop_pr_cont("\n");
}
-   iop_pr_cont("\n");
-   }
 
-   if (iop->status_ctrl & IOP_INT1) {  /* INT1 for incoming msgs */
-   iop->status_ctrl = IOP_INT1 | IOP_RUN | IOP_AUTOINC;
-   iop_pr_debug("new status %02X, recv states", iop->status_ctrl);
-   for (i = 0 ; i < NUM_IOP_CHAN ; i++) {
-   state = iop_readb(iop, IOP_ADDR_RECV_STATE + i);
-   iop_pr_cont(" %02X", state);
-   if (state == IOP_MSG_NEW) {
-   iop_handle_recv(iop_num, i);
+   /* INT1 for incoming messages */
+   if (events & IOP_INT1) {
+   iop->status_ctrl = IOP_INT1 | IOP_RUN | IOP_AUTOINC;
+   iop_pr_debug("new status %02X, recv states",
+iop->status_ctrl);
+   for (i = 0; i < NUM_IOP_CHAN; i++) {
+   state = iop_readb(iop, IOP_ADDR_RECV_STATE + i);
+   iop_pr_cont(" %02X", state);
+   if (state == IOP_MSG_NEW)
+   iop_handle_recv(iop_num, i);
}
+   iop_pr_cont("\n");
}
-   iop_pr_cont("\n");
-   }
+
+   events = iop->status_ctrl & (IOP_INT0 | IOP_INT1);
+   } while (events);
+
return IRQ_HANDLED;
 }
 
-- 
2.26.2



Re: [RFC][PATCH 4/5] thermal: Add support for setting polling interval

2020-05-19 Thread Amit Kucheria
On Mon, May 4, 2020 at 11:47 PM Srinivas Pandruvada
 wrote:
>
> Add new attribute in the thermal syfs for setting temperature sampling
> interval when CONFIG_THERMAL_USER_EVENT_INTERFACE is defined. The default
> value is 0, which means no polling.
>
> At this interval user space will get an event THERMAL_TEMP_SAMPLE with
> temperature sample. This reuses existing polling mecahnism when polling
> or passive delay is specified during zone registry. To avoid interference
> with passive and polling delay, this new polling attribute can't be used
> for those zones.

Why should the kernel periodically emit events for userspace when the
userspace is perfectly capable of deciding how frequently it wants to
poll a file for changes?

>
> Signed-off-by: Srinivas Pandruvada 
> ---
>  drivers/thermal/thermal_core.c  |  7 +++
>  drivers/thermal/thermal_sysfs.c | 36 +++--
>  include/linux/thermal.h |  1 +
>  3 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 14770d882d42..17cd799b0073 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -313,6 +313,8 @@ static void monitor_thermal_zone(struct 
> thermal_zone_device *tz)
> thermal_zone_device_set_polling(tz, tz->passive_delay);
> else if (tz->polling_delay)
> thermal_zone_device_set_polling(tz, tz->polling_delay);
> +   else if (tz->temp_polling_delay)
> +   thermal_zone_device_set_polling(tz, tz->temp_polling_delay);
> else
> thermal_zone_device_set_polling(tz, 0);
>
> @@ -446,6 +448,11 @@ static void update_temperature(struct 
> thermal_zone_device *tz)
> tz->temperature = temp;
> mutex_unlock(>lock);
>
> +   if (tz->temp_polling_delay) {
> +   thermal_dev_send_event(tz->id, THERMAL_TEMP_SAMPLE, temp);
> +   monitor_thermal_zone(tz);
> +   }
> +
> trace_thermal_temperature(tz);
> if (tz->last_temperature == THERMAL_TEMP_INVALID)
> dev_dbg(>device, "last_temperature N/A, 
> current_temperature=%d\n",
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index aa85424c3ac4..0df7997993fe 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -248,6 +248,36 @@ create_thres_attr(temp_thres_low);
>  create_thres_attr(temp_thres_high);
>  create_thres_attr(temp_thres_hyst);
>
> +static ssize_t
> +temp_polling_delay_store(struct device *dev, struct device_attribute *attr,
> +  const char *buf, size_t count)
> +{
> +   struct thermal_zone_device *tz = to_thermal_zone(dev);
> +   int val;
> +
> +   if (kstrtoint(buf, 10, ))
> +   return -EINVAL;
> +
> +   if (val && val < 1000)
> +   return -EINVAL;
> +
> +   tz->temp_polling_delay = val;
> +   thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> +
> +   return count;
> +}
> +
> +static ssize_t
> +temp_polling_delay_show(struct device *dev, struct device_attribute *attr,
> +char *buf)
> +{
> +   struct thermal_zone_device *tz = to_thermal_zone(dev);
> +
> +   return sprintf(buf, "%d\n", tz->temp_polling_delay);
> +}
> +
> +static DEVICE_ATTR_RW(temp_polling_delay);
> +
>  static int create_user_events_attrs(struct thermal_zone_device *tz)
>  {
> struct attribute **attrs;
> @@ -260,8 +290,8 @@ static int create_user_events_attrs(struct 
> thermal_zone_device *tz)
> if (tz->ops->get_temp_thres_high)
> ++index;
>
> -   /* One additional space for NULL */
> -   attrs = kcalloc(index + 1, sizeof(*attrs), GFP_KERNEL);
> +   /* One additional space for NULL and temp_pollling_delay */
> +   attrs = kcalloc(index + 2, sizeof(*attrs), GFP_KERNEL);
> if (!attrs)
> return -ENOMEM;
>
> @@ -312,6 +342,8 @@ static int create_user_events_attrs(struct 
> thermal_zone_device *tz)
> attrs[index] = >threshold_attrs[index].attr.attr;
> ++index;
> }
> +   if (!tz->polling_delay && !tz->passive_delay)
> +   attrs[index++] = _attr_temp_polling_delay.attr;
> attrs[index] = NULL;
> tz->threshold_attribute_group.attrs = attrs;
>
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index ee9d79ace7ce..0ec4bd8c9c5c 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -216,6 +216,7 @@ struct thermal_zone_device {
> enum thermal_notify_event notify_event;
> struct attribute_group threshold_attribute_group;
> struct thermal_attr *threshold_attrs;
> +   int temp_polling_delay;
>  };
>
>  /**
> --
> 2.25.4
>


Re: [PATCHv2 4/5] Input: EXC3000: Add support to query model and fw_version

2020-05-19 Thread kbuild test robot
Hi Sebastian,

I love your patch! Yet something to improve:

[auto build test ERROR on input/next]
[also build test ERROR on v5.7-rc6 next-20200519]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Sebastian-Reichel/EXC3000-Updates/20200520-023207
base:   https://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git next
config: ia64-randconfig-r023-20200519 (attached as .config)
compiler: ia64-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=ia64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All error/warnings (new ones prefixed by >>, old ones prefixed by <<):

In file included from include/linux/kobject.h:20,
from include/linux/device.h:17,
from drivers/input/touchscreen/exc3000.c:11:
>> drivers/input/touchscreen/exc3000.c:252:23: error: initialization of 
>> 'ssize_t (*)(struct device *, struct device_attribute *, char *)' {aka 'long 
>> int (*)(struct device *, struct device_attribute *, char *)'} from 
>> incompatible pointer type 'int (*)(struct device *, struct device_attribute 
>> *, char *)' [-Werror=incompatible-pointer-types]
252 | static DEVICE_ATTR_RO(fw_version);
|   ^~
include/linux/sysfs.h:117:10: note: in definition of macro '__ATTR_RO'
117 |  .show = _name##_show,   |  ^
>> drivers/input/touchscreen/exc3000.c:252:8: note: in expansion of macro 
>> 'DEVICE_ATTR_RO'
252 | static DEVICE_ATTR_RO(fw_version);
|^~
drivers/input/touchscreen/exc3000.c:252:23: note: (near initialization for 
'dev_attr_fw_version.show')
252 | static DEVICE_ATTR_RO(fw_version);
|   ^~
include/linux/sysfs.h:117:10: note: in definition of macro '__ATTR_RO'
117 |  .show = _name##_show,   |  ^
>> drivers/input/touchscreen/exc3000.c:252:8: note: in expansion of macro 
>> 'DEVICE_ATTR_RO'
252 | static DEVICE_ATTR_RO(fw_version);
|^~
cc1: some warnings being treated as errors

vim +252 drivers/input/touchscreen/exc3000.c

   222  
   223  static int fw_version_show(struct device *dev,
   224 struct device_attribute *attr, char *buf)
   225  {
   226  struct exc3000_data *data = dev_get_drvdata(dev);
   227  static const u8 request[68] = {
   228  0x67, 0x00, 0x42, 0x00, 0x03, 0x01, 'D', 0x00
   229  };
   230  struct i2c_client *client = data->client;
   231  int err;
   232  
   233  mutex_lock(>query_lock);
   234  
   235  data->query_result = -ETIMEDOUT;
   236  reinit_completion(>wait_event);
   237  
   238  err = i2c_master_send(client, request, sizeof(request));
   239  if (err < 0) {
   240  mutex_unlock(>query_lock);
   241  return err;
   242  }
   243  
   244  wait_for_completion_interruptible_timeout(>wait_event, 
1*HZ);
   245  mutex_unlock(>query_lock);
   246  
   247  if (data->query_result < 0)
   248  return data->query_result;
   249  
   250  return sprintf(buf, "%s\n", data->fw_version);
   251  }
 > 252  static DEVICE_ATTR_RO(fw_version);
   253  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH v1] driver core: Fix handling of SYNC_STATE_ONLY + STATELESS device links

2020-05-19 Thread Saravana Kannan
On Tue, May 19, 2020 at 9:36 PM Saravana Kannan  wrote:
>
> Commit 21c27f06587d ("driver core: Fix SYNC_STATE_ONLY device link
> implementation") didn't completely fix STATELESS + SYNC_STATE_ONLY
> handling.
>
> What looks like an optimization in that commit is actually a bug that
> causes an if condition to always take the else path. This prevents
> reordering of devices in the dpm_list when a DL_FLAG_STATELESS device
> link is create on top of an existing DL_FLAG_SYNC_STATE_ONLY device
> link.
>
> Fixes: 21c27f06587d ("driver core: Fix SYNC_STATE_ONLY device link 
> implementation")
> Signed-off-by: Saravana Kannan 
> ---
> Sigh... device links are tricky and hard! Sorry about the endless fixes :(
> Also, how was this not caught by the compiler as a warning?
>
> -Saravana
>
>  drivers/base/core.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 83a3e0b62ce3..dfd4e94ef790 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -543,12 +543,14 @@ struct device_link *device_link_add(struct device 
> *consumer,
>
> if (flags & DL_FLAG_STATELESS) {
> kref_get(>kref);
> -   link->flags |= DL_FLAG_STATELESS;
> if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
> -   !(link->flags & DL_FLAG_STATELESS))
> +   !(link->flags & DL_FLAG_STATELESS)) {
> +   link->flags |= DL_FLAG_STATELESS;
> goto reorder;
> -   else
> +   } else {
> +   link->flags |= DL_FLAG_STATELESS;
> goto out;
> +   }
> }
>
> /*

Forgot to add sta...@vger.kernel.org. Doing that now.

-Saravana


Re: [PATCH v1 01/25] net: core: device_rename: Use rwsem instead of a seqcount

2020-05-19 Thread Stephen Hemminger
On Tue, 19 May 2020 20:18:19 -0700
Eric Dumazet  wrote:

> On 5/19/20 7:57 PM, David Miller wrote:
> > From: Thomas Gleixner 
> > Date: Wed, 20 May 2020 01:42:30 +0200
> >   
> >> Stephen Hemminger  writes:  
> >>> On Wed, 20 May 2020 00:23:48 +0200
> >>> Thomas Gleixner  wrote:  
>  No. We did not. -ENOTESTCASE  
> >>>
> >>> Please try, it isn't that hard..
> >>>
> >>> # time for ((i=0;i<1000;i++)); do ip li add dev dummy$i type dummy; done
> >>>
> >>> real  0m17.002s
> >>> user  0m1.064s
> >>> sys   0m0.375s  
> >>
> >> And that solves the incorrectness of the current code in which way?  
> > 
> > You mentioned that there wasn't a test case, he gave you one to try.
> >   
> 
> I do not think this would ever use device rename, nor netdev_get_name()
> 
> None of this stuff is fast path really.
> 
> # time for ((i=1;i<1000;i++)); do ip li add dev dummy$i type dummy; done
> 
> real  0m1.127s
> user  0m0.270s
> sys   0m1.039s

Your right it is a weak test, and most of the overhead is in the syscall
and all netlink events that happen.

It does end up looking up the new name, so would exercise that.
Better test is to use %d syntax or create 1000 dummy's then rename every one.

This is more of a stress test
# for ((i=0;i<1000;i++)); do echo link add dev dummy%d type dummy; done | time 
ip -batch -
0.00user 0.29system 0:02.11elapsed 13%CPU (0avgtext+0avgdata 2544maxresident)k
0inputs+0outputs (0major+148minor)pagefaults 0swaps

# for ((i=999;i>=0;i--)); do echo link set dummy$i name dummy$((i+1)); done | 
time ip -batch -
0.00user 0.26system 0:54.98elapsed 0%CPU (0avgtext+0avgdata 2508maxresident)k
0inputs+0outputs (0major+145minor)pagefaults 0swaps



[PATCH v1] driver core: Fix handling of SYNC_STATE_ONLY + STATELESS device links

2020-05-19 Thread Saravana Kannan
Commit 21c27f06587d ("driver core: Fix SYNC_STATE_ONLY device link
implementation") didn't completely fix STATELESS + SYNC_STATE_ONLY
handling.

What looks like an optimization in that commit is actually a bug that
causes an if condition to always take the else path. This prevents
reordering of devices in the dpm_list when a DL_FLAG_STATELESS device
link is create on top of an existing DL_FLAG_SYNC_STATE_ONLY device
link.

Fixes: 21c27f06587d ("driver core: Fix SYNC_STATE_ONLY device link 
implementation")
Signed-off-by: Saravana Kannan 
---
Sigh... device links are tricky and hard! Sorry about the endless fixes :(
Also, how was this not caught by the compiler as a warning?

-Saravana

 drivers/base/core.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 83a3e0b62ce3..dfd4e94ef790 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -543,12 +543,14 @@ struct device_link *device_link_add(struct device 
*consumer,
 
if (flags & DL_FLAG_STATELESS) {
kref_get(>kref);
-   link->flags |= DL_FLAG_STATELESS;
if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
-   !(link->flags & DL_FLAG_STATELESS))
+   !(link->flags & DL_FLAG_STATELESS)) {
+   link->flags |= DL_FLAG_STATELESS;
goto reorder;
-   else
+   } else {
+   link->flags |= DL_FLAG_STATELESS;
goto out;
+   }
}
 
/*
-- 
2.26.2.761.g0e0b3e54be-goog



[PATCH v6 1/5] perf stat: Fix wrong per-thread runtime stat for interval mode

2020-05-19 Thread Jin Yao
root@kbl-ppc:~# perf stat --per-thread -e cycles,instructions -I1000 
--interval-count 2
 1.004171683 perf-3696  8,747,311  cycles
...
 1.004171683 perf-3696691,730  instructions 
 #0.08  insn per cycle
...
 2.006490373 perf-3696  1,749,936  cycles
...
 2.006490373 perf-3696  1,484,582  instructions 
 #0.28  insn per cycle
...

Let's see interval 2.006490373

perf-3696  1,749,936  cycles
perf-3696  1,484,582  instructions  #0.28  insn 
per cycle

insn per cycle = 1,484,582 / 1,749,936 = 0.85.
But now it's 0.28, that's not correct.

stat_config.stats[] records the per-thread runtime stat. But for interval
mode, it should be reset for each interval.

So now, with this patch,

root@kbl-ppc:~# perf stat --per-thread -e cycles,instructions -I1000 
--interval-count 2
 1.005818121 perf-8633  9,898,045  cycles
...
 1.005818121 perf-8633693,298  instructions 
 #0.07  insn per cycle
...
 2.007863743 perf-8633  1,551,619  cycles
...
 2.007863743 perf-8633  1,317,514  instructions 
 #0.85  insn per cycle
...

Let's check interval 2.007863743.

insn per cycle = 1,317,514 / 1,551,619 = 0.85. It's correct.

This patch creates runtime_stat_reset, places it next to
untime_stat_new/runtime_stat_delete and moves all runtime_stat
functions before process_interval.

 v4:
 ---
 Create runtime_stat_reset.

Fixes: commit 14e72a21c783 ("perf stat: Update or print per-thread stats")
Signed-off-by: Jin Yao 
---
 tools/perf/builtin-stat.c | 70 +++
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e0c1ad23c768..f3b3a59ac7d2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -351,6 +351,46 @@ static void read_counters(struct timespec *rs)
}
 }
 
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
+{
+   int i;
+
+   config->stats = calloc(nthreads, sizeof(struct runtime_stat));
+   if (!config->stats)
+   return -1;
+
+   config->stats_num = nthreads;
+
+   for (i = 0; i < nthreads; i++)
+   runtime_stat__init(>stats[i]);
+
+   return 0;
+}
+
+static void runtime_stat_delete(struct perf_stat_config *config)
+{
+   int i;
+
+   if (!config->stats)
+   return;
+
+   for (i = 0; i < config->stats_num; i++)
+   runtime_stat__exit(>stats[i]);
+
+   zfree(>stats);
+}
+
+static void runtime_stat_reset(struct perf_stat_config *config)
+{
+   int i;
+
+   if (!config->stats)
+   return;
+
+   for (i = 0; i < config->stats_num; i++)
+   perf_stat__reset_shadow_per_stat(>stats[i]);
+}
+
 static void process_interval(void)
 {
struct timespec ts, rs;
@@ -359,6 +399,7 @@ static void process_interval(void)
diff_timespec(, , _time);
 
perf_stat__reset_shadow_per_stat(_stat);
+   runtime_stat_reset(_config);
read_counters();
 
if (STAT_RECORD) {
@@ -1737,35 +1778,6 @@ int process_cpu_map_event(struct perf_session *session,
return set_maps(st);
 }
 
-static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
-{
-   int i;
-
-   config->stats = calloc(nthreads, sizeof(struct runtime_stat));
-   if (!config->stats)
-   return -1;
-
-   config->stats_num = nthreads;
-
-   for (i = 0; i < nthreads; i++)
-   runtime_stat__init(>stats[i]);
-
-   return 0;
-}
-
-static void runtime_stat_delete(struct perf_stat_config *config)
-{
-   int i;
-
-   if (!config->stats)
-   return;
-
-   for (i = 0; i < config->stats_num; i++)
-   runtime_stat__exit(>stats[i]);
-
-   zfree(>stats);
-}
-
 static const char * const stat_report_usage[] = {
"perf stat report []",
NULL,
-- 
2.17.1



[PATCH v6 0/5] perf stat: Support overall statistics for interval mode

2020-05-19 Thread Jin Yao
Currently perf-stat supports to print counts at regular interval (-I),
but it's not very easy for user to get the overall statistics.

With this patchset, it supports to report the summary at the end of
interval output.

For example,

 root@kbl-ppc:~# perf stat -e cycles -I1000 --interval-count 2
 #   time counts unit events
  1.000412064  2,281,114  cycles
  2.001383658  2,547,880  cycles

  Performance counter stats for 'system wide':

  4,828,994  cycles

2.002860349 seconds time elapsed

 root@kbl-ppc:~# perf stat -e cycles,instructions -I1000 --interval-count 2
 #   time counts unit events
  1.000389902  1,536,093  cycles
  1.000389902420,226  instructions  #0.27  
insn per cycle
  2.001433453  2,213,952  cycles
  2.001433453735,465  instructions  #0.33  
insn per cycle

  Performance counter stats for 'system wide':

  3,750,045  cycles
  1,155,691  instructions  #0.31  insn per cycle

2.003023361 seconds time elapsed

 root@kbl-ppc:~# perf stat -M CPI,IPC -I1000 --interval-count 2
 #   time counts unit events
  1.000435121905,303  inst_retired.any  #  2.9 
CPI
  1.000435121  2,663,333  cycles
  1.000435121914,702  inst_retired.any  #  0.3 
IPC
  1.000435121  2,676,559  cpu_clk_unhalted.thread
  2.001615941  1,951,092  inst_retired.any  #  1.8 
CPI
  2.001615941  3,551,357  cycles
  2.001615941  1,950,837  inst_retired.any  #  0.5 
IPC
  2.001615941  3,551,044  cpu_clk_unhalted.thread

  Performance counter stats for 'system wide':

  2,856,395  inst_retired.any  #  2.2 CPI
  6,214,690  cycles
  2,865,539  inst_retired.any  #  0.5 IPC
  6,227,603  cpu_clk_unhalted.thread

2.003403078 seconds time elapsed

 v6:
 ---
 1. Add comments in perf_evlist__save_aggr_prev_raw_counts.
 2. Move init_stats(_nsecs_stats) under interval condition check.

 Following patches are changed in v6.
perf stat: Save aggr value to first member of prev_raw_counts
perf stat: Report summary for interval mode

 v5:
 ---
 1. Create new patch "perf stat: Save aggr value to first member
of prev_raw_counts".

 2. Call perf_evlist__save_aggr_prev_raw_counts to save aggr value
to first member of prev_raw_counts for AGGR_GLOBAL. Then next,
perf_stat_process_counter can create aggr values from per cpu
values.

 Following patches are impacted in v5:
perf stat: Copy counts from prev_raw_counts to evsel->counts
perf stat: Save aggr value to first member of prev_raw_counts
perf stat: Report summary for interval mode

 v4:
 ---
 1. Create runtime_stat_reset.

 2. Zero the aggr in perf_counts__reset and use it to reset
prev_raw_counts.

 3. Move affinity setup and read_counter_cpu to a new function
read_affinity_counters. It's only called when stat_config.summary
is not set.

 v3:
 ---
 1. 'perf stat: Fix wrong per-thread runtime stat for interval mode'
is a new patch which fixes an existing issue found in test.

 2. We use the prev_raw_counts for summary counts. Drop the summary_counts in 
v2.

 3. Fix some issues.

 v2:
 ---
 Rebase to perf/core branch

Jin Yao (5):
  perf stat: Fix wrong per-thread runtime stat for interval mode
  perf counts: Reset prev_raw_counts counts
  perf stat: Copy counts from prev_raw_counts to evsel->counts
  perf stat: Save aggr value to first member of prev_raw_counts
  perf stat: Report summary for interval mode

 tools/perf/builtin-stat.c | 101 +-
 tools/perf/util/counts.c  |   4 +-
 tools/perf/util/counts.h  |   1 +
 tools/perf/util/stat.c|  53 +---
 tools/perf/util/stat.h|   3 ++
 5 files changed, 122 insertions(+), 40 deletions(-)

-- 
2.17.1



[PATCH v6 5/5] perf stat: Report summary for interval mode

2020-05-19 Thread Jin Yao
Currently perf-stat supports to print counts at regular interval (-I),
but it's not very easy for user to get the overall statistics.

The patch uses 'evsel->prev_raw_counts' to get counts for summary.
Copy the counts to 'evsel->counts' after printing the interval results.
Next, we just follow the non-interval processing.

Let's see some examples,

 root@kbl-ppc:~# perf stat -e cycles -I1000 --interval-count 2
 #   time counts unit events
  1.000412064  2,281,114  cycles
  2.001383658  2,547,880  cycles

  Performance counter stats for 'system wide':

  4,828,994  cycles

2.002860349 seconds time elapsed

 root@kbl-ppc:~# perf stat -e cycles,instructions -I1000 --interval-count 2
 #   time counts unit events
  1.000389902  1,536,093  cycles
  1.000389902420,226  instructions  #0.27  
insn per cycle
  2.001433453  2,213,952  cycles
  2.001433453735,465  instructions  #0.33  
insn per cycle

  Performance counter stats for 'system wide':

  3,750,045  cycles
  1,155,691  instructions  #0.31  insn per cycle

2.003023361 seconds time elapsed

 root@kbl-ppc:~# perf stat -M CPI,IPC -I1000 --interval-count 2
 #   time counts unit events
  1.000435121905,303  inst_retired.any  #  2.9 
CPI
  1.000435121  2,663,333  cycles
  1.000435121914,702  inst_retired.any  #  0.3 
IPC
  1.000435121  2,676,559  cpu_clk_unhalted.thread
  2.001615941  1,951,092  inst_retired.any  #  1.8 
CPI
  2.001615941  3,551,357  cycles
  2.001615941  1,950,837  inst_retired.any  #  0.5 
IPC
  2.001615941  3,551,044  cpu_clk_unhalted.thread

  Performance counter stats for 'system wide':

  2,856,395  inst_retired.any  #  2.2 CPI
  6,214,690  cycles
  2,865,539  inst_retired.any  #  0.5 IPC
  6,227,603  cpu_clk_unhalted.thread

2.003403078 seconds time elapsed

 v6:
 ---
 Move init_stats(_nsecs_stats) under interval condition check.
 walltime_nsecs_stats.val holds the last value so we just need to
 init the other fields of stats.

 v5:
 ---
 Call perf_evlist__save_aggr_prev_raw_counts to save aggr value
 to first member of prev_raw_counts for AGGR_GLOBAL. Then next,
 perf_stat_process_counter can create aggr values from per cpu
 values.

 v4:
 ---
 Move affinity setup and read_counter_cpu to a new function
 read_affinity_counters. It's only called when stat_config.summary
 is not set.

 v3:
 ---
 Use evsel->prev_raw_counts for summary counts

 v2:
 ---
 Rebase to perf/core branch

Signed-off-by: Jin Yao 
---
 tools/perf/builtin-stat.c | 31 +++
 tools/perf/util/stat.c|  2 +-
 tools/perf/util/stat.h|  1 +
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f3b3a59ac7d2..2486c79f0f34 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -314,14 +314,14 @@ static int read_counter_cpu(struct evsel *counter, struct 
timespec *rs, int cpu)
return 0;
 }
 
-static void read_counters(struct timespec *rs)
+static int read_affinity_counters(struct timespec *rs)
 {
struct evsel *counter;
struct affinity affinity;
int i, ncpus, cpu;
 
if (affinity__setup() < 0)
-   return;
+   return -1;
 
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
if (!target__has_cpu() || target__has_per_thread())
@@ -341,6 +341,15 @@ static void read_counters(struct timespec *rs)
}
}
affinity__cleanup();
+   return 0;
+}
+
+static void read_counters(struct timespec *rs)
+{
+   struct evsel *counter;
+
+   if (!stat_config.summary && (read_affinity_counters(rs) < 0))
+   return;
 
evlist__for_each_entry(evsel_list, counter) {
if (counter->err)
@@ -763,7 +772,21 @@ static int __run_perf_stat(int argc, const char **argv, 
int run_idx)
if (stat_config.walltime_run_table)
stat_config.walltime_run[run_idx] = t1 - t0;
 
-   update_stats(_nsecs_stats, t1 - t0);
+   if (interval) {
+   stat_config.interval = 0;
+   stat_config.summary = true;
+   init_stats(_nsecs_stats);
+   update_stats(_nsecs_stats, t1 - t0);
+
+   if (stat_config.aggr_mode == AGGR_GLOBAL)
+   perf_evlist__save_aggr_prev_raw_counts(evsel_list);
+
+   perf_evlist__copy_prev_raw_counts(evsel_list);
+   perf_evlist__reset_prev_raw_counts(evsel_list);
+   

[PATCH v6 4/5] perf stat: Save aggr value to first member of prev_raw_counts

2020-05-19 Thread Jin Yao
To collect the overall statistics for interval mode, we copy the
counts from evsel->prev_raw_counts to evsel->counts.

For AGGR_GLOBAL mode, because the perf_stat_process_counter creates
aggr values from per cpu values, but the per cpu values are 0,
so the calculated aggr values will be always 0.

This patch uses a trick that saves the previous aggr value to
the first member of perf_counts, then aggr calculation in
process_counter_values can work correctly for AGGR_GLOBAL.

 v6:
 ---
 Add comments in perf_evlist__save_aggr_prev_raw_counts.

Signed-off-by: Jin Yao 
---
 tools/perf/util/stat.c | 20 
 tools/perf/util/stat.h |  1 +
 2 files changed, 21 insertions(+)

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index aadc723ce871..d23109c9bee9 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -249,6 +249,26 @@ void perf_evlist__copy_prev_raw_counts(struct evlist 
*evlist)
perf_evsel__copy_prev_raw_counts(evsel);
 }
 
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
+{
+   struct evsel *evsel;
+
+   /*
+* To collect the overall statistics for interval mode,
+* we copy the counts from evsel->prev_raw_counts to
+* evsel->counts. The perf_stat_process_counter creates
+* aggr values from per cpu values, but the per cpu values
+* are 0 for AGGR_GLOBAL. So we use a trick that saves the
+* previous aggr value to the first member of perf_counts,
+* then aggr calculation in process_counter_values can work
+* correctly.
+*/
+   evlist__for_each_entry(evlist, evsel) {
+   *perf_counts(evsel->prev_raw_counts, 0, 0) =
+   evsel->prev_raw_counts->aggr;
+   }
+}
+
 static void zero_per_pkg(struct evsel *counter)
 {
if (counter->per_pkg_mask)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 62cf72c71869..18ead55756cc 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -199,6 +199,7 @@ void perf_evlist__free_stats(struct evlist *evlist);
 void perf_evlist__reset_stats(struct evlist *evlist);
 void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
 void perf_evlist__copy_prev_raw_counts(struct evlist *evlist);
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
  struct evsel *counter);
-- 
2.17.1



[PATCH v6 3/5] perf stat: Copy counts from prev_raw_counts to evsel->counts

2020-05-19 Thread Jin Yao
It would be useful to support the overall statistics for perf-stat
interval mode. For example, report the summary at the end of
"perf-stat -I" output.

But since perf-stat can support many aggregation modes, such as
--per-thread, --per-socket, -M and etc, we need a solution which
doesn't bring much complexity.

The idea is to use 'evsel->prev_raw_counts' which is updated in
each interval and it's saved with the latest counts. Before reporting
the summary, we copy the counts from evsel->prev_raw_counts to
evsel->counts, and next we just follow non-interval processing.

 v5:
 ---
 Don't save the previous aggr value to the member of [cpu0,thread0]
 in perf_counts. Originally that was a trick because the
 perf_stat_process_counter would create aggr values from per cpu
 values. But we don't need to do that all the time. We will
 handle it in next patch.

 v4:
 ---
 Change the commit message.
 No functional change.

Signed-off-by: Jin Yao 
---
 tools/perf/util/stat.c | 24 
 tools/perf/util/stat.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index e397815f0dfb..aadc723ce871 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -225,6 +225,30 @@ void perf_evlist__reset_prev_raw_counts(struct evlist 
*evlist)
evsel__reset_prev_raw_counts(evsel);
 }
 
+static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
+{
+   int ncpus = evsel__nr_cpus(evsel);
+   int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+   for (int thread = 0; thread < nthreads; thread++) {
+   for (int cpu = 0; cpu < ncpus; cpu++) {
+   *perf_counts(evsel->counts, cpu, thread) =
+   *perf_counts(evsel->prev_raw_counts, cpu,
+thread);
+   }
+   }
+
+   evsel->counts->aggr = evsel->prev_raw_counts->aggr;
+}
+
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
+{
+   struct evsel *evsel;
+
+   evlist__for_each_entry(evlist, evsel)
+   perf_evsel__copy_prev_raw_counts(evsel);
+}
+
 static void zero_per_pkg(struct evsel *counter)
 {
if (counter->per_pkg_mask)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index b4fdfaa7f2c0..62cf72c71869 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -198,6 +198,7 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool 
alloc_raw);
 void perf_evlist__free_stats(struct evlist *evlist);
 void perf_evlist__reset_stats(struct evlist *evlist);
 void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
  struct evsel *counter);
-- 
2.17.1



[PATCH v6 2/5] perf counts: Reset prev_raw_counts counts

2020-05-19 Thread Jin Yao
When we want to reset the evsel->prev_raw_counts, zeroing the aggr
is not enough, we need to reset the perf_counts too.

The perf_counts__reset zeros the perf_counts, and it should zero
the aggr too. This patch changes perf_counts__reset to non-static,
and calls it in evsel__reset_prev_raw_counts to reset the
prev_raw_counts.

 v4:
 ---
 Zeroing the aggr in perf_counts__reset and use it to reset
 prev_raw_counts.

Signed-off-by: Jin Yao 
---
 tools/perf/util/counts.c | 4 +++-
 tools/perf/util/counts.h | 1 +
 tools/perf/util/stat.c   | 7 ++-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index 615c9f3e95cb..582f3aeaf5e4 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
 #include 
+#include 
 #include "evsel.h"
 #include "counts.h"
 #include 
@@ -42,10 +43,11 @@ void perf_counts__delete(struct perf_counts *counts)
}
 }
 
-static void perf_counts__reset(struct perf_counts *counts)
+void perf_counts__reset(struct perf_counts *counts)
 {
xyarray__reset(counts->loaded);
xyarray__reset(counts->values);
+   memset(>aggr, 0, sizeof(struct perf_counts_values));
 }
 
 void evsel__reset_counts(struct evsel *evsel)
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 8f556c6d98fa..7ff36bf6d644 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -37,6 +37,7 @@ perf_counts__set_loaded(struct perf_counts *counts, int cpu, 
int thread, bool lo
 
 struct perf_counts *perf_counts__new(int ncpus, int nthreads);
 void perf_counts__delete(struct perf_counts *counts);
+void perf_counts__reset(struct perf_counts *counts);
 
 void evsel__reset_counts(struct evsel *evsel);
 int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index f4a44df9b221..e397815f0dfb 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -163,11 +163,8 @@ static void evsel__free_prev_raw_counts(struct evsel 
*evsel)
 
 static void evsel__reset_prev_raw_counts(struct evsel *evsel)
 {
-   if (evsel->prev_raw_counts) {
-   evsel->prev_raw_counts->aggr.val = 0;
-   evsel->prev_raw_counts->aggr.ena = 0;
-   evsel->prev_raw_counts->aggr.run = 0;
-   }
+   if (evsel->prev_raw_counts)
+   perf_counts__reset(evsel->prev_raw_counts);
 }
 
 static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
-- 
2.17.1



Re: [RFC][PATCH 3/5] thermal: Add support for setting notification thresholds

2020-05-19 Thread Amit Kucheria
On Tue, May 19, 2020 at 5:10 AM Srinivas Pandruvada
 wrote:
>
> On Mon, 2020-05-18 at 18:37 +0200, Daniel Lezcano wrote:
> > On 04/05/2020 20:16, Srinivas Pandruvada wrote:
> > > Add new attributes in thermal syfs when a thermal drivers provides
> > > callbacks for them and CONFIG_THERMAL_USER_EVENT_INTERFACE is
> > > defined.
> > >
> > > These attribute allow user space to stop polling for temperature.
> > >
> > > These attributes are:
> > > - temp_thres_low: Specify a notification temperature for a low
> > > temperature threshold event.
> > > temp_thres_high: Specify a notification temperature for a high
> > > temperature threshold event.
> > > temp_thres_hyst: Specify a change in temperature to send
> > > notification
> > > again.
> > >
> > > This is implemented by adding additional sysfs attribute group. The
> > > changes in this patch are trivial to add new attributes in thermal
> > > sysfs as done for other attributes.
> >
> > Isn't it duplicate with the trip point?
> A trip point is where an in-kernel governor takes some action. This is
> not same as a notification temperature. For example at trip point
> configured by ACPI at 85C, the thermal governor may start aggressive
> throttling.
> But a user space can set a notification threshold at 80C and start some
> active controls like activate some fan to reduce the impact of passive
> control on performance.

Then what is the use of thermal trip type "ACTIVE" ?

> We need a way to distinguish between temperature notification threshold
> and actual trip point. Changing a trip point means that user wants
> kernel to throttle at temperature.


Re: [PATCH v1 4/4] of: platform: Batch fwnode parsing when adding all top level devices

2020-05-19 Thread Marek Szyprowski
Hi Saravana,

On 19.05.2020 20:02, Saravana Kannan wrote:
> On Tue, May 19, 2020 at 3:32 AM Marek Szyprowski
>  wrote:
>> On 19.05.2020 09:11, Marek Szyprowski wrote:
>>> On 19.05.2020 08:48, Saravana Kannan wrote:
 On Mon, May 18, 2020 at 11:25 PM Marek Szyprowski
  wrote:
> On 15.05.2020 07:35, Saravana Kannan wrote:
>> The fw_devlink_pause() and fw_devlink_resume() APIs allow batching the
>> parsing of the device tree nodes when a lot of devices are added. This
>> will significantly cut down parsing time (as much a 1 second on some
>> systems). So, use them when adding devices for all the top level
>> device
>> tree nodes in a system.
>>
>> Signed-off-by: Saravana Kannan 
> This patch recently landed in linux-next 20200518. Sadly, it causes
> regression on Samsung Exynos5433-based TM2e board:
>
> ...
>
> Both issues, the lack of DMA for SPI device and Synchronous abort in
> I2S
> probe are new after applying this patch. I'm trying to investigate
> which
> resources are missing and why. The latter issue means typically that
> the
> registers for the given device has been accessed without enabling the
> needed clocks or power domains.
 Did you try this copy-pasta fix that I sent later?
 https://lore.kernel.org/lkml/20200517173453.157703-1-sarava...@google.com/


 Not every system would need it (my test setup didn't), but it helps
 some cases.

 If that fix doesn't help, then some tips for debugging the failing
 drivers.
 What this pause/resume patch effectively (not explicitly) does is:
 1. Doesn't immediately probe the devices as they are added in
 of_platform_default_populate_init()
 2. Adds them in order to the deferred probe list.
 3. Then kicks off deferred probe on them in the order they were added.

 These drivers are just not handling -EPROBE_DEFER correctly or
 assuming probe order and that's causing these issues.

 So, we can either fix that or you can try adding some code to flush
 the deferred probe workqueue at the end of fw_devlink_resume().

 Let me know how it goes.
>>> So far it looks that your patch revealed a hidden issue in exynos5433
>>> clocks configuration, because adding clk_ignore_unused parameter to
>>> kernel command line fixes the boot. I'm still investigating it, so
>>> probable you can ignore my regression report. I will let you know asap
>>> I finish checking it.
>>>
>> Okay, I confirm that the issue is in the Exynos I2S driver and
>> Exynos5433 clock provider. I've posted a quick workaround. I'm sorry for
>> the noise, your patch is fine.
> Thanks for debugging and finding the real issue. I tried finding your
> patches, but couldn't. Can you point me to a lore.kernel.org link? I'm
> just curious to see what the issue was.

https://lore.kernel.org/linux-samsung-soc/f67db8c1-453b-4c70-67b9-59762ac34...@kernel.org/T/#t

It looks that one more clock has to be enabled to properly read init 
configuration. So far it worked, because that device was probed much 
earlier, before the unused clocks are turned off. Your patch changed the 
probe order, so that device is probed later.

> I'm guessing you didn't need to pick up this one?
> https://lore.kernel.org/lkml/20200517173453.157703-1-sarava...@google.com/

Best regards
-- 
Marek Szyprowski, PhD
Samsung R Institute Poland



Re: [PATCH] printk/kdb: Redirect printk messages into kdb in any context

2020-05-19 Thread Sergey Senozhatsky
On (20/05/18 11:21), Petr Mladek wrote:
[..]
> > > Is this guaranteed that we never execute this path from NMI?
> 
> Good question!
> 
> > Absolutely not.
> > 
> > The execution context for kdb is pretty much unique... we are running a
> > debug mode with all CPUs parked in a holding loop with interrupts
> > disabled. One CPU is at an unknown exception state and the others are
> > either handling an IRQ or NMI depending on architecture[1].
> 
> This is similar to the situation in panic() when other CPUs are
> stopped. It is more safe when the CPUs are stopped using IRQ.
> There is higher danger of a deadlock when NMI is used.
> 
> bust_spinlock() is used in panic() to increase the chance to go over
> the deadlock and actually see the messages. It is not enough when
> more locks are used by the console (VT/TTY is good example). And
> it is not guaranteed that the console will still work after
> the hack is disabled by bust_spinlocks(0).

Good point. It's not guaranteed to help, but bust_spinlocks() does
help in general, many serial drivers do check oops_in_progress and
use a deadlock safe approach when locking port lock. I don't see
bust_spinlocks() being used in kdb, so it probably better start
doing so (along with general for_each_console() loop improvements,
like checking if console is enabled/available/etc).

[..]
> > > If so, can this please be added to the commit message? A more
> > > detailed commit message will help a lot.
> 
> What about?
> 
> "KDB has to get messages on consoles even when the system is stopped.
> It uses kdb_printf() internally and calls console drivers on its own.
> 
> It uses a hack to reuse an existing code. It sets "kdb_trap_printk"
> global variable to redirect even the normal printk() into the
> kdb_printf() variant.
> 
> The variable "kdb_trap_printk" is checked in printk_default() and
> it is ignored when printk is redirected to printk_safe in NMI context.
> Solve this by moving the check into printk_func().
> 
> It is obvious that it is not fully safe. But it does not make things
> worse. The console drivers are already called in this context by
> kdb_printf() direct calls."

This looks more informative indeed. Thanks!

-ss


Re: linux-next: manual merge of the rcu tree with the powerpc tree

2020-05-19 Thread Michael Ellerman
Stephen Rothwell  writes:
> Hi all,
>
> Today's linux-next merge of the rcu tree got a conflict in:
>
>   arch/powerpc/kernel/traps.c
>
> between commit:
>
>   116ac378bb3f ("powerpc/64s: machine check interrupt update NMI accounting")
>
> from the powerpc tree and commit:
>
>   187416eeb388 ("hardirq/nmi: Allow nested nmi_enter()")
>
> from the rcu tree.
>
> I fixed it up (I used the powerpc tree version for now) and can carry the
> fix as necessary.

OK, I guess that works for now, we'll have to clean it up later once
both trees are merged upstream.

I created an issue to track it:
  https://github.com/linuxppc/issues/issues/298

cheers


[PATCH v3 1/2] arm64: dts: add qe node to ls1043ardb

2020-05-19 Thread Qiang Zhao
From: Zhao Qiang 

Add qe node to fsl-ls1043a.dtsi and fsl-ls1043a-rdb.dts

Signed-off-by: Zhao Qiang 
---
v2:
 - add commit msg and drop a new blank line

v3:
 - Keep labeling node sort alphabetically
 - remove unused device_type
 - use GIC_SPI and IRQ_TYPE_LEVEL_HIGH
 - use "arm64: dts:" format for subject

 arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts | 16 ++
 arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi| 65 +++
 2 files changed, 81 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts 
b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
index dde50c8..44d9343 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
@@ -176,3 +176,19 @@
};
};
 };
+
+ {
+   ucc_hdlc: ucc@2000 {
+   compatible = "fsl,ucc-hdlc";
+   rx-clock-name = "clk8";
+   tx-clock-name = "clk9";
+   fsl,rx-sync-clock = "rsync_pin";
+   fsl,tx-sync-clock = "tsync_pin";
+   fsl,tx-timeslot-mask = <0xfffe>;
+   fsl,rx-timeslot-mask = <0xfffe>;
+   fsl,tdm-framer-type = "e1";
+   fsl,tdm-id = <0>;
+   fsl,siram-entry-id = <0>;
+   fsl,tdm-interface;
+   };
+};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi 
b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
index c084c7a4..3b641bd 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
@@ -525,6 +525,71 @@
#interrupt-cells = <2>;
};
 
+   uqe: uqe@240 {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   compatible = "fsl,qe", "simple-bus";
+   ranges = <0x0 0x0 0x240 0x4>;
+   reg = <0x0 0x240 0x0 0x480>;
+   brg-frequency = <1>;
+   bus-frequency = <2>;
+   fsl,qe-num-riscs = <1>;
+   fsl,qe-num-snums = <28>;
+
+   qeic: qeic@80 {
+   compatible = "fsl,qe-ic";
+   reg = <0x80 0x80>;
+   #address-cells = <0>;
+   interrupt-controller;
+   #interrupt-cells = <1>;
+   interrupts = ,
+;
+   };
+
+   si1: si@700 {
+   #address-cells = <1>;
+   #size-cells = <0>;
+   compatible = "fsl,ls1043-qe-si",
+   "fsl,t1040-qe-si";
+   reg = <0x700 0x80>;
+   };
+
+   siram1: siram@1000 {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   compatible = "fsl,ls1043-qe-siram",
+   "fsl,t1040-qe-siram";
+   reg = <0x1000 0x800>;
+   };
+
+   ucc@2000 {
+   cell-index = <1>;
+   reg = <0x2000 0x200>;
+   interrupts = <32>;
+   interrupt-parent = <>;
+   };
+
+   ucc@2200 {
+   cell-index = <3>;
+   reg = <0x2200 0x200>;
+   interrupts = <34>;
+   interrupt-parent = <>;
+   };
+
+   muram@1 {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   compatible = "fsl,qe-muram", "fsl,cpm-muram";
+   ranges = <0x0 0x1 0x6000>;
+
+   data-only@0 {
+   compatible = "fsl,qe-muram-data",
+   "fsl,cpm-muram-data";
+   reg = <0x0 0x6000>;
+   };
+   };
+   };
+
lpuart0: serial@295 {
compatible = "fsl,ls1021a-lpuart";
reg = <0x0 0x295 0x0 0x1000>;
-- 
2.7.4



[PATCH v3 2/2] arm64: dts: Add ds26522 node to dts to ls1043ardb

2020-05-19 Thread Qiang Zhao
From: Zhao Qiang 

Add ds26522 node to fsl-ls1043a-rdb.dts

Signed-off-by: Zhao Qiang 
---
v3:
 - use "arm64: dts:" format for subject
 
arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts 
b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
index 44d9343..1cb265f 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts
@@ -94,6 +94,22 @@
reg = <0>;
spi-max-frequency = <100>; /* input clock */
};
+
+   slic@2 {
+   compatible = "maxim,ds26522";
+   reg = <2>;
+   spi-max-frequency = <200>;
+   fsl,spi-cs-sck-delay = <100>;
+   fsl,spi-sck-cs-delay = <50>;
+   };
+
+   slic@3 {
+   compatible = "maxim,ds26522";
+   reg = <3>;
+   spi-max-frequency = <200>;
+   fsl,spi-cs-sck-delay = <100>;
+   fsl,spi-sck-cs-delay = <50>;
+   };
 };
 
  {
-- 
2.7.4



Re: [PATCH 2/2] Add a new sysctl knob: unprivileged_userfaultfd_user_mode_only

2020-05-19 Thread Andrea Arcangeli
Hello Jonathan and everyone,

On Thu, May 07, 2020 at 01:15:03PM -0600, Jonathan Corbet wrote:
> On Wed, 6 May 2020 15:38:16 -0400
> Peter Xu  wrote:
> 
> > If this is going to be added... I am thinking whether it should be easier to
> > add another value for unprivileged_userfaultfd, rather than a new sysctl. 
> > E.g.:
> > 
> >   "0": unprivileged userfaultfd forbidden
> >   "1": unprivileged userfaultfd allowed (both user/kernel faults)
> >   "2": unprivileged userfaultfd allowed (only user faults)
> > 
> > Because after all unprivileged_userfaultfd_user_mode_only will be 
> > meaningless
> > (iiuc) if unprivileged_userfaultfd=0.  The default value will also be the 
> > same
> > as before ("1") 
> It occurs to me to wonder whether this interface should also let an admin
> block *privileged* user from handling kernel-space faults?  In a
> secure-boot/lockdown setting, this could be a hardening measure that keeps
> a (somewhat) restricted root user from expanding their privilege...?

That's a good question. In my view if as root in lockdown mode you can
still run the swapon syscall and setup nfs or other network devices
and load userland fuse filesystems or cuse chardev in userland, even
if you prevent userfaultfd from blocking kernel faults, kernel faults
can still be blocked by other means.

That in fact tends to be true also as non root (so regardless of
lockdown settings) since luser can generally load fuse filesystems.
There is no fundamental integrity breakage or privilege escalation
originating in userfaultfd.

The only concern here is about this: "after a new use-after-free is
discovered in some other part of the kernel (not related to
userfaultfd), how easy it is to turn the use-after-free from a mere
DoS to a more concerning privilege escalation?". userfaultfd might
facilitate the exploitation, but even if you remove userfaultfd from
the equation, there's still no guarantee an user-after-free won't
materialize as a privilege escalation by other means.

So to express it in another way: unless lockdown (no matter in which
mode) is a weak probabilistic based feature and in turn it cannot
provide any guarantee to begin with, userfaultfd sysctl set to 0|1|2
can't possibly make any difference to it.

The best mitigation for those kind of exploits remains to randomize
all kernel memory allocations, so even if the attacker can block the
fault, when it's unblocked it'll pick another page, not the one that
the attacker can predict it will use, so the attacker needs to repeat
the race many more times and hopefully it'll DoS and destabilize the
kernel before it can reproduce a privilege escalation. We got many of
those randomization features in the current kernel and it's probably
more important to enable those than to worry about this sysctl value.

One way to have a peace of mind against all use-after-free regardless
of this sysctl value, is to run each pod in a KVM instance, that's
safer than disabling syscalls or kernel features.

The default seccomp profiles of podman already block userfaultfd too,
so there's no need of virt to get extra safety if you use containers:
containers need to explicitly opt-in to enable userfaultfd through the
OCI schema seccomp object. If userfaultfd is being explicitly
whitelisted in the OCI schema of the container, well then you know
there is a good reason for it. As a matter of fact some things are
only possible to achieve with userfaultfd fully enabled.

The big value uffd brings compared to trapping sigsegv is precisely to
be able to handle kernel faults transparently. sigsegv can't do that
because every syscall would return 1) an inconsistent retval and 2) no
fault address along with the retval.

The possible future uffd userland users could be: dropping JVM dirty
bit, redis snapshot using pthread_create() instead of fork(),
distributed shared memory on pmem, new malloc() implementation never
taking mmap_sem for writing in the kernel and never modifying any vma
to allocate and free anon memory, etc.. I don't think any of them
would work with the sysctl set to "2".

The next kernel feature in uffd land that I was discussing with Peter,
is an async uffd event model to further optimize the replacement of
soft-dirty (which uffd already provides in O(1) instead of O(N)), so
the wrprotect fault won't have to block anymore until the uffd async
queue overflows. That also is unlikely to work with the sysctl set to
"2" without adding extra constraints that soft-dirty doesn't currently
have.

It would also be possible to implement the value "2" to work like
/proc/sys/kernel/unprivileged_bpf_disabled, so when you set it to "1"
as root, you can't set it to "2" or "0" and when you set it to "2" you
can't set it to "0", but personally I think it's unnecessary.

Thanks,
Andrea



Re: [PATCH v4 2/4] kasan: record and print the free track

2020-05-19 Thread Walter Wu
> On Tue, May 19, 2020 at 4:25 AM Walter Wu  wrote:
> >
> > Move free track from slub alloc meta-data to slub free meta-data in
> > order to make struct kasan_free_meta size is 16 bytes. It is a good
> > size because it is the minimal redzone size and a good number of
> > alignment.
> >
> > For free track in generic KASAN, we do the modification in struct
> > kasan_alloc_meta and kasan_free_meta:
> > - remove free track from kasan_alloc_meta.
> > - add free track into kasan_free_meta.
> >
> > [1]https://bugzilla.kernel.org/show_bug.cgi?id=198437
> >
> > Signed-off-by: Walter Wu 
> > Suggested-by: Dmitry Vyukov 
> > Cc: Andrey Ryabinin 
> > Cc: Dmitry Vyukov 
> > Cc: Alexander Potapenko 
> > ---
> >  mm/kasan/common.c  | 22 ++
> >  mm/kasan/generic.c | 18 ++
> >  mm/kasan/kasan.h   |  7 +++
> >  mm/kasan/report.c  | 20 
> >  mm/kasan/tags.c| 37 +
> >  5 files changed, 64 insertions(+), 40 deletions(-)
> >
> > diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> > index 8bc618289bb1..47b53912f322 100644
> > --- a/mm/kasan/common.c
> > +++ b/mm/kasan/common.c
> > @@ -51,7 +51,7 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags)
> > return stack_depot_save(entries, nr_entries, flags);
> >  }
> >
> > -static inline void set_track(struct kasan_track *track, gfp_t flags)
> > +void kasan_set_track(struct kasan_track *track, gfp_t flags)
> >  {
> > track->pid = current->pid;
> > track->stack = kasan_save_stack(flags);
> > @@ -299,24 +299,6 @@ struct kasan_free_meta *get_free_info(struct 
> > kmem_cache *cache,
> > return (void *)object + cache->kasan_info.free_meta_offset;
> >  }
> >
> > -
> > -static void kasan_set_free_info(struct kmem_cache *cache,
> > -   void *object, u8 tag)
> > -{
> > -   struct kasan_alloc_meta *alloc_meta;
> > -   u8 idx = 0;
> > -
> > -   alloc_meta = get_alloc_info(cache, object);
> > -
> > -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
> > -   idx = alloc_meta->free_track_idx;
> > -   alloc_meta->free_pointer_tag[idx] = tag;
> > -   alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
> > -#endif
> > -
> > -   set_track(_meta->free_track[idx], GFP_NOWAIT);
> > -}
> > -
> >  void kasan_poison_slab(struct page *page)
> >  {
> > unsigned long i;
> > @@ -492,7 +474,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, 
> > const void *object,
> > KASAN_KMALLOC_REDZONE);
> >
> > if (cache->flags & SLAB_KASAN)
> > -   set_track(_alloc_info(cache, object)->alloc_track, 
> > flags);
> > +   kasan_set_track(_alloc_info(cache, 
> > object)->alloc_track, flags);
> >
> > return set_tag(object, tag);
> >  }
> > diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
> > index 3372bdcaf92a..763d8a13e0ac 100644
> > --- a/mm/kasan/generic.c
> > +++ b/mm/kasan/generic.c
> > @@ -344,3 +344,21 @@ void kasan_record_aux_stack(void *addr)
> > alloc_info->aux_stack[1] = alloc_info->aux_stack[0];
> > alloc_info->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);
> >  }
> > +
> > +void kasan_set_free_info(struct kmem_cache *cache,
> > +   void *object, u8 tag)
> > +{
> > +   struct kasan_free_meta *free_meta;
> > +
> > +   free_meta = get_free_info(cache, object);
> > +   kasan_set_track(_meta->free_track, GFP_NOWAIT);
> > +}
> > +
> > +struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
> > +   void *object, u8 tag)
> > +{
> > +   struct kasan_free_meta *free_meta;
> > +
> > +   free_meta = get_free_info(cache, object);
> > +   return _meta->free_track;
> > +}
> > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> > index a7391bc83070..ad897ec36545 100644
> > --- a/mm/kasan/kasan.h
> > +++ b/mm/kasan/kasan.h
> > @@ -127,6 +127,9 @@ struct kasan_free_meta {
> >  * Otherwise it might be used for the allocator freelist.
> >  */
> > struct qlist_node quarantine_link;
> > +#ifdef CONFIG_KASAN_GENERIC
> > +   struct kasan_track free_track;
> > +#endif
> >  };
> >
> >  struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
> > @@ -168,6 +171,10 @@ void kasan_report_invalid_free(void *object, unsigned 
> > long ip);
> >  struct page *kasan_addr_to_page(const void *addr);
> >
> >  depot_stack_handle_t kasan_save_stack(gfp_t flags);
> > +void kasan_set_track(struct kasan_track *track, gfp_t flags);
> > +void kasan_set_free_info(struct kmem_cache *cache, void *object, u8 tag);
> > +struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
> > +   void *object, u8 tag);
> >
> >  #if defined(CONFIG_KASAN_GENERIC) && \
> > (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
> > diff --git a/mm/kasan/report.c b/mm/kasan/report.c
> > index 6f8f2bf8f53b..96d2657fe70f 100644
> > --- a/mm/kasan/report.c
> > 

Re: [PATCH v4 0/4] cleaning up the sysctls table (hung_task watchdog)

2020-05-19 Thread Xiaoming Ni

On 2020/5/20 11:31, Andrew Morton wrote:

On Tue, 19 May 2020 11:31:07 +0800 Xiaoming Ni  wrote:


Kernel/sysctl.c


eek!



  fs/proc/proc_sysctl.c|   2 +-
  include/linux/sched/sysctl.h |  14 +--
  include/linux/sysctl.h   |  13 ++-
  kernel/hung_task.c   |  77 +++-
  kernel/sysctl.c  | 214 +++
  kernel/watchdog.c| 101 
  6 files changed, 224 insertions(+), 197 deletions(-)


Here's what we presently have happening in linux-next's kernel/sysctl.c:

  sysctl.c | 3109 
++-
  1 file changed, 1521 insertions(+), 1588 deletions(-)


So this is not a good time for your patch!

Can I suggest that you set the idea aside and take a look after 5.8-rc1
is released?



ok, I will make v5 patch based on 5.8-rc1 after 5.8-rc1 is released,
And add more sysctl table cleanup.

Thanks
Xiaoming Ni




Re: [PATCH 00/12] Add cpufreq and cci devfreq for mt8183, and SVS support

2020-05-19 Thread Chanwoo Choi
Hi Andrew,

Could you explain the base commit of these patches?
When I tried to apply them to v5.7-rc1 for testing,
the merge conflict occurs.

Thanks,
Chanwoo Choi

On 5/20/20 12:42 PM, Andrew-sh.Cheng wrote:
> MT8183 supports CPU DVFS and CCI DVFS, and LITTLE cpus and CCI are in the 
> same voltage domain.
> So, this series is to add drivers to handle the voltage coupling between CPU 
> and CCI DVFS.
> 
> For SVS support, need OPP_EVENT_ADJUST_VOLTAGE and corresponding reaction.
> 
> Change since v5:
>   - Changing dt-binding format to yaml.
>   - Extending current devfreq passive_governor instead of create a new 
> one.
>   - Resend depending patches of Sravana Kannan base on kernel-5.7
> 
> 
> Andrew-sh.Cheng (6):
>   cpufreq: mediatek: add clock and regulator enable for intermediate
> clock
>   dt-bindings: devfreq: add compatible for mt8183 cci devfreq
>   devfreq: add mediatek cci devfreq
>   opp: Modify opp API, dev_pm_opp_get_freq(), find freq in opp, even it
> is disabled
>   cpufreq: mediatek: add opp notification for SVS support
>   devfreq: mediatek: cci devfreq register opp notification for SVS
> support
> 
> Saravana Kannan (6):
>   OPP: Allow required-opps even if the device doesn't have power-domains
>   OPP: Add function to look up required OPP's for a given OPP
>   OPP: Improve required-opps linking
>   PM / devfreq: Cache OPP table reference in devfreq
>   PM / devfreq: Add required OPPs support to passive governor
>   PM / devfreq: Add cpu based scaling support to passive_governor
> 
>  .../devicetree/bindings/devfreq/mt8183-cci.yaml|  51 
>  drivers/cpufreq/mediatek-cpufreq.c | 122 -
>  drivers/devfreq/Kconfig|  12 +
>  drivers/devfreq/Makefile   |   1 +
>  drivers/devfreq/devfreq.c  |   6 +
>  drivers/devfreq/governor_passive.c | 298 
> +++--
>  drivers/devfreq/mt8183-cci-devfreq.c   | 233 
>  drivers/opp/core.c |  85 +-
>  drivers/opp/of.c   | 108 
>  drivers/opp/opp.h  |   5 +
>  include/linux/devfreq.h|  42 ++-
>  include/linux/pm_opp.h |  11 +
>  12 files changed, 874 insertions(+), 100 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/devfreq/mt8183-cci.yaml
>  create mode 100644 drivers/devfreq/mt8183-cci-devfreq.c
> 


Endless soft-lockups for compiling workload since next-20200519

2020-05-19 Thread Qian Cai
Just a head up. Repeatedly compiling kernels for a while would trigger
endless soft-lockups since next-20200519 on both x86_64 and powerpc.
.config are in,

https://github.com/cailca/linux-mm

I did first try to revert the linux-next commit 68cd9f4e7238
("tick/nohz: Narrow down noise while setting current task's tick
dependency"), but it did not help.

== x86_64 ==
[ 1167.993773][C1] WARNING: CPU: 1 PID: 0 at kernel/smp.c:127
flush_smp_call_function_queue+0x1fa/0x2e0
[ 1168.00][C1] Modules linked in: nls_iso8859_1 nls_cp437 vfat
fat kvm_amd ses kvm enclosure dax_pmem irqbypass dax_pmem_core efivars
acpi_cpufreq efivarfs ip_tables x_tables xfs sd_mod smartpqi
scsi_transport_sas tg3 mlx5_core libphy firmware_class dm_mirror
dm_region_hash dm_log dm_mod
[ 1168.029492][C1] CPU: 1 PID: 0 Comm: swapper/1 Not tainted
5.7.0-rc6-next-20200519 #1
[ 1168.037665][C1] Hardware name: HPE ProLiant DL385
Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019
[ 1168.046978][C1] RIP: 0010:flush_smp_call_function_queue+0x1fa/0x2e0
[ 1168.053658][C1] Code: 01 0f 87 c9 12 00 00 83 e3 01 0f 85 cc fe
ff ff 48 c7 c7 c0 55 a9 8f c6 05 f6 86 cd 01 01 e8 de 09 ea ff 0f 0b
e9 b2 fe ff ff <0f> 0b e9 52 ff ff ff 0f 0b e9 f2 fe ff ff 65 44 8b 25
10 52 3f 71
[ 1168.073262][C1] RSP: 0018:c9178918 EFLAGS: 00010046
[ 1168.079253][C1] RAX:  RBX: 430c58f8
RCX: 8ec26083
[ 1168.087156][C1] RDX: 0003 RSI: dc00
RDI: 430c58f8
[ 1168.095054][C1] RBP: c91789a8 R08: ed1108618cec
R09: ed1108618cec
[ 1168.102964][C1] R10: 430c675b R11: 
R12: 430c58e0
[ 1168.110866][C1] R13: 8eb30c40 R14: 430c5880
R15: 430c58e0
[ 1168.118767][C1] FS:  ()
GS:4308() knlGS:
[ 1168.127628][C1] CS:  0010 DS:  ES:  CR0: 80050033
[ 1168.134129][C1] CR2: 55b169604560 CR3: 000d08a14000
CR4: 003406e0
[ 1168.142026][C1] Call Trace:
[ 1168.145206][C1]  
[ 1168.147957][C1]  ? smp_call_on_cpu_callback+0xd0/0xd0
[ 1168.153421][C1]  ? rcu_read_lock_sched_held+0xac/0xe0
[ 1168.158880][C1]  ? rcu_read_lock_bh_held+0xc0/0xc0
[ 1168.164076][C1]  generic_smp_call_function_single_interrupt+0x13/0x2b
[ 1168.170938][C1]  smp_call_function_single_interrupt+0x157/0x4e0
[ 1168.177278][C1]  ? smp_call_function_interrupt+0x4e0/0x4e0
[ 1168.183172][C1]  ? interrupt_entry+0xe4/0xf0
[ 1168.187846][C1]  ? trace_hardirqs_off_caller+0x8d/0x1f0
[ 1168.193478][C1]  ? trace_hardirqs_on_caller+0x1f0/0x1f0
[ 1168.199116][C1]  ? _nohz_idle_balance+0x221/0x360
[ 1168.204228][C1]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[ 1168.209690][C1]  call_function_single_interrupt+0xf/0x20
[ 1168.215415][C1] RIP: 0010:_raw_spin_unlock_irqrestore+0x46/0x50
[ 1168.221747][C1] Code: 8d 5e ff 4c 89 e7 e8 a9 35 5f ff f6 c7 02
75 13 53 9d e8 fd c0 6f ff 65 ff 0d 4e ab a6 70 5b 41 5c 5d c3 e8 dc
c2 6f ff 53 9d  eb 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 65 ff
05 2b ab a6
[ 1168.241353][C1] RSP: 0018:c9178bd0 EFLAGS: 0246
ORIG_RAX: ff04
[ 1168.249700][C1] RAX:  RBX: 0246
RCX: 8eba0740
[ 1168.257602][C1] RDX: 0007 RSI: dc00
RDI: 888214f5c8e4
[ 1168.265503][C1] RBP: c9178be0 R08: fbfff2120216
R09: 
[ 1168.273400][C1] R10:  R11: 
R12: 43145880
[ 1168.281300][C1] R13: 90b2db80 R14: 0002
R15: 0001000164cb
[ 1168.289218][C1]  ? call_function_single_interrupt+0xa/0x20
[ 1168.295117][C1]  ? lockdep_hardirqs_on+0x1b0/0x2c0
[ 1168.300319][C1]  _nohz_idle_balance+0x221/0x360
[ 1168.305256][C1]  run_rebalance_domains+0x16c/0x2e0
[ 1168.310452][C1]  __do_softirq+0x1ca/0x96a
[ 1168.314861][C1]  ? __irqentry_text_end+0x1fa9e7/0x1fa9e7
[ 1168.320579][C1]  ? hrtimer_reprogram+0x170/0x170
[ 1168.325608][C1]  ? __bpf_trace_preemptirq_template+0x100/0x100
[ 1168.331856][C1]  ? lapic_next_event+0x3c/0x50
[ 1168.336617][C1]  ? clockevents_program_event+0xfc/0x180
[ 1168.342249][C1]  ? check_flags.part.28+0x86/0x220
[ 1168.347355][C1]  ? trace_hardirqs_off+0x8d/0x1f0
[ 1168.352374][C1]  ? __bpf_trace_preemptirq_template+0x100/0x100
[ 1168.358620][C1]  ? rcu_read_lock_sched_held+0xac/0xe0
[ 1168.364077][C1]  ? rcu_read_lock_bh_held+0xc0/0xc0
[ 1168.369282][C1]  irq_exit+0xd6/0xf0
[ 1168.373168][C1]  smp_apic_timer_interrupt+0x215/0x560
[ 1168.378628][C1]  ? smp_call_function_single_interrupt+0x4e0/0x4e0
[ 1168.385137][C1]  ? smp_call_function_interrupt+0x4e0/0x4e0
[ 1168.391031][C1]  ? interrupt_entry+0xe4/0xf0
[ 1168.395705][C1]  ? trace_hardirqs_off_caller+0x8d/0x1f0
[ 1168.401336][C1]  ? trace_hardirqs

[tip:x86/entry 23/80] arch/x86/entry/common.c:234:24: warning: no previous prototype for function 'prepare_exit_to_usermode'

2020-05-19 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/entry
head:   095b7a3e7745e6fb7cf0a1c09967c4f43e76f8f4
commit: aa9712e07f82a5458f2f16c100c491d736240d60 [23/80] x86/entry/common: 
Protect against instrumentation
config: x86_64-allyesconfig (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project 
e6658079aca6d971b4e9d7137a3a2ecbc9c34aec)
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
git checkout aa9712e07f82a5458f2f16c100c491d736240d60
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All warnings (new ones prefixed by >>, old ones prefixed by <<):

>> arch/x86/entry/common.c:234:24: warning: no previous prototype for function 
>> 'prepare_exit_to_usermode' [-Wmissing-prototypes]
__visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs)
^
arch/x86/entry/common.c:234:19: note: declare 'static' if the function is not 
intended to be used outside of this translation unit
__visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs)
^
static
>> arch/x86/entry/common.c:296:24: warning: no previous prototype for function 
>> 'syscall_return_slowpath' [-Wmissing-prototypes]
__visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
^
arch/x86/entry/common.c:296:19: note: declare 'static' if the function is not 
intended to be used outside of this translation unit
__visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
^
static
2 warnings generated.

vim +/prepare_exit_to_usermode +234 arch/x86/entry/common.c

   233  
 > 234  __visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs)
   235  {
   236  instrumentation_begin();
   237  __prepare_exit_to_usermode(regs);
   238  instrumentation_end();
   239  exit_to_user_mode();
   240  }
   241  
   242  #define SYSCALL_EXIT_WORK_FLAGS \
   243  (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |  \
   244   _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
   245  
   246  static void syscall_slow_exit_work(struct pt_regs *regs, u32 
cached_flags)
   247  {
   248  bool step;
   249  
   250  audit_syscall_exit(regs);
   251  
   252  if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
   253  trace_sys_exit(regs, regs->ax);
   254  
   255  /*
   256   * If TIF_SYSCALL_EMU is set, we only get here because of
   257   * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
   258   * We already reported this syscall instruction in
   259   * syscall_trace_enter().
   260   */
   261  step = unlikely(
   262  (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
   263  == _TIF_SINGLESTEP);
   264  if (step || cached_flags & _TIF_SYSCALL_TRACE)
   265  tracehook_report_syscall_exit(regs, step);
   266  }
   267  
   268  static void __syscall_return_slowpath(struct pt_regs *regs)
   269  {
   270  struct thread_info *ti = current_thread_info();
   271  u32 cached_flags = READ_ONCE(ti->flags);
   272  
   273  CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
   274  
   275  if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
   276  WARN(irqs_disabled(), "syscall %ld left IRQs disabled", 
regs->orig_ax))
   277  local_irq_enable();
   278  
   279  rseq_syscall(regs);
   280  
   281  /*
   282   * First do one-time work.  If these work items are enabled, we
   283   * want to run them exactly once per syscall exit with IRQs on.
   284   */
   285  if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
   286  syscall_slow_exit_work(regs, cached_flags);
   287  
   288  local_irq_disable();
   289  __prepare_exit_to_usermode(regs);
   290  }
   291  
   292  /*
   293   * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
   294   * state such that we can immediately switch to user mode.
   295   */
 > 296  __visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
   297  {
   298  instrumentation_begin();
   299  __syscall_return_slowpath(regs);
   300  instrumentation_end();
   301  exit_to_user_mode();
   302  }
   303  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


[PATCH v11] arm64: dts: qcom: sc7180: Add WCN3990 WLAN module device node

2020-05-19 Thread Rakesh Pillai
Add device node for the ath10k SNOC platform driver probe
and add resources required for WCN3990 on sc7180 soc.

Signed-off-by: Rakesh Pillai 
---
Changes from v10:
- Corrected the position of wifi node, as per address
- Removed the wlan_fw_mem from reserved memory, since
  its already added as reserved memory in board DT file.
---
 arch/arm64/boot/dts/qcom/sc7180-idp.dts |  7 +++
 arch/arm64/boot/dts/qcom/sc7180.dtsi| 22 ++
 2 files changed, 29 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180-idp.dts 
b/arch/arm64/boot/dts/qcom/sc7180-idp.dts
index 4e9149d..38b102e 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-idp.dts
+++ b/arch/arm64/boot/dts/qcom/sc7180-idp.dts
@@ -389,6 +389,13 @@
};
 };
 
+ {
+   status = "okay";
+   wifi-firmware {
+   iommus = <_smmu 0xc2 0x1>;
+   };
+};
+
 /* PINCTRL - additions to nodes defined in sc7180.dtsi */
 
 _clk {
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 6b12c60..da79f8f 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -2811,6 +2811,28 @@
 
#freq-domain-cells = <1>;
};
+
+   wifi: wifi@1880 {
+   compatible = "qcom,wcn3990-wifi";
+   reg = <0 0x1880 0 0x80>;
+   reg-names = "membase";
+   iommus = <_smmu 0xc0 0x1>;
+   interrupts =
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   memory-region = <_mem>;
+   status = "disabled";
+   };
};
 
thermal-zones {
-- 
2.7.4


[PATCH v2 4/4] driver core: Add waiting_for_supplier sysfs file for devices

2020-05-19 Thread Saravana Kannan
This would be useful to check if a device is not probing because it's
waiting for a supplier to be added and then linked to before it can
probe.

To reduce sysfs clutter, this file is added only if it can ever be 1.
So, if fw_devlink is disabled or set to permissive, this file is not
added. Also, this file is removed once the device probes as it's no
longer relevant.

Signed-off-by: Saravana Kannan 
---
 .../sysfs-devices-waiting_for_supplier| 17 
 drivers/base/core.c   | 26 +++
 2 files changed, 43 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-devices-waiting_for_supplier

diff --git a/Documentation/ABI/testing/sysfs-devices-waiting_for_supplier 
b/Documentation/ABI/testing/sysfs-devices-waiting_for_supplier
new file mode 100644
index ..59d073d20db6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-waiting_for_supplier
@@ -0,0 +1,17 @@
+What:  /sys/devices/.../waiting_for_supplier
+Date:  May 2020
+Contact:   Saravana Kannan 
+Description:
+   The /sys/devices/.../waiting_for_supplier attribute is only
+   present when fw_devlink kernel command line option is enabled
+   and is set to something stricter than "permissive".  It is
+   removed once a device probes successfully (because the
+   information is no longer relevant). The number read from it (0
+   or 1) reflects whether the device is waiting for one or more
+   suppliers to be added and then linked to using device links
+   before the device can probe.
+
+   A value of 0 means the device is not waiting for any suppliers
+   to be added before it can probe.  A value of 1 means the device
+   is waiting for one or more suppliers to be added before it can
+   probe.
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3304ea1a2604..83a3e0b62ce3 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1031,6 +1031,22 @@ static void device_link_drop_managed(struct device_link 
*link)
kref_put(>kref, __device_link_del);
 }
 
+static ssize_t waiting_for_supplier_show(struct device *dev,
+struct device_attribute *attr,
+char *buf)
+{
+   bool val;
+
+   device_lock(dev);
+   mutex_lock(_lock);
+   val = !list_empty(>links.needs_suppliers)
+ && dev->links.need_for_probe;
+   mutex_unlock(_lock);
+   device_unlock(dev);
+   return sprintf(buf, "%u\n", val);
+}
+static DEVICE_ATTR_RO(waiting_for_supplier);
+
 /**
  * device_links_driver_bound - Update device links after probing its driver.
  * @dev: Device to update the links for.
@@ -1055,6 +1071,7 @@ void device_links_driver_bound(struct device *dev)
mutex_lock(_lock);
list_del_init(>links.needs_suppliers);
mutex_unlock(_lock);
+   device_remove_file(dev, _attr_waiting_for_supplier);
 
device_links_write_lock();
 
@@ -2124,8 +2141,16 @@ static int device_add_attrs(struct device *dev)
goto err_remove_dev_groups;
}
 
+   if (fw_devlink_flags && !fw_devlink_is_permissive()) {
+   error = device_create_file(dev, _attr_waiting_for_supplier);
+   if (error)
+   goto err_remove_dev_online;
+   }
+
return 0;
 
+ err_remove_dev_online:
+   device_remove_file(dev, _attr_online);
  err_remove_dev_groups:
device_remove_groups(dev, dev->groups);
  err_remove_type_groups:
@@ -2143,6 +2168,7 @@ static void device_remove_attrs(struct device *dev)
struct class *class = dev->class;
const struct device_type *type = dev->type;
 
+   device_remove_file(dev, _attr_waiting_for_supplier);
device_remove_file(dev, _attr_online);
device_remove_groups(dev, dev->groups);
 
-- 
2.26.2.761.g0e0b3e54be-goog



[PATCH v2 2/4] driver core: Expose device link details in sysfs

2020-05-19 Thread Saravana Kannan
It's helpful to be able to look at device link details from sysfs. So,
expose it in sysfs.

Say device-A is supplier of device-B. These are the additional files
this patch would create:

/sys/class/devlink/device-A:device-B/
auto_remove_on
consumer/ -> .../device-B/
runtime_pm
status
supplier/ -> .../device-A/
sync_state_only

/sys/devices/.../device-A/
consumer:device-B/ -> /sys/class/devlink/device-A:device-B/

/sys/devices/.../device-B/
supplier:device-A/ -> /sys/class/devlink/device-A:device-B/

That way:
To get a list of all the device link in the system:
ls /sys/class/devlink/

To get the consumer names and links of a device:
ls -d /sys/devices/.../device-X/consumer:*

To get the supplier names and links of a device:
ls -d /sys/devices/.../device-X/supplier:*

Signed-off-by: Saravana Kannan 
---
 drivers/base/core.c| 211 +++--
 include/linux/device.h |  58 +--
 2 files changed, 233 insertions(+), 36 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 6dbee5885abb..3304ea1a2604 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -235,6 +235,186 @@ void device_pm_move_to_tail(struct device *dev)
device_links_read_unlock(idx);
 }
 
+#define to_devlink(dev)container_of((dev), struct device_link, 
link_dev)
+
+static ssize_t status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+   char *status;
+
+   switch (to_devlink(dev)->status) {
+   case DL_STATE_NONE:
+   status = "not tracked"; break;
+   case DL_STATE_DORMANT:
+   status = "dormant"; break;
+   case DL_STATE_AVAILABLE:
+   status = "available"; break;
+   case DL_STATE_CONSUMER_PROBE:
+   status = "consumer probing"; break;
+   case DL_STATE_ACTIVE:
+   status = "active"; break;
+   case DL_STATE_SUPPLIER_UNBIND:
+   status = "supplier unbinding"; break;
+   default:
+   status = "unknown"; break;
+   }
+   return sprintf(buf, "%s\n", status);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t auto_remove_on_show(struct device *dev,
+  struct device_attribute *attr, char *buf)
+{
+   struct device_link *link = to_devlink(dev);
+   char *str;
+
+   if (link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
+   str = "supplier unbind";
+   else if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
+   str = "consumer unbind";
+   else
+   str = "never";
+
+   return sprintf(buf, "%s\n", str);
+}
+static DEVICE_ATTR_RO(auto_remove_on);
+
+static ssize_t runtime_pm_show(struct device *dev,
+  struct device_attribute *attr, char *buf)
+{
+   struct device_link *link = to_devlink(dev);
+
+   return sprintf(buf, "%d\n", !!(link->flags & DL_FLAG_PM_RUNTIME));
+}
+static DEVICE_ATTR_RO(runtime_pm);
+
+static ssize_t sync_state_only_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct device_link *link = to_devlink(dev);
+
+   return sprintf(buf, "%d\n", !!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
+}
+static DEVICE_ATTR_RO(sync_state_only);
+
+static struct attribute *devlink_attrs[] = {
+   _attr_status.attr,
+   _attr_auto_remove_on.attr,
+   _attr_runtime_pm.attr,
+   _attr_sync_state_only.attr,
+   NULL,
+};
+ATTRIBUTE_GROUPS(devlink);
+
+static void devlink_dev_release(struct device *dev)
+{
+   kfree(to_devlink(dev));
+}
+
+static struct class devlink_class = {
+   .name = "devlink",
+   .owner = THIS_MODULE,
+   .dev_groups = devlink_groups,
+   .dev_release = devlink_dev_release,
+};
+
+static int devlink_add_symlinks(struct device *dev,
+   struct class_interface *class_intf)
+{
+   int ret;
+   size_t len;
+   struct device_link *link = to_devlink(dev);
+   struct device *sup = link->supplier;
+   struct device *con = link->consumer;
+   char *buf;
+
+   len = max(strlen(dev_name(sup)), strlen(dev_name(con)));
+   len += strlen("supplier:") + 1;
+   buf = kzalloc(len, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   ret = sysfs_create_link(>link_dev.kobj, >kobj, "supplier");
+   if (ret)
+   goto out;
+
+   ret = sysfs_create_link(>link_dev.kobj, >kobj, "consumer");
+   if (ret)
+   goto err_con;
+
+   snprintf(buf, len, "consumer:%s", dev_name(con));
+   ret = sysfs_create_link(>kobj, >link_dev.kobj, buf);
+   if (ret)
+   goto err_con_dev;
+
+   snprintf(buf, len, "supplier:%s", dev_name(sup));
+   ret = sysfs_create_link(>kobj, >link_dev.kobj, buf);
+   if (ret)
+   goto err_sup_dev;
+
+   goto out;
+
+err_sup_dev:
+   

[PATCH v2 1/4] driver core: Remove unnecessary is_fwnode_dev variable in device_add()

2020-05-19 Thread Saravana Kannan
That variable is no longer necessary. Remove it and also fix a minor
typo in comments.

Signed-off-by: Saravana Kannan 
---
 drivers/base/core.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index f804e561e0a2..6dbee5885abb 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2543,7 +2543,6 @@ int device_add(struct device *dev)
struct class_interface *class_intf;
int error = -EINVAL;
struct kobject *glue_dir = NULL;
-   bool is_fwnode_dev = false;
 
dev = get_device(dev);
if (!dev)
@@ -2641,11 +2640,6 @@ int device_add(struct device *dev)
 
kobject_uevent(>kobj, KOBJ_ADD);
 
-   if (dev->fwnode && !dev->fwnode->dev) {
-   dev->fwnode->dev = dev;
-   is_fwnode_dev = true;
-   }
-
/*
 * Check if any of the other devices (consumers) have been waiting for
 * this device (supplier) to be added so that they can create a device
@@ -2654,12 +2648,14 @@ int device_add(struct device *dev)
 * This needs to happen after device_pm_add() because device_link_add()
 * requires the supplier be registered before it's called.
 *
-* But this also needs to happe before bus_probe_device() to make sure
+* But this also needs to happen before bus_probe_device() to make sure
 * waiting consumers can link to it before the driver is bound to the
 * device and the driver sync_state callback is called for this device.
 */
-   if (is_fwnode_dev)
+   if (dev->fwnode && !dev->fwnode->dev) {
+   dev->fwnode->dev = dev;
fw_devlink_link_device(dev);
+   }
 
bus_probe_device(dev);
if (parent)
-- 
2.26.2.761.g0e0b3e54be-goog



  1   2   3   4   5   6   7   8   9   10   >