http://markmail.org/message/s6j2hfcdrtihwd3f

This patch adds the module smi_detector under drivers/misc
 
Code from Jon Masters with small changes from Luis Goncalves and
documentation from Clark Williams.
 
---
diff -uNp a/drivers/misc/Makefile b/drivers/misc/Makefile
--- a/drivers/misc/Makefile    2009-01-09 13:25:31.000000000 -0200
+++ b/drivers/misc/Makefile    2009-01-15 10:55:58.000000000 -0200
@@ -18,3 +18,5 @@ obj-$(CONFIG_SONY_LAPTOP)    += sony-laptop
 obj-$(CONFIG_THINKPAD_ACPI)    += thinkpad_acpi.o
 obj-$(CONFIG_FUJITSU_LAPTOP)    += fujitsu-laptop.o
 obj-$(CONFIG_EEPROM_93CX6)    += eeprom_93cx6.o
+obj-m                += smi_detector.o
+
diff -uNp a/drivers/misc/smi_detector.c b/drivers/misc/smi_detector.c
--- a/drivers/misc/smi_detector.c    1969-12-31 21:00:00.000000000 -0300
+++ b/drivers/misc/smi_detector.c    2009-01-16 09:59:14.000000000 -0200
@@ -0,0 +1,242 @@
+/*
+ * A simple SMI detector. Use this module to detect large system latencies
+ * introduced by the presence of vendor BIOS SMI (System Management Interrupts)
+ * somehow gone awry. We do this by hogging all of the CPU(s) for configurable
+ * time intervals, looking to see if something stole time from us. Therefore,
+ * obviously, you should NEVER use this module in a production environment.
+ *
+ * Copyright (C) 2008 Jon Masters, Red Hat, Inc. <j...@redhat.com>
+ *
+ * Licensed under the GNU General Public License, version 2.0.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/stop_machine.h>
+#include <linux/time.h>
+#include <linux/hrtimer.h>
+#include <linux/kthread.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#define smi_version "0.2.1"
+#define SMI_BANNER "SMI Detector: "
+#define DEFAULT_MS_PER_SAMPLE 1
+#define DEFAULT_MS_SEP_SAMPLE 5000
+#define DEFAULT_SMI_THRESHOLD 1
+
+MODULE_AUTHOR("Jon Masters <j...@redhat.com>");
+MODULE_DESCRIPTION("A simple SMI detector");
+MODULE_LICENSE("GPL");
+
+static int debug = 0;
+static int enabled = 0;
+static int threshold = 0;
+
+module_param(debug, int, 0);
+module_param(enabled, int, 0);
+module_param(threshold, int, 0);
+
+struct task_struct *smi_kthread = NULL;
+
+struct smdata_struct {
+
+    u64    last_sample;
+    u64    max_sample;
+    u64    smi_count;
+    u64    threshold;
+    ktime_t last_spike;
+    u64    frequency;
+
+    atomic_t pending;
+    wait_queue_head_t wq;
+
+} smdata;
+
+struct dentry *smi_debug_dir = NULL;            /* SMI debugfs directory */
+struct dentry *smi_debug_max = NULL;            /* maximum TSC delta */
+struct dentry *smi_debug_smi = NULL;            /* SMI detect count */
+struct dentry *smi_debug_sample_ms = NULL;      /* sample size ms */
+struct dentry *smi_debug_interval_ms = NULL;    /* interval size ms */
+struct dentry *smi_debug_sample = NULL;        /* raw SMI samples (us) */
+struct dentry *smi_debug_threshold = NULL;    /* latency threshold (us) */
+struct dentry *smi_debug_frequency_us = NULL;    /* avg smi spike interval (us) */
+
+u32 smi_sample_ms   = DEFAULT_MS_PER_SAMPLE;    /* sample size ms */
+u32 smi_interval_ms = DEFAULT_MS_SEP_SAMPLE;    /* interval size ms */
+
+/*
+ * smi_get_sample -    Used to repeatedly capture the CPU TSC (or similar),
+ *                      looking for potential SMIs. Called under stop_machine.
+ */
+static int smi_get_sample(void *data)
+{
+    ktime_t start, t1, t2, spike;
+    s64 diff, total = 0;
+    u64 sample = 0;
+    struct smdata_struct *smi_data = (struct smdata_struct *)data;
+
+    start = ktime_get(); /* start timestamp */
+
+    do {
+
+        t1 = ktime_get();
+        t2 = ktime_get();
+
+        total = ktime_to_us(ktime_sub(t2, start));
+
+        diff = ktime_to_us(ktime_sub(t2, t1));
+        if (diff < 0) {
+            printk(KERN_ERR SMI_BANNER "time running backwards\n");
+            return 1;
+        }
+        if (diff > sample)
+            sample = diff; /* only want highest value per sample */
+
+        if (diff > smi_data->threshold)
+            spike = t1;
+
+    } while (total <= USEC_PER_MSEC*smi_sample_ms);
+
+    smi_data->last_sample = sample;
+
+    if (sample > smi_data->threshold) {
+        u64 tmp;
+
+        smi_data->smi_count++;
+        tmp = ktime_to_us(ktime_sub(spike, smi_data->last_spike));
+
+        if (smi_data->smi_count > 2)
+            smi_data->frequency = (smi_data->frequency + tmp) / 2;
+        else 
+            if (smi_data->smi_count == 2)
+                smi_data->frequency = tmp;
+
+        smi_data->last_spike = spike;
+    }
+
+    atomic_set(&smi_data->pending,1);
+
+    if (sample > smi_data->max_sample)
+        smi_data->max_sample = sample;

+    return 0;
+}
+
+/*
+ * smi_kthread_fn - Used to periodically sample the CPU TSC via smi_get_sample.
+ *                  We use stop_machine, which intentionally introduces latency.
+ */
+
+static int smi_kthread_fn(void *data)
+{
+    int err = 0;
+    struct smdata_struct *smi_data = (struct smdata_struct *)data;
+
+    while (!kthread_should_stop()) {
+        // upstream this is stop_machine now
+        err = stop_machine_run(smi_get_sample, smi_data, 0);
+
+        wake_up(&smi_data->wq);
+
+        if (msleep_interruptible(smi_interval_ms))
+            goto out;
+    }
+
+out:
+    return 0;
+}
+
+static int smi_debug_sample_fopen(struct inode *inode, struct file *filp)
+{
+
+    filp->private_data = (void *)&smdata;
+
+    return 0;     
+}
+static ssize_t smi_debug_sample_fread(struct file *filp, char __user *ubuf,
+                    size_t cnt, loff_t *ppos)
+{
+    int len;
+    char buf[64];
+    struct smdata_struct *smi_data = filp->private_data;
+
+    wait_event_interruptible(smi_data->wq, atomic_read(&smi_data->pending));
+    atomic_set(&smi_data->pending,0);
+    len = sprintf(buf, "%08llx\n", smi_data->last_sample);
+    copy_to_user(ubuf, buf, len);
+
+    return len;
+}
+
+static struct file_operations smi_sample_fops = {
+    .open        = smi_debug_sample_fopen,
+    .read        = smi_debug_sample_fread,
+    .owner        = THIS_MODULE,
+};
+
+int smi_detector_init(void)
+{
+
+    printk(KERN_INFO SMI_BANNER "version %s\n", smi_version);
+    if (!enabled) {
+        printk(KERN_INFO SMI_BANNER "please reload with enabled=1\n");
+        return -1;
+    }
+
+    smdata.last_sample = 0;
+    smdata.max_sample = 0;
+    smdata.smi_count = 0;
+    smdata.frequency = 0;
+
+    if (!threshold)
+        smdata.threshold = DEFAULT_SMI_THRESHOLD;
+    else
+        smdata.threshold = threshold;
+
+    init_waitqueue_head(&smdata.wq);
+    atomic_set(&smdata.pending, 0);
+
+    smi_debug_dir = debugfs_create_dir("smi_detector", NULL);
+
+    smi_debug_sample_ms = debugfs_create_u32("ms_per_sample",
+                0644, smi_debug_dir, &smi_sample_ms);
+    smi_debug_interval_ms = debugfs_create_u32("ms_between_samples",
+                0644, smi_debug_dir, &smi_interval_ms);
+    smi_debug_max = debugfs_create_u64("max_sample_us",
+                0644, smi_debug_dir, &smdata.max_sample);
+    smi_debug_smi = debugfs_create_u64("smi_count",
+                0644, smi_debug_dir, &smdata.smi_count);
+    smi_debug_sample = debugfs_create_file("sample_us",
+                0444, smi_debug_dir, &smdata, &smi_sample_fops);
+    smi_debug_frequency_us = debugfs_create_u64("avg_smi_interval_us",
+                0444, smi_debug_dir, &smdata.frequency);
+    smi_debug_threshold = debugfs_create_u64("latency_threshold_us",
+                0444, smi_debug_dir, &smdata.threshold);
+
+
+    smi_kthread = kthread_run(smi_kthread_fn, &smdata, "smi_detector");
+
+    return 0;
+
+}
+
+void smi_detector_exit(void)
+{
+    kthread_stop(smi_kthread);
+
+    debugfs_remove(smi_debug_sample_ms);
+    debugfs_remove(smi_debug_interval_ms);
+    debugfs_remove(smi_debug_max);
+    debugfs_remove(smi_debug_smi);
+    debugfs_remove(smi_debug_frequency_us);
+    debugfs_remove(smi_debug_threshold);
+    debugfs_remove(smi_debug_sample);
+    debugfs_remove(smi_debug_dir);
+}
+
+module_init(smi_detector_init);
+module_exit(smi_detector_exit);
diff -uNp a/Documentation/smi_detector.txt a/Documentation/smi_detector.txt
--- a/Documentation/smi_detector.txt    1969-12-31 21:00:00.000000000 -0300
+++ b/Documentation/smi_detector.txt    2009-01-16 16:22:21.000000000 -0200
@@ -0,0 +1,94 @@
+Introduction:
+-------------
+
+The module smi_detector is a special purpose kernel module that is
+used to detect if System Management Interrupts (SMIs) are causing event
+latencies in the Linux RT kernel.
+
+SMIs are usually not serviced by the Linux kernel. They are set up by
+BIOS code and are serviced by BIOS code, usually for critical events
+such as management of thermal sensors and fans. Sometimes though, SMIs
+are used for other tasks and those tasks can spend an inordinate
+amount of time in the handler (sometimes measured in milliseconds).
+Obviously if you are trying to keep event service latencies down in the
+microsecond range, this is a problem.
+
+The SMI detector works by hogging the cpu for configurable amounts of
+time (by calling stop_machine()), polling the Time Stamp Counter (TSC)
+register for some period, then looking for gaps in the TSC data. Any
+gap indicates a time when the polling was interrupted and since the
+machine is stopped and interrupts turned off the only thing that could
+do that would be an SMI.
+
+Note that the SMI detector should *NEVER* be used in a production
+environment. It is intended to be run manually to determine if the
+hardware platform has a problem with long SMI service routines.
+
+Usage:
+------
+
+Loading the module smi_detector passing the parameter "enabled=1" is the only
+step required to start the smi_detector. It is possible to define a threshold
+in microseconds (us) above which latency spikes will be taken in account
+(parameter "threshold=").
+
+Example:
+
+    # insmod ./smi_detector.ko enabled=1 threshold=100
+
+After the module is loaded, it creates a directory named "smi_detector" under
+the debugfs mountpoint, "/debugfs/smi_detector" for this text. It is necessary
+to have debugfs mounted.
+
+avg_smi_interval_us    - average interval (usecs) between SMI latency spikes
+latency_threshold_us    - minimum latency value to be considered (usecs)
+max_sample_us        - maximum SMI latency spike observed (usecs)
+ms_between_samples    - interval between samples (ms)
+ms_per_sample        - sampling time (ms)
+sample_us        - last sample (usecs). Continously updated, may be used
+              to plot graphs or to create histograms
+smi_count        - how many latency spikes have been observed
+
+
+    # cd /debugfs/smi_detector
+
+    # cat *
+    0        (avg_smi_interval_us)
+    100        (latency_threshold_us)
+    0        (latency_threshold_us)
+    5000        (ms_between_samples)
+    1        (ms_per_sample)
+    3        (sample_us)
+    0        (smi_count)
+
+
+The default values for ms_between_samples and ms_per_sample define that every
+5000ms (5s) samples will be collected during 1ms. It is possible to change the
+sampling time or the sampling interval writing to the related files. To collect
+samples during 5ms every 50ms:
+
+    # echo 5 > ms_per_sample
+    # echo 50 > ms_between_samples
+
+
+    # cat ms_between_samples
+    50
+    # cat ms_per_sample
+    5
+
+After a while, data may be verified to atest the existence of SMI induced
+latencies:
+
+    # cat smi_count 
+    2
+    # cat max_sample_us 
+    468
+    # cat avg_smi_interval_us 
+    1356306050
+
+Depending on the latencies observed it is important to better adjust the
+sampling intervals to obtain more accurate measurements. Care must be taken
+to not create a continous sampling situation, that might be perceived by the
+kernel as a deadlock.
+
+

Reply via email to