panic - init died

2015-03-22 Thread Mages Simon
Hi there,

we got some Problems with init.

init died (signal 11 exit 0)

after using reboot(8) without any flags.

after some investigation i saw that in reboot(8)
at line 241 folling code starts:

for (i = 1;; ++i) {
if (kill(-1, SIGKILL) == -1) {
if (errno == ESRCH)
break;
goto restart;
}
if (i  5) {
warnx(WARNING: some process(es) wouldn't die);
break;
}
(void)sleep(2 * i);
}

reboot(howto);
/* FALLTHROUGH */

Every process gets a SIGKILL and becomes a Zombie until
init calls wait(2) on every one of those. But a Zombie
doesn't respond to signals, so after the first iterration
the loop will just wait. After the loop reboot(2) is
called, if processes are still there or not.

In case every Zombie is finally dead everythings fine
because init is sleeping and the call to vfs_shutdown,
which frees all pages, doesn't affect init.

But if there are still zombies arround which init tries
to clean up und then reboot(2) is called and frees all
page then init gets a SIGSEGV. Init tries to handle it
but the signal handler for SIGSEGV is also freed allready.
If this happens init dies and the Kernel Panics because
of kern/kern_sig.c:
revision 1.167
date: 2014/06/21 20:58:30;  author: guenther;  state: Exp;  lines: +12 -1;  
comm
itid: IvlsVYNsU5F7UWHE;
If the kernel generates a deadly trap signal (SEGV, BUS, etc) for
an untraced process but finds it blocking or ignoring it, just kill
the process instead of looping.  It's undefined behavor in POSIX but
quite annoying when encountered in practice.

A little stupid aproche to solve this would be:
Index: kern//kern_sig.c
===
RCS file: /home/cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.178
diff -u -p -r1.178 kern_sig.c
--- kern//kern_sig.c9 Feb 2015 13:41:24 -   1.178
+++ kern//kern_sig.c18 Mar 2015 11:38:07 -
@@ -769,10 +769,13 @@ trapsignal(struct proc *p, int signum, u
 * generated by the kernel, be ignorable or blockable.
 * If it is and we're not being traced, then just kill
 * the process.
+*
+* XXX: except we are init, init shouldn't die
 */
if ((pr-ps_flags  PS_TRACED) == 0 
(sigprop[signum]  SA_KILL) 
-   ((p-p_sigmask  mask) || (ps-ps_sigignore  mask)))
+   ((p-p_sigmask  mask) || (ps-ps_sigignore  mask)) 
+   p-p_pid != 1)
sigexit(p, signum);
ptsignal(p, signum, STHREAD);
}

But from my point of view init should handle this completely
by him self. Because of init.c:
revision 1.45
date: 2010/10/15 07:11:02;  author: dlg;  state: Exp;  lines: +4 -4;
freebsd uses SIGINT to request a reboot, we may as well be consistent.

I thought about fixing reboot(8) in a way that it just sends
SIGINT to init(8) but this feels a little bit wired because
there are still other cases which are only handled by reboot(8)
and init(8) is not aware of.

It would feel more natural if init(8) would handle all the cases.
Also the code redundancy in init(8) and reboot(8) seems to be not
a good idea.

But maybe there is a reason for it.

BR

Simon Mages



[PATCH] dependency tree bug in ramdisk Makefile

2015-03-15 Thread Mages Simon
Hi there,

i think i found a bug in the build process, im not able to build miniroot
with multiple processes through - for example - 'make -j4'

$ pwd
/usr/src/distrib/amd64/ramdisk_cd
$ sudo make -j 4
awk -f /usr/src/distrib/amd64/ramdisk_cd/../../miniroot/makeconf.awk 
CBIN=instbin /usr/src/distrib/amd64/ramdisk_cd/../common/list 
/usr/src/distrib/amd64/ramdisk_cd/list.local  instbin.conf
mtree -def /usr/src/distrib/amd64/ramdisk_cd/../../miniroot/mtree.conf -p /mnt/ 
-u
cd /usr/src/distrib/amd64/ramdisk_cd/../../../sys/arch/amd64/conf  config 
RAMDISK_CD
Filesystem  512-blocks  Used Avail Capacity iused   ifree  %iused  
Mounted on
/dev/sd0a  2057756448656   150621623%6151  149751 4%   /
umount /mnt
umount: /mnt: not currently mounted
*** Error 1 in target 'rd_teardown' (ignored)
vnconfig -u vnd0
vnconfig: VNDIOCCLR: Device not configured
*** Error 1 in target 'rd_teardown' (ignored)
cp /var/tmp/image.31801 mr.fs
cp: /var/tmp/image.31801: No such file or directory
*** Error 1 in target 'rd_teardown'
TOPDIR=/usr/src/distrib/amd64/ramdisk_cd/.. 
CURDIR=/usr/src/distrib/amd64/ramdisk_cd 
OBJDIR=/usr/src/distrib/amd64/ramdisk_cd  REV=56 TARGDIR=/mnt 
UTILS=/usr/src/distrib/amd64/ramdisk_cd/../../miniroot  RELEASEDIR= sh 
/usr/src/distrib/amd64/ramdisk_cd/../../miniroot/runlist.sh 
/usr/src/distrib/amd64/ramdisk_cd/../common/list 
/usr/src/distrib/amd64/ramdisk_cd/list.local
COPY${OBJDIR}/instbin   
  instbin
cp: /usr/src/distrib/amd64/ramdisk_cd/instbin: No such file or directory
*** Error 1 in target 'do_files'
cd /usr/src/distrib/amd64/ramdisk_cd/../../../sys/arch/amd64/compile/RAMDISK_CD 
  make clean  COPTS=-Os exec make
rm -f eddep *bsd *bsd.gdb tags *.[dio] [a-z]*.s  [Ee]rrs linterrs assym.h
cc  -Werror -Wall -Wstrict-prototypes -Wmissing-prototypes  -Wno-main 
-Wno-uninitialized  -Wstack-larger-than-2047 -mcmodel=kernel -mno-red-zone 
-mno-sse2 -mno-sse -mno-3dnow  -mno-mmx -msoft-float -fno-omit-frame-pointer 
-fno-builtin-printf -fno-builtin-snprintf  -fno-builtin-vsnprintf 
-fno-builtin-log  -fno-builtin-log2 -fno-builtin-malloc -fno-pie 
-fno-stack-protector -Wa,-n -Os -pipe -nostdinc -I../../../.. -I. 
-I../../../../arch -DSCSITERSE -DSMALL_KERNEL -DNO_PROPOLICE -DTIMEZONE=0 
-DDST=0 -DFFS -DFFS2 -DEXT2FS -DNFSCLIENT -DCD9660 -DUDF -DMSDOSFS -DINET 
-DINET6 -DBOOT_CONFIG -DCRYPTO -DRAMDISK_HOOKS -DMINIROOTSIZE=0x1180 
-DPCIVERBOSE -DMAXUSERS=4 -D_KERNEL -MD -MP  -c swapbsd.c


I think this is a bug in the dependency tree of
src/distrib/amd64/ramdisk_cd/Makefile.inc

This Patch would fix it:

Index: distrib/amd64/common/Makefile.inc
===
RCS file: /home/cvs/src/distrib/amd64/common/Makefile.inc,v
retrieving revision 1.26
diff -u -p -r1.26 Makefile.inc
--- distrib/amd64/common/Makefile.inc   29 Mar 2014 17:31:40 -  1.26
+++ distrib/amd64/common/Makefile.inc   13 Mar 2015 14:28:53 -
@@ -71,7 +71,7 @@ bsd:
${MAKE} clean  COPTS=-Os exec ${MAKE}
cp ${.CURDIR}/../../../sys/arch/amd64/compile/${RAMDISK}/bsd bsd
 
-${IMAGE}: ${CBIN} rd_setup do_files rd_teardown
+${IMAGE}: rd_teardown
 
 rd_setup: ${CBIN}
dd if=/dev/zero of=${REALIMAGE} bs=512 count=${NBLKS}
@@ -81,7 +81,7 @@ rd_setup: ${CBIN}
fsck ${VND_RDEV}
mount ${VND_DEV} ${MOUNT_POINT}
 
-rd_teardown:
+rd_teardown: do_files
@df -i ${MOUNT_POINT}
-umount ${MOUNT_POINT}
-vnconfig -u ${VND}
@@ -120,7 +120,7 @@ ${CBIN}: ${CBIN}.mk ${CBIN}.cache ${CBIN
 ${CRUNCHCONF}: ${LISTS}
awk -f ${UTILS}/makeconf.awk CBIN=${CBIN} ${LISTS}  ${CRUNCHCONF}
 
-do_files:
+do_files: rd_setup
mtree -def ${MTREE} -p ${MOUNT_POINT}/ -u
TOPDIR=${TOP} CURDIR=${.CURDIR} OBJDIR=${.OBJDIR} \
REV=${REV} TARGDIR=${MOUNT_POINT} UTILS=${UTILS} \



[PATCH] bpf is now blocking again with and without timeout

2015-01-04 Thread Mages Simon
I restored the functionality according to the manpage.

That means that read() on bpf is blocking again. If a
timeout is set read() will block until the timeout is
over.

Maybe asynchronous is also broken, i will look into that later.

Index: sys/net/bpf.c
===
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.103
diff -u -p -r1.103 bpf.c
--- sys/net/bpf.c   12 Jul 2014 18:44:22 -  1.103
+++ sys/net/bpf.c   4 Jan 2015 14:22:48 -
@@ -433,12 +433,17 @@ bpfread(dev_t dev, struct uio *uio, int 
if (d-bd_rtout == -1) {
/* User requested non-blocking I/O */
error = EWOULDBLOCK;
+   } else if (d-bd_rtout == 0) {
+   /* No timeout? let's wait half a second for an
+* interrupt */
+   error = tsleep((caddr_t)d, PRINET|PCATCH, bpf,
+   hz / 2);
+   if (error == EWOULDBLOCK)
+   error = 0;
} else {
-   if ((d-bd_rdStart + d-bd_rtout)  ticks) {
-   error = tsleep((caddr_t)d, PRINET|PCATCH, bpf,
+   /* User requested timeout */
+   error = tsleep((caddr_t)d, PRINET|PCATCH, bpf,
d-bd_rtout);
-   } else
-   error = EWOULDBLOCK;
}
if (error == EINTR || error == ERESTART) {
D_PUT(d);


To test my patch i wrote a small tool. This tool filters 'finger'
packages.

/* BEGINNING */
#include stdlib.h
#include stdio.h
#include fcntl.h
#include unistd.h
#include signal.h
#include string.h
#include errno.h
#include err.h

#include sys/types.h
#include sys/time.h
#include sys/ioctl.h
#include sys/socket.h
#include net/bpf.h
#include net/ethertypes.h
#include netinet/in.h
#include net/if.h

void usage();
void handler(int);

u_int BUFFFER_SIZE = 32786;
struct bpf_program bpf_machine  = {
13,
(struct bpf_insn []){
BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 12),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ETHERTYPE_IP, 0, 10),
BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 23),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, 8),
BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 20),
BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x1fff, 6, 0),
BPF_STMT(BPF_LDX+BPF_B+BPF_MSH, 14),
BPF_STMT(BPF_LD+BPF_H+BPF_IND, 14),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 79, 2, 0),
BPF_STMT(BPF_LD+BPF_H+BPF_IND, 16),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 79, 0, 1),
BPF_STMT(BPF_RET+BPF_K, (u_int)-1),
BPF_STMT(BPF_RET+BPF_K, 0),
},
};
struct timeval timeout = {
(time_t)0,
(suseconds_t)0,
};
struct ifreq interface;
struct sigaction sigact;
int fd, out, pid;
const char *error;
int ch, nflag, tflag, bflag, iflag, dflag, qflag, pflag, aflag;
/* nflag = interface name */
/* tflag = timeout in seconds */
/* blfag = non-blocking */
/* iflag = immediate mode */
/* dflag = bpf devic, absolut path */
/* qflag = buffersize */
/* pflag = promiscius mode */
/* aflag = async mode */

int
main(int argc, char **argv)
{
while ((ch = getopt(argc, argv, n:t:bid:q:pa)) != -1) {
switch (ch) {
case 'n':
nflag = 1;
strlcpy(interface.ifr_name, optarg,
sizeof(interface.ifr_name));
break;
case 't':
tflag = 1;
timeout.tv_sec = strtonum(optarg, 1, 600, error);
if (error)
errx(1, max timeout %s: %s, error, optarg);
break;
case 'b':
bflag = 1;
break;
case 'i':
iflag = 1;
break;
case 'd':
dflag = 1;
if ((fd = open(optarg, O_RDONLY))  0)
errx(1, open: %s, strerror(errno));
break;
case 'q':
qflag = 1;
if (((BUFFFER_SIZE = strtonum(optarg, 1024, 4194304,
error)) % 1024) != 0)
errx(1, buffer has to be '% 1024 == 0');
break;
case 'p':
pflag = 1;
break;
case 'a':
aflag = 1;
sigact.sa_handler = handler;
pid = getpid();
if (sigaction(SIGIO, sigact, 

Re: patch: Intel CPU sensor readout correction

2015-01-04 Thread Mages Simon
The undocumented ones are already in the kernel but not marked
as such. Thats why i renamed them because they have the
names of the documented features. To cut a long story short,
the undocumented MSRs have now the name *_UNDOCUMENTED. So that
the real documented MSRs can get there real names according to the
documentation.

But anyway, this is the current Patch:

Index: sys/arch/amd64//amd64/identcpu.c
===
RCS file: /home/cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.54
diff -u -p -r1.54 identcpu.c
--- sys/arch/amd64//amd64/identcpu.c13 Jul 2014 12:11:01 -  1.54
+++ sys/arch/amd64//amd64/identcpu.c14 Dec 2014 22:20:07 -
@@ -180,12 +180,16 @@ void  intelcore_update_sensor(void *args)
 /*
  * Temperature read on the CPU is relative to the maximum
  * temperature supported by the CPU, Tj(Max).
- * Poorly documented, refer to:
- * http://softwarecommunity.intel.com/isn/Community/
- * en-US/forums/thread/30228638.aspx
- * Basically, depending on a bit in one msr, the max is either 85 or 100.
- * Then we subtract the temperature portion of thermal status from
- * max to get current temperature.
+ * Refer to:
+ * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
+ * Section 35 and
+ * http://www.intel.com/content/dam/www/public/us/en/documents/
+ * white-papers/cpu-monitoring-dts-peci-paper.pdf
+ *
+ * The Temperature on Intel CPUs can be between 70 and 105 degC, since
+ * WESTMERE we can read the TJmax from the DIE. For older CPUs we have
+ * to gues or use undocumented MSRs. Then we subtract the temperature
+ * portion of thermal status from max to get current temperature.
  */
 void
 intelcore_update_sensor(void *args)
@@ -195,9 +199,21 @@ intelcore_update_sensor(void *args)
int max = 100;
 
/* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
-   if (ci-ci_model == 0xe 
-   (rdmsr(MSR_TEMPERATURE_TARGET)  MSR_TEMPERATURE_TARGET_LOW_BIT))
+   if (ci-ci_model == 0x0e 
+   (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) 
+MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED))
max = 85;
+
+   /*
+* Newer CPU's can tell you what there max temperature is.
+* See: '64-ia-32-architectures-software-developer-
+* vol-3c-part-3-manual.pdf'
+*/
+   if (ci-ci_model  0x17  ci-ci_model != 0x1c 
+   ci-ci_model != 0x26  ci-ci_model != 0x27 
+   ci-ci_model != 0x35  ci-ci_model != 0x36)
+   max = MSR_TEMPERATURE_TARGET_TJMAX(
+   rdmsr(MSR_TEMPERATURE_TARGET));
 
msr = rdmsr(MSR_THERM_STATUS);
if (msr  MSR_THERM_STATUS_VALID_BIT) {
Index: sys/arch/amd64//include/specialreg.h
===
RCS file: /home/cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.28
diff -u -p -r1.28 specialreg.h
--- sys/arch/amd64//include/specialreg.h3 Jul 2014 21:15:28 -   
1.28
+++ sys/arch/amd64//include/specialreg.h14 Dec 2014 22:23:04 -
@@ -278,9 +278,19 @@
 #define MSR_PERFCTR0   0x0c1
 #define MSR_PERFCTR1   0x0c2
 #define MSR_FSB_FREQ   0x0cd   /* Core Duo/Solo only */   
-/* not documented anywhere, see intelcore_update_sensor() */
-#define MSR_TEMPERATURE_TARGET 0x0ee
-#define MSR_TEMPERATURE_TARGET_LOW_BIT 0x4000
+/*
+ * for Core i Series and newer Xeon's, see
+ * http://www.intel.com/content/dam/www/public/us/en/
+ * documents/white-papers/cpu-monitoring-dts-peci-paper.pdf
+ */
+#define MSR_TEMPERATURE_TARGET 0x1a2   /* Core i Series, Newer Xeon's */
+#define MSR_TEMPERATURE_TARGET_TJMAX(msr) (((msr)  16)  0xff)
+/*
+ * not documented anywhere, see intelcore_update_sensor()
+ * only available Core Duo and Core Solo Processors
+ */
+#define MSR_TEMPERATURE_TARGET_UNDOCUMENTED0x0ee
+#define MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED0x4000
 #define MSR_MTRRcap0x0fe
 #defineMSR_BBL_CR_ADDR 0x116   /* PII+ only */
 #defineMSR_BBL_CR_DECC 0x118   /* PII+ only */


At Sun, 4 Jan 2015 12:34:19 -0800,
Mike Larkin wrote:
 
 On Thu, Nov 27, 2014 at 12:44:54PM +0100, Mages, Simon wrote:
  Hi there,
  
  the temperatures 'sysctl hw.sensors' displays for each CPU
  are wrong for the most modern Intel CPUs.
  
  OpenBSD uses only 100 or 85 degC as TJmax for Intel CPUs, but
  in reality the TJmax value is somewhere around those specified
  values. Intel defines a TJmax for every production batch
  individually and burns this on the DIE, since WESTMERE we
  can officially read this value in supervisor mode.
  
  I have a patch which would fix this for CPUs since WESTMERE.
 
 For the undocumented ones, where did you find the information used
 to create this diff?
 
 -ml
 
  
  Index: sys/arch/amd64//amd64/identcpu.c
  ===
  RCS

Re: patch: Intel CPU sensor readout correction

2015-01-04 Thread Mages Simon
Thank you, fixed.

Index: sys/arch/amd64//amd64/identcpu.c
===
RCS file: /home/cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.54
diff -u -p -r1.54 identcpu.c
--- sys/arch/amd64//amd64/identcpu.c13 Jul 2014 12:11:01 -  1.54
+++ sys/arch/amd64//amd64/identcpu.c14 Dec 2014 22:20:07 -
@@ -180,12 +180,16 @@ void  intelcore_update_sensor(void *args)
 /*
  * Temperature read on the CPU is relative to the maximum
  * temperature supported by the CPU, Tj(Max).
- * Poorly documented, refer to:
- * http://softwarecommunity.intel.com/isn/Community/
- * en-US/forums/thread/30228638.aspx
- * Basically, depending on a bit in one msr, the max is either 85 or 100.
- * Then we subtract the temperature portion of thermal status from
- * max to get current temperature.
+ * Refer to:
+ * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
+ * Section 35 and
+ * http://www.intel.com/content/dam/www/public/us/en/documents/
+ * white-papers/cpu-monitoring-dts-peci-paper.pdf
+ *
+ * The Temperature on Intel CPUs can be between 70 and 105 degC, since
+ * WESTMERE we can read the TJmax from the DIE. For older CPUs we have
+ * to guess or use undocumented MSRs. Then we subtract the temperature
+ * portion of thermal status from max to get current temperature.
  */
 void
 intelcore_update_sensor(void *args)
@@ -195,9 +199,21 @@ intelcore_update_sensor(void *args)
int max = 100;
 
/* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
-   if (ci-ci_model == 0xe 
-   (rdmsr(MSR_TEMPERATURE_TARGET)  MSR_TEMPERATURE_TARGET_LOW_BIT))
+   if (ci-ci_model == 0x0e 
+   (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) 
+MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED))
max = 85;
+
+   /*
+* Newer CPUs can tell you what their max temperature is.
+* See: '64-ia-32-architectures-software-developer-
+* vol-3c-part-3-manual.pdf'
+*/
+   if (ci-ci_model  0x17  ci-ci_model != 0x1c 
+   ci-ci_model != 0x26  ci-ci_model != 0x27 
+   ci-ci_model != 0x35  ci-ci_model != 0x36)
+   max = MSR_TEMPERATURE_TARGET_TJMAX(
+   rdmsr(MSR_TEMPERATURE_TARGET));
 
msr = rdmsr(MSR_THERM_STATUS);
if (msr  MSR_THERM_STATUS_VALID_BIT) {
Index: sys/arch/amd64//include/specialreg.h
===
RCS file: /home/cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.28
diff -u -p -r1.28 specialreg.h
--- sys/arch/amd64//include/specialreg.h3 Jul 2014 21:15:28 -   
1.28
+++ sys/arch/amd64//include/specialreg.h14 Dec 2014 22:23:04 -
@@ -278,9 +278,19 @@
 #define MSR_PERFCTR0   0x0c1
 #define MSR_PERFCTR1   0x0c2
 #define MSR_FSB_FREQ   0x0cd   /* Core Duo/Solo only */   
-/* not documented anywhere, see intelcore_update_sensor() */
-#define MSR_TEMPERATURE_TARGET 0x0ee
-#define MSR_TEMPERATURE_TARGET_LOW_BIT 0x4000
+/*
+ * for Core i Series and newer Xeons, see
+ * http://www.intel.com/content/dam/www/public/us/en/
+ * documents/white-papers/cpu-monitoring-dts-peci-paper.pdf
+ */
+#define MSR_TEMPERATURE_TARGET 0x1a2   /* Core i Series, Newer Xeons */
+#define MSR_TEMPERATURE_TARGET_TJMAX(msr) (((msr)  16)  0xff)
+/*
+ * not documented anywhere, see intelcore_update_sensor()
+ * only available Core Duo and Core Solo Processors
+ */
+#define MSR_TEMPERATURE_TARGET_UNDOCUMENTED0x0ee
+#define MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED0x4000
 #define MSR_MTRRcap0x0fe
 #defineMSR_BBL_CR_ADDR 0x116   /* PII+ only */
 #defineMSR_BBL_CR_DECC 0x118   /* PII+ only */


At Sun, 4 Jan 2015 16:45:26 -0500,
TJ wrote:
 
 [1  text/plain; us-ascii (7bit)]
 Couple spelling fixes for the diff:
 
  + * WESTMERE we can read the TJmax from the DIE. For older CPUs we have
  + * to gues or use undocumented MSRs. Then we subtract the temperature
 
 guess, not gues
 
  +* Newer CPU's can tell you what there max temperature is.
 
 CPUs, not CPU's
 their, not there
 
  + * for Core i Series and newer Xeon's, see
  +#define MSR_TEMPERATURE_TARGET 0x1a2   /* Core i Series, Newer Xeon's 
  */
 
 Xeons, not Xeon's
 [2 Digital signature application/pgp-signature (7bit)]
 No public key for 5DEC0E71A8FE706F created at 2015-01-04T22:45:26+0100 using 
 RSA



[PATCH] tcpdump ''immediate mode''

2014-12-17 Thread Mages Simon
Hi,

tcpdump feels a bit laggy or slow some times when i use it for live debugging.

The following patch adds a new flag, '-b', to tcpdump. With this flag, tcpdump
sets BIOCIMMEDIATE for the current bpf(4) interface. With BIOCIMMEDIATE set,
the output is fluent.

Index: usr.sbin/tcpdump/privsep.c
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep.c,v
retrieving revision 1.30
diff -u -p -r1.30 privsep.c
--- usr.sbin/tcpdump/privsep.c  22 Sep 2011 09:12:30 -  1.30
+++ usr.sbin/tcpdump/privsep.c  14 Dec 2014 22:40:14 -
@@ -318,7 +318,7 @@ priv_init(int argc, char **argv)
 static void
 impl_open_bpf(int fd, int *bpfd)
 {
-   int snaplen, promisc, err;
+   int snaplen, promisc, immediate, err;
u_int dlt, dirfilt;
char device[IFNAMSIZ];
size_t iflen;
@@ -327,12 +327,13 @@ impl_open_bpf(int fd, int *bpfd)
 
must_read(fd, snaplen, sizeof(int));
must_read(fd, promisc, sizeof(int));
+   must_read(fd, immediate, sizeof(int));
must_read(fd, dlt, sizeof(u_int));
must_read(fd, dirfilt, sizeof(u_int));
iflen = read_string(fd, device, sizeof(device), __func__);
if (iflen == 0)
errx(1, Invalid interface size specified);
-   *bpfd = pcap_live(device, snaplen, promisc, dlt, dirfilt);
+   *bpfd = pcap_live(device, snaplen, promisc, immediate, dlt, dirfilt);
err = errno;
if (*bpfd  0)
logmsg(LOG_DEBUG,
Index: usr.sbin/tcpdump/privsep.h
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep.h,v
retrieving revision 1.7
diff -u -p -r1.7 privsep.h
--- usr.sbin/tcpdump/privsep.h  25 Aug 2009 06:59:17 -  1.7
+++ usr.sbin/tcpdump/privsep.h  14 Dec 2014 22:40:14 -
@@ -47,10 +47,10 @@ int priv_init(int, char **);
 voidpriv_init_done(void);
 
 intsetfilter(int, int, char *);
-intpcap_live(const char *, int, int, u_int, u_int);
+intpcap_live(const char *, int, int, int, u_int, u_int);
 
 struct bpf_program *priv_pcap_setfilter(pcap_t *, int, u_int32_t);
-pcap_t *priv_pcap_live(const char *, int, int, int, char *, u_int,
+pcap_t *priv_pcap_live(const char *, int, int, int, int, char *, u_int,
u_int);
 pcap_t *priv_pcap_offline(const char *, char *);
 
Index: usr.sbin/tcpdump/privsep_pcap.c
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep_pcap.c,v
retrieving revision 1.17
diff -u -p -r1.17 privsep_pcap.c
--- usr.sbin/tcpdump/privsep_pcap.c 14 Nov 2012 03:33:04 -  1.17
+++ usr.sbin/tcpdump/privsep_pcap.c 14 Dec 2014 22:40:14 -
@@ -172,8 +172,8 @@ priv_pcap_setfilter(pcap_t *hpcap, int o
 
 /* privileged part of priv_pcap_live */
 int
-pcap_live(const char *device, int snaplen, int promisc, u_int dlt,
-u_int dirfilt)
+pcap_live(const char *device, int snaplen, int promisc, int immediate,
+u_int dlt, u_int dirfilt)
 {
charbpf[sizeof /dev/bpf00];
int fd, n = 0;
@@ -204,6 +204,10 @@ pcap_live(const char *device, int snaple
if (promisc)
/* this is allowed to fail */
ioctl(fd, BIOCPROMISC, NULL);
+
+   if (immediate  ioctl(fd, BIOCIMMEDIATE, immediate)  0)
+   goto error;
+
if (ioctl(fd, BIOCSDIRFILT, dirfilt)  0)
goto error;
 
@@ -223,7 +227,7 @@ pcap_live(const char *device, int snaple
  * unprivileged part.
  */
 pcap_t *
-priv_pcap_live(const char *dev, int slen, int prom, int to_ms,
+priv_pcap_live(const char *dev, int slen, int prom, int imme, int to_ms,
 char *ebuf, u_int dlt, u_int dirfilt)
 {
int fd, err;
@@ -251,6 +255,7 @@ priv_pcap_live(const char *dev, int slen
write_command(priv_fd, PRIV_OPEN_BPF);
must_write(priv_fd, slen, sizeof(int));
must_write(priv_fd, prom, sizeof(int));
+   must_write(priv_fd, imme, sizeof(int));
must_write(priv_fd, dlt, sizeof(u_int));
must_write(priv_fd, dirfilt, sizeof(u_int));
write_string(priv_fd, dev);
Index: usr.sbin/tcpdump/tcpdump.8
===
RCS file: /cvs/src/usr.sbin/tcpdump/tcpdump.8,v
retrieving revision 1.83
diff -u -p -r1.83 tcpdump.8
--- usr.sbin/tcpdump/tcpdump.8  3 Jun 2014 02:57:29 -   1.83
+++ usr.sbin/tcpdump/tcpdump.8  14 Dec 2014 22:40:15 -
@@ -28,7 +28,7 @@
 .Sh SYNOPSIS
 .Nm tcpdump
 .Bk -words
-.Op Fl AadefILlNnOopqStvXx
+.Op Fl AabdefILlNnOopqStvXx
 .Op Fl c Ar count
 .Op Fl D Ar direction
 .Oo Fl E Oo Ar espalg : Oc Ns
@@ -61,6 +61,9 @@ The smaller of the entire packet or
 bytes will be printed.
 .It Fl a
 Attempt to convert network and broadcast addresses to names.
+.It Fl b
+Disables read blocking on the bpf(4) buffer. With this so called
+``immediate mode'' reads return immediately upon packet reception.
 .It Fl c Ar count
 

BUG in bpf ?

2014-12-17 Thread Mages Simon
Hi,

if BIOCGRTIMEOUT is set, we should wait for the timeout and not just
set EWOULDBLOCK. From my point of view the whole 'if' is unnecessary.

If the User doesn't set a timeout we have to wait anyway with tsleep()
and loop again. Well, and if the timeout is set, we should tsleep()
for the set timeout.


Index: sys/net//bpf.c
===
RCS file: /home/cvs/src/sys/net/bpf.c,v
retrieving revision 1.103
diff -u -p -u -r1.103 bpf.c
--- sys/net//bpf.c  12 Jul 2014 18:44:22 -  1.103
+++ sys/net//bpf.c  17 Dec 2014 08:10:18 -
@@ -434,11 +434,9 @@ bpfread(dev_t dev, struct uio *uio, int 
/* User requested non-blocking I/O */
error = EWOULDBLOCK;
} else {
-   if ((d-bd_rdStart + d-bd_rtout)  ticks) {
-   error = tsleep((caddr_t)d, PRINET|PCATCH, bpf,
-   d-bd_rtout);
-   } else
-   error = EWOULDBLOCK;
+   /* User requested timeout */
+   error = tsleep((caddr_t)d, PRINET|PCATCH, bpf,
+   d-bd_rtout);
}
if (error == EINTR || error == ERESTART) {
D_PUT(d);



patch: Intel CPU sensor readout correction

2014-11-27 Thread Mages, Simon
Hi there,

the temperatures 'sysctl hw.sensors' displays for each CPU
are wrong for the most modern Intel CPUs.

OpenBSD uses only 100 or 85 degC as TJmax for Intel CPUs, but
in reality the TJmax value is somewhere around those specified
values. Intel defines a TJmax for every production batch
individually and burns this on the DIE, since WESTMERE we
can officially read this value in supervisor mode.

I have a patch which would fix this for CPUs since WESTMERE.

Index: sys/arch/amd64//amd64/identcpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.56
diff -u -p -u -p -r1.56 identcpu.c
--- sys/arch/amd64//amd64/identcpu.c17 Oct 2014 18:15:48 -  1.56
+++ sys/arch/amd64//amd64/identcpu.c27 Nov 2014 10:14:05 -
@@ -179,24 +179,75 @@ void  intelcore_update_sensor(void *args)
 /*
  * Temperature read on the CPU is relative to the maximum
  * temperature supported by the CPU, Tj(Max).
- * Poorly documented, refer to:
- * http://softwarecommunity.intel.com/isn/Community/
- * en-US/forums/thread/30228638.aspx
- * Basically, depending on a bit in one msr, the max is either 85 or 100.
- * Then we subtract the temperature portion of thermal status from
- * max to get current temperature.
+ * Refer to:
+ * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
+ * Section 35 and
+ * http://www.intel.com/content/dam/www/public/us/en/documents/
+ * white-papers/cpu-monitoring-dts-peci-paper.pdf
+ *
+ * The Temperature on Intel CPUs can be between 70 and 105 degC, since
+ * WESTMERE we can read the TJmax from the DIE. For older CPUs we have
+ * to gues or use undocumented MSRs. Then we subtract the temperature
+ * portion of thermal status from max to get current temperature.
  */
 void
 intelcore_update_sensor(void *args)
 {
struct cpu_info *ci = (struct cpu_info *) args;
u_int64_t msr;
-   int max = 100;
+   int max;
 
-   /* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
-   if (ci-ci_model == 0xe 
-   (rdmsr(MSR_TEMPERATURE_TARGET)  MSR_TEMPERATURE_TARGET_LOW_BIT))
-   max = 85;
+   switch (ci-ci_model) {
+   case INTEL_FUTURE_MODEL_4E:
+   case INTEL_FUTURE_MODEL_56:
+   case INTEL_BROADWELL_MODEL_3D:
+   case INTEL_HASWELL_MODEL_3C:
+   case INTEL_HASWELL_MODEL_3F:
+   case INTEL_HASWELL_MODEL_45:
+   case INTEL_HASWELL_MODEL_46:
+   case INTEL_IVYBRIDGE_MODEL_3A:
+   case INTEL_IVYBRIDGE_MODEL_3E:
+   case INTEL_NEHALEM_MODEL_1A:
+   case INTEL_NEHALEM_MODEL_1E:
+   case INTEL_NEHALEM_MODEL_1F:
+   case INTEL_NEHALEM_MODEL_2E:
+   case INTEL_SANDYBRIDGE_MODEL_2A:
+   case INTEL_SANDYBRIDGE_MODEL_2D:
+   case INTEL_SILVERMONT_MODEL_37:
+   case INTEL_SILVERMONT_MODEL_4A:
+   case INTEL_SILVERMONT_MODEL_4D:
+   case INTEL_SILVERMONT_MODEL_5A:
+   case INTEL_SILVERMONT_MODEL_5D:
+   case INTEL_WESTMERE_MODEL_25:
+   case INTEL_WESTMERE_MODEL_2C:
+   case INTEL_WESTMERE_MODEL_2F:
+   /*
+* Newer CPU's can tell you what there max temperature is.
+* See: '64-ia-32-architectures-software-developer-
+* vol-3c-part-3-manual.pdf'
+*/
+   max = MSR_TEMPERATURE_TARGET_TJMAX(
+   rdmsr(MSR_TEMPERATURE_TARGET));
+   break;
+   case INTEL_YONAH_MODEL_0E:
+   /*
+* Only Core Duo and Core Solo family chips have
+* this undocumented MSR_TEMPERATURE_TARGET.
+*/
+   if (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) 
+   MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED) {
+   max = 85;
+   break;
+   }
+   /* FALLTHROUGH */
+   default:
+   /*
+* XXX: 100 degC is not the max for every not here
+* covered CPU. But newer CPU's, since Nehalem,
+* have MSR_TEMPERATURE_TARGET anyway.
+*/
+   max = 100;
+   }
 
msr = rdmsr(MSR_THERM_STATUS);
if (msr  MSR_THERM_STATUS_VALID_BIT) {
Index: sys/arch/amd64//include/specialreg.h
===
RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.28
diff -u -p -u -p -r1.28 specialreg.h
--- sys/arch/amd64//include/specialreg.h3 Jul 2014 21:15:28 -   
1.28
+++ sys/arch/amd64//include/specialreg.h27 Nov 2014 10:14:05 -
@@ -278,9 +278,19 @@
 #define MSR_PERFCTR0   0x0c1
 #define MSR_PERFCTR1   0x0c2
 #define MSR_FSB_FREQ   0x0cd   /* Core Duo/Solo only */   
-/* not documented anywhere, see intelcore_update_sensor() */
-#define MSR_TEMPERATURE_TARGET 0x0ee
-#define MSR_TEMPERATURE_TARGET_LOW_BIT 0x4000
+/*
+ *