BBlack has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/249745

Change subject: More efficient capture->processing for cipher_sim on many 
machines...
......................................................................

More efficient capture->processing for cipher_sim on many machines...

Nobody look at the perl :P

Change-Id: If90cc2c241575163928f7bbc9f25c4333e6524a1
---
A modules/tlsproxy/files/utils/cipher_cap.sh
M modules/tlsproxy/files/utils/cipher_sim.py
2 files changed, 80 insertions(+), 101 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/45/249745/1

diff --git a/modules/tlsproxy/files/utils/cipher_cap.sh 
b/modules/tlsproxy/files/utils/cipher_cap.sh
new file mode 100755
index 0000000..de5decc
--- /dev/null
+++ b/modules/tlsproxy/files/utils/cipher_cap.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# This captures aggregated clienthello data for the commandline-specified
+# number of seconds on our cache machines in a format suitable for later input
+# to "cipher_sim.py".  It should buffer and process the output fairly
+# efficiently, not consuming huge memory or CPU time and outputting
+# ~100K-ish of aggregate stdout data you'll want to redirect to an output file.
+#
+# Assumes installed jessie versions of "tcpdump", "tshark", and "perl".
+
+set -e
+set -o pipefail
+
+if [ $# != 1 ]; then
+    echo "You must supply a number of seconds to capture as the only argument" 
1>&2
+    exit 99
+fi
+
+SECS=$1
+case $SECS in
+    ''|*[!0-9]*)
+        echo "Seconds argument $SECS is not an integer" 1>&2
+        exit 98
+        ;;
+esac
+
+PUSER=nobody
+BPF='dst port 443 and (tcp[((tcp[12:1] & 0xf0) >> 2)+5:1] = 0x01) and 
(tcp[((tcp[12:1] & 0xf0) >> 2):1] = 0x16)'
+
+# "Pay no attention to that man behind the curtain" ...
+/usr/sbin/tcpdump -Z $PUSER -npi eth0 --direction=in -s 0 -W 1 -G $SECS -w - 
"$BPF" 2>/dev/null \
+  | su $PUSER -s /bin/sh -c "/usr/bin/tshark -n -Tfields -e 
ssl.handshake.ciphersuite -r -" 2>/dev/null \
+  | /usr/bin/perl -Minteger -lne 
'$x{join(",",sort(split(",",$_)))}++;END{while(($k,$v)=each %x){print"$v;$k"}}'
diff --git a/modules/tlsproxy/files/utils/cipher_sim.py 
b/modules/tlsproxy/files/utils/cipher_sim.py
index 8d6957a..ca82828 100755
--- a/modules/tlsproxy/files/utils/cipher_sim.py
+++ b/modules/tlsproxy/files/utils/cipher_sim.py
@@ -1,10 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-# cipher_sim - Simulate ciphersuite negotation results based on a set of pcap
-# files of ClientHello packets from real clients, and an arbitrary
-# ssl_ciphersuite string like the ones we configure nginx with (but which may
-# be different from our current, live ciphersuite list).
+# cipher_sim - Simulate ciphersuite negotation results based on one or more
+# aggregated ClientHello data files and an arbitrary server cipher preference
+# list.
 #
 # Copyright 2015 Brandon Black
 # Copyright 2015 Wikimedia Foundation, Inc.
@@ -22,88 +21,49 @@
 # limitations under the License.
 
 # ----
-# The first part of the process is gathering the live packet data, which can
-# then be re-used and re-simulated against different server preference lists:
-# The pcap files should be generated with a BPF filter that matches only the
-# inbound ClientHello packets, such as:
+# The commandline arguments take the form:
 #
-#   dst port 443 \
-#     and (tcp[((tcp[12:1] & 0xf0) >> 2)+5:1] = 0x01) \
-#     and (tcp[((tcp[12:1] & 0xf0) >> 2):1] = 0x16)
+#   cat aggegrate_files* | cipher_sim.py -s server_pref_file
 #
-# (but note the above would also capture the outbound clienthello of local
-# processes connecting outwards from the capturing machine! - could use some
-# filtering on the dst ip addresses as well if you want to close that
-# loophole).
+# Where "server_pref_file" is a file containing a standard OpenSSL ciphersuite
+# preference list in the same form used by e.g. nginx's ssl_ciphers parameter
+# on a single line, and the standard input contains aggregated clienthello
+# stats (possibly catted from several files) formatted with lines as ...
 #
-# Several different sniffer utilities can capture with such a BPF filter.
-# With "tshark" in non-promiscious mode on eth0 and stopping after 1000
-# packets, the command looks like:
-#   tshark -n -p -i eth0 -w /tmp/output.cap -c 1000 -f <bpf filter above>
+# NNN;C1,C2,C3,...
 #
-# Once you have your capture file(s), move them all to the host you're doing
-# the analysis on (as a non-root user!), which needs installed and up-to-date
-# working binaries for both "tshark" and "openssl".  Then you feed this script
-# the name of all the capture files and the OpenSSL server-side ciphersuite
-# setting to simulate, in the same form used in HTTPS server configs, like:
-#  '-ALL:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:....'
+# ... where "NNN" is the count of clienthellos of this type seen, and
+# "C1,C2,C3,..." is the list of decimal integer ciphers those clienthellos
+# indicated.  Example of 54 clienthellos which all specified exactly this list
+# of 3 ciphers: "ECDHE-RSA-AES128-SHA, AES128-SHA, DES-CBC3-SHA"
 #
-# The output of this script is an ordered list of the negotiated ciphersuites
-# by the count and percentage (both displayed) of clients which would have
-# negotiated them.  If some clients would have failed negotiation completely
-# (no ciphersuites in common), they are listed as the special ciphersuite
-# ":HANDSHAKE-FAILURE:".  Be aware that this is only simulating
+# 54;49171,47,10
+#
+# There are no further requirements on the input data: it can contain repeats
+# (which be aggregated up as they're processed), and it can contain duplicates
+# in different order (which it would be more efficient to sort and aggregate
+# when generating the input, but isn't strictly necessary).  If you're
+# concatenating multiple aggregate files from several machines into this
+# analysis script, there likely will be all sorts of repeats, and that's ok.
+#
+# There is a companion shellscript "cipher_cap.sh" designed to generate this
+# aggregated captured data efficiently on our cp* cache machine setup today.
+#
+# The output of this simulator is an ordered list of the negotiated
+# ciphersuites by the count and percentage (both displayed) of clients which
+# would have negotiated them.  If some clients would have failed negotiation
+# completely (no ciphersuites in common), they are listed as the special
+# ciphersuite ":HANDSHAKE-FAILURE:".  Be aware that this is only simulating
 # cipher-matching, and does not account for other forms of potential handshake
 # failure such as DHE>1024 incompatibility.
 #
-# Full example of real usage (pcap files generated as indicated above...):
-# -CUT------------------------
-#   bblack@cp1065:~/cipher_work$ fold -w 72 < server_pref # pep8--
-#   -ALL:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-A
-#   ES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA3
-#   84:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-
-#   SHA256:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES256-SH
-#   A384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA:ECDHE-RSA-AES256-SHA
-#   :DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-RSA-AES256-SHA256:DHE-RSA-
-#   AES256-SHA:DHE-RSA-CAMELLIA128-SHA:DHE-RSA-CAMELLIA256-SHA:AES128-GCM-SH
-#   A256:AES256-GCM-SHA384:AES128-SHA256:AES128-SHA:AES256-SHA256:AES256-SHA
-#   :DES-CBC3-SHA
-#   bblack@cp1065:~/cipher_work$ ls -l capfiles/
-#   total 3268
-#   -rw-r--r-- 1 bblack wikidev 3031756 Oct 28 12:36 second.cap
-#   -rw-r--r-- 1 bblack wikidev  310804 Oct 27 21:13 test.cap
-#   bblack@cp1065:~/cipher_work$ ./cipher_sim.py -s server_pref capfiles/*.cap
-#   Total ClientHellos             | 10993
-#   -----------------------------------------------
-#   ECDHE-ECDSA-AES128-GCM-SHA256  | 30.901% (3397)
-#   ECDHE-RSA-AES256-GCM-SHA384    | 14.236% (1565)
-#   ECDHE-RSA-AES256-SHA384        | 11.789% (1296)
-#   ECDHE-ECDSA-AES256-SHA         | 07.814% (859)
-#   AES128-SHA                     | 05.695% (626)
-#   ECDHE-RSA-AES256-SHA           | 05.494% (604)
-#   ECDHE-ECDSA-AES256-SHA384      | 04.776% (525)
-#   DHE-RSA-AES256-SHA             | 04.030% (443)
-#   ECDHE-RSA-AES128-GCM-SHA256    | 03.812% (419)
-#   ECDHE-ECDSA-AES256-GCM-SHA384  | 03.593% (395)
-#   DES-CBC3-SHA                   | 02.283% (251)
-#   AES128-SHA256                  | 01.674% (184)
-#   AES256-SHA                     | 01.355% (149)
-#   DHE-RSA-AES128-SHA             | 01.164% (128)
-#   ECDHE-ECDSA-AES128-SHA256      | 00.382% (42)
-#   ECDHE-ECDSA-AES128-SHA         | 00.373% (41)
-#   DHE-RSA-AES256-GCM-SHA384      | 00.337% (37)
-#   DHE-RSA-CAMELLIA256-SHA        | 00.109% (12)
-#   DHE-RSA-AES256-SHA256          | 00.073% (8)
-#   AES256-SHA256                  | 00.036% (4)
-#   DHE-RSA-AES128-GCM-SHA256      | 00.036% (4)
-#   ECDHE-RSA-AES128-SHA           | 00.027% (3)
-#   ECDHE-RSA-AES128-SHA256        | 00.009% (1)
-#
-# -CUT------------------------
+# There is also currently a special outout ":TSHARK_BLANK:" to indicate counts
+# of no cipher list at all (e.g. "54;"), which currently happens in small
+# numbers with the current capture script for unknown reasons...
 
 import os
 import re
-import glob
+import sys
 import argparse
 import subprocess
 import collections
@@ -117,20 +77,16 @@
     return ":HANDSHAKE-FAILURE:"
 
 
-def process_pcapfile(pcapf, server_pref):
+def process_stdin(server_pref):
     cipher_stats = collections.Counter()
-    shark_args = [
-        'tshark', '-n', '-l', '-Tfields', '-r', pcapf,
-        '-e', 'ssl.handshake.ciphersuite',
-    ]
-    shark = subprocess.Popen(shark_args, stdout=subprocess.PIPE)
-    for line in iter(shark.stdout.readline, b''):
-        # for some reason there are rare blanks... 0.06% in testing
-        # are these just bad hellos, inexactness of BPF filter, ?
-        if line != '\n':
-            client_cipher_nums = line.rstrip().split(',')
-            choice = get_choice(client_cipher_nums, server_pref)
-            cipher_stats[choice] += 1
+    for line in sys.stdin.readlines():
+         (ct, clist) = line.rstrip().split(';')
+         if len(clist):
+             cnums = clist.split(',')
+             choice = get_choice(cnums, server_pref)
+         else:
+             choice = ":TSHARK_BLANK:"
+         cipher_stats[choice] += int(ct)
     return cipher_stats
 
 
@@ -166,28 +122,18 @@
     p.add_argument('--serverpref', '-s', dest='serverpref', required=True,
                    metavar="FILE", nargs=1, type=file_exists,
                    help="File containing server cipher pref string")
-    p.add_argument('pcapfiles', nargs=argparse.REMAINDER,
-                   help="List of one or more pcap files")
 
     args = p.parse_args()
-    if len(args.pcapfiles) < 1:
-        raise Exception('One or more pcap files must be specified!')
-
     with open(args.serverpref[0], mode='r') as spref_file:
         spref_str = spref_file.read().rstrip()
 
-    return [spref_str, args.pcapfiles]
+    return spref_str
 
 
 def main():
-    (spref_str, pcapfiles) = parse_options()
-
+    spref_str = parse_options()
     server_pref = load_server_pref(spref_str)
-    cipher_stats = collections.Counter()
-    # XXX this could be trivially parallelized mapreduce-style per capfile ...
-    for pcapf in pcapfiles:
-        cipher_stats += process_pcapfile(pcapf, server_pref)
-
+    cipher_stats = process_stdin(server_pref)
     total = sum(cipher_stats.values())
     print "Total ClientHellos             | %d" % (total)
     print "-----------------------------------------------"

-- 
To view, visit https://gerrit.wikimedia.org/r/249745
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If90cc2c241575163928f7bbc9f25c4333e6524a1
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: BBlack <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to