I tested this in SBCL. See the end for instructions for running it. On a randomly generated file of 3000 points, it finds 2712 segments on my machine in a little under 2 CPU minutes, which is about 23 lines or points per second. Not that fast, but hey, it's O(N²).
;;; Solution to fun pattern-recognition exercise ;http://www.cs.princeton.edu/courses/archive/spring08/cos226/assignments/lines.html ;; Briefly, the idea is that you can find sets of (exactly) collinear ;; points in O(N² lg N) time instead of O(N³) or worse, by sorting ;; the points by their angle from some starting point. ;; I think you can do O(N²) in most cases if you use a base-256 radix ;; sort. ;;; Notable features ;; I'm just using cons cells ("pairs") to hold points --- X in the ;; car, Y in the cdr. ;; The code is written in a purely functional style, with no side ;; effects except for input, output, and a couple of calls to "sort". ;;; Bugs ;; This version has an additional step at the end for removing ;; duplicate line segments. This step is actually O(S²) where S is ;; the number of segments, multiplied by an additional O(M²) step of ;; looking for common points between the line segments if they are ;; parallel. In the normal case, each point participates in zero, ;; one, or a few line segments, and the line segments are short and ;; not parallel, so this part is quite fast. ;; However, in a bad case, all of the points might be collinear. This ;; will provoke O(N³) behavior. I'm also not sure what the maximum ;; number of distinct 4-or-more-point line segments for N points is; ;; I'm sure it's not less than O(N) or more than O(N²), but if it's ;; more than O(N sqrt N), that could actually be even worse than the ;; all-collinear case. That case would be easier to fix than the ;; all-collinear case. ;; It would be straightforward to maintain a hash of already-recorded ;; (slope, x, y) tuples, and query that before adding a new line ;; segment to the list, inside of "fast". This would avoid the need ;; for the seven lines of code that currently eliminate duplicates in ;; O(S²) time, multiplying the asymptotic performance at O(N²) only by ;; a constant factor. But I haven't done this. ;; A second bug is that multiple copies of the same point will be ;; considered to be collinear with some third point under some ;; circumstances (if the third point comes earlier in the file, or if ;; the line with the third point is vertical) but not under other ;; circumstances. ;; And it's possible that "brute" is actually worse than the O(N^4) ;; performance you'd naïvely expect, due to appending a bunch of ;; possibly long lists together. ;;; Input. ;; file format: one int (on first line) giving number of points N, ;; followed by N lines each containing two ints that are the ;; coordinates of a point. We don't bother to pay attention to the ;; line boundaries. ;; Here's a sample data file (commented out): ;; 13 ;; 16384 19200 ;; 16384 18666 ;; 16384 32000 ;; 16384 21761 ;; 10000 10000 ;; 20000 10000 ;; 30000 10000 ;; 40000 10000 ;; 1 1 ;; 2 2 ;; 3 3 ;; 4 4 ;; 5 5 ;; It contains a horizontal line at y=10000, a vertical line at ;; x=16384, and a diagonal line at y=x. (defun get-point (stream) (cons (read stream) (read stream))) (defun get-points (stream) (let ((npoints (read stream))) (loop for i from 1 to npoints collect (get-point stream)))) (defun get-points-from-fname (fname) (with-open-file (input fname :direction :input) (get-points input))) ;;; Brute-force version. ;; Return the slope of the line between points a and b, or i (the ;; imaginary number) for vertical lines. Positive infinity would be ;; better, but I don't think Common Lisp has a way to express it. (defun slope (a b) (if (= (cdr a) (cdr b)) #C(0 1) ; return i for vertical lines (/ (- (car b) (car a)) (- (cdr b) (cdr a))))) ;; Return true if three points are collinear. (defun collinear-p (a b c) (= (slope a b) (slope a c))) ;; Return true if all points passed as arguments are collinear. (defun all-collinear-p (a b &rest points) (loop for c in points always (collinear-p a b c))) ;; Brute-force main entry point: test all sets of four points. (defun brute (points) (loop for p1s on points append (loop with p1 = (car p1s) for p2s on (cdr p1s) append (loop with p2 = (car p2s) for p3s on (cdr p2s) ;; Try to be slightly efficient by ;; not looping over fourth points ;; when the first three points are ;; not collinear. when (collinear-p p1 p2 (car p3s)) append (loop with p3 = (car p3s) for p4 in (cdr p3s) when (all-collinear-p p1 p2 p3 p4) collect (list p1 p2 p3 p4)))))) ;;; "Fast" O(N² lg N) version: sort points by slope from a starting point ;; Return true if the slope of segment AB is less than the slope of segment AC. (defun slope-<-p (a b c) (let ((slope1 (slope a b)) (slope2 (slope a c))) (and (not (complexp slope1)) ; return false if both or just first vertical (or (complexp slope2) ; true if just second vertical (< slope1 slope2))))) ;; Partition "others", a list of points, into a list of ;; (variable-length) lines starting from "start" (appending "start" to ;; each one). (defun fast-helper (start others) (if (null others) nil ;; "iterate" loops and adds points onto "currentline" until it ;; finds a point with a different slope, at which point it ;; recurses back to the top level of "fast-helper" to handle the ;; rest of the points. (labels ((iterate (a currentline others) (cond ((null others) (list currentline)) ((= (slope a (car currentline)) (slope a (car others))) (iterate a (cons (car others) currentline) (cdr others))) (t (cons currentline (fast-helper a others)))))) (iterate start (list (car others) start) (cdr others))))) ;; Sort a list of points by their slope from a certain starting point. (defun sort-by-slope (start points) ;; copy-list because "sort" is destructive. That was a frustrating ;; bug. (sort (copy-list points) (lambda (b c) (slope-<-p start b c)))) ;; "Fast" main entry point. ;; For each point, finds all the line segments starting from it, and ;; discards the ones whose length is less than 4. (defun fast (points) (loop for p1s on points append (let* ((p1 (car p1s)) (p2s (sort-by-slope p1 (cdr p1s)))) (remove-if (lambda (line) (< (length line) 4)) (fast-helper p1 p2s))))) ;;; Eliminating duplicate segments. ;; We assume that segments will have their last point in common. ;; This is valid with "fast" because all of the duplicate segments ;; will include the last point in the list. (defun segments-equal (a b) (and (= (slope (car a) (cadr a)) (slope (car b) (cadr b))) (intersection a b))) ;; Return the first member of each equivalence class of segments. ;; This does the right thing with "fast" because "fast" always ;; generates the longest segment first. It doesn't do the correct ;; thing with "brute". (defun distinct-segments (lines) (remove-duplicates lines :test #'segments-equal :from-end t)) (defun distinct-fast (points) (distinct-segments (fast points))) ;;; Output. ;; Comparator to provide a sorting order for points. ;; The only correctness constraint here is that the endpoints of a ;; line segment must sort outside of points inside that segment. (defun point-<-p (a b) (or (< (car a) (car b)) (and (= (car a) (car b)) (< (cdr a) (cdr b))))) ;; Destructively sort the points within lines to bring their endpoints ;; to the ends. (defun normalize-lines (lines) (mapcar (lambda (points) (sort points #'point-<-p)) lines)) ;; List the line segments in a nice format, according to their endpoints. (defun print-lines (lines) (loop for line in (normalize-lines lines) do (let ((a (car line)) (b (car (last line)))) (format t "(~S, ~S) -> (~S, ~S)~%" (car a) (cdr a) (car b) (cdr b))))) ;;; Main program (defun print-lines-from-fname (linefinder fname) (print-lines (funcall linefinder (get-points-from-fname fname)))) ;;; Main program (SBCL version) (defun sbcl-main () (print-lines-from-fname #'distinct-fast (second *posix-argv*)) (quit)) ;; Run as: ;; sbcl --noinform --load collinear-points.lisp --eval '(sbcl-main)' pointsfile3 ;; Alternatively, compile as: ;; (load "collinear-points.lisp") ;; (sb-ext:save-lisp-and-die "collinear-points" :toplevel #'sbcl-main ;; :executable t)