;; should be different on your system.
(pushnew "/home/vst/vstDocs/usr/lib/common-lisp/system/" asdf:*central-registry* :test #'equal)
(asdf:oos 'asdf:load-op :trivial-http)
(asdf:oos 'asdf:load-op :cl-ppcre)

(defun character-stream-to-string (input-stream)
  "Convert stream to string. For using with trivial-http responses."
  (let ((strlist nil))
	(do ((line (read-line input-stream nil nil)
			   (read-line input-stream nil nil)))
		((null line) (apply #'concatenate 'string (reverse strlist)))
	  (push line strlist))))

(defun get-gzipped-mails-list (index-content url-prefix)
  "Extract mothly mbox archive links from html page"
  (let ((result nil))
	(cl-ppcre:do-register-groups (first)
		("<A href=\"([^\"]*\.txt)" index-content)
	  (push first result))
	(let ((res2 nil))
	  (loop for i in result do
			(if (cl-ppcre:scan "^http:" i)
				(push i res2)
				(push (concatenate 'string url-prefix i) res2))) res2)))

(defun get-the-list-of-mboxs (url)
  "Returns a list of available mboxes on the pipermail archive index."
  (reverse (get-gzipped-mails-list
			(character-stream-to-string (caddr (thttp:http-get url)))
			url)))

(defun prepare-full-raw-archive (url-list file-path)
  "Gets all the mboxes and concatenates them into one mbox on local file path"
  (with-open-file (output-stream file-path :direction :OUTPUT)
	(loop for rfile in url-list do
		  (let ((content-stream (caddr (thttp:http-get rfile))))
			(do ((line (read-line content-stream nil nil)
					   (read-line content-stream nil nil)))
				((null line) t)
			  (format output-stream "~A~%" line))))))

(defun from-line? (line)
  "Returns true, if the line looks like a FROM line. Note that we use the regular expression *if* really needed."
  (if (and (> (length line) 11) (equal (subseq line 0 5) "From "))
	  (ppcre:scan "^From [^ ]+ at [^ ]+" line)
	  nil))

(defun date-line? (line)
  "Check for DATE line"
  (and (> (length line) 6) (equal (subseq line 0 6) "Date: ")))

(defun subject-line? (line)
  "Check for SUBJECT line"
  (and (>= (length line) 9) (equal (subseq line 0 9) "Subject: ")))

(defun empty-line? (line)
  "Check for an EMPTY line"
  (equal (length line) 0))

(defun msgid-line? (line)
  "Check for a MESSAGE-ID line"
  (and (> (length line) 13) (equal (subseq line 0 12) "Message-ID: ")))

(defun reply-to-line? (line)
  "Check for REPLY-TO line"
  (and (> (length line) 14) (equal (subseq line 0 13) "In-Reply-To: ")))

;; TODO: the function below looks horrible. do something...
(defun parse-mails-from-mbox (mbox-pathname)
  "Parse the mbox and return a list of associated lists, where each assoc. list corresponds to an email"
  (with-open-file (content-stream mbox-pathname)
	(let ((maillist nil)
		  (mail nil)
		  (seek-for-from t))
	  (do ((line (read-line content-stream nil nil)
				 (read-line content-stream nil nil)))
		  ((null line) (setf maillist (reverse maillist)))
		;; from - header - body - from - header - body - ...
		(cond
		  ((eq seek-for-from t)
		   (cond
			 ((from-line? line)
			  (progn
				(setf maillist (cons (reverse mail) maillist))
				(setf mail nil)
				(setf mail (cons (cons ':FROM line) mail))
				(setf seek-for-from nil)))
			 (t (cond 
				  ((null (assoc ':CONTENT mail)) (setf mail (cons (cons ':CONTENT line) mail)))
				  (t
				   (setf (cdr (assoc ':CONTENT mail))
						 (concatenate 'string
									  (cdr (assoc ':CONTENT mail))
									  (string #\Newline)
									  line)))))))
		  (t (cond
			   ((date-line? line) (setf mail (cons (cons ':DATE line) mail)))
			   ((subject-line? line) (setf mail (cons (cons ':SUBJECT line) mail)))
			   ((msgid-line? line) (setf mail (cons (cons ':MESSAGE-ID line) mail)))
			   ((reply-to-line? line) (setf mail (cons (cons ':REPLY-TO-ID line) mail)))
			   ((empty-line? line) (setf seek-for-from t))))))maillist)))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Examples
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(setf *url-list* (get-the-list-of-mboxs "http://cs.bilgi.edu.tr/pipermail/cs-lisp/"))
(prepare-full-raw-archive *url-list* "/tmp/archive.mbox")
(setf *mails* (parse-mails-from-mbox "/tmp/archive.mbox"))
