/**
 * 
 */
package transform;

import java.io.OutputStreamWriter;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import au.id.jericho.lib.html.Element;
import au.id.jericho.lib.html.Source;
import au.id.jericho.lib.html.Tag;

/**
 * @author alexp
 *
 */
public class DBLPQuery {
	
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		try {
			//Defines the query
			String query = "computability lambda";
			//Get the result
			List<String> result = new LinkedList<String>();
			new DBLPQuery().addEntriesForTitle(query, result);
			new DBLPQuery().addEntriesForAuthor(query, result);
			System.out.println(result);

		} catch (Exception e) {
		}

	}


	public boolean addEntriesForTitle(String query, List<String> entries) throws Exception{
		List<Element> rows = getResponseRows("title", query);
		
		if(rows == null) return false;//No entries added

		for (Element entry : rows) {
			//Find the DBLP entry
			Pattern p = Pattern.compile("\\[DBLP:(.*)\\]");
			Matcher m = p.matcher(entry);m.find();
			
			String dblp = m.group(1);//The text that matched .*
			
			String bibtexUrl = "http://dblp.uni-trier.de/rec/bibtex/"
					+ dblp;
			entries.add(extractBibtexEntries(bibtexUrl));
			
		}
		
		return true;//entries added
	}
	
	public boolean addEntriesForAuthor(String query, List<String> entries) throws Exception{
		//A bit harder
		return false;
	}
	
	//Get the page for key=value
	private List<Element> getResponseRows(String key, String value) throws Exception{
		// Construct data
		//URLEncoder is what converts the query string in the appropriate format
		//(for example spaces are changed to +)
		String data = URLEncoder.encode(key, "UTF-8") + "="
				+ URLEncoder.encode(value, "UTF-8");
		// Send data
		URL url = new URL(
				"http://www.informatik.uni-trier.de/ley/dbbin/dblpquery.cgi");
		URLConnection conn = url.openConnection();
		conn.setDoOutput(true);
		OutputStreamWriter wr = new OutputStreamWriter(conn
				.getOutputStream());
		wr.write(data);//Uses POST method
		wr.flush();

		// Get the response
		Source response = new Source(conn.getInputStream());
		wr.close();
		
		//The second table contains the results, if any.
		List<Element> tables = response.findAllElements(Tag.TABLE);
		if(tables.size() < 2) return null;//No entries found
		Element table = tables.get(1);
		
		//The rows of that table
		List<Element> rows = table.findAllElements(Tag.TR);
		
		return rows;
	}
	
	private String extractBibtexEntries(String url) throws Exception{
		
		StringBuffer entries = new StringBuffer();
		
		Source page = new Source(new URL(url));
		//(?s) don't stop at new lines
		//@<type>{<everything but a @>}
		Pattern p = Pattern.compile("(?s)@\\w+\\{[^@]*\\}"); 
		Matcher m = p.matcher(page);
		
		while(m.find()){
			entries.append(cleanBibtexEntry(m.group()));
			entries.append("\n");
		}
		
		return entries.toString();
	}
	
	//Removes the link around DBLP in the cite key
	private String cleanBibtexEntry(String entry){
		String cleanedEntry;
		
		Pattern p = Pattern.compile("<a.*>(.*)</a>");
		Matcher m = p.matcher(entry);
		m.find();
		cleanedEntry = m.replaceAll(m.group(1));//Replace the whole link with its content

		return cleanedEntry;
	}

}