Modified: websites/production/commons/content/proper/commons-csv/apidocs/src-html/org/apache/commons/csv/CSVParser.html ============================================================================== --- websites/production/commons/content/proper/commons-csv/apidocs/src-html/org/apache/commons/csv/CSVParser.html (original) +++ websites/production/commons/content/proper/commons-csv/apidocs/src-html/org/apache/commons/csv/CSVParser.html Tue Mar 26 22:16:19 2013 @@ -39,267 +39,285 @@ <FONT color="green">036</FONT> * specification of a {@link CSVFormat}.<a name="line.36"></a> <FONT color="green">037</FONT> *<a name="line.37"></a> <FONT color="green">038</FONT> * <p><a name="line.38"></a> -<FONT color="green">039</FONT> * Parsing of a csv-string having tabs as separators, '"' as an optional value encapsulator, and comments starting with<a name="line.39"></a> -<FONT color="green">040</FONT> * '#':<a name="line.40"></a> +<FONT color="green">039</FONT> * To parse a CSV input with tabs as separators, '"' (double-quote) as an optional value encapsulator, <a name="line.39"></a> +<FONT color="green">040</FONT> * and comments starting with '#', you write:<a name="line.40"></a> <FONT color="green">041</FONT> * </p><a name="line.41"></a> <FONT color="green">042</FONT> *<a name="line.42"></a> <FONT color="green">043</FONT> * <pre><a name="line.43"></a> -<FONT color="green">044</FONT> * CSVFormat format = new CSVFormat('\t', '&quot;', '#');<a name="line.44"></a> -<FONT color="green">045</FONT> * Reader in = new StringReader(&quot;a\tb\nc\td&quot;);<a name="line.45"></a> -<FONT color="green">046</FONT> * List&lt;CSVRecord&gt; records = new CSVParser(in, format).getRecords();<a name="line.46"></a> -<FONT color="green">047</FONT> * </pre><a name="line.47"></a> -<FONT color="green">048</FONT> *<a name="line.48"></a> -<FONT color="green">049</FONT> * <p><a name="line.49"></a> -<FONT color="green">050</FONT> * Parsing of a csv-string in Excel CSV format, using a for-each loop:<a name="line.50"></a> -<FONT color="green">051</FONT> * </p><a name="line.51"></a> -<FONT color="green">052</FONT> *<a name="line.52"></a> -<FONT color="green">053</FONT> * <pre><a name="line.53"></a> -<FONT color="green">054</FONT> * Reader in = new StringReader("a;b\nc;d");<a name="line.54"></a> -<FONT color="green">055</FONT> * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);<a name="line.55"></a> -<FONT color="green">056</FONT> * for (CSVRecord record : parser) {<a name="line.56"></a> -<FONT color="green">057</FONT> * ...<a name="line.57"></a> -<FONT color="green">058</FONT> * }<a name="line.58"></a> -<FONT color="green">059</FONT> * </pre><a name="line.59"></a> -<FONT color="green">060</FONT> *<a name="line.60"></a> -<FONT color="green">061</FONT> * <p><a name="line.61"></a> -<FONT color="green">062</FONT> * Internal parser state is completely covered by the format and the reader-state.<a name="line.62"></a> -<FONT color="green">063</FONT> * </p><a name="line.63"></a> -<FONT color="green">064</FONT> *<a name="line.64"></a> +<FONT color="green">044</FONT> * Reader in = new StringReader(&quot;a\tb\nc\td&quot;);<a name="line.44"></a> +<FONT color="green">045</FONT> * Iterable&lt;CSVRecord&gt; parser = CSVFormat.newBuilder()<a name="line.45"></a> +<FONT color="green">046</FONT> * .withCommentStart('#')<a name="line.46"></a> +<FONT color="green">047</FONT> * .withDelimiter('\t')<a name="line.47"></a> +<FONT color="green">048</FONT> * .withQuoteChar('"').parse(in);<a name="line.48"></a> +<FONT color="green">049</FONT> * for (CSVRecord csvRecord : parse) {<a name="line.49"></a> +<FONT color="green">050</FONT> * ...<a name="line.50"></a> +<FONT color="green">051</FONT> * }<a name="line.51"></a> +<FONT color="green">052</FONT> * </pre><a name="line.52"></a> +<FONT color="green">053</FONT> *<a name="line.53"></a> +<FONT color="green">054</FONT> * <p><a name="line.54"></a> +<FONT color="green">055</FONT> * To parse CSV input in a given format like Excel, you write:<a name="line.55"></a> +<FONT color="green">056</FONT> * </p><a name="line.56"></a> +<FONT color="green">057</FONT> *<a name="line.57"></a> +<FONT color="green">058</FONT> * <pre><a name="line.58"></a> +<FONT color="green">059</FONT> * Reader in = new StringReader("a;b\nc;d");<a name="line.59"></a> +<FONT color="green">060</FONT> * Iterable&lt;CSVRecord&gt; parser = CSVFormat.EXCEL.parse(in);<a name="line.60"></a> +<FONT color="green">061</FONT> * for (CSVRecord record : parser) {<a name="line.61"></a> +<FONT color="green">062</FONT> * ...<a name="line.62"></a> +<FONT color="green">063</FONT> * }<a name="line.63"></a> +<FONT color="green">064</FONT> * </pre><a name="line.64"></a> <FONT color="green">065</FONT> * <p><a name="line.65"></a> -<FONT color="green">066</FONT> * see <a href="package-summary.html">package documentation</a> for more details<a name="line.66"></a> +<FONT color="green">066</FONT> * You may also get a List of records:<a name="line.66"></a> <FONT color="green">067</FONT> * </p><a name="line.67"></a> -<FONT color="green">068</FONT> *<a name="line.68"></a> -<FONT color="green">069</FONT> * @version $Id: CSVParser.java 1459447 2013-03-21 18:47:40Z ggregory $<a name="line.69"></a> -<FONT color="green">070</FONT> */<a name="line.70"></a> -<FONT color="green">071</FONT> public class CSVParser implements Iterable<CSVRecord> {<a name="line.71"></a> -<FONT color="green">072</FONT> <a name="line.72"></a> -<FONT color="green">073</FONT> private final Lexer lexer;<a name="line.73"></a> -<FONT color="green">074</FONT> private final Map<String, Integer> headerMap;<a name="line.74"></a> -<FONT color="green">075</FONT> private long recordNumber;<a name="line.75"></a> -<FONT color="green">076</FONT> <a name="line.76"></a> -<FONT color="green">077</FONT> // the following objects are shared to reduce garbage<a name="line.77"></a> -<FONT color="green">078</FONT> <a name="line.78"></a> -<FONT color="green">079</FONT> /** A record buffer for getRecord(). Grows as necessary and is reused. */<a name="line.79"></a> -<FONT color="green">080</FONT> private final List<String> record = new ArrayList<String>();<a name="line.80"></a> -<FONT color="green">081</FONT> private final Token reusableToken = new Token();<a name="line.81"></a> -<FONT color="green">082</FONT> <a name="line.82"></a> -<FONT color="green">083</FONT> /**<a name="line.83"></a> -<FONT color="green">084</FONT> * CSV parser using the default {@link CSVFormat}.<a name="line.84"></a> -<FONT color="green">085</FONT> *<a name="line.85"></a> -<FONT color="green">086</FONT> * @param input<a name="line.86"></a> -<FONT color="green">087</FONT> * a Reader containing "csv-formatted" input<a name="line.87"></a> -<FONT color="green">088</FONT> * @throws IllegalArgumentException<a name="line.88"></a> -<FONT color="green">089</FONT> * thrown if the parameters of the format are inconsistent<a name="line.89"></a> -<FONT color="green">090</FONT> */<a name="line.90"></a> -<FONT color="green">091</FONT> public CSVParser(final Reader input) throws IOException {<a name="line.91"></a> -<FONT color="green">092</FONT> this(input, CSVFormat.DEFAULT);<a name="line.92"></a> -<FONT color="green">093</FONT> }<a name="line.93"></a> +<FONT color="green">068</FONT> * <pre><a name="line.68"></a> +<FONT color="green">069</FONT> * Reader in = new StringReader("a;b\nc;d");<a name="line.69"></a> +<FONT color="green">070</FONT> * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);<a name="line.70"></a> +<FONT color="green">071</FONT> * List&lt;CSVRecord&gt; list = parser.getRecords();<a name="line.71"></a> +<FONT color="green">072</FONT> * </pre><a name="line.72"></a> +<FONT color="green">073</FONT> * <p><a name="line.73"></a> +<FONT color="green">074</FONT> * Internal parser state is completely covered by the format and the reader-state.<a name="line.74"></a> +<FONT color="green">075</FONT> * </p><a name="line.75"></a> +<FONT color="green">076</FONT> *<a name="line.76"></a> +<FONT color="green">077</FONT> * <p><a name="line.77"></a> +<FONT color="green">078</FONT> * see <a href="package-summary.html">package documentation</a> for more details<a name="line.78"></a> +<FONT color="green">079</FONT> * </p><a name="line.79"></a> +<FONT color="green">080</FONT> *<a name="line.80"></a> +<FONT color="green">081</FONT> * @version $Id: CSVParser.java 1461307 2013-03-26 20:52:28Z ggregory $<a name="line.81"></a> +<FONT color="green">082</FONT> */<a name="line.82"></a> +<FONT color="green">083</FONT> public class CSVParser implements Iterable<CSVRecord> {<a name="line.83"></a> +<FONT color="green">084</FONT> <a name="line.84"></a> +<FONT color="green">085</FONT> private final Lexer lexer;<a name="line.85"></a> +<FONT color="green">086</FONT> private final Map<String, Integer> headerMap;<a name="line.86"></a> +<FONT color="green">087</FONT> private long recordNumber;<a name="line.87"></a> +<FONT color="green">088</FONT> <a name="line.88"></a> +<FONT color="green">089</FONT> // the following objects are shared to reduce garbage<a name="line.89"></a> +<FONT color="green">090</FONT> <a name="line.90"></a> +<FONT color="green">091</FONT> /** A record buffer for getRecord(). Grows as necessary and is reused. */<a name="line.91"></a> +<FONT color="green">092</FONT> private final List<String> record = new ArrayList<String>();<a name="line.92"></a> +<FONT color="green">093</FONT> private final Token reusableToken = new Token();<a name="line.93"></a> <FONT color="green">094</FONT> <a name="line.94"></a> <FONT color="green">095</FONT> /**<a name="line.95"></a> -<FONT color="green">096</FONT> * Customized CSV parser using the given {@link CSVFormat}<a name="line.96"></a> +<FONT color="green">096</FONT> * CSV parser using the default {@link CSVFormat}.<a name="line.96"></a> <FONT color="green">097</FONT> *<a name="line.97"></a> <FONT color="green">098</FONT> * @param input<a name="line.98"></a> -<FONT color="green">099</FONT> * a Reader containing CSV-formatted input<a name="line.99"></a> -<FONT color="green">100</FONT> * @param format<a name="line.100"></a> -<FONT color="green">101</FONT> * the CSVFormat used for CSV parsing<a name="line.101"></a> -<FONT color="green">102</FONT> * @throws IllegalArgumentException<a name="line.102"></a> -<FONT color="green">103</FONT> * thrown if the parameters of the format are inconsistent<a name="line.103"></a> +<FONT color="green">099</FONT> * a Reader containing "csv-formatted" input<a name="line.99"></a> +<FONT color="green">100</FONT> * @throws IllegalArgumentException<a name="line.100"></a> +<FONT color="green">101</FONT> * thrown if the parameters of the format are inconsistent<a name="line.101"></a> +<FONT color="green">102</FONT> * @throws IOException<a name="line.102"></a> +<FONT color="green">103</FONT> * If an I/O error occurs<a name="line.103"></a> <FONT color="green">104</FONT> */<a name="line.104"></a> -<FONT color="green">105</FONT> public CSVParser(final Reader input, final CSVFormat format) throws IOException {<a name="line.105"></a> -<FONT color="green">106</FONT> this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input));<a name="line.106"></a> -<FONT color="green">107</FONT> this.headerMap = initializeHeader(format);<a name="line.107"></a> -<FONT color="green">108</FONT> }<a name="line.108"></a> -<FONT color="green">109</FONT> <a name="line.109"></a> -<FONT color="green">110</FONT> /**<a name="line.110"></a> -<FONT color="green">111</FONT> * Customized CSV parser using the given {@link CSVFormat}<a name="line.111"></a> -<FONT color="green">112</FONT> *<a name="line.112"></a> -<FONT color="green">113</FONT> * @param input<a name="line.113"></a> -<FONT color="green">114</FONT> * a String containing "csv-formatted" input<a name="line.114"></a> -<FONT color="green">115</FONT> * @param format<a name="line.115"></a> -<FONT color="green">116</FONT> * the CSVFormat used for CSV parsing<a name="line.116"></a> -<FONT color="green">117</FONT> * @throws IllegalArgumentException<a name="line.117"></a> -<FONT color="green">118</FONT> * thrown if the parameters of the format are inconsistent<a name="line.118"></a> -<FONT color="green">119</FONT> */<a name="line.119"></a> -<FONT color="green">120</FONT> public CSVParser(final String input, final CSVFormat format) throws IOException {<a name="line.120"></a> -<FONT color="green">121</FONT> this(new StringReader(input), format);<a name="line.121"></a> -<FONT color="green">122</FONT> }<a name="line.122"></a> -<FONT color="green">123</FONT> <a name="line.123"></a> -<FONT color="green">124</FONT> /**<a name="line.124"></a> -<FONT color="green">125</FONT> * Returns a copy of the header map that iterates in column order.<a name="line.125"></a> -<FONT color="green">126</FONT> * <p><a name="line.126"></a> -<FONT color="green">127</FONT> * The map keys are column names.<a name="line.127"></a> -<FONT color="green">128</FONT> * The map values are 0-based indices.<a name="line.128"></a> -<FONT color="green">129</FONT> *<a name="line.129"></a> -<FONT color="green">130</FONT> * @return a copy of the header map that iterates in column order.<a name="line.130"></a> -<FONT color="green">131</FONT> */<a name="line.131"></a> -<FONT color="green">132</FONT> public Map<String, Integer> getHeaderMap() {<a name="line.132"></a> -<FONT color="green">133</FONT> return new LinkedHashMap<String, Integer>(headerMap);<a name="line.133"></a> -<FONT color="green">134</FONT> }<a name="line.134"></a> -<FONT color="green">135</FONT> <a name="line.135"></a> -<FONT color="green">136</FONT> /**<a name="line.136"></a> -<FONT color="green">137</FONT> * Returns the current line number in the input stream.<a name="line.137"></a> -<FONT color="green">138</FONT> * <p/><a name="line.138"></a> -<FONT color="green">139</FONT> * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to the record number.<a name="line.139"></a> -<FONT color="green">140</FONT> *<a name="line.140"></a> -<FONT color="green">141</FONT> * @return current line number<a name="line.141"></a> -<FONT color="green">142</FONT> */<a name="line.142"></a> -<FONT color="green">143</FONT> public long getLineNumber() {<a name="line.143"></a> -<FONT color="green">144</FONT> return lexer.getLineNumber();<a name="line.144"></a> -<FONT color="green">145</FONT> }<a name="line.145"></a> -<FONT color="green">146</FONT> <a name="line.146"></a> -<FONT color="green">147</FONT> /**<a name="line.147"></a> -<FONT color="green">148</FONT> * Returns the current record number in the input stream.<a name="line.148"></a> -<FONT color="green">149</FONT> * <p/><a name="line.149"></a> -<FONT color="green">150</FONT> * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to the line number.<a name="line.150"></a> -<FONT color="green">151</FONT> *<a name="line.151"></a> -<FONT color="green">152</FONT> * @return current line number<a name="line.152"></a> -<FONT color="green">153</FONT> */<a name="line.153"></a> -<FONT color="green">154</FONT> public long getRecordNumber() {<a name="line.154"></a> -<FONT color="green">155</FONT> return recordNumber;<a name="line.155"></a> -<FONT color="green">156</FONT> }<a name="line.156"></a> -<FONT color="green">157</FONT> <a name="line.157"></a> -<FONT color="green">158</FONT> /**<a name="line.158"></a> -<FONT color="green">159</FONT> * Parses the next record from the current point in the stream.<a name="line.159"></a> -<FONT color="green">160</FONT> *<a name="line.160"></a> -<FONT color="green">161</FONT> * @return the record as an array of values, or <tt>null</tt> if the end of the stream has been reached<a name="line.161"></a> -<FONT color="green">162</FONT> * @throws IOException<a name="line.162"></a> -<FONT color="green">163</FONT> * on parse error or input read-failure<a name="line.163"></a> -<FONT color="green">164</FONT> */<a name="line.164"></a> -<FONT color="green">165</FONT> CSVRecord nextRecord() throws IOException {<a name="line.165"></a> -<FONT color="green">166</FONT> CSVRecord result = null;<a name="line.166"></a> -<FONT color="green">167</FONT> record.clear();<a name="line.167"></a> -<FONT color="green">168</FONT> StringBuilder sb = null;<a name="line.168"></a> -<FONT color="green">169</FONT> do {<a name="line.169"></a> -<FONT color="green">170</FONT> reusableToken.reset();<a name="line.170"></a> -<FONT color="green">171</FONT> lexer.nextToken(reusableToken);<a name="line.171"></a> -<FONT color="green">172</FONT> switch (reusableToken.type) {<a name="line.172"></a> -<FONT color="green">173</FONT> case TOKEN:<a name="line.173"></a> -<FONT color="green">174</FONT> record.add(reusableToken.content.toString());<a name="line.174"></a> -<FONT color="green">175</FONT> break;<a name="line.175"></a> -<FONT color="green">176</FONT> case EORECORD:<a name="line.176"></a> -<FONT color="green">177</FONT> record.add(reusableToken.content.toString());<a name="line.177"></a> -<FONT color="green">178</FONT> break;<a name="line.178"></a> -<FONT color="green">179</FONT> case EOF:<a name="line.179"></a> -<FONT color="green">180</FONT> if (reusableToken.isReady) {<a name="line.180"></a> -<FONT color="green">181</FONT> record.add(reusableToken.content.toString());<a name="line.181"></a> -<FONT color="green">182</FONT> }<a name="line.182"></a> -<FONT color="green">183</FONT> break;<a name="line.183"></a> -<FONT color="green">184</FONT> case INVALID:<a name="line.184"></a> -<FONT color="green">185</FONT> throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");<a name="line.185"></a> -<FONT color="green">186</FONT> case COMMENT: // Ignored currently<a name="line.186"></a> -<FONT color="green">187</FONT> if (sb == null) { // first comment for this record<a name="line.187"></a> -<FONT color="green">188</FONT> sb = new StringBuilder();<a name="line.188"></a> -<FONT color="green">189</FONT> } else {<a name="line.189"></a> -<FONT color="green">190</FONT> sb.append("\n");<a name="line.190"></a> -<FONT color="green">191</FONT> }<a name="line.191"></a> -<FONT color="green">192</FONT> sb.append(reusableToken.content);<a name="line.192"></a> -<FONT color="green">193</FONT> reusableToken.type = TOKEN; // Read another token<a name="line.193"></a> -<FONT color="green">194</FONT> break;<a name="line.194"></a> -<FONT color="green">195</FONT> }<a name="line.195"></a> -<FONT color="green">196</FONT> } while (reusableToken.type == TOKEN);<a name="line.196"></a> -<FONT color="green">197</FONT> <a name="line.197"></a> -<FONT color="green">198</FONT> if (!record.isEmpty()) {<a name="line.198"></a> -<FONT color="green">199</FONT> recordNumber++;<a name="line.199"></a> -<FONT color="green">200</FONT> final String comment = sb == null ? null : sb.toString();<a name="line.200"></a> -<FONT color="green">201</FONT> result = new CSVRecord(record.toArray(new String[record.size()]), headerMap, comment, this.recordNumber);<a name="line.201"></a> -<FONT color="green">202</FONT> }<a name="line.202"></a> -<FONT color="green">203</FONT> return result;<a name="line.203"></a> -<FONT color="green">204</FONT> }<a name="line.204"></a> -<FONT color="green">205</FONT> <a name="line.205"></a> -<FONT color="green">206</FONT> /**<a name="line.206"></a> -<FONT color="green">207</FONT> * Parses the CSV input according to the given format and returns the content as an array of {@link CSVRecord}<a name="line.207"></a> -<FONT color="green">208</FONT> * entries.<a name="line.208"></a> -<FONT color="green">209</FONT> * <p/><a name="line.209"></a> -<FONT color="green">210</FONT> * The returned content starts at the current parse-position in the stream.<a name="line.210"></a> -<FONT color="green">211</FONT> *<a name="line.211"></a> -<FONT color="green">212</FONT> * @return list of {@link CSVRecord} entries, may be empty<a name="line.212"></a> -<FONT color="green">213</FONT> * @throws IOException<a name="line.213"></a> -<FONT color="green">214</FONT> * on parse error or input read-failure<a name="line.214"></a> -<FONT color="green">215</FONT> */<a name="line.215"></a> -<FONT color="green">216</FONT> public List<CSVRecord> getRecords() throws IOException {<a name="line.216"></a> -<FONT color="green">217</FONT> final List<CSVRecord> records = new ArrayList<CSVRecord>();<a name="line.217"></a> -<FONT color="green">218</FONT> CSVRecord rec;<a name="line.218"></a> -<FONT color="green">219</FONT> while ((rec = nextRecord()) != null) {<a name="line.219"></a> -<FONT color="green">220</FONT> records.add(rec);<a name="line.220"></a> -<FONT color="green">221</FONT> }<a name="line.221"></a> -<FONT color="green">222</FONT> return records;<a name="line.222"></a> -<FONT color="green">223</FONT> }<a name="line.223"></a> -<FONT color="green">224</FONT> <a name="line.224"></a> -<FONT color="green">225</FONT> /**<a name="line.225"></a> -<FONT color="green">226</FONT> * Initializes the name to index mapping if the format defines a header.<a name="line.226"></a> -<FONT color="green">227</FONT> */<a name="line.227"></a> -<FONT color="green">228</FONT> private Map<String, Integer> initializeHeader(final CSVFormat format) throws IOException {<a name="line.228"></a> -<FONT color="green">229</FONT> Map<String, Integer> hdrMap = null;<a name="line.229"></a> -<FONT color="green">230</FONT> if (format.getHeader() != null) {<a name="line.230"></a> -<FONT color="green">231</FONT> hdrMap = new LinkedHashMap<String, Integer>();<a name="line.231"></a> -<FONT color="green">232</FONT> <a name="line.232"></a> -<FONT color="green">233</FONT> String[] header = null;<a name="line.233"></a> -<FONT color="green">234</FONT> if (format.getHeader().length == 0) {<a name="line.234"></a> -<FONT color="green">235</FONT> // read the header from the first line of the file<a name="line.235"></a> -<FONT color="green">236</FONT> final CSVRecord rec = nextRecord();<a name="line.236"></a> -<FONT color="green">237</FONT> if (rec != null) {<a name="line.237"></a> -<FONT color="green">238</FONT> header = rec.values();<a name="line.238"></a> -<FONT color="green">239</FONT> }<a name="line.239"></a> -<FONT color="green">240</FONT> } else {<a name="line.240"></a> -<FONT color="green">241</FONT> header = format.getHeader();<a name="line.241"></a> -<FONT color="green">242</FONT> }<a name="line.242"></a> -<FONT color="green">243</FONT> <a name="line.243"></a> -<FONT color="green">244</FONT> // build the name to index mappings<a name="line.244"></a> -<FONT color="green">245</FONT> if (header != null) {<a name="line.245"></a> -<FONT color="green">246</FONT> for (int i = 0; i < header.length; i++) {<a name="line.246"></a> -<FONT color="green">247</FONT> hdrMap.put(header[i], Integer.valueOf(i));<a name="line.247"></a> -<FONT color="green">248</FONT> }<a name="line.248"></a> -<FONT color="green">249</FONT> }<a name="line.249"></a> -<FONT color="green">250</FONT> }<a name="line.250"></a> -<FONT color="green">251</FONT> return hdrMap;<a name="line.251"></a> -<FONT color="green">252</FONT> }<a name="line.252"></a> -<FONT color="green">253</FONT> <a name="line.253"></a> -<FONT color="green">254</FONT> /**<a name="line.254"></a> -<FONT color="green">255</FONT> * Returns an iterator on the records. IOExceptions occurring during the iteration are wrapped in a<a name="line.255"></a> -<FONT color="green">256</FONT> * RuntimeException.<a name="line.256"></a> -<FONT color="green">257</FONT> */<a name="line.257"></a> -<FONT color="green">258</FONT> public Iterator<CSVRecord> iterator() {<a name="line.258"></a> -<FONT color="green">259</FONT> return new Iterator<CSVRecord>() {<a name="line.259"></a> -<FONT color="green">260</FONT> private CSVRecord current;<a name="line.260"></a> +<FONT color="green">105</FONT> public CSVParser(final Reader input) throws IOException {<a name="line.105"></a> +<FONT color="green">106</FONT> this(input, CSVFormat.DEFAULT);<a name="line.106"></a> +<FONT color="green">107</FONT> }<a name="line.107"></a> +<FONT color="green">108</FONT> <a name="line.108"></a> +<FONT color="green">109</FONT> /**<a name="line.109"></a> +<FONT color="green">110</FONT> * Customized CSV parser using the given {@link CSVFormat}<a name="line.110"></a> +<FONT color="green">111</FONT> *<a name="line.111"></a> +<FONT color="green">112</FONT> * @param input<a name="line.112"></a> +<FONT color="green">113</FONT> * a Reader containing CSV-formatted input<a name="line.113"></a> +<FONT color="green">114</FONT> * @param format<a name="line.114"></a> +<FONT color="green">115</FONT> * the CSVFormat used for CSV parsing<a name="line.115"></a> +<FONT color="green">116</FONT> * @throws IllegalArgumentException<a name="line.116"></a> +<FONT color="green">117</FONT> * thrown if the parameters of the format are inconsistent<a name="line.117"></a> +<FONT color="green">118</FONT> * @throws IOException<a name="line.118"></a> +<FONT color="green">119</FONT> * If an I/O error occurs<a name="line.119"></a> +<FONT color="green">120</FONT> */<a name="line.120"></a> +<FONT color="green">121</FONT> public CSVParser(final Reader input, final CSVFormat format) throws IOException {<a name="line.121"></a> +<FONT color="green">122</FONT> this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input));<a name="line.122"></a> +<FONT color="green">123</FONT> this.headerMap = initializeHeader(format);<a name="line.123"></a> +<FONT color="green">124</FONT> }<a name="line.124"></a> +<FONT color="green">125</FONT> <a name="line.125"></a> +<FONT color="green">126</FONT> /**<a name="line.126"></a> +<FONT color="green">127</FONT> * Customized CSV parser using the given {@link CSVFormat}<a name="line.127"></a> +<FONT color="green">128</FONT> *<a name="line.128"></a> +<FONT color="green">129</FONT> * @param input<a name="line.129"></a> +<FONT color="green">130</FONT> * a String containing "csv-formatted" input<a name="line.130"></a> +<FONT color="green">131</FONT> * @param format<a name="line.131"></a> +<FONT color="green">132</FONT> * the CSVFormat used for CSV parsing<a name="line.132"></a> +<FONT color="green">133</FONT> * @throws IllegalArgumentException<a name="line.133"></a> +<FONT color="green">134</FONT> * thrown if the parameters of the format are inconsistent<a name="line.134"></a> +<FONT color="green">135</FONT> * @throws IOException<a name="line.135"></a> +<FONT color="green">136</FONT> * If an I/O error occurs<a name="line.136"></a> +<FONT color="green">137</FONT> */<a name="line.137"></a> +<FONT color="green">138</FONT> public CSVParser(final String input, final CSVFormat format) throws IOException {<a name="line.138"></a> +<FONT color="green">139</FONT> this(new StringReader(input), format);<a name="line.139"></a> +<FONT color="green">140</FONT> }<a name="line.140"></a> +<FONT color="green">141</FONT> <a name="line.141"></a> +<FONT color="green">142</FONT> /**<a name="line.142"></a> +<FONT color="green">143</FONT> * Returns a copy of the header map that iterates in column order.<a name="line.143"></a> +<FONT color="green">144</FONT> * <p><a name="line.144"></a> +<FONT color="green">145</FONT> * The map keys are column names.<a name="line.145"></a> +<FONT color="green">146</FONT> * The map values are 0-based indices.<a name="line.146"></a> +<FONT color="green">147</FONT> *<a name="line.147"></a> +<FONT color="green">148</FONT> * @return a copy of the header map that iterates in column order.<a name="line.148"></a> +<FONT color="green">149</FONT> */<a name="line.149"></a> +<FONT color="green">150</FONT> public Map<String, Integer> getHeaderMap() {<a name="line.150"></a> +<FONT color="green">151</FONT> return new LinkedHashMap<String, Integer>(headerMap);<a name="line.151"></a> +<FONT color="green">152</FONT> }<a name="line.152"></a> +<FONT color="green">153</FONT> <a name="line.153"></a> +<FONT color="green">154</FONT> /**<a name="line.154"></a> +<FONT color="green">155</FONT> * Returns the current line number in the input stream.<a name="line.155"></a> +<FONT color="green">156</FONT> * <p/><a name="line.156"></a> +<FONT color="green">157</FONT> * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to the record number.<a name="line.157"></a> +<FONT color="green">158</FONT> *<a name="line.158"></a> +<FONT color="green">159</FONT> * @return current line number<a name="line.159"></a> +<FONT color="green">160</FONT> */<a name="line.160"></a> +<FONT color="green">161</FONT> public long getLineNumber() {<a name="line.161"></a> +<FONT color="green">162</FONT> return lexer.getLineNumber();<a name="line.162"></a> +<FONT color="green">163</FONT> }<a name="line.163"></a> +<FONT color="green">164</FONT> <a name="line.164"></a> +<FONT color="green">165</FONT> /**<a name="line.165"></a> +<FONT color="green">166</FONT> * Returns the current record number in the input stream.<a name="line.166"></a> +<FONT color="green">167</FONT> * <p/><a name="line.167"></a> +<FONT color="green">168</FONT> * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to the line number.<a name="line.168"></a> +<FONT color="green">169</FONT> *<a name="line.169"></a> +<FONT color="green">170</FONT> * @return current line number<a name="line.170"></a> +<FONT color="green">171</FONT> */<a name="line.171"></a> +<FONT color="green">172</FONT> public long getRecordNumber() {<a name="line.172"></a> +<FONT color="green">173</FONT> return recordNumber;<a name="line.173"></a> +<FONT color="green">174</FONT> }<a name="line.174"></a> +<FONT color="green">175</FONT> <a name="line.175"></a> +<FONT color="green">176</FONT> /**<a name="line.176"></a> +<FONT color="green">177</FONT> * Parses the next record from the current point in the stream.<a name="line.177"></a> +<FONT color="green">178</FONT> *<a name="line.178"></a> +<FONT color="green">179</FONT> * @return the record as an array of values, or <tt>null</tt> if the end of the stream has been reached<a name="line.179"></a> +<FONT color="green">180</FONT> * @throws IOException<a name="line.180"></a> +<FONT color="green">181</FONT> * on parse error or input read-failure<a name="line.181"></a> +<FONT color="green">182</FONT> */<a name="line.182"></a> +<FONT color="green">183</FONT> CSVRecord nextRecord() throws IOException {<a name="line.183"></a> +<FONT color="green">184</FONT> CSVRecord result = null;<a name="line.184"></a> +<FONT color="green">185</FONT> record.clear();<a name="line.185"></a> +<FONT color="green">186</FONT> StringBuilder sb = null;<a name="line.186"></a> +<FONT color="green">187</FONT> do {<a name="line.187"></a> +<FONT color="green">188</FONT> reusableToken.reset();<a name="line.188"></a> +<FONT color="green">189</FONT> lexer.nextToken(reusableToken);<a name="line.189"></a> +<FONT color="green">190</FONT> switch (reusableToken.type) {<a name="line.190"></a> +<FONT color="green">191</FONT> case TOKEN:<a name="line.191"></a> +<FONT color="green">192</FONT> record.add(reusableToken.content.toString());<a name="line.192"></a> +<FONT color="green">193</FONT> break;<a name="line.193"></a> +<FONT color="green">194</FONT> case EORECORD:<a name="line.194"></a> +<FONT color="green">195</FONT> record.add(reusableToken.content.toString());<a name="line.195"></a> +<FONT color="green">196</FONT> break;<a name="line.196"></a> +<FONT color="green">197</FONT> case EOF:<a name="line.197"></a> +<FONT color="green">198</FONT> if (reusableToken.isReady) {<a name="line.198"></a> +<FONT color="green">199</FONT> record.add(reusableToken.content.toString());<a name="line.199"></a> +<FONT color="green">200</FONT> }<a name="line.200"></a> +<FONT color="green">201</FONT> break;<a name="line.201"></a> +<FONT color="green">202</FONT> case INVALID:<a name="line.202"></a> +<FONT color="green">203</FONT> throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");<a name="line.203"></a> +<FONT color="green">204</FONT> case COMMENT: // Ignored currently<a name="line.204"></a> +<FONT color="green">205</FONT> if (sb == null) { // first comment for this record<a name="line.205"></a> +<FONT color="green">206</FONT> sb = new StringBuilder();<a name="line.206"></a> +<FONT color="green">207</FONT> } else {<a name="line.207"></a> +<FONT color="green">208</FONT> sb.append("\n");<a name="line.208"></a> +<FONT color="green">209</FONT> }<a name="line.209"></a> +<FONT color="green">210</FONT> sb.append(reusableToken.content);<a name="line.210"></a> +<FONT color="green">211</FONT> reusableToken.type = TOKEN; // Read another token<a name="line.211"></a> +<FONT color="green">212</FONT> break;<a name="line.212"></a> +<FONT color="green">213</FONT> }<a name="line.213"></a> +<FONT color="green">214</FONT> } while (reusableToken.type == TOKEN);<a name="line.214"></a> +<FONT color="green">215</FONT> <a name="line.215"></a> +<FONT color="green">216</FONT> if (!record.isEmpty()) {<a name="line.216"></a> +<FONT color="green">217</FONT> recordNumber++;<a name="line.217"></a> +<FONT color="green">218</FONT> final String comment = sb == null ? null : sb.toString();<a name="line.218"></a> +<FONT color="green">219</FONT> result = new CSVRecord(record.toArray(new String[record.size()]), headerMap, comment, this.recordNumber);<a name="line.219"></a> +<FONT color="green">220</FONT> }<a name="line.220"></a> +<FONT color="green">221</FONT> return result;<a name="line.221"></a> +<FONT color="green">222</FONT> }<a name="line.222"></a> +<FONT color="green">223</FONT> <a name="line.223"></a> +<FONT color="green">224</FONT> /**<a name="line.224"></a> +<FONT color="green">225</FONT> * Parses the CSV input according to the given format and returns the content as an array of {@link CSVRecord}<a name="line.225"></a> +<FONT color="green">226</FONT> * entries.<a name="line.226"></a> +<FONT color="green">227</FONT> * <p/><a name="line.227"></a> +<FONT color="green">228</FONT> * The returned content starts at the current parse-position in the stream.<a name="line.228"></a> +<FONT color="green">229</FONT> *<a name="line.229"></a> +<FONT color="green">230</FONT> * @return list of {@link CSVRecord} entries, may be empty<a name="line.230"></a> +<FONT color="green">231</FONT> * @throws IOException<a name="line.231"></a> +<FONT color="green">232</FONT> * on parse error or input read-failure<a name="line.232"></a> +<FONT color="green">233</FONT> */<a name="line.233"></a> +<FONT color="green">234</FONT> public List<CSVRecord> getRecords() throws IOException {<a name="line.234"></a> +<FONT color="green">235</FONT> final List<CSVRecord> records = new ArrayList<CSVRecord>();<a name="line.235"></a> +<FONT color="green">236</FONT> CSVRecord rec;<a name="line.236"></a> +<FONT color="green">237</FONT> while ((rec = nextRecord()) != null) {<a name="line.237"></a> +<FONT color="green">238</FONT> records.add(rec);<a name="line.238"></a> +<FONT color="green">239</FONT> }<a name="line.239"></a> +<FONT color="green">240</FONT> return records;<a name="line.240"></a> +<FONT color="green">241</FONT> }<a name="line.241"></a> +<FONT color="green">242</FONT> <a name="line.242"></a> +<FONT color="green">243</FONT> /**<a name="line.243"></a> +<FONT color="green">244</FONT> * Initializes the name to index mapping if the format defines a header.<a name="line.244"></a> +<FONT color="green">245</FONT> */<a name="line.245"></a> +<FONT color="green">246</FONT> private Map<String, Integer> initializeHeader(final CSVFormat format) throws IOException {<a name="line.246"></a> +<FONT color="green">247</FONT> Map<String, Integer> hdrMap = null;<a name="line.247"></a> +<FONT color="green">248</FONT> if (format.getHeader() != null) {<a name="line.248"></a> +<FONT color="green">249</FONT> hdrMap = new LinkedHashMap<String, Integer>();<a name="line.249"></a> +<FONT color="green">250</FONT> <a name="line.250"></a> +<FONT color="green">251</FONT> String[] header = null;<a name="line.251"></a> +<FONT color="green">252</FONT> if (format.getHeader().length == 0) {<a name="line.252"></a> +<FONT color="green">253</FONT> // read the header from the first line of the file<a name="line.253"></a> +<FONT color="green">254</FONT> final CSVRecord record = nextRecord();<a name="line.254"></a> +<FONT color="green">255</FONT> if (record != null) {<a name="line.255"></a> +<FONT color="green">256</FONT> header = record.values();<a name="line.256"></a> +<FONT color="green">257</FONT> }<a name="line.257"></a> +<FONT color="green">258</FONT> } else {<a name="line.258"></a> +<FONT color="green">259</FONT> header = format.getHeader();<a name="line.259"></a> +<FONT color="green">260</FONT> }<a name="line.260"></a> <FONT color="green">261</FONT> <a name="line.261"></a> -<FONT color="green">262</FONT> private CSVRecord getNextRecord() {<a name="line.262"></a> -<FONT color="green">263</FONT> try {<a name="line.263"></a> -<FONT color="green">264</FONT> return nextRecord();<a name="line.264"></a> -<FONT color="green">265</FONT> } catch (final IOException e) {<a name="line.265"></a> -<FONT color="green">266</FONT> // TODO: This is not great, throw an ISE instead?<a name="line.266"></a> -<FONT color="green">267</FONT> throw new RuntimeException(e);<a name="line.267"></a> -<FONT color="green">268</FONT> }<a name="line.268"></a> -<FONT color="green">269</FONT> }<a name="line.269"></a> -<FONT color="green">270</FONT> <a name="line.270"></a> -<FONT color="green">271</FONT> public boolean hasNext() {<a name="line.271"></a> -<FONT color="green">272</FONT> if (current == null) {<a name="line.272"></a> -<FONT color="green">273</FONT> current = getNextRecord();<a name="line.273"></a> -<FONT color="green">274</FONT> }<a name="line.274"></a> -<FONT color="green">275</FONT> <a name="line.275"></a> -<FONT color="green">276</FONT> return current != null;<a name="line.276"></a> -<FONT color="green">277</FONT> }<a name="line.277"></a> -<FONT color="green">278</FONT> <a name="line.278"></a> -<FONT color="green">279</FONT> public CSVRecord next() {<a name="line.279"></a> -<FONT color="green">280</FONT> CSVRecord next = current;<a name="line.280"></a> -<FONT color="green">281</FONT> current = null;<a name="line.281"></a> -<FONT color="green">282</FONT> <a name="line.282"></a> -<FONT color="green">283</FONT> if (next == null) {<a name="line.283"></a> -<FONT color="green">284</FONT> // hasNext() wasn't called before<a name="line.284"></a> -<FONT color="green">285</FONT> next = getNextRecord();<a name="line.285"></a> -<FONT color="green">286</FONT> if (next == null) {<a name="line.286"></a> -<FONT color="green">287</FONT> throw new NoSuchElementException("No more CSV records available");<a name="line.287"></a> -<FONT color="green">288</FONT> }<a name="line.288"></a> -<FONT color="green">289</FONT> }<a name="line.289"></a> -<FONT color="green">290</FONT> <a name="line.290"></a> -<FONT color="green">291</FONT> return next;<a name="line.291"></a> -<FONT color="green">292</FONT> }<a name="line.292"></a> +<FONT color="green">262</FONT> // build the name to index mappings<a name="line.262"></a> +<FONT color="green">263</FONT> if (header != null) {<a name="line.263"></a> +<FONT color="green">264</FONT> for (int i = 0; i < header.length; i++) {<a name="line.264"></a> +<FONT color="green">265</FONT> hdrMap.put(header[i], Integer.valueOf(i));<a name="line.265"></a> +<FONT color="green">266</FONT> }<a name="line.266"></a> +<FONT color="green">267</FONT> }<a name="line.267"></a> +<FONT color="green">268</FONT> }<a name="line.268"></a> +<FONT color="green">269</FONT> return hdrMap;<a name="line.269"></a> +<FONT color="green">270</FONT> }<a name="line.270"></a> +<FONT color="green">271</FONT> <a name="line.271"></a> +<FONT color="green">272</FONT> /**<a name="line.272"></a> +<FONT color="green">273</FONT> * Returns an iterator on the records. IOExceptions occurring during the iteration are wrapped in a<a name="line.273"></a> +<FONT color="green">274</FONT> * RuntimeException.<a name="line.274"></a> +<FONT color="green">275</FONT> */<a name="line.275"></a> +<FONT color="green">276</FONT> public Iterator<CSVRecord> iterator() {<a name="line.276"></a> +<FONT color="green">277</FONT> return new Iterator<CSVRecord>() {<a name="line.277"></a> +<FONT color="green">278</FONT> private CSVRecord current;<a name="line.278"></a> +<FONT color="green">279</FONT> <a name="line.279"></a> +<FONT color="green">280</FONT> private CSVRecord getNextRecord() {<a name="line.280"></a> +<FONT color="green">281</FONT> try {<a name="line.281"></a> +<FONT color="green">282</FONT> return nextRecord();<a name="line.282"></a> +<FONT color="green">283</FONT> } catch (final IOException e) {<a name="line.283"></a> +<FONT color="green">284</FONT> // TODO: This is not great, throw an ISE instead?<a name="line.284"></a> +<FONT color="green">285</FONT> throw new RuntimeException(e);<a name="line.285"></a> +<FONT color="green">286</FONT> }<a name="line.286"></a> +<FONT color="green">287</FONT> }<a name="line.287"></a> +<FONT color="green">288</FONT> <a name="line.288"></a> +<FONT color="green">289</FONT> public boolean hasNext() {<a name="line.289"></a> +<FONT color="green">290</FONT> if (current == null) {<a name="line.290"></a> +<FONT color="green">291</FONT> current = getNextRecord();<a name="line.291"></a> +<FONT color="green">292</FONT> }<a name="line.292"></a> <FONT color="green">293</FONT> <a name="line.293"></a> -<FONT color="green">294</FONT> public void remove() {<a name="line.294"></a> -<FONT color="green">295</FONT> throw new UnsupportedOperationException();<a name="line.295"></a> -<FONT color="green">296</FONT> }<a name="line.296"></a> -<FONT color="green">297</FONT> };<a name="line.297"></a> -<FONT color="green">298</FONT> }<a name="line.298"></a> -<FONT color="green">299</FONT> }<a name="line.299"></a> +<FONT color="green">294</FONT> return current != null;<a name="line.294"></a> +<FONT color="green">295</FONT> }<a name="line.295"></a> +<FONT color="green">296</FONT> <a name="line.296"></a> +<FONT color="green">297</FONT> public CSVRecord next() {<a name="line.297"></a> +<FONT color="green">298</FONT> CSVRecord next = current;<a name="line.298"></a> +<FONT color="green">299</FONT> current = null;<a name="line.299"></a> +<FONT color="green">300</FONT> <a name="line.300"></a> +<FONT color="green">301</FONT> if (next == null) {<a name="line.301"></a> +<FONT color="green">302</FONT> // hasNext() wasn't called before<a name="line.302"></a> +<FONT color="green">303</FONT> next = getNextRecord();<a name="line.303"></a> +<FONT color="green">304</FONT> if (next == null) {<a name="line.304"></a> +<FONT color="green">305</FONT> throw new NoSuchElementException("No more CSV records available");<a name="line.305"></a> +<FONT color="green">306</FONT> }<a name="line.306"></a> +<FONT color="green">307</FONT> }<a name="line.307"></a> +<FONT color="green">308</FONT> <a name="line.308"></a> +<FONT color="green">309</FONT> return next;<a name="line.309"></a> +<FONT color="green">310</FONT> }<a name="line.310"></a> +<FONT color="green">311</FONT> <a name="line.311"></a> +<FONT color="green">312</FONT> public void remove() {<a name="line.312"></a> +<FONT color="green">313</FONT> throw new UnsupportedOperationException();<a name="line.313"></a> +<FONT color="green">314</FONT> }<a name="line.314"></a> +<FONT color="green">315</FONT> };<a name="line.315"></a> +<FONT color="green">316</FONT> }<a name="line.316"></a> +<FONT color="green">317</FONT> }<a name="line.317"></a>