001 package org.LiveGraph.dataFile.read; 002 003 import java.io.BufferedReader; 004 import java.io.Closeable; 005 import java.io.IOException; 006 import java.io.InputStream; 007 import java.io.InputStreamReader; 008 import java.util.ArrayList; 009 import java.util.Collections; 010 import java.util.HashMap; 011 import java.util.List; 012 import java.util.Map; 013 014 import org.LiveGraph.dataFile.common.DataFormatException; 015 016 017 import static org.LiveGraph.dataFile.common.DataFormatTools.*; 018 019 020 /** 021 * A reader for a data stream (usually, a CSV file). This reader 022 * will parse the data stream and extract the file information, the data 023 * series headings and the actual data.<br /> 024 * <br /> 025 * The information extracted from the data stream is passed to the application 026 * using an observer pattern: after a line was parsed, the appropriate 027 * {@code notifyXXXX(...)}-method of this class is called with the extracted 028 * information. The {@code notifyXXXX(...)}-methods dispatch appropriate 029 * notifications to all {@link DataStreamObserver}-objects registered with this 030 * {@code DataStreamReader}-instance.<br /> 031 * If required, an application may also overwrite the {@code notifyXXXX(...)}-methods 032 * to handle data read events.<br /> 033 * <br /> 034 * See {@link org.LiveGraph.dataFile.write.DataStreamWriter} for the details of the 035 * data file format.<br /> 036 * <br /> 037 * Note, that this class has a different role than it did in version 1.01 of the 038 * LiveGraph API. The {@code DataStreamReader} class from version 1.01 is replaced by 039 * {@link org.LiveGraph.dataCache.DataStreamToCacheReader}. 040 * 041 * <p><strong>LiveGraph</strong> (http://www.live-graph.org).</p> 042 * <p>Copyright (c) 2007 by G. Paperin.</p> 043 * <p>File: DataStreamReader.java</p> 044 * <p style="font-size:smaller;">Redistribution and use in source and binary forms, with or 045 * without modification, are permitted provided that the following terms and conditions are met: 046 * </p> 047 * <p style="font-size:smaller;">1. Redistributions of source code must retain the above 048 * acknowledgement of the LiveGraph project and its web-site, the above copyright notice, 049 * this list of conditions and the following disclaimer.<br /> 050 * 2. Redistributions in binary form must reproduce the above acknowledgement of the 051 * LiveGraph project and its web-site, the above copyright notice, this list of conditions 052 * and the following disclaimer in the documentation and/or other materials provided with 053 * the distribution.<br /> 054 * 3. All advertising materials mentioning features or use of this software or any derived 055 * software must display the following acknowledgement:<br /> 056 * <em>This product includes software developed by the LiveGraph project and its 057 * contributors.<br />(http://www.live-graph.org)</em><br /> 058 * 4. All advertising materials distributed in form of HTML pages or any other technology 059 * permitting active hyper-links that mention features or use of this software or any 060 * derived software must display the acknowledgment specified in condition 3 of this 061 * agreement, and in addition, include a visible and working hyper-link to the LiveGraph 062 * homepage (http://www.live-graph.org). 063 * </p> 064 * <p style="font-size:smaller;">THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY 065 * OF ANY KIND, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 066 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 067 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 068 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 069 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 070 * </p> 071 * 072 * @author Greg Paperin (http://www.paperin.org) 073 * @version {@value org.LiveGraph.LiveGraph#version} 074 * @see DataStreamObserver 075 * @see DataStreamObserverAdapter 076 * @see org.LiveGraph.dataCache.DataStreamToCacheReader 077 */ 078 public class DataStreamReader implements Closeable { 079 080 /** 081 * Data stream reader. 082 */ 083 private BufferedReader in = null; 084 085 /** 086 * Data values separator. 087 */ 088 private String separator = DefaultSeparator; 089 090 /** 091 * Whether the data values separator was already finalised. 092 */ 093 private boolean separatorSet = false; 094 095 /** 096 * Whether the data series headings are already set-up. 097 */ 098 private boolean labelsSet = false; 099 100 /** 101 * The data stream index of the next data record. 102 */ 103 private int nextDatasetFileIndex = -1; 104 105 /** 106 * Observers who want to know what's on the data stream. 107 */ 108 private List<DataStreamObserver> observers = null; 109 110 111 /** 112 * Creates a data reader on the specified stream. 113 * 114 * @param is The stream from which to read. 115 */ 116 public DataStreamReader(InputStream is) { 117 118 if (null == is) 119 throw new NullPointerException("Cannot read from a null stream."); 120 121 this.in = new BufferedReader(new InputStreamReader(is)); 122 this.separator = DefaultSeparator; 123 this.separatorSet = false; 124 this.labelsSet = false; 125 this.nextDatasetFileIndex = -1; 126 this.observers = new ArrayList<DataStreamObserver>(); 127 } 128 129 /** 130 * Creates a data reader on the specified stream and add one initial observer. 131 * 132 * @param is The stream from which to read. 133 * @param observer An observer for the data stream contents. 134 */ 135 public DataStreamReader(InputStream is, DataStreamObserver observer) { 136 this(is); 137 addObserver(observer); 138 } 139 140 141 /** 142 * Tells whether this reader's underlying data stream is ready to be read. 143 * 144 * @return {@code true} if the next {@code readFromStream()} is guaranteed not to block for input, 145 * {@code false} otherwise. Note that returning {@code false} does not guarantee that the next read 146 * will block. 147 * @throws IOException If an I/O error occurs. 148 */ 149 public boolean ready() throws IOException { 150 return in.ready(); 151 } 152 153 /** 154 * Closes the underlying data stream. Further reading is not possible after calling this method. 155 * @throws IOException If an I/O error occurs. 156 */ 157 public void close() throws IOException { 158 in.close(); 159 } 160 161 /** 162 * Reads as many data lines from the underlying stream as there are available and parses them. 163 * 164 * @return The number on non-empty data lines read. 165 * @throws IOException If an I/O error occurs. 166 * @throws DataFormatException If the data stream contents do not conform with the expected data 167 * stream format. 168 * @see org.LiveGraph.dataFile.write.DataStreamWriter 169 * @see #readFromStream(int) 170 */ 171 public int readFromStream() throws IOException, DataFormatException { 172 return readFromStream(-1); 173 } 174 175 /** 176 * Reads up to a specified number of data lines from the underlying stream, and parses the lines. 177 * Reading is stopped when the specified number of lines in reached or if no more lines are available. 178 * 179 * @param maxLines The maximum number of data lines to read (empty lines are ignored and not counted, 180 * but all other lines including comment lines are counted). If negative, all available lines will 181 * be read. 182 * @return The number on non-empty data lines read. 183 * @throws IOException If an I/O error occurs. 184 * @throws DataFormatException If the data stream contents do not conform with the expected data 185 * stream format. 186 * @see org.LiveGraph.dataFile.write.DataStreamWriter 187 */ 188 public int readFromStream(int maxLines) throws IOException, DataFormatException { 189 190 int linesRead = 0; 191 String line = null; 192 while (ready() && (0 > maxLines || linesRead < maxLines) ) { 193 line = in.readLine(); 194 line = line.trim(); 195 if (line.length() > 0) { 196 processLine(line); 197 linesRead++; 198 } 199 } 200 return linesRead; 201 } 202 203 /** 204 * Notifies observers regestered with this parser of a "data values separator set"-event. 205 * 206 * @param separator New data separator to be passed to the observers. 207 */ 208 protected void notifySeparatorSet(String separator) { 209 for (DataStreamObserver observer : observers) 210 observer.eventSeparatorSet(separator, this); 211 } 212 213 /** 214 * Notifies observers regestered with this parser of a "comment line parsed"-event. 215 * 216 * @param comment The parsed comment line to be passed to the observers. 217 */ 218 protected void notifyCommentLine(String comment) { 219 for (DataStreamObserver observer : observers) 220 observer.eventCommentLine(comment, this); 221 } 222 223 /** 224 * Notifies observers regestered with this parser of a "file info line parsed"-event. 225 * 226 * @param info The parsed file info to be passed to the observers. 227 */ 228 protected void notifyFileInfoLine(String info) { 229 for (DataStreamObserver observer : observers) 230 observer.eventFileInfoLine(info, this); 231 } 232 233 /** 234 * Notifies observers regestered with this parser of a "data series labels parsed"-event. 235 * 236 * @param labels The parsed data series labels to be passed to the observers. 237 */ 238 protected void notifyLabelsSet(List<String> labels) { 239 for (DataStreamObserver observer : observers) 240 observer.eventLabelsSet(labels, this); 241 } 242 243 /** 244 * Notifies observers regestered with this parser of a "dataset parsed"-event. 245 * 246 * @param dataTokens The parsed data tokens to be passed to the observers. 247 * @param datasetIndex The file index of the parsed dataset to be passed to the observers. 248 */ 249 protected void notifyDataLineRead(List<String> dataTokens, int datasetIndex) { 250 for (DataStreamObserver observer : observers) 251 observer.eventDataLineRead(dataTokens, datasetIndex, this); 252 } 253 254 /** 255 * Adds an observer to this parser. 256 * 257 * @param observer The observer to add. 258 * @return {@code if the specified observer cound not be added because it was already registered}, 259 * {@code true otherwise}. 260 */ 261 public boolean addObserver(DataStreamObserver observer) { 262 if (null == observer || hasObserver(observer)) 263 return false; 264 return observers.add(observer); 265 } 266 267 /** 268 * Checks whether the specified observer is registered with this parser. 269 * 270 * @param observer An observer. 271 * @return {@code true} if the specified {@code observer} is not {@code null} and is regestered 272 * with this parser, {@code false} otherwise. 273 */ 274 public boolean hasObserver(DataStreamObserver observer) { 275 if (null == observer) 276 return false; 277 return observers.contains(observer); 278 } 279 280 /** 281 * De-registeres the specified observer from this parser. 282 * 283 * @param observer An observer. 284 * @return {@code true} if the specified observer is not {@code null} and was on the 285 * list of registered observers and is now removed from this list, {@code false} otherwise. 286 */ 287 public boolean removeObserver(DataStreamObserver observer) { 288 if (null == observer) 289 return false; 290 return observers.remove(observer); 291 } 292 293 /** 294 * Counts this parser's observers. 295 * 296 * @return The number of observers registered with this parser. 297 */ 298 public int countObservers() { 299 return observers.size(); 300 } 301 302 /** 303 * This static utility method converts a list of {@code String} tokens (presumably just parsed 304 * from a data line) to a list of {@code Double} objects containing the tokens' values; tokens 305 * that cannot be parsed to a {@code Double} are represented by {@code null}-objects in the 306 * resulting list. 307 * 308 * @param tokens A list of data tokens. 309 * @return A list of the double values of the specified tokens. 310 */ 311 public static List<Double> convertTokensToDoubles(List<String> tokens) { 312 313 if (null == tokens) 314 return Collections.emptyList(); 315 316 List<Double> doubles = new ArrayList<Double>(tokens.size()); 317 for (String tok : tokens) { 318 319 if (null == tok) 320 continue; 321 322 tok = tok.trim(); 323 324 Double val = null; 325 if (null != tok && 0 < tok.length()) { 326 try { val = Double.valueOf(tok); } 327 catch (NumberFormatException e) { val = null; } 328 } 329 330 doubles.add(val); 331 } 332 return doubles; 333 } 334 335 336 /** 337 * This static utility method converts a list of strings (presumably representing a list of 338 * labels just parsed from the data file) to a list of strings where each string is unique 339 * in respect to its {@code equals} method (case sensitive); this happens by attaching 340 * counters to repreated strings: for instance, {@code ["boo", "foo", "boo"]} it converted to 341 * {@code ["boo (1)", "foo", "boo (2)"]}. 342 * 343 * @param rawLabels The list of labels to convert. 344 * @param allowEmptyLabels If this is {@code false}, all empty strings ({@code ""}) are converted 345 * to underscores ({@code "_"}) before possibly applying the counters. 346 * @return A list of unique data series labels based on the specified list. 347 */ 348 public static List<String> createUniqueLabels(List<String> rawLabels, boolean allowEmptyLabels) { 349 350 List<String> uniqueLabels = new ArrayList<String>(); 351 Map<String, Integer> labelCounts = new HashMap<String, Integer>(); 352 353 // Mark labels which occure more than once: 354 for (String rawLabel : rawLabels) { 355 356 rawLabel = rawLabel.trim(); 357 if (!allowEmptyLabels && rawLabel.length() == 0) 358 rawLabel = "_"; 359 360 if (!labelCounts.containsKey(rawLabel)) { 361 362 labelCounts.put(rawLabel, 1); 363 364 } else { 365 366 int c = labelCounts.get(rawLabel); 367 labelCounts.put(rawLabel, ++c); 368 rawLabel = rawLabel + " (" + c + ")"; 369 } 370 371 uniqueLabels.add(rawLabel); 372 } 373 374 // Change first occurence of "label" into "label (1)" for the labels which appear more than once: 375 for (String label : labelCounts.keySet()) { 376 int c = labelCounts.get(label); 377 if (1 < c) { 378 int p = uniqueLabels.indexOf(label); 379 uniqueLabels.set(p, label + " (1)"); 380 } 381 } 382 383 // Done: 384 return uniqueLabels; 385 } 386 387 388 /** 389 * Examines a data line and dispatches to a specialised parsing routine. 390 * 391 * @param line A data line. 392 * @throws DataFormatException If the data stream contents do not conform with the expected data 393 * stream format. 394 */ 395 private void processLine(String line) throws DataFormatException { 396 397 if (!separatorSet && line.startsWith(TAGSepDefinition) && line.endsWith(TAGSepDefinition)) { 398 processSeparatorDefinitionLine(line); 399 return; 400 } 401 402 if (line.startsWith(TAGComment)) { 403 processCommentLine(line); 404 return; 405 } 406 407 if (line.startsWith(TAGFileInfo)) { 408 processFileInfoLine(line); 409 return; 410 } 411 412 if (!labelsSet) { 413 processSeriesLabelsLine(line); 414 return; 415 } 416 417 if (true) { 418 processDataLine(line); 419 return; 420 } 421 422 throw new Error("The program should never get to this line!"); 423 } 424 425 /** 426 * Parses a data values separator definition line. 427 * 428 * @param line Data line to parse. 429 * @throws DataFormatException If the data line contents are not in the expected format. 430 */ 431 private void processSeparatorDefinitionLine(String line) throws DataFormatException { 432 433 if (line.length() < TAGSepDefinition.length() * 2) 434 throw new DataFormatException("Illegal separator definition: \"" + line + "\""); 435 436 if (line.length() == TAGSepDefinition.length() * 2) 437 throw new DataFormatException("Illegal separator definition: separator may not be an empty string"); 438 439 String sep = line.substring(TAGSepDefinition.length(), line.length() - TAGSepDefinition.length()); 440 441 String problem = isValidSeparator(sep); 442 if (null != problem) 443 throw new DataFormatException("Illegal separator definition: " + problem); 444 445 separator = sep; 446 separatorSet = true; 447 notifySeparatorSet(separator); 448 } 449 450 /** 451 * Parses a comments line. 452 * 453 * @param line Data line to parse. 454 * @throws DataFormatException If the data line contents are not in the expected format. 455 */ 456 private void processCommentLine(String line) throws DataFormatException { 457 String comment = ""; 458 if (line.length() > TAGComment.length()) 459 comment = line.substring(TAGComment.length()).trim(); 460 461 separatorSet = true; 462 notifyCommentLine(comment); 463 } 464 465 /** 466 * Parses a file information line. 467 * 468 * @param line Data line to parse. 469 * @throws DataFormatException If the data line contents are not in the expected format. 470 */ 471 private void processFileInfoLine(String line) throws DataFormatException { 472 String info = ""; 473 if (line.length() > TAGFileInfo.length()) 474 info = line.substring(TAGFileInfo.length()).trim(); 475 476 separatorSet = true; 477 notifyFileInfoLine(info); 478 } 479 480 /** 481 * Parses a data series headings line. 482 * 483 * @param line Data line to parse. 484 * @throws DataFormatException If the data line contents are not in the expected format. 485 */ 486 private void processSeriesLabelsLine(String line) throws DataFormatException { 487 488 DataLineTokenizer tok = new DataLineTokenizer(line, separator); 489 nextDatasetFileIndex = 0; 490 labelsSet = true; 491 separatorSet = true; 492 notifyLabelsSet(Collections.unmodifiableList(tok.getTokens())); 493 } 494 495 /** 496 * Parses a data line. 497 * 498 * @param line Data line to parse. 499 * @throws DataFormatException If the data line contents are not in the expected format. 500 */ 501 private void processDataLine(String line) throws DataFormatException { 502 503 DataLineTokenizer tok = new DataLineTokenizer(line, separator); 504 separatorSet = true; 505 notifyDataLineRead(tok.getTokens(), nextDatasetFileIndex++); 506 } 507 508 }