diff --git a/modules/parser/src/main/scala/ava/parser/CharacterReader.scala b/modules/parser/src/main/scala/ava/parser/CharacterReader.scala index 3928286..1c20aa8 100644 --- a/modules/parser/src/main/scala/ava/parser/CharacterReader.scala +++ b/modules/parser/src/main/scala/ava/parser/CharacterReader.scala @@ -1,6 +1,11 @@ package ava.parser import java.io.BufferedReader +import java.io.FileReader +import java.io.InputStream +import java.io.InputStreamReader +import java.nio.charset.StandardCharsets +import java.nio.file.Path /** Used to consume characters from the input stream. * @@ -41,38 +46,43 @@ class CharacterReader( * accordingly. */ private def fillBuffer(): Unit = - // TODO: Short circuit on EOF! - var numberOfCharacters = 0 - if peekedAhead then - buffer(0) = lookAhead - numberOfCharacters = input.read(buffer, 1, Capacity - 1) - else numberOfCharacters = input.read(buffer, 0, Capacity) + if eof then () + else + var numberOfCharacters = 0 + if peekedAhead then + // Account for the case where we were forced to look ahead of the + // current stream and consume a character. This means we have to set + // that consumed character as the first element of the buffer. + buffer(0) = lookAhead + numberOfCharacters = input.read(buffer, 1, Capacity - 1) + else numberOfCharacters = input.read(buffer, 0, Capacity) - // Record the number of characters ACTUALLY consumed by the stream. - // This is our working buffer size. - length = numberOfCharacters + // Record the number of characters ACTUALLY consumed by the stream. + // This is our working buffer size. + length = numberOfCharacters - // If no characters could be read, we're done. - // EDGE CASE: If we peeked ahead to the last character, we still technically - // have a valid buffer of length 1. - val _ = - if numberOfCharacters <= 0 && peekedAhead then - // Edge case: buffer of size 1. - length = 1 - else if length <= 0 then - // EOF case: no characters remain. - length = 0 - setEof() - input.close() - else - // Normal case: we read some number of characters. - length = numberOfCharacters + (if peekedAhead then 1 else 0) + // If no characters could be read, we're done. + /* EDGE CASE: If we peeked ahead to the last character, we still + * technically */ + // have a valid buffer of length 1. + val _ = + if numberOfCharacters <= 0 && peekedAhead then + // Edge case: buffer of size 1. + length = 1 + else if length <= 0 then + // EOF case: no characters remain. + length = 0 + setEof() + input.close() + else + // Normal case: we read some number of characters. + length = numberOfCharacters + (if peekedAhead then 1 else 0) - // Reset the peeked state -- we already consumed it. - peekedAhead = false + // Reset the peeked state -- we already consumed it. + peekedAhead = false - // Reset the index position to start iterating through the buffer again. - index = 0 + // Reset the index position to start iterating through the buffer again. + index = 0 /** Observe, but do not consume, the next character in the stream. Note that * if the buffer is fully consumed, this function must read the underlying @@ -85,7 +95,9 @@ class CharacterReader( def peek(): Option[Char] = if eof then None else if index == length then - // Special case -- try to read one. + // Special case -- we have exhausted the buffer and still want to peek. + // In this case, we consume exactly one character (if possible) and set + // the peek state. peekedAhead = true lookAhead = input.read().toChar if lookAhead < 0 then @@ -97,18 +109,29 @@ class CharacterReader( def consume(): Option[Char] = if eof then None else if peekedAhead then + // If we are in the "peeked ahead" state, we know that we were forced to + // consume the stream to peek. Fill the buffer accordingly. fillBuffer() - lastChars.push(lookAhead) - Some(lookAhead) + + // buffer(0) contains what we need, but it's ALSO in lookAhead. Take it, + // increment the index (to 1), and return the correct value. + val selectedChar = buffer(0) + lastChars.push(selectedChar) + index = index + 1 + Some(selectedChar) else if index == length then + // The buffer has been exhausted. Refill it. fillBuffer() if eof then None else + // At this point, 'index' should be 0. val pos = index index = index + 1 lastChars.push(buffer(pos)) Some(buffer(pos)) else + // Regular case -- the buffer is not exhausted and contains data. Get the + // data at the current position and move our pointer. val pos = index index = index + 1 lastChars.push(buffer(pos)) @@ -126,3 +149,41 @@ class CharacterReader( */ def isEof(): Boolean = eof + + def getLastChar(): Char = lastChars.newest() + + def getLookback(): Array[Char] = lastChars.newestToOldest() + +object CharacterReader: + + /** Initialize a [[CharacterReader]] for the the given file, using the UTF-8 + * character set. + * + * @param path + * The path to the file. + * @return + * The new [[CharacterReader]]. + */ + def forPath(path: Path): CharacterReader = + new CharacterReader( + new BufferedReader( + new FileReader(path.toFile(), StandardCharsets.UTF_8) + ) + ) + + /** Initialize a [[CharacterReader]] for the given input stream, using the + * UTF-8 character set. + * + * @param inputStream + * The input stream. + * @return + * The new [[CharacterReader]]. + */ + def forInputStream(inputStream: InputStream): CharacterReader = + new CharacterReader( + new BufferedReader( + new InputStreamReader(inputStream, StandardCharsets.UTF_8) + ) + ) + +end CharacterReader