WIP: Fleshing out the CharacterReader before tests.
This commit is contained in:
parent
a10f5fa1b1
commit
7ae19980d9
1 changed files with 92 additions and 31 deletions
|
@ -1,6 +1,11 @@
|
|||
package ava.parser
|
||||
|
||||
import java.io.BufferedReader
|
||||
import java.io.FileReader
|
||||
import java.io.InputStream
|
||||
import java.io.InputStreamReader
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.nio.file.Path
|
||||
|
||||
/** Used to consume characters from the input stream.
|
||||
*
|
||||
|
@ -41,38 +46,43 @@ class CharacterReader(
|
|||
* accordingly.
|
||||
*/
|
||||
private def fillBuffer(): Unit =
|
||||
// TODO: Short circuit on EOF!
|
||||
var numberOfCharacters = 0
|
||||
if peekedAhead then
|
||||
buffer(0) = lookAhead
|
||||
numberOfCharacters = input.read(buffer, 1, Capacity - 1)
|
||||
else numberOfCharacters = input.read(buffer, 0, Capacity)
|
||||
if eof then ()
|
||||
else
|
||||
var numberOfCharacters = 0
|
||||
if peekedAhead then
|
||||
// Account for the case where we were forced to look ahead of the
|
||||
// current stream and consume a character. This means we have to set
|
||||
// that consumed character as the first element of the buffer.
|
||||
buffer(0) = lookAhead
|
||||
numberOfCharacters = input.read(buffer, 1, Capacity - 1)
|
||||
else numberOfCharacters = input.read(buffer, 0, Capacity)
|
||||
|
||||
// Record the number of characters ACTUALLY consumed by the stream.
|
||||
// This is our working buffer size.
|
||||
length = numberOfCharacters
|
||||
// Record the number of characters ACTUALLY consumed by the stream.
|
||||
// This is our working buffer size.
|
||||
length = numberOfCharacters
|
||||
|
||||
// If no characters could be read, we're done.
|
||||
// EDGE CASE: If we peeked ahead to the last character, we still technically
|
||||
// have a valid buffer of length 1.
|
||||
val _ =
|
||||
if numberOfCharacters <= 0 && peekedAhead then
|
||||
// Edge case: buffer of size 1.
|
||||
length = 1
|
||||
else if length <= 0 then
|
||||
// EOF case: no characters remain.
|
||||
length = 0
|
||||
setEof()
|
||||
input.close()
|
||||
else
|
||||
// Normal case: we read some number of characters.
|
||||
length = numberOfCharacters + (if peekedAhead then 1 else 0)
|
||||
// If no characters could be read, we're done.
|
||||
/* EDGE CASE: If we peeked ahead to the last character, we still
|
||||
* technically */
|
||||
// have a valid buffer of length 1.
|
||||
val _ =
|
||||
if numberOfCharacters <= 0 && peekedAhead then
|
||||
// Edge case: buffer of size 1.
|
||||
length = 1
|
||||
else if length <= 0 then
|
||||
// EOF case: no characters remain.
|
||||
length = 0
|
||||
setEof()
|
||||
input.close()
|
||||
else
|
||||
// Normal case: we read some number of characters.
|
||||
length = numberOfCharacters + (if peekedAhead then 1 else 0)
|
||||
|
||||
// Reset the peeked state -- we already consumed it.
|
||||
peekedAhead = false
|
||||
// Reset the peeked state -- we already consumed it.
|
||||
peekedAhead = false
|
||||
|
||||
// Reset the index position to start iterating through the buffer again.
|
||||
index = 0
|
||||
// Reset the index position to start iterating through the buffer again.
|
||||
index = 0
|
||||
|
||||
/** Observe, but do not consume, the next character in the stream. Note that
|
||||
* if the buffer is fully consumed, this function must read the underlying
|
||||
|
@ -85,7 +95,9 @@ class CharacterReader(
|
|||
def peek(): Option[Char] =
|
||||
if eof then None
|
||||
else if index == length then
|
||||
// Special case -- try to read one.
|
||||
// Special case -- we have exhausted the buffer and still want to peek.
|
||||
// In this case, we consume exactly one character (if possible) and set
|
||||
// the peek state.
|
||||
peekedAhead = true
|
||||
lookAhead = input.read().toChar
|
||||
if lookAhead < 0 then
|
||||
|
@ -97,18 +109,29 @@ class CharacterReader(
|
|||
def consume(): Option[Char] =
|
||||
if eof then None
|
||||
else if peekedAhead then
|
||||
// If we are in the "peeked ahead" state, we know that we were forced to
|
||||
// consume the stream to peek. Fill the buffer accordingly.
|
||||
fillBuffer()
|
||||
lastChars.push(lookAhead)
|
||||
Some(lookAhead)
|
||||
|
||||
// buffer(0) contains what we need, but it's ALSO in lookAhead. Take it,
|
||||
// increment the index (to 1), and return the correct value.
|
||||
val selectedChar = buffer(0)
|
||||
lastChars.push(selectedChar)
|
||||
index = index + 1
|
||||
Some(selectedChar)
|
||||
else if index == length then
|
||||
// The buffer has been exhausted. Refill it.
|
||||
fillBuffer()
|
||||
if eof then None
|
||||
else
|
||||
// At this point, 'index' should be 0.
|
||||
val pos = index
|
||||
index = index + 1
|
||||
lastChars.push(buffer(pos))
|
||||
Some(buffer(pos))
|
||||
else
|
||||
// Regular case -- the buffer is not exhausted and contains data. Get the
|
||||
// data at the current position and move our pointer.
|
||||
val pos = index
|
||||
index = index + 1
|
||||
lastChars.push(buffer(pos))
|
||||
|
@ -126,3 +149,41 @@ class CharacterReader(
|
|||
*/
|
||||
def isEof(): Boolean =
|
||||
eof
|
||||
|
||||
def getLastChar(): Char = lastChars.newest()
|
||||
|
||||
def getLookback(): Array[Char] = lastChars.newestToOldest()
|
||||
|
||||
object CharacterReader:
|
||||
|
||||
/** Initialize a [[CharacterReader]] for the the given file, using the UTF-8
|
||||
* character set.
|
||||
*
|
||||
* @param path
|
||||
* The path to the file.
|
||||
* @return
|
||||
* The new [[CharacterReader]].
|
||||
*/
|
||||
def forPath(path: Path): CharacterReader =
|
||||
new CharacterReader(
|
||||
new BufferedReader(
|
||||
new FileReader(path.toFile(), StandardCharsets.UTF_8)
|
||||
)
|
||||
)
|
||||
|
||||
/** Initialize a [[CharacterReader]] for the given input stream, using the
|
||||
* UTF-8 character set.
|
||||
*
|
||||
* @param inputStream
|
||||
* The input stream.
|
||||
* @return
|
||||
* The new [[CharacterReader]].
|
||||
*/
|
||||
def forInputStream(inputStream: InputStream): CharacterReader =
|
||||
new CharacterReader(
|
||||
new BufferedReader(
|
||||
new InputStreamReader(inputStream, StandardCharsets.UTF_8)
|
||||
)
|
||||
)
|
||||
|
||||
end CharacterReader
|
||||
|
|
Loading…
Add table
Reference in a new issue