WIP: Fleshing out the CharacterReader before tests.

This commit is contained in:
Pat Garrity 2024-02-10 22:43:37 -06:00
parent a10f5fa1b1
commit 7ae19980d9
Signed by: pfm
GPG key ID: 5CA5D21BAB7F3A76

View file

@ -1,6 +1,11 @@
package ava.parser package ava.parser
import java.io.BufferedReader import java.io.BufferedReader
import java.io.FileReader
import java.io.InputStream
import java.io.InputStreamReader
import java.nio.charset.StandardCharsets
import java.nio.file.Path
/** Used to consume characters from the input stream. /** Used to consume characters from the input stream.
* *
@ -41,9 +46,13 @@ class CharacterReader(
* accordingly. * accordingly.
*/ */
private def fillBuffer(): Unit = private def fillBuffer(): Unit =
// TODO: Short circuit on EOF! if eof then ()
else
var numberOfCharacters = 0 var numberOfCharacters = 0
if peekedAhead then if peekedAhead then
// Account for the case where we were forced to look ahead of the
// current stream and consume a character. This means we have to set
// that consumed character as the first element of the buffer.
buffer(0) = lookAhead buffer(0) = lookAhead
numberOfCharacters = input.read(buffer, 1, Capacity - 1) numberOfCharacters = input.read(buffer, 1, Capacity - 1)
else numberOfCharacters = input.read(buffer, 0, Capacity) else numberOfCharacters = input.read(buffer, 0, Capacity)
@ -53,7 +62,8 @@ class CharacterReader(
length = numberOfCharacters length = numberOfCharacters
// If no characters could be read, we're done. // If no characters could be read, we're done.
// EDGE CASE: If we peeked ahead to the last character, we still technically /* EDGE CASE: If we peeked ahead to the last character, we still
* technically */
// have a valid buffer of length 1. // have a valid buffer of length 1.
val _ = val _ =
if numberOfCharacters <= 0 && peekedAhead then if numberOfCharacters <= 0 && peekedAhead then
@ -85,7 +95,9 @@ class CharacterReader(
def peek(): Option[Char] = def peek(): Option[Char] =
if eof then None if eof then None
else if index == length then else if index == length then
// Special case -- try to read one. // Special case -- we have exhausted the buffer and still want to peek.
// In this case, we consume exactly one character (if possible) and set
// the peek state.
peekedAhead = true peekedAhead = true
lookAhead = input.read().toChar lookAhead = input.read().toChar
if lookAhead < 0 then if lookAhead < 0 then
@ -97,18 +109,29 @@ class CharacterReader(
def consume(): Option[Char] = def consume(): Option[Char] =
if eof then None if eof then None
else if peekedAhead then else if peekedAhead then
// If we are in the "peeked ahead" state, we know that we were forced to
// consume the stream to peek. Fill the buffer accordingly.
fillBuffer() fillBuffer()
lastChars.push(lookAhead)
Some(lookAhead) // buffer(0) contains what we need, but it's ALSO in lookAhead. Take it,
// increment the index (to 1), and return the correct value.
val selectedChar = buffer(0)
lastChars.push(selectedChar)
index = index + 1
Some(selectedChar)
else if index == length then else if index == length then
// The buffer has been exhausted. Refill it.
fillBuffer() fillBuffer()
if eof then None if eof then None
else else
// At this point, 'index' should be 0.
val pos = index val pos = index
index = index + 1 index = index + 1
lastChars.push(buffer(pos)) lastChars.push(buffer(pos))
Some(buffer(pos)) Some(buffer(pos))
else else
// Regular case -- the buffer is not exhausted and contains data. Get the
// data at the current position and move our pointer.
val pos = index val pos = index
index = index + 1 index = index + 1
lastChars.push(buffer(pos)) lastChars.push(buffer(pos))
@ -126,3 +149,41 @@ class CharacterReader(
*/ */
def isEof(): Boolean = def isEof(): Boolean =
eof eof
def getLastChar(): Char = lastChars.newest()
def getLookback(): Array[Char] = lastChars.newestToOldest()
object CharacterReader:
/** Initialize a [[CharacterReader]] for the the given file, using the UTF-8
* character set.
*
* @param path
* The path to the file.
* @return
* The new [[CharacterReader]].
*/
def forPath(path: Path): CharacterReader =
new CharacterReader(
new BufferedReader(
new FileReader(path.toFile(), StandardCharsets.UTF_8)
)
)
/** Initialize a [[CharacterReader]] for the given input stream, using the
* UTF-8 character set.
*
* @param inputStream
* The input stream.
* @return
* The new [[CharacterReader]].
*/
def forInputStream(inputStream: InputStream): CharacterReader =
new CharacterReader(
new BufferedReader(
new InputStreamReader(inputStream, StandardCharsets.UTF_8)
)
)
end CharacterReader