Working on building out tokenization and codifying things like keywords.

This commit is contained in:
Pat Garrity 2024-02-18 15:05:52 -06:00
parent fdb150b12d
commit bad991337d
Signed by: pfm
GPG key ID: 5CA5D21BAB7F3A76
8 changed files with 166 additions and 14 deletions

View file

@ -102,11 +102,14 @@ k_true ::= 't', 'r', 'u', 'e';
(* false is a Boolean literal *)
k_false ::= 'f', 'a', 'l', 's', 'e';
(* defn is fn but for definitions without implementations *)
k_false ::= 'd', 'e', 'f', 'n';
keyword ::= k_type | k_class | k_alias | k_const | k_enum | k_record
| k_object | k_let | k_mut | k_export | k_import
| k_namespace | k_infix | k_fn | k_end | k_match | k_case
| k_lambda | k_if | k_then | k_else | k_do | k_return
| k_given | k_private | k_true | k_false;
| k_given | k_private | k_true | k_false | k_defn;
(* ============== *)
(* Literal Values *)

View file

@ -28,7 +28,7 @@ lazy val parser = project
.settings(name := s"${gsProjectName.value}-parser-v${semVerMajor.value}")
.settings(
libraryDependencies ++= Seq(
"co.fs2" %% "fs2-core" % "3.9.4",
"co.fs2" %% "fs2-io" % "3.9.4"
"co.fs2" %% "fs2-core" % "3.9.4" % Test,
"co.fs2" %% "fs2-io" % "3.9.4" % Test
)
)

View file

@ -21,6 +21,7 @@ class CharacterReader(
private val LookBackCapacity: Int = 16
// Internal buffers.
private var currentChar: Char = 0
private val lastChars: Ring[Char] = Ring[Char](LookBackCapacity, 0)
private val buffer: Array[Char] = Array.fill(Capacity)(0)
@ -98,12 +99,14 @@ class CharacterReader(
// Special case -- we have exhausted the buffer and still want to peek.
// In this case, we consume exactly one character (if possible) and set
// the peek state.
peekedAhead = true
lookAhead = input.read().toChar
if lookAhead < 0 then
val ch = input.read()
if ch < 0 then
eof = true
None
else Some(lookAhead)
else
peekedAhead = true
lookAhead = ch.toChar
Some(lookAhead)
else Some(buffer(index))
def consume(): Option[Char] =
@ -116,10 +119,10 @@ class CharacterReader(
// buffer(0) contains what we need, but it's ALSO in lookAhead. Take it,
// increment the index (to 1), and return the correct value.
val selectedChar = buffer(0)
lastChars.push(selectedChar)
updateCurrentChar(selectedChar)
index = index + 1
Some(selectedChar)
else if index == length then
else if index >= length then
// The buffer has been exhausted. Refill it.
fillBuffer()
if eof then None
@ -127,15 +130,17 @@ class CharacterReader(
// At this point, 'index' should be 0.
val pos = index
index = index + 1
lastChars.push(buffer(pos))
Some(buffer(pos))
val ch = buffer(pos)
updateCurrentChar(ch)
Some(ch)
else
// Regular case -- the buffer is not exhausted and contains data. Get the
// data at the current position and move our pointer.
val pos = index
index = index + 1
lastChars.push(buffer(pos))
Some(buffer(pos))
val ch = buffer(pos)
updateCurrentChar(ch)
Some(ch)
/** @return
* True if all data in the _buffer_ has been consumed, false otherwise.
@ -150,10 +155,35 @@ class CharacterReader(
def isEof(): Boolean =
eof
def getLastChar(): Char = lastChars.newest()
/** Get the _previous_ character. If 0 or 1 characters have been read in
* total, this will throw an exception due to being uninitialized.
*
* @return
* The previous character.
*/
def getPreviousChar(): Char = lastChars.newest()
/** Get the _current_ character. Initialized to 0.
*
* @return
* The current character, or 0 if nothing has been consumed.
*/
def getCurrentChar(): Char = currentChar
/** Dump the lookback array from newest character to oldest character.
*
* @return
* The lookback array, ordered from newest to oldest.
*/
def getLookback(): Array[Char] = lastChars.newestToOldest()
private def updateCurrentChar(ch: Char): Unit =
val _ =
if currentChar != 0 then lastChars.push(currentChar)
else ()
currentChar = ch
object CharacterReader:
/** Initialize a [[CharacterReader]] for the the given file, using the UTF-8

View file

@ -0,0 +1,36 @@
package ava.parser
sealed abstract class Keyword(val value: String)
object Keyword:
case object Alias extends Keyword("alias")
case object Case extends Keyword("case")
case object Class extends Keyword("class")
case object Const extends Keyword("const")
case object Defn extends Keyword("defn")
case object Do extends Keyword("do")
case object Else extends Keyword("else")
case object End extends Keyword("end")
case object Enum extends Keyword("enum")
case object Export extends Keyword("export")
case object False extends Keyword("false")
case object Fn extends Keyword("fn")
case object Given extends Keyword("given")
case object If extends Keyword("if")
case object Import extends Keyword("import")
case object Infix extends Keyword("infix")
case object Lambda extends Keyword("λ")
case object Let extends Keyword("let")
case object Match extends Keyword("match")
case object Mut extends Keyword("mut")
case object Namespace extends Keyword("namespace")
case object Object extends Keyword("object")
case object Private extends Keyword("private")
case object Record extends Keyword("record")
case object Return extends Keyword("return")
case object Then extends Keyword("then")
case object True extends Keyword("true")
case object Type extends Keyword("type")
end Keyword

View file

@ -0,0 +1,6 @@
package ava.parser
case class Token(
value: String,
tokenType: TokenType
)

View file

@ -0,0 +1,18 @@
package ava.parser
sealed trait TokenType
object TokenType:
case object Keyword extends TokenType
case object ReservedOperator extends TokenType
case object Name extends TokenType
case object OpenParen extends TokenType
case object CloseParen extends TokenType
case object Dot extends TokenType
case object Comma extends TokenType
case object DoubleQuote extends TokenType
case object Hole extends TokenType
case object Literal extends TokenType
end TokenType

View file

@ -0,0 +1,6 @@
package ava.parser
class Tokenizer(private val reader: CharacterReader):
def next(): Option[String] = None
def close(): Unit = reader.close()

View file

@ -2,6 +2,7 @@ package ava.parser
import cats.effect.IO
import cats.effect.unsafe.IORuntime
import java.io.ByteArrayInputStream
import java.io.InputStream
import scala.io.Source
@ -25,6 +26,58 @@ class CharacterReaderTests extends munit.FunSuite:
.unsafeRunSync()
assertEquals(output, expected)
assertEquals(reader.isEof(), true)
assertEquals(reader.isBufferExhausted(), true)
}
test("should properly handle an empty stream") {
val stream = new ByteArrayInputStream(Array[Byte]())
val reader = CharacterReader.forInputStream(stream)
// Verify initial state.
assertEquals(0, stream.available())
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.isEof(), false)
// Trigger recognition of EOF.
// Case 1 Peek: index == length (initial state)
assertEquals(reader.peek(), None)
// Case 2 Peek: eof == true (triggered state)
assertEquals(reader.peek(), None)
// Case: eof == true (triggered state)
assertEquals(reader.consume(), None)
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.isEof(), true)
reader.close()
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.isEof(), true)
}
test("should receive the current and previous characters") {
val stream = new ByteArrayInputStream(Array[Byte]('a', 'b'))
val reader = CharacterReader.forInputStream(stream)
// Verify initial state.
assertEquals(0, reader.getCurrentChar().toInt)
interceptMessage[IllegalStateException](
"This ring has not been initialized with any data."
) {
reader.getPreviousChar()
}
// Perform this test without peaking, just consume characters.
assertEquals(reader.consume(), Some('a'))
assertEquals(reader.isBufferExhausted(), false)
assertEquals(reader.isEof(), false)
assertEquals(reader.getCurrentChar(), 'a')
assertEquals(reader.consume(), Some('b'))
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.getCurrentChar(), 'b')
assertEquals(reader.getPreviousChar(), 'a')
assertEquals(reader.getLookback().toList, List('a'))
assertEquals(reader.consume(), None)
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.isEof(), true)
}
private def loadFileToString(name: String): String =