Working on building out tokenization and codifying things like keywords.

This commit is contained in:
Pat Garrity 2024-02-18 15:05:52 -06:00
parent fdb150b12d
commit bad991337d
Signed by: pfm
GPG key ID: 5CA5D21BAB7F3A76
8 changed files with 166 additions and 14 deletions

View file

@ -102,11 +102,14 @@ k_true ::= 't', 'r', 'u', 'e';
(* false is a Boolean literal *) (* false is a Boolean literal *)
k_false ::= 'f', 'a', 'l', 's', 'e'; k_false ::= 'f', 'a', 'l', 's', 'e';
(* defn is fn but for definitions without implementations *)
k_false ::= 'd', 'e', 'f', 'n';
keyword ::= k_type | k_class | k_alias | k_const | k_enum | k_record keyword ::= k_type | k_class | k_alias | k_const | k_enum | k_record
| k_object | k_let | k_mut | k_export | k_import | k_object | k_let | k_mut | k_export | k_import
| k_namespace | k_infix | k_fn | k_end | k_match | k_case | k_namespace | k_infix | k_fn | k_end | k_match | k_case
| k_lambda | k_if | k_then | k_else | k_do | k_return | k_lambda | k_if | k_then | k_else | k_do | k_return
| k_given | k_private | k_true | k_false; | k_given | k_private | k_true | k_false | k_defn;
(* ============== *) (* ============== *)
(* Literal Values *) (* Literal Values *)

View file

@ -28,7 +28,7 @@ lazy val parser = project
.settings(name := s"${gsProjectName.value}-parser-v${semVerMajor.value}") .settings(name := s"${gsProjectName.value}-parser-v${semVerMajor.value}")
.settings( .settings(
libraryDependencies ++= Seq( libraryDependencies ++= Seq(
"co.fs2" %% "fs2-core" % "3.9.4", "co.fs2" %% "fs2-core" % "3.9.4" % Test,
"co.fs2" %% "fs2-io" % "3.9.4" "co.fs2" %% "fs2-io" % "3.9.4" % Test
) )
) )

View file

@ -21,6 +21,7 @@ class CharacterReader(
private val LookBackCapacity: Int = 16 private val LookBackCapacity: Int = 16
// Internal buffers. // Internal buffers.
private var currentChar: Char = 0
private val lastChars: Ring[Char] = Ring[Char](LookBackCapacity, 0) private val lastChars: Ring[Char] = Ring[Char](LookBackCapacity, 0)
private val buffer: Array[Char] = Array.fill(Capacity)(0) private val buffer: Array[Char] = Array.fill(Capacity)(0)
@ -98,12 +99,14 @@ class CharacterReader(
// Special case -- we have exhausted the buffer and still want to peek. // Special case -- we have exhausted the buffer and still want to peek.
// In this case, we consume exactly one character (if possible) and set // In this case, we consume exactly one character (if possible) and set
// the peek state. // the peek state.
peekedAhead = true val ch = input.read()
lookAhead = input.read().toChar if ch < 0 then
if lookAhead < 0 then
eof = true eof = true
None None
else Some(lookAhead) else
peekedAhead = true
lookAhead = ch.toChar
Some(lookAhead)
else Some(buffer(index)) else Some(buffer(index))
def consume(): Option[Char] = def consume(): Option[Char] =
@ -116,10 +119,10 @@ class CharacterReader(
// buffer(0) contains what we need, but it's ALSO in lookAhead. Take it, // buffer(0) contains what we need, but it's ALSO in lookAhead. Take it,
// increment the index (to 1), and return the correct value. // increment the index (to 1), and return the correct value.
val selectedChar = buffer(0) val selectedChar = buffer(0)
lastChars.push(selectedChar) updateCurrentChar(selectedChar)
index = index + 1 index = index + 1
Some(selectedChar) Some(selectedChar)
else if index == length then else if index >= length then
// The buffer has been exhausted. Refill it. // The buffer has been exhausted. Refill it.
fillBuffer() fillBuffer()
if eof then None if eof then None
@ -127,15 +130,17 @@ class CharacterReader(
// At this point, 'index' should be 0. // At this point, 'index' should be 0.
val pos = index val pos = index
index = index + 1 index = index + 1
lastChars.push(buffer(pos)) val ch = buffer(pos)
Some(buffer(pos)) updateCurrentChar(ch)
Some(ch)
else else
// Regular case -- the buffer is not exhausted and contains data. Get the // Regular case -- the buffer is not exhausted and contains data. Get the
// data at the current position and move our pointer. // data at the current position and move our pointer.
val pos = index val pos = index
index = index + 1 index = index + 1
lastChars.push(buffer(pos)) val ch = buffer(pos)
Some(buffer(pos)) updateCurrentChar(ch)
Some(ch)
/** @return /** @return
* True if all data in the _buffer_ has been consumed, false otherwise. * True if all data in the _buffer_ has been consumed, false otherwise.
@ -150,10 +155,35 @@ class CharacterReader(
def isEof(): Boolean = def isEof(): Boolean =
eof eof
def getLastChar(): Char = lastChars.newest() /** Get the _previous_ character. If 0 or 1 characters have been read in
* total, this will throw an exception due to being uninitialized.
*
* @return
* The previous character.
*/
def getPreviousChar(): Char = lastChars.newest()
/** Get the _current_ character. Initialized to 0.
*
* @return
* The current character, or 0 if nothing has been consumed.
*/
def getCurrentChar(): Char = currentChar
/** Dump the lookback array from newest character to oldest character.
*
* @return
* The lookback array, ordered from newest to oldest.
*/
def getLookback(): Array[Char] = lastChars.newestToOldest() def getLookback(): Array[Char] = lastChars.newestToOldest()
private def updateCurrentChar(ch: Char): Unit =
val _ =
if currentChar != 0 then lastChars.push(currentChar)
else ()
currentChar = ch
object CharacterReader: object CharacterReader:
/** Initialize a [[CharacterReader]] for the the given file, using the UTF-8 /** Initialize a [[CharacterReader]] for the the given file, using the UTF-8

View file

@ -0,0 +1,36 @@
package ava.parser
sealed abstract class Keyword(val value: String)
object Keyword:
case object Alias extends Keyword("alias")
case object Case extends Keyword("case")
case object Class extends Keyword("class")
case object Const extends Keyword("const")
case object Defn extends Keyword("defn")
case object Do extends Keyword("do")
case object Else extends Keyword("else")
case object End extends Keyword("end")
case object Enum extends Keyword("enum")
case object Export extends Keyword("export")
case object False extends Keyword("false")
case object Fn extends Keyword("fn")
case object Given extends Keyword("given")
case object If extends Keyword("if")
case object Import extends Keyword("import")
case object Infix extends Keyword("infix")
case object Lambda extends Keyword("λ")
case object Let extends Keyword("let")
case object Match extends Keyword("match")
case object Mut extends Keyword("mut")
case object Namespace extends Keyword("namespace")
case object Object extends Keyword("object")
case object Private extends Keyword("private")
case object Record extends Keyword("record")
case object Return extends Keyword("return")
case object Then extends Keyword("then")
case object True extends Keyword("true")
case object Type extends Keyword("type")
end Keyword

View file

@ -0,0 +1,6 @@
package ava.parser
case class Token(
value: String,
tokenType: TokenType
)

View file

@ -0,0 +1,18 @@
package ava.parser
sealed trait TokenType
object TokenType:
case object Keyword extends TokenType
case object ReservedOperator extends TokenType
case object Name extends TokenType
case object OpenParen extends TokenType
case object CloseParen extends TokenType
case object Dot extends TokenType
case object Comma extends TokenType
case object DoubleQuote extends TokenType
case object Hole extends TokenType
case object Literal extends TokenType
end TokenType

View file

@ -0,0 +1,6 @@
package ava.parser
class Tokenizer(private val reader: CharacterReader):
def next(): Option[String] = None
def close(): Unit = reader.close()

View file

@ -2,6 +2,7 @@ package ava.parser
import cats.effect.IO import cats.effect.IO
import cats.effect.unsafe.IORuntime import cats.effect.unsafe.IORuntime
import java.io.ByteArrayInputStream
import java.io.InputStream import java.io.InputStream
import scala.io.Source import scala.io.Source
@ -25,6 +26,58 @@ class CharacterReaderTests extends munit.FunSuite:
.unsafeRunSync() .unsafeRunSync()
assertEquals(output, expected) assertEquals(output, expected)
assertEquals(reader.isEof(), true)
assertEquals(reader.isBufferExhausted(), true)
}
test("should properly handle an empty stream") {
val stream = new ByteArrayInputStream(Array[Byte]())
val reader = CharacterReader.forInputStream(stream)
// Verify initial state.
assertEquals(0, stream.available())
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.isEof(), false)
// Trigger recognition of EOF.
// Case 1 Peek: index == length (initial state)
assertEquals(reader.peek(), None)
// Case 2 Peek: eof == true (triggered state)
assertEquals(reader.peek(), None)
// Case: eof == true (triggered state)
assertEquals(reader.consume(), None)
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.isEof(), true)
reader.close()
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.isEof(), true)
}
test("should receive the current and previous characters") {
val stream = new ByteArrayInputStream(Array[Byte]('a', 'b'))
val reader = CharacterReader.forInputStream(stream)
// Verify initial state.
assertEquals(0, reader.getCurrentChar().toInt)
interceptMessage[IllegalStateException](
"This ring has not been initialized with any data."
) {
reader.getPreviousChar()
}
// Perform this test without peaking, just consume characters.
assertEquals(reader.consume(), Some('a'))
assertEquals(reader.isBufferExhausted(), false)
assertEquals(reader.isEof(), false)
assertEquals(reader.getCurrentChar(), 'a')
assertEquals(reader.consume(), Some('b'))
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.getCurrentChar(), 'b')
assertEquals(reader.getPreviousChar(), 'a')
assertEquals(reader.getLookback().toList, List('a'))
assertEquals(reader.consume(), None)
assertEquals(reader.isBufferExhausted(), true)
assertEquals(reader.isEof(), true)
} }
private def loadFileToString(name: String): String = private def loadFileToString(name: String): String =