Working on building out tokenization and codifying things like keywords.
This commit is contained in:
parent
fdb150b12d
commit
bad991337d
8 changed files with 166 additions and 14 deletions
5
ava.ebnf
5
ava.ebnf
|
@ -102,11 +102,14 @@ k_true ::= 't', 'r', 'u', 'e';
|
||||||
(* false is a Boolean literal *)
|
(* false is a Boolean literal *)
|
||||||
k_false ::= 'f', 'a', 'l', 's', 'e';
|
k_false ::= 'f', 'a', 'l', 's', 'e';
|
||||||
|
|
||||||
|
(* defn is fn but for definitions without implementations *)
|
||||||
|
k_false ::= 'd', 'e', 'f', 'n';
|
||||||
|
|
||||||
keyword ::= k_type | k_class | k_alias | k_const | k_enum | k_record
|
keyword ::= k_type | k_class | k_alias | k_const | k_enum | k_record
|
||||||
| k_object | k_let | k_mut | k_export | k_import
|
| k_object | k_let | k_mut | k_export | k_import
|
||||||
| k_namespace | k_infix | k_fn | k_end | k_match | k_case
|
| k_namespace | k_infix | k_fn | k_end | k_match | k_case
|
||||||
| k_lambda | k_if | k_then | k_else | k_do | k_return
|
| k_lambda | k_if | k_then | k_else | k_do | k_return
|
||||||
| k_given | k_private | k_true | k_false;
|
| k_given | k_private | k_true | k_false | k_defn;
|
||||||
|
|
||||||
(* ============== *)
|
(* ============== *)
|
||||||
(* Literal Values *)
|
(* Literal Values *)
|
||||||
|
|
|
@ -28,7 +28,7 @@ lazy val parser = project
|
||||||
.settings(name := s"${gsProjectName.value}-parser-v${semVerMajor.value}")
|
.settings(name := s"${gsProjectName.value}-parser-v${semVerMajor.value}")
|
||||||
.settings(
|
.settings(
|
||||||
libraryDependencies ++= Seq(
|
libraryDependencies ++= Seq(
|
||||||
"co.fs2" %% "fs2-core" % "3.9.4",
|
"co.fs2" %% "fs2-core" % "3.9.4" % Test,
|
||||||
"co.fs2" %% "fs2-io" % "3.9.4"
|
"co.fs2" %% "fs2-io" % "3.9.4" % Test
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -21,6 +21,7 @@ class CharacterReader(
|
||||||
private val LookBackCapacity: Int = 16
|
private val LookBackCapacity: Int = 16
|
||||||
|
|
||||||
// Internal buffers.
|
// Internal buffers.
|
||||||
|
private var currentChar: Char = 0
|
||||||
private val lastChars: Ring[Char] = Ring[Char](LookBackCapacity, 0)
|
private val lastChars: Ring[Char] = Ring[Char](LookBackCapacity, 0)
|
||||||
private val buffer: Array[Char] = Array.fill(Capacity)(0)
|
private val buffer: Array[Char] = Array.fill(Capacity)(0)
|
||||||
|
|
||||||
|
@ -98,12 +99,14 @@ class CharacterReader(
|
||||||
// Special case -- we have exhausted the buffer and still want to peek.
|
// Special case -- we have exhausted the buffer and still want to peek.
|
||||||
// In this case, we consume exactly one character (if possible) and set
|
// In this case, we consume exactly one character (if possible) and set
|
||||||
// the peek state.
|
// the peek state.
|
||||||
peekedAhead = true
|
val ch = input.read()
|
||||||
lookAhead = input.read().toChar
|
if ch < 0 then
|
||||||
if lookAhead < 0 then
|
|
||||||
eof = true
|
eof = true
|
||||||
None
|
None
|
||||||
else Some(lookAhead)
|
else
|
||||||
|
peekedAhead = true
|
||||||
|
lookAhead = ch.toChar
|
||||||
|
Some(lookAhead)
|
||||||
else Some(buffer(index))
|
else Some(buffer(index))
|
||||||
|
|
||||||
def consume(): Option[Char] =
|
def consume(): Option[Char] =
|
||||||
|
@ -116,10 +119,10 @@ class CharacterReader(
|
||||||
// buffer(0) contains what we need, but it's ALSO in lookAhead. Take it,
|
// buffer(0) contains what we need, but it's ALSO in lookAhead. Take it,
|
||||||
// increment the index (to 1), and return the correct value.
|
// increment the index (to 1), and return the correct value.
|
||||||
val selectedChar = buffer(0)
|
val selectedChar = buffer(0)
|
||||||
lastChars.push(selectedChar)
|
updateCurrentChar(selectedChar)
|
||||||
index = index + 1
|
index = index + 1
|
||||||
Some(selectedChar)
|
Some(selectedChar)
|
||||||
else if index == length then
|
else if index >= length then
|
||||||
// The buffer has been exhausted. Refill it.
|
// The buffer has been exhausted. Refill it.
|
||||||
fillBuffer()
|
fillBuffer()
|
||||||
if eof then None
|
if eof then None
|
||||||
|
@ -127,15 +130,17 @@ class CharacterReader(
|
||||||
// At this point, 'index' should be 0.
|
// At this point, 'index' should be 0.
|
||||||
val pos = index
|
val pos = index
|
||||||
index = index + 1
|
index = index + 1
|
||||||
lastChars.push(buffer(pos))
|
val ch = buffer(pos)
|
||||||
Some(buffer(pos))
|
updateCurrentChar(ch)
|
||||||
|
Some(ch)
|
||||||
else
|
else
|
||||||
// Regular case -- the buffer is not exhausted and contains data. Get the
|
// Regular case -- the buffer is not exhausted and contains data. Get the
|
||||||
// data at the current position and move our pointer.
|
// data at the current position and move our pointer.
|
||||||
val pos = index
|
val pos = index
|
||||||
index = index + 1
|
index = index + 1
|
||||||
lastChars.push(buffer(pos))
|
val ch = buffer(pos)
|
||||||
Some(buffer(pos))
|
updateCurrentChar(ch)
|
||||||
|
Some(ch)
|
||||||
|
|
||||||
/** @return
|
/** @return
|
||||||
* True if all data in the _buffer_ has been consumed, false otherwise.
|
* True if all data in the _buffer_ has been consumed, false otherwise.
|
||||||
|
@ -150,10 +155,35 @@ class CharacterReader(
|
||||||
def isEof(): Boolean =
|
def isEof(): Boolean =
|
||||||
eof
|
eof
|
||||||
|
|
||||||
def getLastChar(): Char = lastChars.newest()
|
/** Get the _previous_ character. If 0 or 1 characters have been read in
|
||||||
|
* total, this will throw an exception due to being uninitialized.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* The previous character.
|
||||||
|
*/
|
||||||
|
def getPreviousChar(): Char = lastChars.newest()
|
||||||
|
|
||||||
|
/** Get the _current_ character. Initialized to 0.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* The current character, or 0 if nothing has been consumed.
|
||||||
|
*/
|
||||||
|
def getCurrentChar(): Char = currentChar
|
||||||
|
|
||||||
|
/** Dump the lookback array from newest character to oldest character.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* The lookback array, ordered from newest to oldest.
|
||||||
|
*/
|
||||||
def getLookback(): Array[Char] = lastChars.newestToOldest()
|
def getLookback(): Array[Char] = lastChars.newestToOldest()
|
||||||
|
|
||||||
|
private def updateCurrentChar(ch: Char): Unit =
|
||||||
|
val _ =
|
||||||
|
if currentChar != 0 then lastChars.push(currentChar)
|
||||||
|
else ()
|
||||||
|
|
||||||
|
currentChar = ch
|
||||||
|
|
||||||
object CharacterReader:
|
object CharacterReader:
|
||||||
|
|
||||||
/** Initialize a [[CharacterReader]] for the the given file, using the UTF-8
|
/** Initialize a [[CharacterReader]] for the the given file, using the UTF-8
|
||||||
|
|
36
modules/parser/src/main/scala/ava/parser/Keyword.scala
Normal file
36
modules/parser/src/main/scala/ava/parser/Keyword.scala
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
package ava.parser
|
||||||
|
|
||||||
|
sealed abstract class Keyword(val value: String)
|
||||||
|
|
||||||
|
object Keyword:
|
||||||
|
|
||||||
|
case object Alias extends Keyword("alias")
|
||||||
|
case object Case extends Keyword("case")
|
||||||
|
case object Class extends Keyword("class")
|
||||||
|
case object Const extends Keyword("const")
|
||||||
|
case object Defn extends Keyword("defn")
|
||||||
|
case object Do extends Keyword("do")
|
||||||
|
case object Else extends Keyword("else")
|
||||||
|
case object End extends Keyword("end")
|
||||||
|
case object Enum extends Keyword("enum")
|
||||||
|
case object Export extends Keyword("export")
|
||||||
|
case object False extends Keyword("false")
|
||||||
|
case object Fn extends Keyword("fn")
|
||||||
|
case object Given extends Keyword("given")
|
||||||
|
case object If extends Keyword("if")
|
||||||
|
case object Import extends Keyword("import")
|
||||||
|
case object Infix extends Keyword("infix")
|
||||||
|
case object Lambda extends Keyword("λ")
|
||||||
|
case object Let extends Keyword("let")
|
||||||
|
case object Match extends Keyword("match")
|
||||||
|
case object Mut extends Keyword("mut")
|
||||||
|
case object Namespace extends Keyword("namespace")
|
||||||
|
case object Object extends Keyword("object")
|
||||||
|
case object Private extends Keyword("private")
|
||||||
|
case object Record extends Keyword("record")
|
||||||
|
case object Return extends Keyword("return")
|
||||||
|
case object Then extends Keyword("then")
|
||||||
|
case object True extends Keyword("true")
|
||||||
|
case object Type extends Keyword("type")
|
||||||
|
|
||||||
|
end Keyword
|
6
modules/parser/src/main/scala/ava/parser/Token.scala
Normal file
6
modules/parser/src/main/scala/ava/parser/Token.scala
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
package ava.parser
|
||||||
|
|
||||||
|
case class Token(
|
||||||
|
value: String,
|
||||||
|
tokenType: TokenType
|
||||||
|
)
|
18
modules/parser/src/main/scala/ava/parser/TokenType.scala
Normal file
18
modules/parser/src/main/scala/ava/parser/TokenType.scala
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
package ava.parser
|
||||||
|
|
||||||
|
sealed trait TokenType
|
||||||
|
|
||||||
|
object TokenType:
|
||||||
|
|
||||||
|
case object Keyword extends TokenType
|
||||||
|
case object ReservedOperator extends TokenType
|
||||||
|
case object Name extends TokenType
|
||||||
|
case object OpenParen extends TokenType
|
||||||
|
case object CloseParen extends TokenType
|
||||||
|
case object Dot extends TokenType
|
||||||
|
case object Comma extends TokenType
|
||||||
|
case object DoubleQuote extends TokenType
|
||||||
|
case object Hole extends TokenType
|
||||||
|
case object Literal extends TokenType
|
||||||
|
|
||||||
|
end TokenType
|
6
modules/parser/src/main/scala/ava/parser/Tokenizer.scala
Normal file
6
modules/parser/src/main/scala/ava/parser/Tokenizer.scala
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
package ava.parser
|
||||||
|
|
||||||
|
class Tokenizer(private val reader: CharacterReader):
|
||||||
|
def next(): Option[String] = None
|
||||||
|
|
||||||
|
def close(): Unit = reader.close()
|
|
@ -2,6 +2,7 @@ package ava.parser
|
||||||
|
|
||||||
import cats.effect.IO
|
import cats.effect.IO
|
||||||
import cats.effect.unsafe.IORuntime
|
import cats.effect.unsafe.IORuntime
|
||||||
|
import java.io.ByteArrayInputStream
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
|
@ -25,6 +26,58 @@ class CharacterReaderTests extends munit.FunSuite:
|
||||||
.unsafeRunSync()
|
.unsafeRunSync()
|
||||||
|
|
||||||
assertEquals(output, expected)
|
assertEquals(output, expected)
|
||||||
|
assertEquals(reader.isEof(), true)
|
||||||
|
assertEquals(reader.isBufferExhausted(), true)
|
||||||
|
}
|
||||||
|
|
||||||
|
test("should properly handle an empty stream") {
|
||||||
|
val stream = new ByteArrayInputStream(Array[Byte]())
|
||||||
|
val reader = CharacterReader.forInputStream(stream)
|
||||||
|
|
||||||
|
// Verify initial state.
|
||||||
|
assertEquals(0, stream.available())
|
||||||
|
assertEquals(reader.isBufferExhausted(), true)
|
||||||
|
assertEquals(reader.isEof(), false)
|
||||||
|
|
||||||
|
// Trigger recognition of EOF.
|
||||||
|
// Case 1 Peek: index == length (initial state)
|
||||||
|
assertEquals(reader.peek(), None)
|
||||||
|
// Case 2 Peek: eof == true (triggered state)
|
||||||
|
assertEquals(reader.peek(), None)
|
||||||
|
// Case: eof == true (triggered state)
|
||||||
|
assertEquals(reader.consume(), None)
|
||||||
|
assertEquals(reader.isBufferExhausted(), true)
|
||||||
|
assertEquals(reader.isEof(), true)
|
||||||
|
reader.close()
|
||||||
|
assertEquals(reader.isBufferExhausted(), true)
|
||||||
|
assertEquals(reader.isEof(), true)
|
||||||
|
}
|
||||||
|
|
||||||
|
test("should receive the current and previous characters") {
|
||||||
|
val stream = new ByteArrayInputStream(Array[Byte]('a', 'b'))
|
||||||
|
val reader = CharacterReader.forInputStream(stream)
|
||||||
|
|
||||||
|
// Verify initial state.
|
||||||
|
assertEquals(0, reader.getCurrentChar().toInt)
|
||||||
|
interceptMessage[IllegalStateException](
|
||||||
|
"This ring has not been initialized with any data."
|
||||||
|
) {
|
||||||
|
reader.getPreviousChar()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform this test without peaking, just consume characters.
|
||||||
|
assertEquals(reader.consume(), Some('a'))
|
||||||
|
assertEquals(reader.isBufferExhausted(), false)
|
||||||
|
assertEquals(reader.isEof(), false)
|
||||||
|
assertEquals(reader.getCurrentChar(), 'a')
|
||||||
|
assertEquals(reader.consume(), Some('b'))
|
||||||
|
assertEquals(reader.isBufferExhausted(), true)
|
||||||
|
assertEquals(reader.getCurrentChar(), 'b')
|
||||||
|
assertEquals(reader.getPreviousChar(), 'a')
|
||||||
|
assertEquals(reader.getLookback().toList, List('a'))
|
||||||
|
assertEquals(reader.consume(), None)
|
||||||
|
assertEquals(reader.isBufferExhausted(), true)
|
||||||
|
assertEquals(reader.isEof(), true)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def loadFileToString(name: String): String =
|
private def loadFileToString(name: String): String =
|
||||||
|
|
Loading…
Add table
Reference in a new issue