From c5ea827944cf32a69e8226d70549cef0797ef89d Mon Sep 17 00:00:00 2001 From: Pat Garrity Date: Sun, 18 Feb 2024 21:41:43 -0600 Subject: [PATCH] Expanding on definitions, starting the tokenizer implementation. --- .../src/main/scala/ava/parser/Keyword.scala | 34 ++++++++++++ .../src/main/scala/ava/parser/Name.scala | 15 ++++++ .../src/main/scala/ava/parser/Operator.scala | 33 ++++++++++++ .../src/main/scala/ava/parser/Token.scala | 54 +++++++++++++++++-- .../src/main/scala/ava/parser/TokenType.scala | 18 ------- .../src/main/scala/ava/parser/Tokenizer.scala | 22 +++++++- 6 files changed, 153 insertions(+), 23 deletions(-) create mode 100644 modules/parser/src/main/scala/ava/parser/Name.scala create mode 100644 modules/parser/src/main/scala/ava/parser/Operator.scala delete mode 100644 modules/parser/src/main/scala/ava/parser/TokenType.scala diff --git a/modules/parser/src/main/scala/ava/parser/Keyword.scala b/modules/parser/src/main/scala/ava/parser/Keyword.scala index 6e1edce..987f7cb 100644 --- a/modules/parser/src/main/scala/ava/parser/Keyword.scala +++ b/modules/parser/src/main/scala/ava/parser/Keyword.scala @@ -33,4 +33,38 @@ object Keyword: case object True extends Keyword("true") case object Type extends Keyword("type") + val All: List[Keyword] = List( + Alias, + Case, + Class, + Const, + Defn, + Do, + Else, + End, + Enum, + Export, + False, + Fn, + Given, + If, + Import, + Infix, + Lambda, + Let, + Match, + Mut, + Namespace, + Object, + Private, + Record, + Return, + Then, + True, + Type + ) + + def isKeyword(candidate: String): Boolean = + All.contains(candidate) + end Keyword diff --git a/modules/parser/src/main/scala/ava/parser/Name.scala b/modules/parser/src/main/scala/ava/parser/Name.scala new file mode 100644 index 0000000..5bc9ea8 --- /dev/null +++ b/modules/parser/src/main/scala/ava/parser/Name.scala @@ -0,0 +1,15 @@ +package ava.parser + +sealed trait Name: + def value: String + +object Name: + case class UserDef(value: String) extends Name + + case object AnonValue extends Name: + val value: String = "_" + + case object AnonType extends Name: + val value: String = "*" + +end Name diff --git a/modules/parser/src/main/scala/ava/parser/Operator.scala b/modules/parser/src/main/scala/ava/parser/Operator.scala new file mode 100644 index 0000000..402dfc5 --- /dev/null +++ b/modules/parser/src/main/scala/ava/parser/Operator.scala @@ -0,0 +1,33 @@ +package ava.parser + +sealed abstract class Operator(val value: String) + +object Operator: + + case object Hole extends Operator("???") + case object ImportSplat extends Operator("*") + case object Member extends Operator(".") + case object Union extends Operator("|") + case object ListPrepend extends Operator(":-") + case object BindType extends Operator(":") + case object BindValue extends Operator(":=") + case object FnReturn extends Operator("->") + case object BindDo extends Operator("<-") + case object ClassMember extends Operator("::") + case object Case extends Operator("=>") + + val All: List[Operator] = List( + Hole, + ImportSplat, + Member, + Union, + ListPrepend, + BindType, + BindValue, + FnReturn, + BindDo, + ClassMember, + Case + ) + +end Operator diff --git a/modules/parser/src/main/scala/ava/parser/Token.scala b/modules/parser/src/main/scala/ava/parser/Token.scala index 17b6a8b..b2d40f9 100644 --- a/modules/parser/src/main/scala/ava/parser/Token.scala +++ b/modules/parser/src/main/scala/ava/parser/Token.scala @@ -1,6 +1,52 @@ package ava.parser -case class Token( - value: String, - tokenType: TokenType -) +sealed trait Token + +object Token: + /** Most tokens are generic tokens. They represent some arbitrary grouping of + * characters that will be refined later. + * + * @param value + * The token value. + */ + case class Generic(value: String) extends Token + + /** Comments are detected at time of tokenization, and are arbitrary strings. + * + * @param value + * The comment value. + */ + case class Comment(value: String) extends Token + + /** The '(' character. + */ + case object OpenParen extends Token + + /** The ')' character. + */ + case object CloseParen extends Token + + /** The '.' character. + */ + case object Dot extends Token + + /** The ',' character. + */ + case object Comma extends Token + + /** The '"' character. + */ + case object DoubleQuote extends Token + + /** The ''' character. + */ + case object SingleQuote extends Token + + /** The ':' character. + */ + case object Colon extends Token + + /** The '#' character. + */ + case object Tuple extends Token +end Token diff --git a/modules/parser/src/main/scala/ava/parser/TokenType.scala b/modules/parser/src/main/scala/ava/parser/TokenType.scala deleted file mode 100644 index c67dfd1..0000000 --- a/modules/parser/src/main/scala/ava/parser/TokenType.scala +++ /dev/null @@ -1,18 +0,0 @@ -package ava.parser - -sealed trait TokenType - -object TokenType: - - case object Keyword extends TokenType - case object ReservedOperator extends TokenType - case object Name extends TokenType - case object OpenParen extends TokenType - case object CloseParen extends TokenType - case object Dot extends TokenType - case object Comma extends TokenType - case object DoubleQuote extends TokenType - case object Hole extends TokenType - case object Literal extends TokenType - -end TokenType diff --git a/modules/parser/src/main/scala/ava/parser/Tokenizer.scala b/modules/parser/src/main/scala/ava/parser/Tokenizer.scala index 2449d85..740fb53 100644 --- a/modules/parser/src/main/scala/ava/parser/Tokenizer.scala +++ b/modules/parser/src/main/scala/ava/parser/Tokenizer.scala @@ -1,6 +1,26 @@ package ava.parser class Tokenizer(private val reader: CharacterReader): - def next(): Option[String] = None + import Tokenizer.* + + private var state: State = State.Initial + + def next(): Option[Token] = None def close(): Unit = reader.close() + +object Tokenizer: + + sealed trait State + + object State: + + case object Initial extends State + case object PotentialComment extends State + case object InComment extends State + case object InQuote extends State + case object InGeneric extends State + + end State + +end Tokenizer