updated lexical analyser to fix merge problems

tobywalsh1107 · tobywalsh1107 · commit d70cbb45211c · 2021-10-17T22:20:52.000+11:00
diff --git a/Assignment2/src/com/company/LexicalAnalyser.java b/Assignment2/src/com/company/LexicalAnalyser.java
@@ -4,79 +4,116 @@
 import java.util.Optional;
 import java.util.Collections;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.NoSuchElementException;
 
 public class LexicalAnalyser {
 
-	public static List<Token> analyse(String sourceCode) throws LexicalException {
-		// Turn the input String into a list of Tokens!
-		String[] splitList = sourceCode.split("\\s+");
-		List<String> furtherSplitList = new ArrayList<String>();
-		List<Token> tokenList = new ArrayList<Token>();
+    public static List<Token> analyse(String sourceCode) throws LexicalException {
+        // Turn the input String into a list of Tokens!
+        // split source code by spaces into a list and initialize all other necessary lists
+        String[] splitList = sourceCode.split(" ");
+        List<String> tokenCase = Arrays.asList(new String[]{ // list to check characters against
+                "\"", "\'", "(", ")", "{", "}",
+                ";", "+", "-", "*", "/", "%"
+        });
+        List<String> furtherSplitList = new ArrayList<String>();
+        List<Token> tokenList = new ArrayList<Token>();
 
-		for (String word : splitList) {
-			String s = "";
-			for (int i = 0; i < word.length(); i++) {
-				// System.out.println(word.charAt(i));
-				String c = Character.toString(word.charAt(i));
-				switch (c) {
-					case "(":
-					case ")":
-					case "{":
-					case "}":
-					case ";":
-						// System.out.println(s);
-						// System.out.println(c);
-						// if (s != null || s.length() > 0 || s != "") {
-						// 	System.out.println(s);
-						// 	furtherSplitList.add(s);
-						// }
-						furtherSplitList.add(s);
-						furtherSplitList.add(c);
-						s = "";
-						break;
-					case " ":
-						i = word.length();
-						break;
-					default:
-						s += c;
-				}
-				// if token exists, add s to split list, then add token
-				// clear string var
-			}
-			// if (s != null || s.length() > 0 || s != "") {
-			// 	System.out.println(s);
-			// 	furtherSplitList.add(s);
-			// }
-			furtherSplitList.add(s);
-		}
+        for (String word : splitList) {
+            String s = "";
+            for (int i = 0; i < word.length(); i++) { // iterate through the word
+                // System.out.println(word.charAt(i));
+                String c = Character.toString(word.charAt(i));
+                if (tokenCase.contains(c)) { // check against list of tokens
+                    furtherSplitList.add(s); // add whatever we have in the list
+                    furtherSplitList.add(c); // add the last character as a separate token
+                    s = "";
+                } else {
+                    s += c; // if character wasn't a separator keep adding to s
+                }
+            }
+            furtherSplitList.add(s);
+        }
+        // start adding tokens to the token list based on what we have in further split list
+        for (int i = 0; i < furtherSplitList.size(); i++) {
+            String s = furtherSplitList.get(i);
+            if (s.length() > 0) {
+                try {
+                    if (i > 0 && furtherSplitList.get(i - 1).matches("\"")) {
+                        tokenList.add(tokenTypeStringLit(s).get());
+                    } else if (i > 0 && furtherSplitList.get(i - 1).matches("\'")) {
+                        tokenList.add(tokenTypeCharLit(s).get());
+                    } else if (i < furtherSplitList.size() - 1 && furtherSplitList.get(i).matches(".[=]$")) {
+                        // System.out.println(furtherSplitList.get(i));
+                        if (furtherSplitList.get(i).matches("^[>].*")) {
+                            tokenList.add(tokenTypeGE(s).get());
+                        } else if (furtherSplitList.get(i).matches("^[<].*")) {
+                            tokenList.add(tokenTypeLE(s).get());
+                        } else if (furtherSplitList.get(i).matches("^[!].*")) {
+                            tokenList.add(tokenTypeNE(s).get());
+                        } else if (furtherSplitList.get(i).matches("^[=].*")) {
+                            tokenList.add(tokenTypeEqual(s).get());
+                        } else {
+                            tokenList.add(tokenTypeEqual(s).get());
+                        }
+                    } else {
+                        tokenList.add(tokenFromString(s).get());
+                    }
+                } catch (NoSuchElementException e) {
+                    System.out.print("Token not found: " + e + "\n");
+                } catch (Exception e) {
+                    System.out.print(e);
+                }
+            }
+        }
+        return tokenList;
+    }
 
-		System.out.println(furtherSplitList);
+    private static Optional<Token> tokenFromString(String t) {
+        Optional<Token.TokenType> type = tokenTypeOf(t);
+        if (type.isPresent())
+            return Optional.of(new Token(type.get(), t));
+        return Optional.empty();
+    }
 
+    private static Optional<Token> tokenTypeStringLit(String t) {
+        Optional<Token.TokenType> type = Optional.of(Token.TokenType.STRINGLIT);
+        if (type.isPresent())
+            return Optional.of(new Token(type.get(), t));
+        return Optional.empty();
+    }
 
-		for(int j = 0; j < furtherSplitList.size(); j++){
+    private static Optional<Token> tokenTypeCharLit(String t) {
+        Optional<Token.TokenType> type = Optional.of(Token.TokenType.CHARLIT);
+        if (type.isPresent())
+            return Optional.of(new Token(type.get(), t));
+        return Optional.empty();
+    }
 
-		}
+    private static Optional<Token> tokenTypeGE(String t) {
+        Optional<Token.TokenType> type = Optional.of(Token.TokenType.GE);
+        if (type.isPresent())
+            return Optional.of(new Token(type.get(), t));
+        return Optional.empty();
+    }
 
-		for (String s : furtherSplitList) {
+    private static Optional<Token> tokenTypeLE(String t) {
+        Optional<Token.TokenType> type = Optional.of(Token.TokenType.LE);
+        if (type.isPresent())
+            return Optional.of(new Token(type.get(), t));
+        return Optional.empty();
+    }
 
-			if(s.length() > 0){
-				try {
-					tokenList.add(tokenFromString(s).get());
-				}
-				catch (NoSuchElementException e) {
-					// tokenList.add(Optional.empty());
-				}
-			}
-			//try {
-			//	tokenList.add(LexicalAnalyser.tokenFromString().get())
-			//}
-		}
-		return tokenList;
-		// return Collections.emptyList();
-	}
+    private static Optional<Token> tokenTypeEqual(String t) {
+        Optional<Token.TokenType> type = Optional.of(Token.TokenType.EQUAL);
+        if (type.isPresent())
+            return Optional.of(new Token(type.get(), t));
+        return Optional.empty();
+    }
 
-    private static Optional<Token> tokenFromString(String t) {
-        Optional<Token.TokenType> type = tokenTypeOf(t);
+    private static Optional<Token> tokenTypeNE(String t) {
+        Optional<Token.TokenType> type = Optional.of(Token.TokenType.NEQUAL);
         if (type.isPresent())
             return Optional.of(new Token(type.get(), t));
         return Optional.empty();
@@ -144,14 +181,27 @@ private static Optional<Token.TokenType> tokenTypeOf(String t) {
                 return Optional.of(Token.TokenType.TRUE);
             case "false":
                 return Optional.of(Token.TokenType.FALSE);
+            case "<":
+                return Optional.of(Token.TokenType.LT);
+            case ">":
+                return Optional.of(Token.TokenType.GT);
+            // case "<=":
+            // return Optional.of(Token.TokenType.LE);
+            // case ">=":
+            // return Optional.of(Token.TokenType.GE);
         }
 
+        if (t.matches("\"{1}"))
+            return Optional.of(Token.TokenType.DQUOTE);
+        if (t.matches("\'{1}"))
+            return Optional.of(Token.TokenType.SQUOTE);
         if (t.matches("\\d+"))
             return Optional.of(Token.TokenType.NUM);
         if (Character.isAlphabetic(t.charAt(0)) && t.matches("[\\d|\\w]+")) {
             return Optional.of(Token.TokenType.ID);
         }
+
         return Optional.empty();
     }
 
-}
+}