From 4edadacc4c9aa0b12aca637e981fe5ea7cb68176 Mon Sep 17 00:00:00 2001 From: Abdelrahman Abounegm Date: Wed, 25 Oct 2023 16:44:31 +0300 Subject: [PATCH 1/3] Add length information to the position token Co-authored-by: Nikolai Kudasov --- source/src/BNFC/Backend/Haskell/CFtoAlex3.hs | 28 +++++++++---------- source/src/BNFC/Backend/Haskell/CFtoHappy.hs | 16 +++++------ source/src/BNFC/Backend/Haskell/CFtoLayout.hs | 6 ++-- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs index 65d0dbf5..4c9b23e1 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs @@ -136,8 +136,8 @@ restOfAlex tokenText cf = concat ] , [ "{" , "-- | Create a token with position." - , "tok :: (" ++ stringType ++ " -> Tok) -> (Posn -> " ++ stringType ++ " -> Token)" - , "tok f p = PT p . f" + , "tok :: (" ++ stringType ++ " -> Tok) -> (Posn -> Int -> " ++ stringType ++ " -> Token)" + , "tok f p l = PT p l . f" , "" , "-- | Token without position." , "data Tok" @@ -173,7 +173,7 @@ restOfAlex tokenText cf = concat , "" , "-- | Token with position." , "data Token" - , " = PT Posn Tok" + , " = PT Posn Int Tok" , " | Err Posn" , " deriving (Eq, Show, Ord)" , "" @@ -188,10 +188,10 @@ restOfAlex tokenText cf = concat , "" , "-- | Get the position of a token." , "tokenPosn :: Token -> Posn" - , "tokenPosn (PT p _) = p" - , "tokenPosn (Err p) = p" + , "tokenPosn (PT posn _len _tok) = posn" + , "tokenPosn (Err posn) = posn" , "" - , "-- | Get line and column of a token." + , "-- | Get start line and column of a token." , "tokenLineCol :: Token -> (Int, Int)" , "tokenLineCol = posLineCol . tokenPosn" , "" @@ -206,15 +206,15 @@ restOfAlex tokenText cf = concat , "-- | Convert a token to its text." , "tokenText :: Token -> " ++ stringType , "tokenText t = case t of" - , " PT _ (TS s _) -> s" - , " PT _ (TL s) -> " ++ applyP stringPack "show s" - , " PT _ (TI s) -> s" - , " PT _ (TV s) -> s" - , " PT _ (TD s) -> s" - , " PT _ (TC s) -> s" + , " PT _ _ (TS s _) -> s" + , " PT _ _ (TL s) -> " ++ applyP stringPack "show s" + , " PT _ _ (TI s) -> s" + , " PT _ _ (TV s) -> s" + , " PT _ _ (TD s) -> s" + , " PT _ _ (TC s) -> s" , " Err _ -> " ++ apply stringPack "\"#error\"" ] - , [ " PT _ (T_" ++ name ++ " s) -> s" | name <- tokenNames cf ] + , [ " PT _ _ (T_" ++ name ++ " s) -> s" | name <- tokenNames cf ] , [ "" , "-- | Convert a token to a string." , "prToken :: Token -> String" @@ -295,7 +295,7 @@ restOfAlex tokenText cf = concat , " AlexEOF -> []" , " AlexError (pos, _, _, _) -> [Err pos]" , " AlexSkip inp' len -> go inp'" - , " AlexToken inp' len act -> act pos (" ++ stringTake ++ " len str) : (go inp')" + , " AlexToken inp' len act -> act pos len (" ++ stringTake ++ " len str) : (go inp')" , "" , "alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)" , "alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s))" diff --git a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs index f21f9aba..ba0243ff 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs @@ -124,7 +124,7 @@ tokens cf functor | otherwise = "%token" $$ (nest 2 $ vcat $ map text $ table " " ts) where ts = map prToken (cfTokens cf) ++ specialToks cf functor - prToken (t,k) = [ render (convert t), "{ PT _ (TS _ " ++ show k ++ ")", "}" ] + prToken (t,k) = [ render (convert t), "{ PT _ _ (TS _ " ++ show k ++ ")", "}" ] -- Happy doesn't allow characters such as åäö to occur in the happy file. This -- is however not a restriction, just a naming paradigm in the happy source file. @@ -297,12 +297,12 @@ footer absName tokenText functor eps _cf = unlines $ concat -- | GF literals. specialToks :: CF -> Bool -> [[String]] -- ^ A table with three columns (last is "}"). specialToks cf functor = (`map` literals cf) $ \t -> case t of - "Ident" -> [ "L_Ident" , "{ PT _ (TV " ++ posn t ++ ")", "}" ] - "String" -> [ "L_quoted", "{ PT _ (TL " ++ posn t ++ ")", "}" ] - "Integer" -> [ "L_integ ", "{ PT _ (TI " ++ posn t ++ ")", "}" ] - "Double" -> [ "L_doubl ", "{ PT _ (TD " ++ posn t ++ ")", "}" ] - "Char" -> [ "L_charac", "{ PT _ (TC " ++ posn t ++ ")", "}" ] - own -> [ "L_" ++ own,"{ PT _ (T_" ++ own ++ " " ++ posn own ++ ")", "}" ] + "Ident" -> [ "L_Ident" , "{ PT _ _ (TV " ++ posn t ++ ")", "}" ] + "String" -> [ "L_quoted", "{ PT _ _ (TL " ++ posn t ++ ")", "}" ] + "Integer" -> [ "L_integ ", "{ PT _ _ (TI " ++ posn t ++ ")", "}" ] + "Double" -> [ "L_doubl ", "{ PT _ _ (TD " ++ posn t ++ ")", "}" ] + "Char" -> [ "L_charac", "{ PT _ _ (TC " ++ posn t ++ ")", "}" ] + own -> [ "L_" ++ own,"{ PT _ _ (T_" ++ own ++ " " ++ posn own ++ ")", "}" ] where posn tokenCat = if isPositionCat cf tokenCat || functor then "_" else "$$" @@ -327,7 +327,7 @@ specialRules absName functor tokenText cf = unlines . intersperse "" . (`map` li | otherwise = mkValPart tokenCat mkValPart tokenCat = case tokenCat of - "String" -> if functor then stringUnpack "((\\(PT _ (TL s)) -> s) $1)" + "String" -> if functor then stringUnpack "((\\(PT _ _ (TL s)) -> s) $1)" else stringUnpack "$1" -- String never has pos "Integer" -> if functor then "(read " ++ stringUnpack "(tokenText $1)" ++ ") :: Integer" else "(read " ++ stringUnpack "$1" ++ ") :: Integer" -- Integer never has pos diff --git a/source/src/BNFC/Backend/Haskell/CFtoLayout.hs b/source/src/BNFC/Backend/Haskell/CFtoLayout.hs index 38322e03..b575f4bc 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoLayout.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoLayout.hs @@ -317,7 +317,7 @@ cf2Layout layName lexName cf = unlines $ concat , "" , "-- | Create a position symbol token." , "sToken :: Position -> TokSymbol -> Token" - , "sToken p t = PT p $ TK t" + , "sToken p t@(TokSymbol s _) = PT p (length s) $ TK t" , "" , "-- | Get the line number of a token." , "line :: Token -> Line" @@ -334,13 +334,13 @@ cf2Layout layName lexName cf = unlines $ concat , "-- | Check if a word is a layout start token." , "isLayout :: Token -> Maybe LayoutDelimiters" , "isLayout = \\case" - , " PT _ (TK t) -> lookup t layoutWords" + , " PT _ _ (TK t) -> lookup t layoutWords" , " _ -> Nothing" , "" , "-- | Check if a token is one of the given symbols." , "isTokenIn :: [TokSymbol] -> Token -> Bool" , "isTokenIn ts = \\case" - , " PT _ (TK t) -> t `elem` ts" + , " PT _ _ (TK t) -> t `elem` ts" , " _ -> False" , "" , "-- | Check if a token is a layout stop token." From c180b2cc97284577244f39e1c973f6ffeeff8188 Mon Sep 17 00:00:00 2001 From: Abdelrahman Abounegm Date: Wed, 25 Oct 2023 16:46:25 +0300 Subject: [PATCH 2/3] Add end position information to BNFC'Position Co-authored-by: Nikolai Kudasov --- .../src/BNFC/Backend/Haskell/CFtoAbstract.hs | 28 ++++++++++++++++--- source/src/BNFC/Backend/Haskell/CFtoAlex3.hs | 20 +++++++++++-- source/src/BNFC/Backend/Haskell/CFtoHappy.hs | 28 +++++++++++++++---- 3 files changed, 64 insertions(+), 12 deletions(-) diff --git a/source/src/BNFC/Backend/Haskell/CFtoAbstract.hs b/source/src/BNFC/Backend/Haskell/CFtoAbstract.hs index 8c551694..eb18bced 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoAbstract.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoAbstract.hs @@ -92,15 +92,34 @@ cf2Abstract Options{ lang, tokenText, generic, functor } name cf = vsep . concat -- regardless whether it is used in the abstract syntax. -- It may be used in the parser. , [ vcat - [ "-- | Start position (line, column) of something." + [ "-- | Position range ((startLine, startColumn), (endLine, endColumn)) of something." , "" - , "type" <+> posType <+> "=" <+> "C.Maybe (C.Int, C.Int)" + , "type" <+> posType <+> "=" <+> "C.Maybe ((C.Int, C.Int), (C.Int, C.Int))" , "" , "pattern" <+> noPosConstr <+> "::" <+> posType , "pattern" <+> noPosConstr <+> "=" <+> "C.Nothing" , "" - , "pattern" <+> posConstr <+> ":: C.Int -> C.Int ->" <+> posType - , "pattern" <+> posConstr <+> "line col =" <+> "C.Just (line, col)" + , "pattern" <+> posConstr <+> ":: (C.Int, C.Int) -> (C.Int, C.Int) ->" <+> posType + , "pattern" <+> posConstr <+> "start end =" <+> "C.Just (start, end)" + , "" + , "{-# COMPLETE" <+> posConstr <> "," <+> noPosConstr <+> "#-}" + , "" + , "startLineCol" <> posConstr <+> "::" <+> posType <+> "-> C.Maybe (C.Int, C.Int)" + , "startLineCol" <> posConstr <+> "= C.fmap C.fst" + , "" + , "endLineCol" <> posConstr <+> "::" <+> posType <+> "-> C.Maybe (C.Int, C.Int)" + , "endLineCol" <> posConstr <+> "= C.fmap C.snd" + , "" + , "span" <> posConstr <+> "::" <+> posType <+> "->" <+> posType <+> "->" <+> posType + , "span" <> posConstr + <+> "(" <+> posConstr <+> "start _end" <+> ")" + <+> "(" <+> posConstr <+> "_start end" <+> ") =" <+> posConstr <+> "start end" + , "span" <> posConstr + <+> "(" <+> posConstr <+> "start end" <+> ") _ =" <+> posConstr <+> "start end" + , "span" <> posConstr + <+> "_ (" <+> posConstr <+> "start end" <+> ") =" <+> posConstr <+> "start end" + , "span" <> posConstr + <+> noPosConstr <+> noPosConstr <+> "=" <+> noPosConstr ] | defPosition ] @@ -159,6 +178,7 @@ cf2Abstract Options{ lang, tokenText, generic, functor } name cf = vsep . concat [ [ text $ List.intercalate ", " stdClasses | hasTextualToks || hasData ] , [ text $ List.intercalate ", " funClasses | fun ] , [ text $ "Int, Maybe(..)" | defPosition ] + , [ text $ "fmap, fst, snd"] ] -- | diff --git a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs index 4c9b23e1..4e71895b 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs @@ -191,17 +191,33 @@ restOfAlex tokenText cf = concat , "tokenPosn (PT posn _len _tok) = posn" , "tokenPosn (Err posn) = posn" , "" + , "-- | Get the length of a token." + , "tokenLen :: Token -> Int" + , "tokenLen (PT _posn len _tok) = len" + , "tokenLen (Err _) = 0" + , "" , "-- | Get start line and column of a token." , "tokenLineCol :: Token -> (Int, Int)" , "tokenLineCol = posLineCol . tokenPosn" , "" + , "-- | Get end line and column of a token." + , "tokenLineColEnd :: Token -> (Int, Int)" + , "tokenLineColEnd t = (l, c + n)" + , " where" + , " (l, c) = tokenLineCol t" + , " n = tokenLen t" + , "" + , "-- | Get line and column for both start and end of a token." + , "tokenSpan :: Token -> ((Int, Int), (Int, Int))" + , "tokenSpan t = (tokenLineCol t, tokenLineColEnd t)" + , "" , "-- | Get line and column of a position." , "posLineCol :: Posn -> (Int, Int)" , "posLineCol (Pn _ l c) = (l,c)" , "" , "-- | Convert a token into \"position token\" form." - , "mkPosToken :: Token -> ((Int, Int), " ++ stringType ++ ")" - , "mkPosToken t = (tokenLineCol t, tokenText t)" + , "mkPosToken :: Token -> (((Int, Int), Int), " ++ stringType ++ ")" + , "mkPosToken t = ((tokenLineCol t, tokenLen t), tokenText t)" , "" , "-- | Convert a token to its text." , "tokenText :: Token -> " ++ stringType diff --git a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs index ba0243ff..57560cc0 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs @@ -157,7 +157,7 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) -> -- Coercion are much simpler: -- -- >>> constructRule "Foo" True (npRule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"] Parsable) --- ("'(' Exp ')'","(uncurry Foo.BNFC'Position (tokenLineCol $1), (snd $2))") +-- ("'(' Exp ')'","(uncurry Foo.BNFC'Position (tokenSpan $1), (snd $2))") -- constructRule :: IsFun f => String -> Bool -> Rul f -> (Pattern, Action) constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pat, action) @@ -167,10 +167,26 @@ constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pat, action) action | functor = "(" ++ actionPos id ++ ", " ++ actionValue ++ ")" | otherwise = actionValue - actionPos paren = case rhs of - [] -> qualify noPosConstr - (Left _:_) -> paren "fst $1" - (Right _:_) -> paren $ unwords [ "uncurry", qualify posConstr , "(tokenLineCol $1)" ] + actionPos paren = case headAndLast rhs of + Nothing -> qualify noPosConstr + Just (startTok, endTok) -> paren $ unwords + [ qualify ("span" ++ posConstr) + , startOf startTok + , endOf endTok + ] + where + startOf :: Either a b -> String + startOf Left{} = "(fst $1)" + startOf Right{} = unwords [ "(uncurry", qualify posConstr , "(tokenSpan $1))" ] + endOf :: Either a b -> String + endOf Left{} = "(fst $" ++ show (length rhs) ++ ")" + endOf Right{} = unwords [ "(uncurry", qualify posConstr , "(tokenSpan $" ++ show (length rhs) ++"))" ] + + headAndLast :: [a] -> Maybe (a, a) + headAndLast xs = + case (xs, reverse xs) of + (x:_, z:_) -> Just (x, z) + _ -> Nothing actionValue | isCoercion fun = unwords metavars | isNilCons fun = unwords (qualify fun : metavars) @@ -323,7 +339,7 @@ specialRules absName functor tokenText cf = unlines . intersperse "" . (`map` li where mkTypePart tokenCat = if functor then concat [ "(", qualify posType, ", ", tokenCat, ")" ] else tokenCat mkBodyPart tokenCat - | functor = "(" ++ unwords ["uncurry", qualify posConstr, "(tokenLineCol $1)"] ++ ", " ++ mkValPart tokenCat ++ ")" + | functor = "(" ++ unwords ["uncurry", qualify posConstr, "(tokenSpan $1)"] ++ ", " ++ mkValPart tokenCat ++ ")" | otherwise = mkValPart tokenCat mkValPart tokenCat = case tokenCat of From fe5c49d8eda50df760ec954318fac0ea21f773ff Mon Sep 17 00:00:00 2001 From: Abdelrahman Abounegm Date: Tue, 7 Nov 2023 17:33:49 +0300 Subject: [PATCH 3/3] Fix the position functor doctests for CFtoHappy --- source/src/BNFC/Backend/Haskell/CFtoHappy.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs index 57560cc0..e9691f7e 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs @@ -141,10 +141,10 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) -> -- >>> constructRule "Foo" False (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable) -- ("Exp '+' Exp","Foo.EPlus $1 $3") -- --- If we're using functors, it adds position value: +-- If we're using functors, it adds position range value: -- -- >>> constructRule "Foo" True (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable) --- ("Exp '+' Exp","(fst $1, Foo.EPlus (fst $1) (snd $1) (snd $3))") +-- ("Exp '+' Exp","(Foo.spanBNFC'Position (fst $1) (fst $3), Foo.EPlus (Foo.spanBNFC'Position (fst $1) (fst $3)) (snd $1) (snd $3))") -- -- List constructors should not be prefixed by the abstract module name: -- @@ -157,7 +157,7 @@ rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) -> -- Coercion are much simpler: -- -- >>> constructRule "Foo" True (npRule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"] Parsable) --- ("'(' Exp ')'","(uncurry Foo.BNFC'Position (tokenSpan $1), (snd $2))") +-- ("'(' Exp ')'","(Foo.spanBNFC'Position (uncurry Foo.BNFC'Position (tokenSpan $1)) (uncurry Foo.BNFC'Position (tokenSpan $3)), (snd $2))") -- constructRule :: IsFun f => String -> Bool -> Rul f -> (Pattern, Action) constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pat, action)