diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml index 4a5d78b36..53770ad6f 100644 --- a/.github/workflows/haskell.yml +++ b/.github/workflows/haskell.yml @@ -36,6 +36,15 @@ jobs: ${{ runner.os }}-build- ${{ runner.os }}- + - name: Install Ubuntu Prerequisites (libpcre) + if: startsWith(matrix.os, 'ubuntu') + run: | + sudo apt-get install libpcre3-dev + - name: Install MacOS Prerequisites (libpcre) + if: startsWith(matrix.os, 'macOS') + run: | + brew install pcre + - name: Build explainable run: stack build working-directory: lib/haskell/explainable diff --git a/lib/haskell/natural4/package.yaml b/lib/haskell/natural4/package.yaml index 4d4bf1adb..752e8bb5b 100644 --- a/lib/haskell/natural4/package.yaml +++ b/lib/haskell/natural4/package.yaml @@ -71,6 +71,15 @@ dependencies: - string-interpolate - prettyprinter-interp - json + - optics + - generic-optics + - nonempty-containers + - lens-regex-pcre + - pcre-heavy + - string-conversions + - raw-strings-qq + + language: GHC2021 diff --git a/lib/haskell/natural4/src/LS/Rule.hs b/lib/haskell/natural4/src/LS/Rule.hs index 462332669..4b92e7453 100644 --- a/lib/haskell/natural4/src/LS/Rule.hs +++ b/lib/haskell/natural4/src/LS/Rule.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveAnyClass, DeriveGeneric, FlexibleContexts, TypeFamilies, TypeApplications, DataKinds #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} @@ -20,6 +20,7 @@ import Data.Text qualified as Text import Data.Void (Void) import Flow ((|>)) import GHC.Generics (Generic) +import Data.Generics.Sum.Constructors import LS.Types ( BoolStructP, BoolStructR, @@ -71,9 +72,25 @@ import Text.Megaparsec ) import Data.Graph.Inductive (Gr, empty) import LS.XPile.Logging (XPileLogW) +import Optics hiding ((|>), has) -- the Rule record has a `has` field +import Optics qualified as O --- | [TODO] refactoring: these should be broken out into their own (new)types and have Rule include them all. --- We should take advantage of NoFieldSelectors to reduce the hazards here +{- | +[TODO] refactoring: these should be broken out into their own (new)types and have Rule include them all. +We should take advantage of NoFieldSelectors to reduce the hazards here + + +The deriving Generics stuff allows us to do things like +>>> O.has (_Ctor @"Regulative") defaultHorn +False +>>> O.has (_Ctor @"Regulative") defaultReg +True + +as well as extracting givens from a rule in an easy way (see the Logical English code). + +See https://hackage.haskell.org/package/generic-optics-2.2.1.0/docs/Data-Generics-Sum-Constructors.html +for an explanation of how the Generics and optics stuff works +-} data Rule = Regulative { subj :: BoolStructP -- man AND woman AND child , rkeyword :: RegKeywords -- Every | Party | TokAll @@ -181,6 +198,7 @@ instance Hashable Rule type Parser = WriterT (DList Rule) PlainParser + -- | the more responsible version of head . words . show ruleConstructor :: Rule -> String ruleConstructor Regulative{} = "Regulative" diff --git a/lib/haskell/natural4/src/LS/Utils.hs b/lib/haskell/natural4/src/LS/Utils.hs index ca614a618..931614a56 100644 --- a/lib/haskell/natural4/src/LS/Utils.hs +++ b/lib/haskell/natural4/src/LS/Utils.hs @@ -8,10 +8,12 @@ module LS.Utils mapThenSwallowErrs, runMonoidValidate, swallowErrs, - MonoidValidate + MonoidValidate, + (<||>) ) where +import Control.Applicative (liftA2) import Control.Monad.Validate ( MonadValidate (refute), Validate, @@ -72,4 +74,9 @@ swallowErrs :: [MonoidValidate e a] -> [a] swallowErrs = mapThenSwallowErrs id runMonoidValidate :: MonoidValidate e a -> Either e a -runMonoidValidate x = x |> coerce |> runValidate \ No newline at end of file +runMonoidValidate x = x |> coerce |> runValidate + +-- | A simple lifted ('||'), copied from Control.Bool +(<||>) :: Applicative f => f Bool -> f Bool -> f Bool +(<||>) = liftA2 (||) +{-# INLINE (<||>) #-} \ No newline at end of file diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenLEHCs.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenLEHCs.hs index 2b7204b90..5c25f6da2 100644 --- a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenLEHCs.hs +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenLEHCs.hs @@ -10,15 +10,13 @@ module LS.XPile.LogicalEnglish.GenLEHCs (leHCFromVarsHC) where - import Data.Text qualified as T import Data.HashSet qualified as HS -import Data.Foldable (toList) +-- import Data.Foldable (toList) -- import Debug.Trace (trace) import Data.Coerce (coerce) -- import Data.String.Interpolate ( i ) import Data.Traversable -import Control.Monad.Identity (Identity) import LS.XPile.LogicalEnglish.Types @@ -150,7 +148,7 @@ simplifyVAtomicP = fmap simplifyVCells simplifyVCells :: VCell -> LEhcCell simplifyVCells = \case - Pred txt -> NotVar txt + Pred txt -> NotVar txt TempVar tv -> tvar2lecell tv tvar2lecell :: TemplateVar -> LEhcCell diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenNLAs.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenNLAs.hs index ff4a9ad3e..093d65865 100644 --- a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenNLAs.hs +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/GenNLAs.hs @@ -1,103 +1,352 @@ {-# OPTIONS_GHC -W #-} +{-# OPTIONS_GHC -Wno-unused-top-binds #-} {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE DuplicateRecordFields, RecordWildCards #-} --- {-# LANGUAGE OverloadedRecordDot #-} --- {-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE OverloadedRecordDot, DuplicateRecordFields, RecordWildCards, NoFieldSelectors #-} {-# LANGUAGE OverloadedStrings #-} --- {-# LANGUAGE QuasiQuotes #-} --- {-# LANGUAGE DerivingStrategies #-} -{-# LANGUAGE ViewPatterns #-} -{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} -{-# HLINT ignore "Replace case with maybe" #-} +{-# LANGUAGE DerivingVia, DeriveAnyClass #-} +{-# LANGUAGE PatternSynonyms, ViewPatterns #-} +{-# LANGUAGE QuasiQuotes, TemplateHaskell #-} + module LS.XPile.LogicalEnglish.GenNLAs ( - nlasFromVarsHC + nlasFromVarsHC + , NLATxt(..) + , _MkNLATxt + + , NLA -- opaque + , mkNLA -- smart constructor + , getNLAtxt + + , RegexTrav + , FilterResult(..) + , removeInternallySubsumed + , removeRegexMatches + , removeDisprefdInEquivUpToVarNames + , regextravifyNLASection + , regextravifyLENLA ) where --- TODO: Make export list - --- import Data.Text qualified as T +import LS.Utils ((<||>)) +import Data.Text qualified as T +import Data.Ord (Down(..)) +import GHC.Exts (sortWith) import Data.HashSet qualified as HS -import Data.Foldable (toList) +import Data.Containers (difference) +import Data.Hashable (Hashable, hashWithSalt, hashUsing) +import Data.Foldable (fold, foldl', toList) import Data.Maybe (catMaybes) -import qualified Data.List as L hiding (head, tail) -- import Debug.Trace (trace) import Data.Coerce (coerce) --- import Data.String.Interpolate ( i ) import LS.XPile.LogicalEnglish.Types +import LS.XPile.LogicalEnglish.Utils (setInsert) +import Data.String (IsString) +import Data.String.Interpolate ( i ) + +import Data.List.Split (splitOn) +import Data.String.Conversions (cs) +-- import Data.String.Conversions.Monomorphic +import Text.RawString.QQ +import qualified Text.Regex.PCRE.Heavy as PCRE +-- import Text.Regex.PCRE.Heavy() +import Control.Lens.Regex.Text + +import Optics +-- import Data.Text.Optics (unpacked) +import Data.HashSet.Optics (setOf) +import Data.Sequence.Optics (seqOf) +import Data.Containers.NonEmpty (NE, HasNonEmpty, nonEmpty, fromNonEmpty) +-- onNonEmpty, fromNonEmpty, +import Data.Sequence (Seq) +-- import qualified Data.Sequence as Seq +import Data.Sequences (SemiSequence, intersperse) --groupAllOn +import Data.List qualified as L +import Data.MonoTraversable (Element) +import Prettyprinter(Pretty) + + +newtype NLATxt = MkNLATxt T.Text + deriving stock (Show) + deriving newtype (Eq, Ord, IsString, Semigroup, Monoid, Hashable, Pretty) +makePrisms ''NLATxt + +type RegexTrav = Traversal T.Text T.Text Match Match +type RawRegexStr = String +data NLA = + MkNLA { getBase :: NE (Seq VCell) + , numVars :: !Int + , getNLATxt' :: NLATxt + , regex :: RegexTrav } + +instance Eq NLA where + a == b = a.getNLATxt' == b.getNLATxt' +instance Ord NLA where + a `compare` b = a.getNLATxt' `compare` b.getNLATxt' +instance Show NLA where + show :: NLA -> String + show nla = show nla.getBase <> "\n" <> show nla.numVars <> "\n" <> show nla.getNLATxt' + +instance Hashable NLA where + hashWithSalt = hashUsing getNLAtxt + -- prob the easiest way to filter out overlapping NLAs is to use a separate function, rather than trying to shoehorn it into Eq and Hashable and Eq somehow + +{- | Think of this as the specialized-to-the-Txt-to-NLATxt-direction form of `coerce` +Using this allows you to get the nice no-run-time-overhead of `coerce`, +/without/ having to add the type annotations you'd need if you were using `coerce` +-} +mkNLATxt :: T.Text -> NLATxt +mkNLATxt = view (re _MkNLATxt) + +nlaAsTxt :: NLA -> T.Text +nlaAsTxt = view _MkNLATxt . getNLAtxt + +{- | public getter to view the NLAtxt +Don't need to export a lens for this field cos not going to change / set it -} +getNLAtxt :: NLA -> NLATxt +getNLAtxt nla = nla.getNLATxt' + +-- | Smart constructor for making NLA +mkNLA :: forall f. (Foldable f, HasNonEmpty (f VCell)) => f VCell -> Maybe NLA +mkNLA (seqOf folded -> vcells) = do + nmtVcells <- nonEmpty vcells + regex <- regexifyVCells nmtVcells ^? _Right + return $ MkNLA { getBase = nmtVcells + , numVars = lengthOf (folded % filteredBy _TempVar) vcells + , getNLATxt' = annotxtify vcells + , regex = traversify regex} + +--- helpers for making NLAs + +-- | Private function for making NLATxt for NLA +annotxtify :: Seq VCell -> NLATxt +annotxtify = textify spaceDelimtr vcell2NLAtxt + where + spaceDelimtr :: NLATxt = coerce (" " :: T.Text) + +{- | Replace each variable indicator with a regex pattern + that matches either a word or another variable indicator. +-} +regexifyVCells :: NE (Seq VCell) -> Either String Regex +regexifyVCells = makeRegex . textify strdelimitr regexf . fromNonEmpty + where + strdelimitr :: String = " " + regexf = \case + TempVar tvar -> tvar2WordOrVIregex tvar + Pred nonvartxt -> (PCRE.escape . T.unpack $ nonvartxt) + --TODO: Add tests to check if have to escape metachars in Pred + -- T.unpack nonvartxt + -- PCRE.escape . T.unpack $ nonvartxt + +textify :: (Foldable t, Monoid c, SemiSequence (t c), Functor t) => Element (t c) -> (a -> c) -> t a -> c +textify spaceDelimtr mappingfn = fold . intersperse spaceDelimtr . fmap mappingfn + +--- helpers for working with regex + +{- | a regex that matches either a word or another variable indicator -} +wordOrVI :: RawRegexStr +wordOrVI = [r|(\w+|\*[\w\s]+\*)|] + +tvar2WordOrVIregex :: TemplateVar -> RawRegexStr +tvar2WordOrVIregex = \case + MatchGVar _ -> wordOrVI + EndsInApos _ -> wordOrVI <> [r|'s|] + IsNum _ -> [r|is |] <> wordOrVI + +makeRegex :: RawRegexStr -> Either String Regex +makeRegex rawregex = PCRE.compileM (cs rawregex) [] + +traversify :: Regex -> RegexTrav +traversify regex = traversalVL (regexing regex) + +regextravify :: RawRegexStr -> Maybe RegexTrav +regextravify rawregex = + rawregex ^? (to makeRegex % _Right % to traversify) + +matchesTxtOf :: RegexTrav -> NLATxt -> Bool +regexTrav `matchesTxtOf` nlatxt = has regexTrav (view _MkNLATxt nlatxt) +------------------- Filtering out subsumed NLAs -nlasFromVarsHC :: VarsHC -> HS.HashSet LENatLangAnnot +{- | Returns a set of non-subsumed NLAs +Currently implemented in a somewhat naive way -} +removeInternallySubsumed :: HS.HashSet NLA -> HS.HashSet NLA +removeInternallySubsumed nlaset = foldl' addIfNotSubsumed HS.empty nlasByNumVs + where + nlasByNumVs = sortWith (Down . (.numVars)) (toList nlaset) + -- See https://ro-che.info/articles/2016-04-02-descending-sort-haskell for why not sortBy (compare `on` + addIfNotSubsumed :: HS.HashSet NLA -> NLA -> HS.HashSet NLA + addIfNotSubsumed acc nla = + if any (nla `isSubsumedBy`) acc + then acc + else nla `setInsert` acc + +-- filtering out subsumed by lib templates + +{- | filter out NLAs that are matched by any of the regex travs +Use this for filtering out NLATxts that are subsumed by lib template NLAs +-} +removeRegexMatches :: (Foldable f, Foldable g) => f RegexTrav -> g NLA -> HS.HashSet NLA +removeRegexMatches regtravs = foldr addIfNotMatched HS.empty + where + addIfNotMatched :: NLA -> HS.HashSet NLA -> HS.HashSet NLA + addIfNotMatched nla acc = + if any (`matchesTxtOf` nla.getNLATxt') regtravs + --- i.e., if any of the regextravs matches the nlatext of the nla under consideration + then acc + else nla `setInsert` acc + +{- | For parsing lib templates, as well as templates from, e.g., unit tests -} +regextravifyNLASection :: T.Text -> [RegexTrav] +regextravifyNLASection nlasectn = + nlasectn + & view (to T.lines) + & toListOf (traversed % to T.unsnoc + % folded % _1 + % to regextravifyLENLA % _Right) + +-- filtering out dispreferred among the equivalent up to var names +-- TODO: first pass; haven't fully thought thru the API yet +data FilterResult a = MkFResult { subsumed :: a, kept :: a } + deriving (Eq, Show, Functor) + +instance Semigroup a => Semigroup (FilterResult a) where + MkFResult s1 k1 <> MkFResult s2 k2 = MkFResult (s1 <> s2) (k1 <> k2) +instance Monoid a => Monoid (FilterResult a) where + mempty = MkFResult mempty mempty + +{- | TODO: Add doctests/examples +-} +removeDisprefdInEquivUpToVarNames :: Foldable t => t NLA -> FilterResult [NLA] +removeDisprefdInEquivUpToVarNames nlas = + let eqclasses :: [[NLA]] = L.groupBy isEquivUpToVarNames . L.sort . toList $ nlas + -- Re sorting NLAs: recall that the Ord for an NLA delegates to the Ord for its NLATxt + makeFResult :: [NLA] -> FilterResult [NLA] + makeFResult eqclass = if length eqclass == 1 + then MkFResult {subsumed = mempty, kept = eqclass} + else (fmap toList . removeDisprefdInEqClass) eqclass + in mconcat . fmap makeFResult $ eqclasses + +--- helpers + +{- | x `subsumes` y <=> x overlaps with y and x's arg places >= y's + +Examples of NLAs that overlap: + NLA Orig: @*a person*'s blahed *a person* blah2@ + + `NLA Orig` overlaps with each of: + @ + *a number*'s blahed *a star* blah2 + sdsd7's blahed sfsi23mkm blah2 + @ + but does NOT overlap with + @ + Alice's blahed *a person* *hohoho* blah2 + Alice's2 blahed *a person* blah2 + @ + TODO: add / make the above doctests unit tests -} +subsumes :: NLA -> NLA -> Bool +x `subsumes` y = + x.numVars > y.numVars && x.regex `matchesTxtOf` y.getNLATxt' + -- TODO: Look into the is-num vs "is payout" duplication + +isSubsumedBy :: NLA -> NLA -> Bool +isSubsumedBy = flip subsumes + +isEquivUpToVarNames :: NLA -> NLA -> Bool +x `isEquivUpToVarNames` y = x.numVars == y.numVars && x.regex `matchesTxtOf` y.getNLATxt' + {- ^ the above two conditions imply also that y.regex `matchesTxtOf` x.getNLATxt' + This could be made into a property test + -} + +{- Given an equiv class of (two or more) NLAs, remove the dispreferred in that class + Assumes (without checking!) that the class has > 1 NLA +-} +removeDisprefdInEqClass :: Foldable f => f NLA -> FilterResult (HS.HashSet NLA) +removeDisprefdInEqClass nlas = + let + maybeMaxNumChars :: Maybe Int = nlas & maximumOf (folded % to nlaAsTxt % to T.length) + fewerThanMaxNumChars :: T.Text -> Bool + fewerThanMaxNumChars txt = case maybeMaxNumChars of + Just maxNumChars -> T.length txt < maxNumChars + Nothing -> False + isLessInformative :: T.Text -> Bool = T.isInfixOf "a number" <||> fewerThanMaxNumChars + + subsumed :: HS.HashSet NLA = nlas & setOf (folded + % filteredBy (to nlaAsTxt % filtered isLessInformative)) + kept :: HS.HashSet NLA = difference (setOf folded nlas) subsumed + in MkFResult {subsumed=subsumed, kept=kept} + + +-- | Takes as input a T.Text NLA that has already had the final char (either comma or period) removed +regextravifyLENLA :: T.Text -> Either String RegexTrav +regextravifyLENLA = fmap traversify . makeRegex . rawregexifyLENLA + +rawregexifyLENLA :: T.Text -> RawRegexStr +rawregexifyLENLA (T.unpack -> nlastr) = + let + splitted = splitOn "*" nlastr + isVarIdx = if splitted ^? ix 0 == Just "" then odd else even + {- first elt of `splitted` will be a "" if the template begins with a variable indicator + >>> splitOn "*" "*a blah*'s nested *a blah list* is" + ["","a blah","'s nested ","a blah list"," is"] + >>> splitOn "*" "a class's *a list*" + ["a class's ","a list",""] + -} + in splitted + & itraversed %& indices isVarIdx .~ wordOrVI + & itraversed %& indices (not . isVarIdx) %~ PCRE.escape + -- escape metachars in text that's not part of any var indicator + & toListOf (folded % folded) + +------------------- Building NLAs from VarsHCs + +nlasFromVarsHC :: VarsHC -> HS.HashSet NLA nlasFromVarsHC = \case VhcF vfact -> - case nlaFromVFact vfact of - Nothing -> HS.empty - Just nla -> HS.singleton nla + (maybe HS.empty HS.singleton (nlaFromVFact vfact)) VhcR vrule -> nlasFromVarsRule vrule --- TODO: When have more time, write smtg tt checks if it is indeed in fixed lib, and add it if not. -nlaFromVFact :: VarsFact -> Maybe LENatLangAnnot +--- helpers + +nlaFromVFact :: VarsFact -> Maybe NLA nlaFromVFact VFact{..} = nlaLoneFromVAtomicP varsfhead -nlasFromVarsRule :: VarsRule -> HS.HashSet LENatLangAnnot +nlasFromVarsRule :: VarsRule -> HS.HashSet NLA nlasFromVarsRule MkBaseRule{..} = let bodyNLAs = nlasFromBody rbody in case nlaLoneFromVAtomicP rhead of Nothing -> bodyNLAs Just headNLA -> HS.insert headNLA bodyNLAs -nlasFromBody :: BoolPropn AtomicPWithVars -> HS.HashSet LENatLangAnnot +nlasFromBody :: BoolPropn AtomicPWithVars -> HS.HashSet NLA nlasFromBody varsABP = let lstNLAs = fmap nlaLoneFromVAtomicP varsABP in HS.fromList . catMaybes . toList $ lstNLAs --- TODO: Check if this really does conform to the spec --- went a bit fast here -nlaLoneFromVAtomicP :: AtomicPWithVars -> Maybe LENatLangAnnot +nlaLoneFromVAtomicP :: AtomicPWithVars -> Maybe NLA nlaLoneFromVAtomicP = \case - ABPatomic vcells -> annotFromVCells vcells - ABPIsOpSuchTt _ _ vcells -> annotFromVCells vcells + ABPatomic vcells -> mkNLA vcells + ABPIsOpSuchTt _ _ vcells -> mkNLA vcells + -- the other two cases are accounted for by lib NLAs/templates ABPIsDiffFr{} -> Nothing ABPIsOpOf{} -> Nothing - where - annotFromVCells :: [VCell] -> Maybe LENatLangAnnot - annotFromVCells = annotFromNLAcells . nlacellsFromLacs - - nlacellsFromLacs :: [VCell] -> [NLACell] - nlacellsFromLacs = fmap vcell2NLAcell - -annotFromNLAcells :: [NLACell] -> Maybe LENatLangAnnot -annotFromNLAcells = \case - (mconcat . intersperseWithSpace -> MkNonParam concatted) -> - Just $ coerce concatted - _ -> Nothing - where - spaceDelimtr = MkNonParam " " - intersperseWithSpace = L.intersperse spaceDelimtr - -vcell2NLAcell :: VCell -> NLACell -vcell2NLAcell = \case - TempVar tvar -> tvar2NLAcell tvar - Pred nonparamtxt -> MkNonParam nonparamtxt - -{- | -Invariant: all NLAParams take one of the following two forms: - *a var* - *a var*'s --} -tvar2NLAcell :: TemplateVar -> NLACell -tvar2NLAcell = \case - EndsInApos _ -> MkParam "*a var*'s" - IsNum _numtxt -> MkParam "is *a var*" - -- handling this case explicitly to remind ourselves tt we've handled it, and cos we might want to use "*a number*" instead - MatchGVar _ -> MkParam "*a var*" - {- ^ - From the LE handbook: - An instance of a template is obtained from the template by replacing every parameter of the template by a list of words separated by spaces. - **There need not be any relationship between the words in a parameter and the words in the instance of the parameter. Different parameters in the same template can be replaced by different or identical instances.** (emphasis mine) - - Right now I'm making all of them "a var" or "a var's", as opposed to "a " / "a 's", so tt it'll be easy to remove duplicates - -} \ No newline at end of file + +vcell2NLAtxt :: VCell -> NLATxt +vcell2NLAtxt = \case + TempVar tvar -> tvar2NLAtxt tvar + Pred nonvartxt -> coerce nonvartxt + +tvar2NLAtxt :: TemplateVar -> NLATxt +tvar2NLAtxt = \case + EndsInApos prefix -> mkNLATxt [i|*a #{prefix}*'s|] + IsNum _numtxt -> mkNLATxt "is *a number*" + MatchGVar gvar -> mkNLATxt [i|*a #{gvar}*|] + +{- ^ +From the LE handbook: + An instance of a template is obtained from the template by replacing every parameter of the template by a list of words separated by spaces. + **There need not be any relationship between the words in a parameter and the words in the instance of the parameter. Different parameters in the same template can be replaced by different or identical instances.** (emphasis mine) +-} \ No newline at end of file diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/IdVars.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/IdVars.hs index 518dced3d..a1d19452b 100644 --- a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/IdVars.hs +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/IdVars.hs @@ -1,10 +1,10 @@ {-# OPTIONS_GHC -W #-} {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedRecordDot, DuplicateRecordFields, RecordWildCards #-} -{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE DuplicateRecordFields, RecordWildCards #-} +-- {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE QuasiQuotes #-} +-- {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE ViewPatterns #-} @@ -34,27 +34,46 @@ idVarsInHC = \case idVarsInAP :: GVarSet -> L4AtomicP -> AtomicPWithVars idVarsInAP gvars = \case ABPatomic cells -> - ABPatomic $ fmap mklabscell cells + ABPatomic $ fmap mkVcell cells ABPIsDiffFr t1 t2 -> ABPIsDiffFr (cell2vcell gvars t1) (cell2vcell gvars t2) ABPIsOpOf t opOf termargs -> - ABPIsOpOf (cell2vcell gvars t) opOf (fmap mklabscell termargs) + ABPIsOpOf (cell2vcell gvars t) opOf (fmap mkVcell termargs) ABPIsOpSuchTt t opST cells -> ABPIsOpSuchTt (cell2vcell gvars t) opST - (fmap mklabscell cells) + (fmap mkVcell cells) where - mklabscell = cell2vcell gvars + mkVcell = cell2vcell gvars + +-- | Replace "." with "dot" and "," with "comma", in the Pred txts of ABPatomics +postprocAP :: AtomicPWithVars -> AtomicPWithVars +postprocAP = \case + ABPatomic cells -> ABPatomic $ fmap replaceTxtVCell cells + others -> others idVarsInBody :: GVarSet -> BoolPropn L4AtomicP -> BoolPropn AtomicPWithVars -idVarsInBody gvars l4boolprop = - let absAtomic = idVarsInAP gvars - in fmap absAtomic l4boolprop +idVarsInBody gvars = fmap (postprocAP . idVarsInAP gvars) ---- helpers -{- | +-- | Replace text in VCells +replaceTxtVCell :: VCell -> VCell +replaceTxtVCell = \case + tv@(TempVar _) -> tv + Pred txt -> Pred $ replaceTxt txt + +replaceTxt :: T.Text -> T.Text +replaceTxt txt = if txt == T.empty then txt + -- T.replace will error if input empty + else replacePercent . replaceCommaDot $ txt + where replaceCommaDot = T.replace "," "comma" . + T.replace "." "dot" + replacePercent = T.replace "%" " percent" + + +{- | Convert a SimplifiedL4 Cell to a VCell The code for simplifying L4 AST has established these invariants: * every IS NUM has had the IS removed, with the number converted to T.Text and wrapped in a MkCellIsNum * every IS tt was NOT an IS NUM has been replaced with a `MkCellT "is"`. @@ -74,8 +93,21 @@ cell2vcell gvars = \case else Pred celltxt MkCellIsNum numtxt -> TempVar (IsNum numtxt) +txtIsAGivenVar :: GVarSet -> T.Text -> Bool +txtIsAGivenVar gvars txt = HS.member (coerce txt) gvars + +type PrefixAposVar = T.Text +isAposVar :: GVarSet -> T.Text -> (PrefixAposVar, Bool) +isAposVar gvs (T.stripSuffix "'s" -> Just prefix) = + if txtIsAGivenVar gvs prefix + then (prefix, True) + else ("", False) +isAposVar _ _ = ("", False) +-- TODO: this matching on "'s" is a bit brittle cos unicode + + --- {- | Deprecating this and the next fn b/c the encoding suggests terms other than the args for op of might not just be either MatchGVar or EndsInApos --- they can also be atoms / non-variables +-- {- Deprecating this and the next fn b/c the encoding suggests terms other than the args for op of might not just be either MatchGVar or EndsInApos --- they can also be atoms / non-variables -- -} -- term2tvar :: GVarSet -> Term -> TemplateVar -- term2tvar gvars = \case @@ -91,17 +123,4 @@ cell2vcell gvars = \case -- optOfArg :: Cell -> TemplateVar -- optOfArg = \case -- MkCellT t -> OpOfVarArg t --- MkCellIsNum t -> OpOfVarArg t - - -txtIsAGivenVar :: GVarSet -> T.Text -> Bool -txtIsAGivenVar gvars txt = HS.member (coerce txt) gvars - -type PrefixAposVar = T.Text -isAposVar :: GVarSet -> T.Text -> (PrefixAposVar, Bool) -isAposVar gvs (T.stripSuffix "'s" -> Just prefix) = - if txtIsAGivenVar gvs prefix - then (prefix, True) - else ("", False) -isAposVar _ _ = ("", False) --- ^ TODO: this matching on "'s" is a bit brittle cos unicode \ No newline at end of file +-- MkCellIsNum t -> OpOfVarArg t \ No newline at end of file diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/LogicalEnglish.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/LogicalEnglish.hs index 5508039e5..4c48e8600 100644 --- a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/LogicalEnglish.hs +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/LogicalEnglish.hs @@ -1,16 +1,13 @@ {-# OPTIONS_GHC -W #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE OverloadedRecordDot, DuplicateRecordFields #-} -{-# LANGUAGE OverloadedLists #-} +-- {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE DuplicateRecordFields, OverloadedRecordDot #-} +-- {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE QuasiQuotes #-} -{-# LANGUAGE DeriveAnyClass #-} +-- {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DataKinds, KindSignatures, AllowAmbiguousTypes #-} -{-# LANGUAGE PatternSynonyms, ViewPatterns #-} {-| We're trying to work with the rules / AST instead, @@ -21,55 +18,53 @@ After all, the design intentions for this short-term LE transpiler aren't the sa module LS.XPile.LogicalEnglish.LogicalEnglish (toLE) where -import Text.Pretty.Simple ( pShowNoColor ) +-- import Text.Pretty.Simple ( pShowNoColor ) import Data.Text qualified as T -import Data.Bifunctor ( first ) -import Data.HashMap.Strict qualified as HM import Data.HashSet qualified as HS -import Data.Hashable (Hashable) -import GHC.Generics (Generic) -import Data.Maybe (fromMaybe, listToMaybe) -import Data.HashMap.Strict qualified as Map -import Control.Monad.Identity ( Identity ) +-- import Data.List (sort) +-- import Data.Maybe (fromMaybe, listToMaybe) import Control.Monad.Validate (runValidate) -import Data.String (IsString) import Data.Coerce (coerce) -import Data.List ( sort ) +-- import Optics import Prettyprinter ( Doc, Pretty (pretty)) import LS.PrettyPrinter - ( myrender, vvsep, (), () ) -import Prettyprinter.Interpolate (__di) + ( myrender) +import LS.XPile.LogicalEnglish.Pretty(LEProg(..), libTemplatesTxt) - -import qualified AnyAll as AA -import LS.Types qualified as L4 -import LS.Types (RelationalPredicate(..), RPRel(..), MTExpr, BoolStructR(..), BoolStructT) +-- import LS.Types qualified as L4 +-- import LS.Types (RelationalPredicate(..), RPRel(..), MTExpr, BoolStructR, BoolStructT) import LS.Rule qualified as L4 (Rule(..)) import LS.XPile.LogicalEnglish.Types import LS.XPile.LogicalEnglish.ValidateL4Input - (L4Rules, ValidHornls, Unvalidated, - check, refine, loadRawL4AsUnvalid, isHornlike) -import LS.XPile.LogicalEnglish.SimplifyL4 (SimpL4(..), SimL4Error(..), simplifyL4hc) -- TODO: Add import list + ( + isHornlike + -- L4Rules, ValidHornls, Unvalidated, + -- check, refine, loadRawL4AsUnvalid, + ) +import LS.XPile.LogicalEnglish.SimplifyL4 (SimpL4(..), SimL4Error(..), simplifyL4rule) import LS.XPile.LogicalEnglish.IdVars (idVarsInHC) -import LS.XPile.LogicalEnglish.GenNLAs (nlasFromVarsHC) +import LS.XPile.LogicalEnglish.GenNLAs + ( nlasFromVarsHC + , NLATxt(..) + , NLA + , getNLAtxt + , RegexTrav + , FilterResult(..) + , removeInternallySubsumed + , regextravifyNLASection + , removeRegexMatches + , removeDisprefdInEquivUpToVarNames + ) + import LS.XPile.LogicalEnglish.GenLEHCs (leHCFromVarsHC) -import LS.XPile.LogicalEnglish.Pretty() -import LS.XPile.LogicalEnglish.UtilsLEReplDev -- for prototyping +-- import LS.XPile.LogicalEnglish.UtilsLEReplDev -- for prototyping {- -TODO: After we get a v simple end-to-end prototype out, -we'll add functionality for checking the L4 input rules __upfront__ for things like whether it's using unsupported keywords, whether the input is well-formed by the lights of the translation rules, and so forth. -(This should be done with Monad.Validate or Data.Validation -- XPileLog isn't as good a fit for this.) -The thought is that if the upfront checks fail, we'll be able to exit gracefully and provide more helpful diagnostics / error messages. - -But for now, we will help ourselves, undeservedly, to the assumption that the L4 input is wellformed. - - TODO: Add property based tests EG: * If you add a new var anywhere in a randomly generated LamAbs HC, the new LE HC should have an 'a' in front of that var @@ -77,50 +72,61 @@ TODO: Add property based tests * Take a randomly generated LamABs HC with vars that potentially have multiple occurrences and generate the LE HC from it. For every var in the HC, the 'a' prefix should only appear once. * There should be as many NLAs as leaves in the HC (modulo lib NLAs) --} +TODO: Think abt doing more on the pre-validation front, e.g. checking the L4 input rules __upfront__ for things like whether it's using unsupported keywords, whether the input is well-formed by the lights of the translation rules, and so forth. +The thought is that if the upfront checks fail, we'll be able to exit gracefully and provide more helpful diagnostics / error messages. +But it might be enough to just do what we are currently doing with the validation in the simplifyL4 step -{------------------------------------------------------------------------------- - L4 rules -> SimpleL4HCs -> LamAbsRules --------------------------------------------------------------------------------} +But for now, we will help ourselves, undeservedly, to the assumption that the L4 input is wellformed. + + +TODO: Add some prevalidation stuff in the future, if time permits: + * e.g., checking for typos (e.g., if there's something in a cell tt's very similar to but not exactly the same as a given var) --- | TODO: Work on implementing this and adding the Monad Validate or Data.Validation stuff instead of Maybe (i.e., rly doing checks upfront and carrying along the error messages and potential warnings) after getting enoguh of the main transpiler out -checkAndRefine :: L4Rules Unvalidated -> Maybe (L4Rules ValidHornls) -checkAndRefine rawrules = do - validatedL4rules <- check rawrules - pure $ refine validatedL4rules +-} {------------------------------------------------------------------------------- Orchestrating and pretty printing -------------------------------------------------------------------------------} --- | Generate LE Nat Lang Annotations from VarsHCs -allNLAs :: [VarsHC] -> HS.HashSet LENatLangAnnot -allNLAs = foldMap nlasFromVarsHC - - -simplifyL4hcs :: [L4.Rule] -> SimpL4 [SimpleL4HC] -simplifyL4hcs = traverse simplifyL4hc . filter isHornlike -{- ^ IMPT TODO: move `filter isHornlike` to prevalidation step when implementing that. - This is a temp hack to avoid crashes due to NatL4 app's poor architecture --} - -xpileSimplifiedL4HCs :: [SimpleL4HC] -> String -xpileSimplifiedL4HCs simpL4HCs = - let hcsVarsMarked = map idVarsInHC simpL4HCs - nlas = sort . HS.toList . allNLAs $ hcsVarsMarked - lehcs = map leHCFromVarsHC hcsVarsMarked - leProgam = MkLEProg { nlas = nlas, leHCs = lehcs } - in doc2str . pretty $ leProgam - toLE :: [L4.Rule] -> String toLE l4rules = - case runValidate . runSimpL4 . simplifyL4hcs $ l4rules of + case runAndValidate . simplifyL4rules . filter isHornlike $ l4rules of Left errors -> errs2str errors - Right hcs -> xpileSimplifiedL4HCs hcs + Right hcs -> xpileSimplifiedL4hcs hcs where errs2str = pure "ERRORS FOUND:\n" <> T.unpack . T.intercalate "\n" . coerce . HS.toList + runAndValidate = runValidate . runSimpL4 +{- ^ TODO: think abt whether to do more on the pre-simplifyL4rules front +-} + +-- | Generate LE Nat Lang Annotations from VarsHCs +getNLATxtResults :: Foldable g => g VarsHC -> FilterResult [NLATxt] +getNLATxtResults = (fmap . fmap $ getNLAtxt) . removeSubsumedOrDisprefed . foldMap nlasFromVarsHC + where + removeSubsumedOrDisprefed :: HS.HashSet NLA -> FilterResult [NLA] + removeSubsumedOrDisprefed = removeDisprefdInEquivUpToVarNames . removeSubsumedByLibTemplates . removeInternallySubsumed + + libTemplatesRegTravs :: [RegexTrav] + libTemplatesRegTravs = regextravifyNLASection libTemplatesTxt + + removeSubsumedByLibTemplates :: Foldable f => f NLA -> HS.HashSet NLA + removeSubsumedByLibTemplates = removeRegexMatches libTemplatesRegTravs + +simplifyL4rules :: [L4.Rule] -> SimpL4 [SimpleL4HC] +simplifyL4rules = sequenceA . concatMap simplifyL4rule + +xpileSimplifiedL4hcs :: [SimpleL4HC] -> String +xpileSimplifiedL4hcs simpL4HCs = + let hcsVarsMarked :: [VarsHC] = map idVarsInHC simpL4HCs + nlatresults = getNLATxtResults hcsVarsMarked + lehcs = map leHCFromVarsHC hcsVarsMarked + leProgam = MkLEProg { keptnlats = nlatresults.kept, subsumednlats = nlatresults.subsumed + , leHCs = lehcs + , commentSym = "%"} + in doc2str . pretty $ leProgam + doc2str :: Doc ann -> String -doc2str = T.unpack . myrender +doc2str = T.unpack . myrender \ No newline at end of file diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Pretty.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Pretty.hs index 7390ba1b7..5d69d7dda 100644 --- a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Pretty.hs +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Pretty.hs @@ -13,19 +13,12 @@ {-# LANGUAGE DataKinds, KindSignatures, AllowAmbiguousTypes #-} {-# LANGUAGE PatternSynonyms, ViewPatterns #-} -module LS.XPile.LogicalEnglish.Pretty where +module LS.XPile.LogicalEnglish.Pretty (LEProg(..), libTemplatesTxt) where -import Text.Pretty.Simple ( pShowNoColor ) +-- import Text.Pretty.Simple ( pShowNoColor ) import Data.Text qualified as T -import Data.HashMap.Strict qualified as HM -import Data.HashSet qualified as HS -import Data.Hashable (Hashable) -import GHC.Generics (Generic) -import Data.Maybe (fromMaybe, listToMaybe) -import Data.HashMap.Strict qualified as Map -import Control.Monad.Identity ( Identity ) -import Data.String (IsString) -import qualified Data.List as L hiding (head, tail) +-- import Data.HashSet qualified as HS +import Data.String() import Prettyprinter ( Doc, @@ -33,32 +26,45 @@ import Prettyprinter comma, hsep, line, - parens, + -- parens, punctuate, list, indent, nest, vsep, (<+>), - viaShow, - encloseSep, + -- viaShow, + -- encloseSep, concatWith, dot) import LS.PrettyPrinter - ( myrender, vvsep, (), () ) + ( vvsep, (), myrender ) import Prettyprinter.Interpolate (__di) +-- import Optics +-- import Data.Set.Optics (setOf) +import Data.List ( sort ) + import LS.XPile.LogicalEnglish.Types -import LS.XPile.LogicalEnglish.ValidateL4Input - (L4Rules, ValidHornls, Unvalidated, - check, refine, loadRawL4AsUnvalid) +import LS.XPile.LogicalEnglish.GenNLAs (NLATxt) +-- import LS.XPile.LogicalEnglish.ValidateL4Input +-- (L4Rules, ValidHornls, Unvalidated, +-- check, refine, loadRawL4AsUnvalid) -import LS.XPile.LogicalEnglish.UtilsLEReplDev -- for prototyping +-- import LS.XPile.LogicalEnglish.UtilsLEReplDev -- for prototyping {------------------------------------------------------------------------------- L4 rules -> SimpleL4HCs -> VRules -------------------------------------------------------------------------------} +data LEProg = MkLEProg { keptnlats :: [NLATxt] + , subsumednlats :: [NLATxt] + -- ^ this wouldn't be *all* of the filtered-out NLATxts -- just those that are equiv up to var names (and have the same number of vars) + , leHCs :: [LEhcPrint] + , commentSym :: T.Text + } + + -- | config record for pretty printing data PrintCfg = MkPrintCfg { numIndentSpcs :: !Int} printcfg :: PrintCfg @@ -131,26 +137,41 @@ instance Pretty TxtAtomicBP where prettyprop = hsep . map pretty endWithDot txt = [__di|#{ txt }.|] - - + instance Pretty LEProg where - pretty :: LEProg -> Doc ann + + {- + Preconditions: + * The Pretty-ing code will not do any 'substantive' filtering: + it expects that any required filtering of any of the constituent parts of LEProg (either the NLATxts or the LEhcs) will already have been done, prior to being passed into `pretty` + -} + pretty :: forall ann. LEProg -> Doc ann pretty MkLEProg{..} = - - let indentedNLAs = endWithDot . nestVsepSeq . punctuate comma . map pretty $ nlas - -- assume list of NLAs is pre-sorted - prettyLEhcs = vvsep $ map ((<> dot) . pretty) leHCs + let + indentedNLAs :: Doc ann = endWithDot . nestVsepSeq . punctuate comma . map pretty . sort $ keptnlats + prettyLEhcs :: Doc ann = vvsep $ map ((<> dot) . pretty) leHCs + {- ^ Assume commas and dots already replaced in NLAs and LEHcs + (can't replace here b/c we sometimes do want the dot, e.g. for numbers) -} + + prependWithCommentOp :: Doc ann -> Doc ann = (pretty commentSym <+>) + removedNLAs :: Doc ann = vsep . map (prependWithCommentOp . pretty) $ subsumednlats + removedNLAsection :: Doc ann = if length subsumednlats > 0 + then + line <> [__di|%% Some of the removed templates (just the equiv-up-to-var-names-with-same-num-vars ones): + #{indentLE removedNLAs}|] + else "" in [__di| the target language is: prolog. the templates are: #{indentedNLAs} - #{nestLE joeLibTemplates} + #{nestLE libTemplates} + #{removedNLAsection} % Predefined stdlib for translating natural4 -> LE. the knowledge base prelude includes: - #{nestLE joeLibHCs} + #{nestLE libHCs} the knowledge base encoding includes: #{nestLE prettyLEhcs} @@ -159,25 +180,37 @@ instance Pretty LEProg where 0 < 1. |] -joeLibTemplates :: Doc ann -joeLibTemplates = +libTemplates :: Doc ann +libTemplates = [__di| + *a var* is after *a var*, + *a var* is before *a var*, + *a var* is strictly after *a var*, + *a var* is strictly before *a var*. *a class*'s *a field* is *a value*, *a class*'s nested *a list of fields* is *a value*, *a class*'s *a field0*'s *a field1* is *a value*, *a class*'s *a field0*'s *a field1*'s *a field2* is *a value*, *a class*'s *a field0*'s *a field1*'s *a field2*'s *a field3* is *a value*, - *a class*'s *a field0*'s *a field1*'s *a field2*'s *a field3*'s *a field4* is *a value*. - + *a class*'s *a field0*'s *a field1*'s *a field2*'s *a field3*'s *a field4* is *a value*, *a number* is a lower bound of *a list*, *a number* is an upper bound of *a list*, *a number* is the minimum of *a number* and the maximum of *a number* and *a number*, the sum of *a list* does not exceed the minimum of *a list*, - *a number* does not exceed the minimum of *a list*. - |] + *a number* does not exceed the minimum of *a list*.|] -joeLibHCs :: Doc ann -joeLibHCs = +libTemplatesTxt :: T.Text +libTemplatesTxt = T.strip . myrender $ libTemplates +{- ^ +>>> libTemplatesTxt +"*a var* is after *a var*,\n*a var* is before *a var*,\n*a var* is strictly after *a var*,\n*a var* is strictly before *a var*.\n*a class*'s *a field* is *a value*,\n*a class*'s nested *a list of fields* is *a value*,\n*a class*'s *a field0*'s *a field1* is *a value*,\n*a class*'s *a field0*'s *a field1*'s *a field2* is *a value*,\n*a class*'s *a field0*'s *a field1*'s *a field2*'s *a field3* is *a value*,\n*a class*'s *a field0*'s *a field1*'s *a field2*'s *a field3*'s *a field4* is *a value*,\n*a number* is a lower bound of *a list*,\n*a number* is an upper bound of *a list*,\n*a number* is the minimum of *a number* and the maximum of *a number* and *a number*,\nthe sum of *a list* does not exceed the minimum of *a list*,\n*a number* does not exceed the minimum of *a list*." + +The T.strip isn't currently necessary, +but it seems like a good thing to include to pre-empt any future issues from accidentally adding whitespace. +-} + +libHCs :: Doc ann +libHCs = [__di| % Note: LE's parsing of [H | T] is broken atm because it transforms that % into [H, T] rather than the Prolog term [H | T]. @@ -188,6 +221,20 @@ joeLibHCs = % if the class's the field is an other class % and the other class's nested the fields is the value. + a d0 is before a d1 + if d0 is a n days before d1 + and n >= 0. + + a d0 is strictly before a d1 + if d0 is a n days before d1 + and n > 0. + + a d0 is after a d1 + if d1 is before d0. + + a d0 is strictly after a d1 + if d1 is strictly before d0. + % Nested accessor predicates. a class's a field0's a field1 is a value if class's field0 is a class0 diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/README.md b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/README.md new file mode 100644 index 000000000..9234a41b9 --- /dev/null +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/README.md @@ -0,0 +1,37 @@ +# README + +If you haven't already read Joe's specification "Denotational semantics of Relational Predicates", you should look at that first. + +This README is meant to be a summary of some of the more nitty-gritty LE details that are important to keep in mind when implementing or understanding the L4 -> LE transpiler. + +## Notes on Logical English + +### NLAs / templates + +From https://github.com/LogicalContracts/LogicalEnglish/blob/main/le_syntax.md + + * A template is a string of words and variable indicators separated by spaces. A word is string of letters or numbers. A variable indicator is a string of words prefixed by `*` and postfixed by `*`. + * An instance of a template is an __expression__ obtained from the template by consistently replacing all the variable indicators by __terms__. + * A __term__ is either + * a variable, + * an expression (which include __constants__) + * or a compound (such as a list). + +From "Logical English for Law and Education": + * A __variable__ is a noun phrase ending with a common noun, such as “person” or “thing” and starting with a determiner such as “a”, “an” or “the”. + * The indefinite determiner, “a” or “an”, introduces the first occurrence of a variable in a sentence. The same noun phrase with the indefinite determiner replaced the definite determiner, “the”, represents all later occurrences of the same variable in the same sentence. + * Any other string of words in the position of an argument place is a __constant__. + + +EGs: + +An argument place can be filled by either a constant or a variable: +``` +LE: Alice likes a person if the person likes logic. +Prolog: likes(alice, A) :- likes(A, logic). +``` + + +### Further reading + +"Logical English for Law and Education" is quite readable / skimmable \ No newline at end of file diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/SimplifyL4.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/SimplifyL4.hs index 7570919e8..bc69ae73d 100644 --- a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/SimplifyL4.hs +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/SimplifyL4.hs @@ -5,14 +5,14 @@ {-# LANGUAGE OverloadedRecordDot, DuplicateRecordFields #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE QuasiQuotes #-} -{-# LANGUAGE DeriveAnyClass #-} +-- {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DataKinds, KindSignatures, AllowAmbiguousTypes, ApplicativeDo #-} -{-# LANGUAGE PatternSynonyms, ViewPatterns #-} +{-# LANGUAGE TypeApplications, GADTs #-} +{-# LANGUAGE PatternSynonyms #-} -module LS.XPile.LogicalEnglish.SimplifyL4 (simplifyL4hc, SimpL4(..), SimL4Error(..)) where +module LS.XPile.LogicalEnglish.SimplifyL4 (simplifyL4rule, SimpL4(..), SimL4Error(..)) where import Data.Text qualified as T import qualified Data.Text.Lazy as T (toStrict) @@ -24,30 +24,21 @@ import Control.Monad.Validate ( MonadValidate (..) , Validate , refute - , dispute - , runValidate ) -import Control.Monad.Identity - -import Data.Bifunctor ( first ) -import Data.HashMap.Strict qualified as HM +import Optics +import Data.Generics.Product.Types (types) import Data.HashSet qualified as HS import Data.Hashable (Hashable) -import GHC.Generics (Generic) -import Data.Maybe (fromMaybe, listToMaybe) -import Data.HashMap.Strict qualified as Map import Data.String (IsString) -import Data.List.NonEmpty qualified as NE -import Debug.Trace (trace) import qualified AnyAll as AA import LS.Types qualified as L4 -import LS.Types (RelationalPredicate(..), RPRel(..), MTExpr(..), BoolStructR(..), BoolStructT) +import LS.Types (RelationalPredicate(..), RPRel(..), MTExpr(..)) import LS.Rule qualified as L4 (Rule(..)) import LS.XPile.LogicalEnglish.Types -import LS.XPile.LogicalEnglish.ValidateL4Input - (L4Rules, ValidHornls, Unvalidated, - loadRawL4AsUnvalid) +-- import LS.XPile.LogicalEnglish.ValidateL4Input +-- (L4Rules, ValidHornls, Unvalidated, +-- loadRawL4AsUnvalid) {- @@ -64,30 +55,38 @@ newtype SimL4Error = MkErr { unpackErr :: T.Text } newtype SimpL4 a = SimpL4 { runSimpL4 :: Validate (HS.HashSet SimL4Error) a } deriving newtype (Functor, Applicative, Monad, MonadValidate (HS.HashSet SimL4Error)) {- ^ TODOs: -1. Use the newtype... -2. Per monad-validate's docs, move away from native linked list when time permits --- use Dual [a] or Seq or even Hashset -3. When time permits, we probably want to switch to validateT add Reader in there for metadata like the location of the erroring rule + * When time permits, we probably want to switch to validateT add Reader in there for metadata like the location of the erroring rule -} --- TODO: Switch over to this, e.g. with coerce or with `over` from new-type generic when have time: simplifyL4rule :: L4Rules ValidHornls -> SimpleL4HC +-- TODO: Switch over to this, e.g. with coerce or with `over` from new-type generic when have time: simplifyL4rule :: L4Rules ValidHornls -> [SimpL4 SimpleL4HC] {- | - Precondition: assume that the input L4 rules only have 1 HC in their Horn clauses. - TODO: This invariant will have to be established in the next iteration of work on this transpiler (mainly by desugaring the 'ditto'/decision table stuff accordingly first) + It's fine if input L4 rules have more than 1 HC in their Horn clauses, as in the ditto syntax --- we get the givens and simplify each of the clauses with those givens + + When writing L4, it’s important that + * there not be empty cells in the head or body between contentful cells, + * there not be `""` below the `DECIDE` --- if there are `""` below the `DECIDE`, then the stuff below will get parsed as distinct Hornlikes but without the givens, and the only way to then figure out what the original givens were will be very hacky / fragile +-} +simplifyL4rule :: L4.Rule -> [SimpL4 SimpleL4HC] +simplifyL4rule l4rule = + let + gvars = gvarsFromL4Rule l4rule + hcs = L4.clauses l4rule + in map (simplifyL4hc gvars) hcs + -- TODO: would probably be good to check upfront for whether there are L4 rules with no clauses and log a warning if such L4rules are found + +{- | an L4 hc, in this context, is taken to be a L4.Rule with ___exactly one__ elt in its `clauses` field -} -simplifyL4hc :: L4.Rule -> SimpL4 SimpleL4HC -simplifyL4hc l4hc = do - let gvars = gvarsFromL4Rule l4hc - clause = Prelude.head $ L4.clauses l4hc - -- ^ this use of head will be safe in the future iteration when we do validation and make sure that there will be exactly one HC in every L4 rule that this fn gets called on - simpHead <- simplifyHead clause.hHead - - case clause.hBody of +simplifyL4hc :: GVarSet -> L4.HornClause2 -> SimpL4 SimpleL4HC +simplifyL4hc gvars l4hc = do + simpHead <- simplifyHead l4hc.hHead + case l4hc.hBody of Nothing -> pure $ MkL4FactHc {fgiven = gvars, fhead = simpHead} + -- ^ There are Facts / HCs with Nothing in the body in the encoding Just rbod -> do simpBod <- simplifyHcBodyBsr rbod pure $ MkL4RuleHc {rgiven = gvars, rhead = simpHead, rbody = simpBod} - -- ^ There are Facts / HCs with Nothing in the body in the encoding + {------------------------------------------------------------------------------- Simplifying L4 HCs @@ -114,23 +113,14 @@ simplifyHead = \case RPnary {} -> refute [MkErr "RPnary in the head of HC not supported."] -{- ^ -An example of an is-num pattern in a RPConstraint -[ HC - { hHead = RPConstraint - [ MTT "total savings" ] RPis - [ MTI 100 ] - , hBody = Just - ( All Nothing - [ Leaf - ( RPConstraint - [ MTT "initial savings" ] RPis - [ MTF 22.5 ] - ) + +{- | Simplifies the RPConstraint in the head of a L4 HC (from an encoding that conforms to the L4->LE spec). +Right now, the only RPConstraint tt can appear in head of L4 HC, according to spec, is RPis -} -{- | -Simplifies the RPConstraint in the head of a L4 HC (from an encoding that conforms to the L4->LE spec). +simpheadRPC :: [MTExpr] -> [MTExpr] -> L4AtomicP +simpheadRPC = simpRPCis +{- | Given left and right exprs that flank an RPIs, return a L4AtomicP where s have been marked accordingly in the numcell, @@ -141,18 +131,33 @@ Two cases of IS-ing to consider: in which case we should convert the NUM to text and warp it in a MkCellIsNum 2. It does not in which case we should replace the IS with 'is' text + + + An example of an is-num pattern in a RPConstraint: + [ HC + { hHead = RPConstraint + [ MTT "total savings" ] RPis + [ MTI 100 ] + , hBody = Just + ( All Nothing + [ Leaf + ( RPConstraint + [ MTT "initial savings" ] RPis + [ MTF 22.5 ] + ) -} -simpheadRPC :: [MTExpr] -> [MTExpr] -> L4AtomicP -simpheadRPC exprsl exprsr = - let lefts = mtes2cells exprsl +simpRPCis :: [MTExpr] -> [MTExpr] -> L4AtomicP +simpRPCis exprsl exprsr = + let lefts = mtes2cells exprsl + txtRPis = "is" :: T.Text in case exprsr of - (MTI int : xs) -> + (MTI int : xs) -> ABPatomic $ lefts <> [MkCellIsNum (int2Text int)] <> mtes2cells xs - (MTF float : xs) -> + (MTF float : xs) -> ABPatomic $ lefts <> [MkCellIsNum (float2Text float)] <> mtes2cells xs _ -> - ABPatomic (lefts <> [MkCellT "is"] <> mtes2cells exprsr) - + ABPatomic (lefts <> [MkCellT txtRPis] <> mtes2cells exprsr) + {------------------------------------------------------------------------------- simplifying body of L4 HC @@ -248,23 +253,12 @@ t IS MIN t1 t2 .. tn: ]]) -} -simplifybodyRP :: forall m. MonadValidate (HS.HashSet SimL4Error) m => RelationalPredicate -> m (BoolPropn L4AtomicP) +simplifybodyRP :: forall m. MonadValidate (HS.HashSet SimL4Error) m => + RelationalPredicate -> m (BoolPropn L4AtomicP) simplifybodyRP = \case RPMT exprs -> pure $ MkTrueAtomicBP (mtes2cells exprs) - -- ^ this is the same for both the body and head - RPConstraint exprsl rel exprsr -> case rel of - RPis -> pure $ simpbodRPC @RPis exprsl exprsr - RPor -> pure $ simpbodRPC @RPor exprsl exprsr - RPand -> pure $ simpbodRPC @RPand exprsl exprsr - _ -> refute [MkErr "shouldn't be seeing other rel ops in rpconstraint in body"] - {- ^ Special case to handle for RPConstraint in the body but not the head: non-propositional connectives / anaphora! - EG: ( Leaf - ( RPConstraint - [ MTT "data breach" , MTT "came about from"] - RPor - [ MTT "luck, fate", MTT "acts of god or any similar event"] - ) - ) -} + -- ^ this is the same for both the body and head + RPConstraint exprsl rel exprsr -> simpbodRPC exprsl exprsr rel -- max / min / sum x where φ(x) TermIsMaxXWhere term φx -> pure $ MkIsOpSuchTtBP (mte2cell term) MaxXSuchThat (mtes2cells φx) @@ -284,12 +278,15 @@ simplifybodyRP = \case RPParamText _ -> refute [MkErr "should not be seeing RPParamText in body"] -termIsNaryOpOf :: (Foldable seq, Traversable seq, MonadValidate (HS.HashSet SimL4Error) m) => OpOf -> MTExpr -> seq RelationalPredicate -> m (BoolPropn L4AtomicP) -termIsNaryOpOf op mteTerm rpargs = pure MkIsOpOf <*> pure term <*> pure op <*> argterms +termIsNaryOpOf :: + (Foldable seq, Traversable seq, MonadValidate (HS.HashSet SimL4Error) m) => + OpOf -> MTExpr -> seq RelationalPredicate -> m (BoolPropn L4AtomicP) +termIsNaryOpOf op mteTerm rpargs = MkIsOpOf term op <$> argterms where term = mte2cell mteTerm argterms = concat <$> traverse atomRPoperand2cell rpargs -atomRPoperand2cell :: forall m. MonadValidate (HS.HashSet SimL4Error) m => RelationalPredicate -> m [Cell] +atomRPoperand2cell :: forall m. MonadValidate (HS.HashSet SimL4Error) m => + RelationalPredicate -> m [Cell] atomRPoperand2cell = \case RPMT mtexprs -> pure $ mtes2cells mtexprs RPParamText _pt -> refute ["not sure if we rly need this case (RPParamText in fn atomRPoperand2cell); erroring as a diagnostic tool"] @@ -299,25 +296,50 @@ atomRPoperand2cell = \case --------- simplifying RPConstraint in body of L4 HC ------------------------------------ --- https://www.tweag.io/blog/2022-11-15-unrolling-with-typeclasses/ -class SimpBodyRPConstrntRPrel (rp :: RPRel) where - simpbodRPC :: [MTExpr] -> [MTExpr] -> BoolPropn L4AtomicP - -instance SimpBodyRPConstrntRPrel RPis where - simpbodRPC exprsl exprsr = AtomicBP (simpheadRPC exprsl exprsr) +simpbodRPC :: forall m. MonadValidate (HS.HashSet SimL4Error) m => + [MTExpr] -> [MTExpr] -> RPRel -> m (BoolPropn L4AtomicP) +simpbodRPC exprsl exprsr = \case + RPis -> pure $ AtomicBP (simpheadRPC exprsl exprsr) --- TODO: Chk with Joe and Meng about RPor and RPand -instance SimpBodyRPConstrntRPrel RPor where - simpbodRPC exprsl exprsr = Or (map f exprsr) - where f exprr = - AtomicBP (ABPatomic (mtes2cells exprsl <> [mte2cell exprr])) + RPor -> pure $ simBodRPCboolop InlRPor exprsl exprsr + RPand -> pure $ simBodRPCboolop InlRPand exprsl exprsr -instance SimpBodyRPConstrntRPrel RPand where - simpbodRPC exprsl exprsr = And (map f exprsr) - where f exprr = - AtomicBP (ABPatomic (mtes2cells exprsl <> [mte2cell exprr])) - --------------------------------------------------------------------------------- + RPlt -> pure $ simBodRPCarithcomp InlRPlt exprsl exprsr + RPlte -> pure $ simBodRPCarithcomp InlRPlte exprsl exprsr + RPgt -> pure $ simBodRPCarithcomp InlRPgt exprsl exprsr + RPgte -> pure $ simBodRPCarithcomp InlRPgte exprsl exprsr + + _ -> refute [MkErr "shouldn't be seeing other rel ops in rpconstraint in body"] + +{- | + -- TODO: Check if this is still required in light of recent discussion + Special case to handle for RPConstraint in the body but not the head: non-propositional connectives / anaphora! + EG: ( Leaf + ( RPConstraint + [ MTT "data breach" , MTT "came about from"] + RPor + [ MTT "luck, fate", MTT "acts of god or any similar event"] + ) + ) -} +simBodRPCboolop :: InlineRPrel RPnonPropAnaph -> [MTExpr] -> [MTExpr] -> BoolPropn L4AtomicP +simBodRPCboolop anaOp exprsl exprsr = + let + withLefts :: MTExpr -> BoolPropn L4AtomicP + withLefts exprr = MkTrueAtomicBP (mtes2cells exprsl <> [mte2cell exprr]) + in case anaOp of + InlRPor -> Or (map withLefts exprsr) + InlRPand -> And (map withLefts exprsr) + +simBodRPCarithcomp :: InlineRPrel RParithComp -> [MTExpr] -> [MTExpr] -> BoolPropn L4AtomicP +simBodRPCarithcomp comp exprsl exprsr = + MkTrueAtomicBP (mtes2cells exprsl <> [MkCellT (comp2txt comp)] <> mtes2cells exprsr) + where + comp2txt :: InlineRPrel RParithComp -> T.Text + comp2txt = \case + InlRPlt -> "<" + InlRPlte -> "<=" + InlRPgt -> ">" + InlRPgte -> ">=" {------------------------------------------------------------------------------- @@ -326,40 +348,61 @@ instance SimpBodyRPConstrntRPrel RPand where ------------ Extracting vars from given ----------------------------------- -extractGiven :: L4.Rule -> [MTExpr] - -- [(NE.NonEmpty MTExpr, Maybe TypeSig)] -extractGiven L4.Hornlike {given=Nothing} = [] --- won't need to worry abt this when we add checking upfront -extractGiven L4.Hornlike {given=Just paramtext} = concatMap (NE.toList . fst) (NE.toList paramtext) -extractGiven _ = trace "not a Hornlike rule, not extracting given" mempty --- also won't need to worry abt this when we add checking + filtering upfront - +{- | Preconditions / invariants: + * The input L4 rule is a Hornlike (TODO: And actually this fn is an eg of where it *would* be helpful to use the phantom type technique to tag that this is a Hornlike in the type) + * Each gvar in the GIVEN declaration should occupy only one cell in the spreadsheet, + so that the head of each NonEmpty MTExpr in the TypedMulti tuple would correspond to the gvar for that spreadsheet row in the declaration + +An example of GIVENs in the AST, as of Sep 8 2023: + given = Just ( + ( MTT "sightg" :| [] + , Just + ( SimpleType TOne "Sighting" ) + ) :| + [ + ( MTT "fun activity" :| [] + , Just + ( SimpleType TOne "Fun Activity" ) + ) + , + ( MTT "perzon" :| [] + , Just + ( SimpleType TOne "Person" ) + ) + ]) +-} +getGivens :: L4.Rule -> [MTExpr] +getGivens l4rule = l4rule.given ^.. types @MTExpr gvarsFromL4Rule :: L4.Rule -> GVarSet -gvarsFromL4Rule rule = let givenMTExprs = extractGiven rule - in HS.fromList $ map gmtexpr2gvar givenMTExprs - where - -- | Transforms a MTExpr tt appears in the GIVEN of a HC to a Gvar. This is importantly different from `mtexpr2text` in that it only converts the cases we use for LE and that we would encounter in the Givens on our LE conventions - gmtexpr2gvar :: MTExpr -> GVar - gmtexpr2gvar = \case - MTT var -> MkGVar var - _ -> error "non-text mtexpr variable names in the GIVEN are not allowed on our LE spec :)" +gvarsFromL4Rule rule = + let givenMTExprs = getGivens rule + in HS.fromList $ map gmtexpr2gvar givenMTExprs + where + -- | Transforms a MTExpr tt appears in the GIVEN of a HC to a Gvar. + gmtexpr2gvar :: MTExpr -> GVar + gmtexpr2gvar = textifyMTE MkGVar + -- TODO: Check upfront for wehther there are non-text mtexpr variable names in the GIVENs; raise a `dispute` if so and print warning as comment in resulting .le ------------ MTExprs to [Cell] ------------------------------------------ +textifyMTE :: (T.Text -> t) -> MTExpr -> t +textifyMTE constrtr = \case + MTT t -> constrtr t + MTI i -> constrtr (int2Text i) + MTF f -> constrtr (float2Text f) + MTB b -> constrtr (T.pack (show b)) + -- TODO: Prob shld check upfront for whether there are any MTB MTExprs in cells; raise a `dispute` if so and print warning as comment in resulting .le + mte2cell :: L4.MTExpr -> Cell -mte2cell = \case - MTT t -> MkCellT t - MTI i -> MkCellT (int2Text i) - MTF f -> MkCellT (float2Text f) - _ -> error "Booleans in cells currently not supported" +mte2cell = textifyMTE MkCellT -- | convenience function for when `map mte2cell` too wordy mtes2cells :: [L4.MTExpr] -> [Cell] -mtes2cells = map mte2cell +mtes2cells = fmap mte2cell ------ Other misc utils -{-| From https://github.com/haskell/text/issues/218 lol +{-| From https://github.com/haskell/text/issues/218 Thanks to Jo Hsi for finding these! -} float2Text :: RealFloat a => a -> T.Text @@ -371,7 +414,7 @@ int2Text = T.toStrict . B.toLazyText . B.decimal --- misc notes -- wrapper :: L4Rules ValidHornls -> [(NE.NonEmpty MTExpr, Maybe TypeSig)] --- wrapper = concat . map extractGiven . coerce +-- wrapper = concat . map getGivens . coerce {- a more ambitious version, for the future: data SimL4Error = Error { errInfo :: SimL4ErrorInfo -- in the future: errLoc :: ... diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Types.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Types.hs index 267c4d173..f48b500c2 100644 --- a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Types.hs +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Types.hs @@ -1,19 +1,23 @@ {-# OPTIONS_GHC -W #-} {-# OPTIONS_GHC -Wno-unused-top-binds #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedRecordDot, DuplicateRecordFields#-} +{-# LANGUAGE OverloadedRecordDot, DuplicateRecordFields, NoFieldSelectors #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DerivingVia #-} -{-# LANGUAGE PatternSynonyms, DataKinds #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternSynonyms, ViewPatterns, DataKinds, GADTs #-} +{-# LANGUAGE TemplateHaskell, QuasiQuotes #-} module LS.XPile.LogicalEnglish.Types ( -- Common types OrigVarName , BoolPropn(..) -- L4-related types + , InlineRPrel(..) + , RPnonPropAnaph + , RParithComp + , RPothers , GVar(..) , GVarSet , Cell(..) @@ -32,6 +36,7 @@ module LS.XPile.LogicalEnglish.Types ( -- Intermediate representation types , TemplateVar(..) + , _MatchGVar, _EndsInApos, _IsNum , OrigVarPrefix , OrigVarSeq , VarsHC(MkVarsFact, @@ -44,11 +49,11 @@ module LS.XPile.LogicalEnglish.Types ( , VarsRule , AtomicPWithVars , VCell(..) + , _TempVar, _Pred -- LE-related types , LEhcCell(..) , LEVar(..) - , NLACell(..) , NormdVars , NormalizedVar(..) @@ -56,7 +61,6 @@ module LS.XPile.LogicalEnglish.Types ( , TxtAtomicBP , LERule - , LENatLangAnnot(..) , LETemplateTxt(..) , UnivStatus(..) @@ -65,9 +69,6 @@ module LS.XPile.LogicalEnglish.Types ( , LEFactForPrint , LERuleForPrint , LEhcPrint(..) - - -- Configuration and LE-specific consts - , LEProg(..) ) where @@ -76,10 +77,15 @@ import Data.HashSet qualified as HS import Data.Hashable (Hashable) import GHC.Generics (Generic) +-- import Data.Coerce (coerce) + +-- import Data.Sequence.NonEmpty qualified as NESeq +-- import Data.Sequence qualified as Seq (fromList) import Data.String (IsString) -- import LS.Rule as L4 (Rule(..)) import Prettyprinter(Pretty) - + +import Optics.TH {- | Misc notes @@ -137,6 +143,28 @@ data OpSuchTt = MaxXSuchThat {------------------------------------------------------------------------------- The L4-related data types -------------------------------------------------------------------------------} +data RPnonPropAnaph +data RParithComp +data RPothers + +{- | + Some RPs are supported by converting them to cases in other data structures + Some RPs are, by contrast, 'inlined'; the following are the 'inline' RPRels tt are supported by L4 -> LE transpiler + + Having a GADT like this is useful for various reasons. + For example, it allows us to mark explicitly in the types which of the various RPRel types a function uses (because often, e.g., we only use a specific proper subset), + and to avoid incomplete-pattern-matching errors from the compiler (i.e., to actually get the sort of compile-time guarantees we'd like) +-} +data InlineRPrel a where + InlRPlt :: InlineRPrel RParithComp + InlRPlte :: InlineRPrel RParithComp + InlRPgt :: InlineRPrel RParithComp + InlRPgte :: InlineRPrel RParithComp + + InlRPor :: InlineRPrel RPnonPropAnaph + InlRPand :: InlineRPrel RPnonPropAnaph + + InlRPelem :: InlineRPrel RPothers -- | vars in the GIVEN of an L4 HC newtype GVar = MkGVar T.Text @@ -189,13 +217,14 @@ pattern MkL4FactHc{fgiven, fhead} = , head = fhead}) {-# COMPLETE MkL4FactHc, MkL4RuleHc #-} + {------------------------------------------------------------------------------- Types for L4 -> LE / intermediate representation -------------------------------------------------------------------------------} -- | we only need text / strs to capture what the original var 'names' were, because what we will eventually be printing out strings! type OrigVarName = T.Text - type OrigVarPrefix = T.Text + {-| TemplateVars mark the places where we'd instantiate / substitute in the VCell / condition template to get either a natural language annotation or a LE rule. They store the original text / var name in the cell so that that text can be transformed as needed when instantiating the VCell. -} data TemplateVar = MatchGVar !OrigVarName @@ -207,6 +236,8 @@ data TemplateVar = MatchGVar !OrigVarName -- This case should be treated differently depending on whether trying to generate a NLA or LE rule deriving stock (Eq, Ord, Show) deriving (Generic, Hashable) +makePrisms '' TemplateVar + type TVarSet = HS.HashSet TemplateVar {- Got this error @@ -224,15 +255,12 @@ from https://hackage.haskell.org/package/hashable-generics-1.1.7/docs/Data-Hasha type OrigVarSeq = [TemplateVar] -- TODO: Look into replacing [] with a more general Sequence type? ---TODO: Edit this / think thru it again when we get to this on Mon {-| Intermediate representation from which we can generate either LE natl lang annotations or LE rules. Things to note / think about: * One difference between NLAs and making LE rules: Not all L4AtomicBPs will need to be converted to NLAs --- e.g., t1 is different from t2 already has a NLA in the fixed lib. By contrast, we do need to be able to convert every L4AtomicP to a LE condition. -* - -} data VarsHC = VhcF VarsFact | VhcR VarsRule deriving stock (Eq, Ord, Show) @@ -262,52 +290,33 @@ pattern MkVarsRule{vrhead, vrbody} But I wanted to retain information about what the original variant of AtomicBPropn was for p printing afterwards. Also, it's helpful to have tt info for generating NLAs, since the only time we need to generate an NLA is when we have a `baseprop` / `VCell` --- we don't need to do tt for ABPIsDiffFr and ABPIsOpOf. - To put it another way: NLAs are generated *from*, and only from, LamAbsBases. + + TODO: add more comments / references to the relevant code -} type AtomicPWithVars = AtomicBPropn VCell -{-| This is best understood in the context of the other VarsX data types -} +{-| This is best understood in the context of the other VarsX data types -} data VCell = TempVar TemplateVar | Pred !T.Text deriving stock (Eq, Ord, Show) + deriving (Generic, Hashable) +makePrisms ''VCell {------------------------------------------------------------------------------- LE data types -------------------------------------------------------------------------------} -data NLACell = MkParam !T.Text - | MkNonParam !T.Text - deriving stock (Eq, Ord, Show) - -instance Semigroup NLACell where - MkParam l <> MkParam r = MkNonParam $ l <> r - MkParam l <> MkNonParam r = MkNonParam $ l <> r - MkNonParam l <> MkParam r = MkNonParam $ l <> r - MkNonParam l <> MkNonParam r = MkNonParam $ l <> r -instance Monoid NLACell where - mempty = MkNonParam "" - -{- Another option, courtesy of `Mango IV.` from the Functional Programming discord: - deriving stock Generic - deriving (Semigroup, Monoid) via Generically NLACell -This requires a base that's shipped with ghc 94 or newer and and import Generically. -But sticking to handwritten instance b/c it's easy enough, and to make the behavior explicit -} - -newtype LENatLangAnnot = MkNLA T.Text - deriving stock (Show) - deriving newtype (Eq, Ord, IsString, Hashable, Pretty) - ---------------- For generating template instances / non-NLAs data LEVar = VarApos !OrigVarPrefix | VarNonApos !OrigVarName deriving stock (Eq, Ord, Show) -{-| The first prep step for generating TemplateTxts from LamAbs stuff involves simplifying LamAbsCells +{-| The first prep step for generating LETemplateTxt from the intermediate stuff involves simplifying VCells -} data LEhcCell = VarCell LEVar | NotVar !T.Text - -- ^ i.e., not smtg tt we will ever need to check if we need to prefix with an 'a' + -- | i.e., not smtg tt we will ever need to check if we need to prefix with an 'a' deriving stock (Eq, Ord, Show) deriving (Generic) @@ -345,18 +354,8 @@ type LERule = BaseRule (AtomicBPropn LEhcCell) type RuleWithUnivsMarked = BaseRule (AtomicBPropn UnivStatus) type LERuleForPrint = BaseRule TxtAtomicBP ------ for pretty printing ------------------------------------------------------- - -data LEProg = MkLEProg { nlas :: [LENatLangAnnot] - , leHCs :: [LEhcPrint] - } --- docHeader :: !T.Text --- , nlasHeader :: !T.Text --- , libHCsHeader :: !T.Text --- , libHCs :: forall ann. Doc ann --- , hcsHeader :: !T.Text --- to remove once we are sure we won't want to go back to this way of doing this: -- LE Rule -- data LERule a b = diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Utils.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Utils.hs new file mode 100644 index 000000000..d476ba4db --- /dev/null +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/Utils.hs @@ -0,0 +1,33 @@ +{-# OPTIONS_GHC -W #-} +{-# OPTIONS_GHC -Wno-unused-top-binds #-} + +-- {-# LANGUAGE LambdaCase #-} +-- {-# LANGUAGE OverloadedRecordDot, DuplicateRecordFields, RecordWildCards, NoFieldSelectors #-} +-- {-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE TypeFamilies #-} +-- {-# LANGUAGE TemplateHaskell, QuasiQuotes #-} + +module LS.XPile.LogicalEnglish.Utils (setInsert) where + +-- import Data.Text qualified as T +-- import Data.HashSet qualified as HS +-- import Data.Hashable (Hashable, hashWithSalt, hashUsing) +import Optics + +{- Optics provide a nice interface over concrete data structures, +but they can sometimes be hard to read / understand. +The following functions provide more readable variants of those functions +-} + +-- $setup +-- >>> import Data.HashSet qualified as HS + +{- | Insert into __any__ kind of set + +Examples: + +>>> setInsert 5 (HS.fromList [1..3]) +fromList [1,2,3,5] +-} +setInsert :: forall {a}. (IxValue a ~ (), At a) => Index a -> a -> a +elt `setInsert` set = set & at' elt ?~() diff --git a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/ValidateL4Input.hs b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/ValidateL4Input.hs index 719dacfb8..da9ef1d34 100644 --- a/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/ValidateL4Input.hs +++ b/lib/haskell/natural4/src/LS/XPile/LogicalEnglish/ValidateL4Input.hs @@ -1,4 +1,4 @@ -{-# OPTIONS_GHC -W #-} +-- {-# OPTIONS_GHC -W #-} {-# LANGUAGE OverloadedStrings, RecordWildCards, LambdaCase #-} module LS.XPile.LogicalEnglish.ValidateL4Input