Skip to content

Commit

Permalink
Test and benchmark for date field check
Browse files Browse the repository at this point in the history
  • Loading branch information
olorin committed Jul 19, 2016
1 parent e5deeaa commit 9fa8b76
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 6 deletions.
8 changes: 6 additions & 2 deletions ambiata-warden.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,9 @@ executable warden-gen
, resourcet == 1.1.*
, semigroups
, temporary == 1.2.*
, transformers >= 0.3 && < 5
, text
, time == 1.5.*
, transformers >= 0.3 && < 5
, unix >= 2.7.1 && < 2.7.3
, vector == 0.10.*

Expand Down Expand Up @@ -223,10 +224,11 @@ test-suite test
, filepath == 1.3.*
, ieee754 == 0.7.*
, lens == 4.9.*
, semigroups
, quickcheck-instances == 0.3.*
, semigroups
, temporary == 1.2.*
, text
, time == 1.5.*
, vector == 0.10.*

test-suite test-io
Expand Down Expand Up @@ -273,6 +275,7 @@ test-suite test-io
, semigroups
, temporary
, text == 1.2.*
, time == 1.5.*
, transformers >= 0.3 && < 5
, unix >= 2.7.1 && < 2.7.3
, vector == 0.10.*
Expand Down Expand Up @@ -319,6 +322,7 @@ benchmark bench
, semigroups
, temporary
, text == 1.2.*
, time == 1.5.*
, transformers >= 0.3 && < 5
, unix >= 2.7.1 && < 2.7.3
, vector == 0.10.*
Expand Down
17 changes: 16 additions & 1 deletion bench/bench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ prepareBools = fmap (fmap T.encodeUtf8) . generate' (Deterministic 555) (GenSize
prepareNonBools :: IO [ByteString]
prepareNonBools = fmap (fmap T.encodeUtf8) . generate' (Deterministic 666) (GenSize 100) $ vectorOf 100 renderedNonBool

prepareDates :: IO [ByteString]
prepareDates = generate' (Deterministic 555) (GenSize 100) $ vectorOf 100 renderedDate

prepareNonDates :: IO [ByteString]
prepareNonDates = generate' (Deterministic 666) (GenSize 100) $ vectorOf 100 renderedNonDate

benchABDecode :: FileFormat -> NonEmpty ViewFile -> IO ()
benchABDecode ff vfs =
let sep = Separator . fromIntegral $ ord '|'
Expand Down Expand Up @@ -150,6 +156,9 @@ benchToRow = toRow . Right
benchCheckFieldBool :: [ByteString] -> [Bool]
benchCheckFieldBool = fmap checkFieldBool

benchCheckFieldDate :: [ByteString] -> [Bool]
benchCheckFieldDate = fmap checkFieldDate

main :: IO ()
main = do
withTempDirectory "." "warden-bench-" $ \root ->
Expand All @@ -160,11 +169,17 @@ main = do
, bench "decode/delimited-text/1000" $ nfIO (benchABDecode DelimitedText vfs)
, bench "decode/toRow/100" $ nf benchToRow bss
]
, env ((,,) <$> prepareRow <*> prepareBools <*> prepareNonBools) $ \ ~(rs, bools, nonbools) ->
, env ((,,,,) <$> prepareRow
<*> prepareBools
<*> prepareNonBools
<*> prepareDates
<*> prepareNonDates) $ \ ~(rs, bools, nonbools, dates, nondates) ->
bgroup "field-parsing" $ [
bench "parseField/200" $ nf benchFieldParse rs
, bench "checkFieldBool/boolean/100" $ nf benchCheckFieldBool bools
, bench "checkFieldBool/non-boolean/100" $ nf benchCheckFieldBool nonbools
, bench "checkFieldDate/date/100" $ nf benchCheckFieldDate dates
, bench "checkFieldDate/non-date/100" $ nf benchCheckFieldDate nondates
]
, env prepareFolds $ \ ~(rs, ts, piis, nonPiis, bs100, bs10) ->
bgroup "folds" $ [
Expand Down
6 changes: 4 additions & 2 deletions cbits/field.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ static inline bool is_separator(char c) {
precondition: n >= 8 */
static inline bool match_ymd(char *buf, size_t n) {
/* 0xc0 = 0x80 & 0x40 - if these bits are set, the byte is too
/* 0xc0 = 0x80 | 0x40 - if these bits are set, the byte is too
high to be a digit or a separator. */
static const int64_t ymd_mask = 0xc0c0c0c0c0c0ffff;

Expand All @@ -138,7 +138,7 @@ static inline bool match_ymd(char *buf, size_t n) {

/* First, we drop everything which doesn't start with '20' and
have eight bytes compatible with a YYYYxMMxDD format. */
if (!((*p & ymd_mask) == ymd_bits) && is_digit(buf[2]) && is_digit(buf[3])) {
if (!(((*p & ymd_mask) == ymd_bits) && is_digit(buf[2]) && is_digit(buf[3]))) {
return FALSE;
}

Expand Down Expand Up @@ -166,6 +166,8 @@ static inline bool match_ymd(char *buf, size_t n) {
Currently checks:
- Fields beginning with big-endian dates.
FIXME: more supported date formats
*/
bool warden_field_datetime(char *buf, size_t n) {
/* The shortest thing we're willing to call a "date" at this
Expand Down
60 changes: 59 additions & 1 deletion test/Test/Warden/Arbitrary.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ import qualified Data.ByteString.Lazy as BL
import Data.Char
import Data.Csv
import qualified Data.Set as S
import Data.List (nub)
import Data.List ((\\), nub)
import qualified Data.List as L
import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NE
import Data.String (String)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8, decodeUtf8', encodeUtf8)
import Data.Time.Calendar (Day(..))
import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Data.Time.Format (defaultTimeLocale, formatTime)
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Data.Word
Expand All @@ -36,6 +40,7 @@ import Test.Delorean.Arbitrary ()
import Test.QuickCheck (Small(..), NonNegative(..))
import Test.QuickCheck (Arbitrary, Gen, elements, choose, listOf, listOf1)
import Test.QuickCheck (vectorOf, arbitrary, suchThat, oneof, sized)
import Test.QuickCheck (frequency)
import Test.QuickCheck.Instances ()

import Text.Printf (printf)
Expand Down Expand Up @@ -792,3 +797,56 @@ instance Arbitrary PIICheckType where
pure NoPIIChecks
, PIIChecks <$> arbitrary
]

genSensibleDateTime :: Gen UTCTime
genSensibleDateTime = do
-- sometime this century
days <- ModifiedJulianDay <$> choose (142 * 365, 192 * 365)
secs <- secondsToDiffTime <$> choose (0, 86401)
pure $ UTCTime days secs

renderedDate :: Gen BS.ByteString
renderedDate = do
dt <- genSensibleDateTime
render <- frequency [(1, renderDateNoSeparator), (9, renderDateSeparator)]
pure . BSC.pack $ render dt

renderDateNoSeparator :: Gen (UTCTime -> String)
renderDateNoSeparator = do
tp <- timePart
pure (formatTime defaultTimeLocale ("%Y%m%d" <> tp))

renderDateSeparator :: Gen (UTCTime -> String)
renderDateSeparator = do
tp <- timePart
sep <- elements ["-", ".", "/"]
pure (formatTime defaultTimeLocale ((concat ["%Y", sep, "%m", sep, "%d"]) <> tp))

timePart :: Gen String
timePart = elements [
""
, "%H%M"
, "%H:%M"
, "%H%M%S"
, "%H:%M:%S"
, "T%H:%M:%SZ"
]


renderedNonDate :: Gen BS.ByteString
renderedNonDate = do
x <- fmap BS.pack $ listOf1 noDigits
dt <- renderedDate
oneof [
pure $ x <> dt
, reversed
, encodeUtf8 <$> elements muppets
]
where
reversed = do
dt <- genSensibleDateTime
render <- renderDateSeparator
pure . BSC.pack . reverse $ render dt

noDigits =
elements $ [0x00..0xff] \\ [0x30..0x39]
10 changes: 10 additions & 0 deletions test/Test/Warden/Parser/Field.hs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ prop_numericFieldP_neg = forAll (elements muppets) $ \t ->
let r = parseOnly numericFieldP $ T.encodeUtf8 t in
isLeft r === True

prop_checkFieldDate_pos :: Property
prop_checkFieldDate_pos = forAll renderedDate $ \bs ->
let r = checkFieldDate bs in
r === True

prop_checkFieldDate_neg :: Property
prop_checkFieldDate_neg = forAll renderedNonDate $ \bs ->
let r = checkFieldDate bs in
r === False

return []
tests :: IO Bool
tests = $forAllProperties $ quickCheckWithResult (stdArgs { maxSuccess = 1000 })

0 comments on commit 9fa8b76

Please sign in to comment.