Skip to content

Commit

Permalink
Merge pull request #169 from ambiata/topic/date
Browse files Browse the repository at this point in the history
Add date field check
  • Loading branch information
olorin authored Jul 19, 2016
2 parents 3d0f0d0 + 5424f22 commit d8008fe
Show file tree
Hide file tree
Showing 13 changed files with 214 additions and 170 deletions.
20 changes: 16 additions & 4 deletions ambiata-warden.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,16 @@ library
-Wall

cc-options:
-O3 -Wall -Werror

-O3
-Wall
-Werror
-Wbad-function-cast
-Wnested-externs
-Wstrict-prototypes
-Wmissing-prototypes
-Wmissing-declarations
-Waggregate-return

hs-source-dirs:
src gen

Expand Down Expand Up @@ -178,8 +186,9 @@ executable warden-gen
, resourcet == 1.1.*
, semigroups
, temporary == 1.2.*
, transformers >= 0.3 && < 5
, text
, time == 1.5.*
, transformers >= 0.3 && < 5
, unix >= 2.7.1 && < 2.7.3
, vector == 0.10.*

Expand Down Expand Up @@ -215,10 +224,11 @@ test-suite test
, filepath == 1.3.*
, ieee754 == 0.7.*
, lens == 4.9.*
, semigroups
, quickcheck-instances == 0.3.*
, semigroups
, temporary == 1.2.*
, text
, time == 1.5.*
, vector == 0.10.*

test-suite test-io
Expand Down Expand Up @@ -265,6 +275,7 @@ test-suite test-io
, semigroups
, temporary
, text == 1.2.*
, time == 1.5.*
, transformers >= 0.3 && < 5
, unix >= 2.7.1 && < 2.7.3
, vector == 0.10.*
Expand Down Expand Up @@ -311,6 +322,7 @@ benchmark bench
, semigroups
, temporary
, text == 1.2.*
, time == 1.5.*
, transformers >= 0.3 && < 5
, unix >= 2.7.1 && < 2.7.3
, vector == 0.10.*
Expand Down
144 changes: 0 additions & 144 deletions ambiata-warden.lock-7.8.4

This file was deleted.

17 changes: 16 additions & 1 deletion bench/bench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ prepareBools = fmap (fmap T.encodeUtf8) . generate' (Deterministic 555) (GenSize
prepareNonBools :: IO [ByteString]
prepareNonBools = fmap (fmap T.encodeUtf8) . generate' (Deterministic 666) (GenSize 100) $ vectorOf 100 renderedNonBool

prepareDates :: IO [ByteString]
prepareDates = generate' (Deterministic 555) (GenSize 100) $ vectorOf 100 renderedDate

prepareNonDates :: IO [ByteString]
prepareNonDates = generate' (Deterministic 666) (GenSize 100) $ vectorOf 100 renderedNonDate

benchABDecode :: FileFormat -> NonEmpty ViewFile -> IO ()
benchABDecode ff vfs =
let sep = Separator . fromIntegral $ ord '|'
Expand Down Expand Up @@ -150,6 +156,9 @@ benchToRow = toRow . Right
benchCheckFieldBool :: [ByteString] -> [Bool]
benchCheckFieldBool = fmap checkFieldBool

benchCheckFieldDate :: [ByteString] -> [Bool]
benchCheckFieldDate = fmap checkFieldDate

main :: IO ()
main = do
withTempDirectory "." "warden-bench-" $ \root ->
Expand All @@ -160,11 +169,17 @@ main = do
, bench "decode/delimited-text/1000" $ nfIO (benchABDecode DelimitedText vfs)
, bench "decode/toRow/100" $ nf benchToRow bss
]
, env ((,,) <$> prepareRow <*> prepareBools <*> prepareNonBools) $ \ ~(rs, bools, nonbools) ->
, env ((,,,,) <$> prepareRow
<*> prepareBools
<*> prepareNonBools
<*> prepareDates
<*> prepareNonDates) $ \ ~(rs, bools, nonbools, dates, nondates) ->
bgroup "field-parsing" $ [
bench "parseField/200" $ nf benchFieldParse rs
, bench "checkFieldBool/boolean/100" $ nf benchCheckFieldBool bools
, bench "checkFieldBool/non-boolean/100" $ nf benchCheckFieldBool nonbools
, bench "checkFieldDate/date/100" $ nf benchCheckFieldDate dates
, bench "checkFieldDate/non-date/100" $ nf benchCheckFieldDate nondates
]
, env prepareFolds $ \ ~(rs, ts, piis, nonPiis, bs100, bs10) ->
bgroup "folds" $ [
Expand Down
66 changes: 64 additions & 2 deletions cbits/field.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#include "field.h"
#include "predicates.h"

bool warden_field_bool(char *buf, size_t n) {
/* Returns TRUE if the buffer we're passed contains a bool, otherwise
* FALSE. */
bool warden_field_bool(const char *buf, size_t n) {
/* little-endian "false" */
static const int64_t false_bits = 0x00000065736c6166;
static const int64_t false_mask = 0x000000ffffffffff;
Expand Down Expand Up @@ -48,7 +50,7 @@ bool warden_field_bool(char *buf, size_t n) {
in scientific notation.
Otherwise returns non_numeric_field. */
numeric_field warden_field_numeric(char *buf, size_t n) {
numeric_field warden_field_numeric(const char *buf, size_t n) {
size_t i = 0;
int preradix_digits = 0; /* digits before the radix point */
int exponent_digits = 0; /* digits in the exponent (scientific notation) */
Expand Down Expand Up @@ -114,3 +116,63 @@ numeric_field warden_field_numeric(char *buf, size_t n) {
/* just cruft on the end after all */
return non_numeric_field;
}


static inline bool is_separator(char c) {
return (c == '-' || c == '/' || c == '.');
}

/* Match a year in the 20xx century, in big-endian date format with or
without separators. */
static inline bool match_ymd(const char *buf, size_t n) {
/* The shortest thing we're willing to call a "date" at this
point is YYYYMMDD. */
if (n < 8) {
return FALSE;
}

/* 0xc0 = 0x80 | 0x40 - if these bits are set, the byte is too
high to be a digit or a separator. */
static const int64_t ymd_mask = 0xc0c0c0c0c0c0ffff;

/* No 0x80 or 0x40 set anywhere, and the first two bytes must
be "20". */
static const int64_t ymd_bits = 0x0000000000003032;
int64_t *p = (int64_t *) buf;

/* First, we drop everything which doesn't start with '20' and
have eight bytes compatible with a YYYYxMMxDD format. */
if (!(((*p & ymd_mask) == ymd_bits) && is_digit(buf[2]) && is_digit(buf[3]))) {
return FALSE;
}

/* YYYY-MM-DD */
if (is_separator(buf[4])) {
return (n >= 10 &&
is_digit(buf[5]) &&
is_digit(buf[6]) &&
is_separator(buf[7]) &&
is_digit(buf[8]) &&
is_digit(buf[9]));
}

/* YYYYMMDD */
return (is_digit(buf[4]) &&
is_digit(buf[5]) &&
is_digit(buf[6]) &&
is_digit(buf[7]));

}

/* Returns TRUE if the data in the buffer looks like a date, otherwise
FALSE.
Currently checks:
- Fields beginning with big-endian dates.
FIXME: more supported date formats
*/
bool warden_field_datetime(const char *buf, size_t n) {
return match_ymd(buf, n);
}
6 changes: 4 additions & 2 deletions cbits/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ typedef enum _numeric_field {
real_field = 2
} numeric_field;

bool warden_field_bool(char *, size_t);
bool warden_field_bool(const char *, size_t);

numeric_field warden_field_numeric(char *, size_t);
numeric_field warden_field_numeric(const char *, size_t);

bool warden_field_datetime(const char *, size_t);

#endif
Loading

0 comments on commit d8008fe

Please sign in to comment.