Skip to content

Commit

Permalink
Fixes #414 - categorical maps are integers now by default
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Sep 12, 2024
1 parent 7802024 commit 07a31c7
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/tech/v3/dataset/categorical.clj
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,15 @@ Non integers found: " (vec bad-mappings)))))
m
(set/unique (ds-base/column dataset colname)))
colname
(or res-dtype :float64))))
(or res-dtype :int64))))



(defn transform-categorical-map
"Apply a categorical mapping transformation fit with fit-categorical-map."
[dataset fit-data]
(let [colname (:src-column fit-data)
result-datatype (or (:result-datatype fit-data) :float64)
result-datatype (or (:result-datatype fit-data) :int64)
lookup-table (:lookup-table fit-data)
column (ds-base/column dataset colname)
missing (ds-proto/missing column)
Expand Down Expand Up @@ -231,7 +231,7 @@ user> (ds-cat/dataset->categorical-maps catds)
dataset (dissoc dataset src-column)
n-elems (dtype/ecount column)
op-space (casting/simple-operation-space (dtype-proto/operational-elemwise-datatype column))]
(merge dataset
(merge dataset
(->> one-hot-table
(lznc/map
(fn [[k v]]
Expand Down
4 changes: 4 additions & 0 deletions test/data/local_date.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
{"test": 1, "time-period": "2024-06-20"},
{"test": 2, "time-period": "2024-06-21"},
{"test": 3, "time-period": "2024-06-22"}]
10 changes: 10 additions & 0 deletions test/tech/v3/dataset/categorical_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,13 @@
(dtype/emap val-map :keyword col))))
(ds/categorical->number cf/categorical)
(ds/column "Survived")))))
(deftest categorical-assignments-are-integers
(is (= #{0 1 2 3}
(->
(ds/->dataset {:x1 [1 2 4 5 6 5 6 7]
:x2 [5 6 6 7 8 2 4 6]
:y [:a :b :b :a :c :a :b :d]})
(ds/categorical->number [:y])
(get :y)
distinct
set))))

0 comments on commit 07a31c7

Please sign in to comment.