Skip to content

Commit

Permalink
debug sparse
Browse files Browse the repository at this point in the history
  • Loading branch information
behrica committed Oct 1, 2024
1 parent 49c906f commit 9a0fd7a
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 16 deletions.
19 changes: 12 additions & 7 deletions src/scicloj/ml/xgboost.clj
Original file line number Diff line number Diff line change
Expand Up @@ -174,16 +174,17 @@ subsample may be set to as low as 0.1 without loss of model accuracy. Note that

(defn- sparse->labeled-point [^SparseArray sparse target n-sparse-columns]
(let [x-i-s
(map
(mapv
#(hash-map :i (.i ^SparseArray$Entry %) :x (.x ^SparseArray$Entry %))
(iterator-seq
(.iterator sparse)))]
(println :x-i-s (count x-i-s))
(LabeledPoint. target
n-sparse-columns
(into-array Integer/TYPE (map :i x-i-s))
(into-array Float/TYPE (map :x x-i-s)))))

(defn- sparse-feature->dmatrix [feature-ds target-ds sparse-column n-sparse-columns]
(defn sparse-feature->dmatrix [feature-ds target-ds sparse-column n-sparse-columns]
(DMatrix.
(.iterator
^Iterable (map
Expand All @@ -194,6 +195,7 @@ subsample may be set to as low as 0.1 without loss of model accuracy. Note that
nil))



(defn- dataset->labeled-point-iterator
"Create an iterator to labeled points from a possibly quite large
sequence of maps. Sets expected length to length of first entry"
Expand Down Expand Up @@ -297,11 +299,14 @@ c/xgboost4j/java/XGBoost.java#L208"))
params (->> cleaned-options
;;Adding in some defaults
(merge
{:alpha 0.0
:eta 0.3
:lambda 1.0
:max-depth 6
:subsample 0.87}
{
;; :alpha 0.0
;; :eta 0.3
;; :lambda 1.0
;; :max-depth 6
;; :subsample 0.87

}

cleaned-options
(when label-map
Expand Down
36 changes: 27 additions & 9 deletions test/scicloj/ml/xgboost_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -205,16 +205,33 @@
(dtype/emap val-map :keyword col))))
(ds/categorical->number cf/categorical)
(ds-mod/set-inference-target :Score)))




(def feature-ds
(cf/feature reviews))

(def target-ds
(cf/target reviews))

(require '[scicloj.ml.xgboost])
(def d
(scicloj.ml.xgboost/sparse-feature->dmatrix
feature-ds
target-ds
:bow-sparse
1000
))
(def trained-model
(ml/train reviews {:model-type :xgboost/classification
:sparse-column :bow-sparse
:n-sparse-columns 100
:silent 0
:round 1
:eval-metric "merror"
:watches {:test-ds (ds/sample reviews 10)}}))

:n-sparse-columns 1000
;:silent 0
;:round 1
;:eval-metric "merror"
;:watches {:test-ds (ds/sample reviews 10)}
}))



(def prediction
Expand All @@ -227,5 +244,6 @@
(ml/train-k-fold reviews {:model-type :xgboost/classification
:sparse-column :bow-sparse}))


(ml/explain folds))

(ml/explain folds)
)

0 comments on commit 9a0fd7a

Please sign in to comment.