Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .swp
Binary file not shown.
2 changes: 1 addition & 1 deletion project.clj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(defproject synaptic "0.3.0-SNAPSHOT"
(defproject keorn/synaptic "0.3.0-SNAPSHOT"
:description "Neural Networks in Clojure"
:url "https://github.com/japonophile/synaptic"
:license {:name "Eclipse Public License"
Expand Down
3 changes: 3 additions & 0 deletions src/synaptic/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
(def load-training-set-header d/load-training-set-header)
(alter-meta! #'load-training-set-header merge
(select-keys (meta #'d/load-training-set-header) [:doc :arglists]))
(def dataset d/dataset)
(alter-meta! #'dataset merge
(select-keys (meta #'d/dataset) [:doc :arglists]))
(def training-set d/training-set)
(alter-meta! #'training-set merge
(select-keys (meta #'d/training-set) [:doc :arglists]))
Expand Down
39 changes: 20 additions & 19 deletions src/synaptic/datasets.clj
Original file line number Diff line number Diff line change
Expand Up @@ -80,33 +80,34 @@

(defn count-labels
"Create a map with number of occurrence of each label."
[uniquelabels binlabels]
(let [binlb2cnt (reduce (fn [lbmap lb] (assoc lbmap lb (inc (get lbmap lb 0))))
{} binlabels)]
(zipmap (u/frombinary uniquelabels (keys binlb2cnt)) (vals binlb2cnt))))
[labeltranslator encodedlabels]
(let [translate-keys #(zipmap (mapv labeltranslator (keys %))
(vals %))]
(translate-keys (frequencies encodedlabels))))

(defn training-set
"Create a training set from samples and associated labels.
The training set consists of one or more batches and optionally a validation set.
It also has a map that will allow converting y's back to the original labels.

Options:
:name - a name for the training set
:type - the type of training data (e.g. :binary-image, :grayscale-image ...)
:fieldsize - [width height] of each sample data (for images)
:nvalid - size of the validation set (default is 0, i.e. no validation set)
:batch - size of a mini-batch (default is the number of samples, after
having set apart the validation set)
:online true - set this flag for online training (same as batch size = 1)
:rand false - unset this flag to keep original ordering (by default, samples
will be shuffled before partitioning)."
:name - a name for the training set
:type - the type of training data (e.g. :binary-image, :grayscale-image ...)
:continuous true - set this flag to use continuous labels (auto-scaled to between 0 and 1)
:fieldsize - [width height] of each sample data (for images)
:nvalid - size of the validation set (default is 0, i.e. no validation set)
:batch - size of a mini-batch (default is the number of samples, after
having set apart the validation set)
:online true - set this flag for online training (same as batch size = 1)
:rand false - unset this flag to keep original ordering (by default, samples
will be shuffled before partitioning)."
[samples labels & [options]]
{:pre [(= (count samples) (count labels))]}
(let [batchsize (if (:online options) 1 (:batch options))
trainsize (if (:nvalid options) (- (count samples) (:nvalid options)))
randomize (if (nil? (:rand options)) true (:rand options))
[binlb uniquelb] (u/tobinary labels)
[smp lb] (if randomize (shuffle-vecs samples binlb) [samples binlb])
[reglb lbtranslator] (if (:continuous options) (u/tocontinuous labels) (u/tobinary labels))
[smp lb] (if randomize (shuffle-vecs samples reglb) [samples reglb])
[trainsmp validsmp] (if trainsize (split-at trainsize smp) [smp nil])
[trainlb validlb] (if trainsize (split-at trainsize lb) [lb nil])
[batchsmp batchlb] (partition-vecs batchsize trainsmp trainlb)
Expand All @@ -118,9 +119,9 @@
:type (:type options)
:fieldsize (or (:fieldsize options)
(u/divisors (count (first samples))))
:batches (mapv (partial count-labels uniquelb) batchlb)
:valid (count-labels uniquelb validlb)
:labels uniquelb}]
:batches (mapv (partial count-labels lbtranslator) batchlb)
:valid (count-labels lbtranslator validlb)
:labeltranslator lbtranslator}]
(TrainingSet. header trainsets validset)))

(defn test-set
Expand All @@ -129,7 +130,7 @@

Options:
:name - a name for the test set
:type - the type of training data (e.g. :binary-image, :grayscale-image ...)
:type - the type of test data (e.g. :binary-image, :grayscale-image ...)
:fieldsize - [width height] of each sample data (for images)
:rand true - set this flag to shuffle samples."
[samples & [options]]
Expand Down
27 changes: 15 additions & 12 deletions src/synaptic/net.clj
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@
[zs]
(m/map (fn [z] (if (>= z 0) 1 -1)) zs))

(defn relu
"Rectified linear unit."
[zs]
(m/map #(if (<= % 0.) 0. %) zs))

(defn sigmoid
"Sigmoid activation function.
Computed as 1 / (1 + e^(-z))."
Expand Down Expand Up @@ -256,16 +261,14 @@
as))))

(defn estimate
"Estimate classes for a given data set, by computing network output for each
sample of the data set, and returns the most probable class (label) - or its
index if labels are not defined."
"Estimate labels for a given data set, by computing network output for each
sample of the data set, and returns appropriately transformed result
- or its index if labels are not defined."
[^Net nn ^DataSet dset]
(let [x (:x dset)
y (m/dense (:a (last (net-activities nn x))))
n (count (first y))
ci (mapv #(apply max-key % (range n)) y)
cs (-> nn :header :labels)]
(if cs
(mapv #(get cs %) ci)
ci)))

(let [x (:x dset)
y (m/dense (:a (last (net-activities nn x))))
label-size (count (first y))
lbtranslator (-> nn :arch :labeltranslator)]
(if lbtranslator
(mapv lbtranslator y)
(mapv #(apply max-key % (range label-size)) y))))
16 changes: 11 additions & 5 deletions src/synaptic/training.clj
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@
(case actkind
:softmax (fn [ys] (m/mult ys (m/- 1.0 ys)))
:sigmoid (fn [ys] (m/mult ys (m/- 1.0 ys)))
:hyperbolic-tangent (fn [ys] (m/- 1.0 (m/mult ys ys)))))
:hyperbolic-tangent (fn [ys] (m/- 1.0 (m/mult ys ys)))
:relu (fn [ys] (m/map #(if (<= % 0) 0. 1.)))))

(defn output-layer-error-deriv
"Returns the function to compute the error derivative of the output layer
Expand Down Expand Up @@ -623,12 +624,18 @@
as the training progresses."
(fn [net _ _] (-> @net :training :algo)))

(defn initialize-train
"First step in train procedure"
[net ^TrainingSet trset]
(swap! net assoc-in [:training :state :state] :training)
(swap! net init-stats)
(swap! net assoc-in [:arch :labeltranslator] (-> trset :header :labeltranslator)))

(defmethod train
:lbfgs
[net ^TrainingSet trset nepochs]
(future
(swap! net assoc-in [:training :state :state] :training)
(swap! net init-stats)
(initialize-train net trset)
(let [l (-> @net :arch :layers)
b (d/merge-batches (:batches trset))
w0 (weights-to-double-array (:weights @net))
Expand All @@ -654,8 +661,7 @@
:default
[net ^TrainingSet trset nepochs]
(future
(swap! net assoc-in [:training :state :state] :training)
(swap! net init-stats)
(initialize-train net trset)
(let [maxep (+ nepochs (-> @net :training :stats :epochs))
all-batches (:batches trset)]
(loop [batches all-batches]
Expand Down
17 changes: 15 additions & 2 deletions src/synaptic/util.clj
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,13 @@
(vec (for [i (range n)] (assoc (vec (repeat n 0)) i 1))))

(defn tobinary
"Encode labels to a vector with 0 and 1. Also returns the vector of
"Encode labels to a vector with 0 and 1. Also returns the map of
unique labels to decode them."
[labels]
(let [uniquelabels (unique labels)
lbcodes (bincodes (count uniquelabels))
lb2code (zipmap uniquelabels lbcodes)]
[(mapv lb2code labels) uniquelabels]))
[(mapv lb2code labels) (zipmap lbcodes uniquelabels)]))

(defn frombinary
"Decode a vector of 0 and 1 to the original label, based on a vector
Expand All @@ -162,6 +162,19 @@
code2lb (zipmap lbcodes uniquelabels)]
(mapv code2lb encodedlabels)))

; continuous scaling

(defn tocontinuous
"Encode labels to vectors with numbers in range 0 to 1
and return a function to decode them."
[labels]
(let [smallest-element (apply min (flatten labels))
largest-element (apply max (flatten labels))
scaling-factor (m/- largest-element smallest-element)
shifted-representation (m/- (m/matrix labels) smallest-element)]
[(m/to-vecs (m/div shifted-representation scaling-factor))
#(m/+ smallest-element (m/mult (m/matrix %) scaling-factor))]))

; Make clatrix matrices printable and readable in EDN format

(defmethod print-method
Expand Down
12 changes: 6 additions & 6 deletions test/synaptic/datasets_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@
(is (= TrainingSet (type ts)))
(let [bs (:batches ts)
vs (:valid ts)
ulbs (-> ts :header :labels)]
ulbs (-> ts :header :labeltranslator)]
(is (vector? bs))
(is (= 5 (count bs)))
(is (every? #(= DataSet (type %)) bs))
(is (nil? vs))
(is (= ["a" "b"] ulbs))
(is (= {[0 1] "b", [1 0] "a"} ulbs))
(let [x (:x (first bs))
y (:y (first bs))]
(is (m/matrix? x))
Expand All @@ -49,12 +49,12 @@
(is (= TrainingSet (type ts)))
(let [bs (:batches ts)
vs (:valid ts)
ulbs (-> ts :header :labels)]
ulbs (-> ts :header :labeltranslator)]
(is (vector? bs))
(is (= 1 (count bs)))
(is (= DataSet (type (first bs))))
(is (= DataSet (type vs)))
(is (= ["0" "1" "2" "3"] ulbs))
(is (= {[0 0 0 1] "3", [0 0 1 0] "2", [0 1 0 0] "1", [1 0 0 0] "0"} ulbs))
(let [x (:x (first bs))
y (:y (first bs))]
(is (m/matrix? x))
Expand All @@ -72,12 +72,12 @@
(is (= TrainingSet (type ts)))
(let [bs (:batches ts)
vs (:valid ts)
ulbs (-> ts :header :labels)]
ulbs (-> ts :header :labeltranslator)]
(is (vector? bs))
(is (= 5 (count bs)))
(is (= DataSet (type (first bs))))
(is (nil? vs))
(is (= ["+" "-"] ulbs))
(is (= {[0 1] "-", [1 0] "+"} ulbs))
(is (every? true? (map #(= [(map double %1)]
(m/dense (:x %2))) smp bs)))))))

3 changes: 3 additions & 0 deletions test/synaptic/net_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
(testing "sigmoid"
(is (m-quasi-equal? [[0.25 0.2]]
(sigmoid (m/matrix [[(Math/log 1/3) (Math/log 1/4)]])))))
(testing "relu"
(is (m-quasi-equal? [[0. 0.2]]
(relu (m/matrix [[-10. 0.2]])))))
(testing "hyperbolic-tangent"
(is (m-quasi-equal? [[0.7615942 0.9640276 0.9950548]]
(hyperbolic-tangent (m/matrix [[1 2 3]])))))
Expand Down
10 changes: 9 additions & 1 deletion test/synaptic/util_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
(testing "tobinary should return the vector of unique labels and all labels
encoded to binary vectors"
(is (= [[[0 0 0 1] [1 0 0 0] [0 1 0 0] [0 0 0 1] [0 0 1 0] [1 0 0 0] [0 0 1 0]]
["1" "2" "3" "8"]]
{[0 0 0 1] "8", [0 0 1 0] "3", [0 1 0 0] "2", [1 0 0 0] "1"}]
(tobinary ["8" "1" "2" "8" "3" "1" "3"]))))
(testing "frombinary should decode each label to its original value, based
on a vector of unique labels"
Expand All @@ -113,6 +113,14 @@
[[0 0 0 1] [1 0 0 0] [0 1 0 0] [0 0 0 1]
[0 0 1 0] [1 0 0 0] [0 0 1 0]])))))

(deftest test-continuous-scaling
(testing "tocontinuous should return the vector of unique labels and all labels
scaled to vectors with values in range 0 to 1, and a function to scale them back"
(is (= [[0.4 0.6] [0.8 1.0] [0.0 0.2]]
(first (tocontinuous [[1 2] [3 4] [-1 0]]))))
(is (= (m/matrix [[-1 4]])
((second (tocontinuous [[1 2] [3 4] [-1 0]])) [[0 1]])))))

(deftest test-data-manipulation
(testing "unique should return a sorted vector of unique values"
(is (= ["a" "b" "c" "d" "x" "y" "z"]
Expand Down