diff --git a/base/random.go b/base/random.go index bc4323ecd..72a9a7b7c 100644 --- a/base/random.go +++ b/base/random.go @@ -59,6 +59,14 @@ func (rng RandomGenerator) NormalMatrix(row, col int, mean, stdDev float32) [][] return ret } +func (rng RandomGenerator) NormalVector(size int, mean, stdDev float32) []float32 { + ret := make([]float32, size) + for i := 0; i < len(ret); i++ { + ret[i] = float32(rng.NormFloat64())*stdDev + mean + } + return ret +} + // UniformMatrix makes a matrix filled with uniform random floats. func (rng RandomGenerator) UniformMatrix(row, col int, low, high float32) [][]float32 { ret := make([][]float32, row) diff --git a/model/click/deepfm.go b/model/click/deepfm.go index 2a823c450..71425c426 100644 --- a/model/click/deepfm.go +++ b/model/click/deepfm.go @@ -31,6 +31,12 @@ import ( "modernc.org/mathutil" ) +const ( + beta1 float32 = 0.9 + beta2 float32 = 0.999 + eps float32 = 1e-8 +) + type DeepFM struct { BaseFactorizationMachine @@ -42,15 +48,20 @@ type DeepFM struct { vm gorgonia.VM g *gorgonia.ExprGraph - indices *gorgonia.Node + embeddingV *gorgonia.Node + embeddingW *gorgonia.Node values *gorgonia.Node output *gorgonia.Node target *gorgonia.Node cost *gorgonia.Node - v *gorgonia.Node - w *gorgonia.Node b *gorgonia.Node learnables []*gorgonia.Node + v [][]float32 + w []float32 + m_v [][]float32 + m_w []float32 + v_v [][]float32 + v_w []float32 // Hyper parameters batchSize int @@ -139,7 +150,9 @@ func (fm *DeepFM) BatchPredict(x []lo.Tuple2[[]int32, []float32]) []float32 { indicesTensor, valuesTensor, _ := fm.convertToTensors(x, nil) predictions := make([]float32, 0, len(x)) for i := 0; i < len(x); i += fm.batchSize { - lo.Must0(gorgonia.Let(fm.indices, lo.Must1(indicesTensor.Slice(gorgonia.S(i, i+fm.batchSize))))) + v, w := fm.embedding(lo.Must1(indicesTensor.Slice(gorgonia.S(i, i+fm.batchSize)))) + lo.Must0(gorgonia.Let(fm.embeddingV, v)) + lo.Must0(gorgonia.Let(fm.embeddingW, w)) lo.Must0(gorgonia.Let(fm.values, lo.Must1(valuesTensor.Slice(gorgonia.S(i, i+fm.batchSize))))) lo.Must0(fm.vm.RunAll()) predictions = append(predictions, fm.output.Value().Data().([]float32)...) @@ -175,11 +188,14 @@ func (fm *DeepFM) Fit(trainSet *Dataset, testSet *Dataset, config *FitConfig) Sc fitStart := time.Now() cost := float32(0) for i := 0; i < trainSet.Count(); i += fm.batchSize { - lo.Must0(gorgonia.Let(fm.indices, lo.Must1(indicesTensor.Slice(gorgonia.S(i, i+fm.batchSize))))) + v, w := fm.embedding(lo.Must1(indicesTensor.Slice(gorgonia.S(i, i+fm.batchSize)))) + lo.Must0(gorgonia.Let(fm.embeddingV, v)) + lo.Must0(gorgonia.Let(fm.embeddingW, w)) lo.Must0(gorgonia.Let(fm.values, lo.Must1(valuesTensor.Slice(gorgonia.S(i, i+fm.batchSize))))) lo.Must0(gorgonia.Let(fm.target, lo.Must1(targetTensor.Slice(gorgonia.S(i, i+fm.batchSize))))) lo.Must0(fm.vm.RunAll()) + fm.backward(lo.Must1(indicesTensor.Slice(gorgonia.S(i, i+fm.batchSize))), epoch) cost += fm.cost.Value().Data().(float32) lo.Must0(solver.Step(gorgonia.NodesToValueGrads(fm.learnables))) fm.vm.Reset() @@ -217,21 +233,19 @@ func (fm *DeepFM) Init(trainSet *Dataset) { fm.numDimension = mathutil.MaxVal(fm.numDimension, len(x)) } - fm.v = gorgonia.NewMatrix(fm.g, tensor.Float32, - gorgonia.WithShape(fm.numFeatures, fm.nFactors), - gorgonia.WithName("v"), - gorgonia.WithInit(gorgonia.Gaussian(float64(fm.initMean), float64(fm.initStdDev)))) - fm.w = gorgonia.NewMatrix(fm.g, tensor.Float32, - gorgonia.WithShape(fm.numFeatures, 1), - gorgonia.WithName("w"), - gorgonia.WithInit(gorgonia.Gaussian(float64(fm.initMean), float64(fm.initStdDev)))) + fm.v = fm.GetRandomGenerator().NormalMatrix(fm.numFeatures, fm.nFactors, fm.initMean, fm.initStdDev) + fm.w = fm.GetRandomGenerator().NormalVector(fm.numFeatures, fm.initMean, fm.initStdDev) + fm.m_v = zeros(fm.numFeatures, fm.nFactors) + fm.m_w = make([]float32, fm.numFeatures) + fm.v_v = zeros(fm.numFeatures, fm.nFactors) + fm.v_w = make([]float32, fm.numFeatures) fm.b = gorgonia.NewMatrix(fm.g, tensor.Float32, gorgonia.WithShape(1, 1), gorgonia.WithName("b"), gorgonia.WithInit(gorgonia.Zeroes())) - fm.learnables = []*gorgonia.Node{fm.v, fm.w, fm.b} fm.forward(fm.batchSize) + fm.learnables = []*gorgonia.Node{fm.b, fm.embeddingV, fm.embeddingW} lo.Must1(gorgonia.Grad(fm.cost, fm.learnables...)) fm.vm = gorgonia.NewTapeMachine(fm.g, gorgonia.BindDualValues(fm.learnables...)) @@ -252,9 +266,12 @@ func (fm *DeepFM) Complexity() int { func (fm *DeepFM) forward(batchSize int) { // input nodes - fm.indices = gorgonia.NodeFromAny(fm.g, - tensor.New(tensor.WithShape(batchSize, fm.numDimension), tensor.WithBacking(make([]float32, batchSize*fm.numDimension))), - gorgonia.WithName("indices")) + fm.embeddingV = gorgonia.NodeFromAny(fm.g, + tensor.New(tensor.WithShape(batchSize, fm.numDimension, fm.nFactors), tensor.WithBacking(make([]float32, batchSize*fm.numDimension*fm.nFactors))), + gorgonia.WithName("embeddingV")) + fm.embeddingW = gorgonia.NodeFromAny(fm.g, + tensor.New(tensor.WithShape(batchSize, fm.numDimension, 1), tensor.WithBacking(make([]float32, batchSize*fm.numDimension))), + gorgonia.WithName("embeddingW")) fm.values = gorgonia.NodeFromAny(fm.g, tensor.New(tensor.WithShape(batchSize, fm.numDimension), tensor.WithBacking(make([]float32, batchSize*fm.numDimension))), gorgonia.WithName("values")) @@ -263,18 +280,16 @@ func (fm *DeepFM) forward(batchSize int) { gorgonia.WithName("target")) // factorization machine - v := gorgonia.Must(gorgonia.Embedding(fm.v, fm.indices)) - w := gorgonia.Must(gorgonia.Embedding(fm.w, fm.indices)) x := gorgonia.Must(gorgonia.Reshape(fm.values, []int{batchSize, fm.numDimension, 1})) - vx := gorgonia.Must(gorgonia.BatchedMatMul(v, x, true)) + vx := gorgonia.Must(gorgonia.BatchedMatMul(fm.embeddingV, x, true)) sumSquare := gorgonia.Must(gorgonia.Square(vx)) - v2 := gorgonia.Must(gorgonia.Square(v)) + v2 := gorgonia.Must(gorgonia.Square(fm.embeddingV)) x2 := gorgonia.Must(gorgonia.Square(x)) squareSum := gorgonia.Must(gorgonia.BatchedMatMul(v2, x2, true)) sum := gorgonia.Must(gorgonia.Sub(sumSquare, squareSum)) sum = gorgonia.Must(gorgonia.Sum(sum, 1)) sum = gorgonia.Must(gorgonia.Mul(sum, fm.nodeFromFloat64(0.5))) - linear := gorgonia.Must(gorgonia.BatchedMatMul(w, x, true, false)) + linear := gorgonia.Must(gorgonia.BatchedMatMul(fm.embeddingW, x, true, false)) fm.output = gorgonia.Must(gorgonia.BroadcastAdd( gorgonia.Must(gorgonia.Reshape(linear, []int{batchSize})), fm.b, @@ -286,6 +301,91 @@ func (fm *DeepFM) forward(batchSize int) { fm.cost = fm.bceWithLogits(fm.target, fm.output) } +func (fm *DeepFM) embedding(indices tensor.View) (v, w *tensor.Dense) { + s := indices.Shape() + if len(s) != 2 { + panic("indices must be 2-dimensional") + } + batchSize, numDimension := s[0], s[1] + + dataV := make([]float32, batchSize*numDimension*fm.nFactors) + dataW := make([]float32, batchSize*numDimension) + for i := 0; i < batchSize; i++ { + for j := 0; j < numDimension; j++ { + index := lo.Must1(indices.At(i, j)).(float32) + for k := 0; k < fm.nFactors; k++ { + dataV[i*numDimension*fm.nFactors+j*fm.nFactors+k] = fm.v[int(index)][k] + } + dataW[i*numDimension+j] = fm.w[int(index)] + } + } + + v = tensor.New(tensor.WithShape(batchSize, numDimension, fm.nFactors), tensor.WithBacking(dataV)) + w = tensor.New(tensor.WithShape(batchSize, numDimension, 1), tensor.WithBacking(dataW)) + return +} + +func (fm *DeepFM) backward(indices tensor.View, t int) { + s := indices.Shape() + if len(s) != 2 { + panic("indices must be 2-dimensional") + } + batchSize, numDimension := s[0], s[1] + + gradEmbeddingV := lo.Must1(fm.embeddingV.Grad()).Data().([]float32) + gradEmbeddingW := lo.Must1(fm.embeddingW.Grad()).Data().([]float32) + gradV := make(map[int][]float32) + gradW := make(map[int]float32) + + for i := 0; i < batchSize; i++ { + for j := 0; j < numDimension; j++ { + index := int(lo.Must1(indices.At(i, j)).(float32)) + + if _, exist := gradV[index]; !exist { + gradV[index] = make([]float32, fm.nFactors) + } + for k := 0; k < fm.nFactors; k++ { + gradV[index][k] += gradEmbeddingV[i*numDimension*fm.nFactors+j*fm.nFactors+k] + } + + if _, exist := gradW[index]; !exist { + gradW[index] = 0 + } + gradW[index] += gradEmbeddingW[i*numDimension+j] + } + } + + for index, grad := range gradV { + for k := 0; k < fm.nFactors; k++ { + grad[k] /= float32(batchSize) + // m_t = eta * [ beta_1 * m_{t-1} + (1 - beta_1) * g_t ] + fm.m_v[index][k] = fm.lr * (beta1*fm.m_v[index][k] + (1-beta1)*grad[k]) + // v_t = beta_2 * v_{t-1} + (1 - beta_2) * g_t^2 + fm.v_v[index][k] = beta2*fm.v_v[index][k] + (1-beta2)*grad[k]*grad[k] + // \hat{m}_t = m_t / (1 - beta_1^t) + mHat := fm.m_v[index][k] / (1 - math32.Pow(beta1, float32(t))) + // \hat{v}_t = v_t / (1 - beta_2^t) + vHat := fm.v_v[index][k] / (1 - math32.Pow(beta2, float32(t))) + // \theta_t = \theta_{t-1} + \eta * \hat{m}_t / (\sqrt{\hat{v}_t} + \epsilon) + fm.v[index][k] -= mHat / (math32.Sqrt(vHat) + eps) + } + } + + for index, grad := range gradW { + grad /= float32(batchSize) + // m_t = eta * [ beta_1 * m_{t-1} + (1 - beta_1) * g_t ] + fm.m_w[index] = fm.lr * (beta1*fm.m_w[index] + (1-beta1)*grad) + // v_t = beta_2 * v_{t-1} + (1 - beta_2) * g_t^2 + fm.v_w[index] = beta2*fm.v_w[index] + (1-beta2)*grad*grad + // \hat{m}_t = m_t / (1 - beta_1^t) + mHat := fm.m_w[index] / (1 - math32.Pow(beta1, float32(t))) + // \hat{v}_t = v_t / (1 - beta_2^t) + vHat := fm.v_w[index] / (1 - math32.Pow(beta2, float32(t))) + // \theta_t = \theta_{t-1} + \eta * \hat{m}_t / (\sqrt{\hat{v}_t} + \epsilon) + fm.w[index] -= mHat / (math32.Sqrt(vHat) + eps) + } +} + func (fm *DeepFM) convertToTensors(x []lo.Tuple2[[]int32, []float32], y []float32) (indicesTensor, valuesTensor, targetTensor *tensor.Dense) { if y != nil && len(x) != len(y) { panic("length of x and y must be equal") @@ -351,3 +451,11 @@ func (fm *DeepFM) bceWithLogits(target, prediction *gorgonia.Node) *gorgonia.Nod func (fm *DeepFM) nodeFromFloat64(any float32) *gorgonia.Node { return gorgonia.NodeFromAny(fm.g, any, gorgonia.WithName(uuid.NewString())) } + +func zeros(a, b int) [][]float32 { + retVal := make([][]float32, a) + for i := range retVal { + retVal[i] = make([]float32, b) + } + return retVal +}