更新1/6/2014:我已經更新了問題,以便我試圖解決一個非線性方程。正如你們許多人指出的那樣,我不需要額外的複雜性(隱藏層,sigmoid函數等)來解決非線性問題。使用神經網絡解決Y = X * X + b型公式
另外,我意識到我可以用神經網絡以外的其他方法解決這樣的非線性問題。我沒有試圖寫出最有效的代碼或最少量的代碼。這純粹是爲了更好地學習神經網絡。
我已經創建了自己的後向傳播神經網絡實現。
在訓練解決簡單異或操作時,它工作正常。
但是現在我想適應它&訓練它解決Y = X * X + B類型的公式,但我沒有得到預期的結果。訓練後,網絡不計算正確的答案。神經網絡是否非常適合解決像這樣的代數方程?我意識到我的例子是微不足道的,我只是想了解更多關於神經網絡和他們的能力。
我的隱藏層正在使用sigmoid激活函數,而且我的輸出層正在使用標識函數。
如果你能分析我的代碼並指出任何錯誤,我將不勝感激。
這裏是我完整的代碼(C#.NET):
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace NeuralNetwork
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Training Network...");
Random r = new Random();
var network = new NeuralNetwork(1, 5, 1);
for (int i = 0; i < 100000; i++)
{
int x = i % 15;
int y = x * x + 10;
network.Train(x);
network.BackPropagate(y);
}
//Below should output 20, but instead outputs garbage
Console.WriteLine("0 * 0 + 10 = " + network.Compute(0)[0]);
//Below should output 110, but instead outputs garbage
Console.WriteLine("10 * 10 + 10 = " + network.Compute(10)[0]);
//Below should output 410, but instead outputs garbage
Console.WriteLine("20 * 20 + 10 = " + network.Compute(20)[0]);
}
}
public class NeuralNetwork
{
public double LearnRate { get; set; }
public double Momentum { get; set; }
public List<Neuron> InputLayer { get; set; }
public List<Neuron> HiddenLayer { get; set; }
public List<Neuron> OutputLayer { get; set; }
static Random random = new Random();
public NeuralNetwork(int inputSize, int hiddenSize, int outputSize)
{
LearnRate = .9;
Momentum = .04;
InputLayer = new List<Neuron>();
HiddenLayer = new List<Neuron>();
OutputLayer = new List<Neuron>();
for (int i = 0; i < inputSize; i++)
InputLayer.Add(new Neuron());
for (int i = 0; i < hiddenSize; i++)
HiddenLayer.Add(new Neuron(InputLayer));
for (int i = 0; i < outputSize; i++)
OutputLayer.Add(new Neuron(HiddenLayer));
}
public void Train(params double[] inputs)
{
int i = 0;
InputLayer.ForEach(a => a.Value = inputs[i++]);
HiddenLayer.ForEach(a => a.CalculateValue());
OutputLayer.ForEach(a => a.CalculateValue());
}
public double[] Compute(params double[] inputs)
{
Train(inputs);
return OutputLayer.Select(a => a.Value).ToArray();
}
public double CalculateError(params double[] targets)
{
int i = 0;
return OutputLayer.Sum(a => Math.Abs(a.CalculateError(targets[i++])));
}
public void BackPropagate(params double[] targets)
{
int i = 0;
OutputLayer.ForEach(a => a.CalculateGradient(targets[i++]));
HiddenLayer.ForEach(a => a.CalculateGradient());
HiddenLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));
OutputLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));
}
public static double NextRandom()
{
return 2 * random.NextDouble() - 1;
}
public static double SigmoidFunction(double x)
{
if (x < -45.0) return 0.0;
else if (x > 45.0) return 1.0;
return 1.0/(1.0 + Math.Exp(-x));
}
public static double SigmoidDerivative(double f)
{
return f * (1 - f);
}
public static double HyperTanFunction(double x)
{
if (x < -10.0) return -1.0;
else if (x > 10.0) return 1.0;
else return Math.Tanh(x);
}
public static double HyperTanDerivative(double f)
{
return (1 - f) * (1 + f);
}
public static double IdentityFunction(double x)
{
return x;
}
public static double IdentityDerivative()
{
return 1;
}
}
public class Neuron
{
public bool IsInput { get { return InputSynapses.Count == 0; } }
public bool IsHidden { get { return InputSynapses.Count != 0 && OutputSynapses.Count != 0; } }
public bool IsOutput { get { return OutputSynapses.Count == 0; } }
public List<Synapse> InputSynapses { get; set; }
public List<Synapse> OutputSynapses { get; set; }
public double Bias { get; set; }
public double BiasDelta { get; set; }
public double Gradient { get; set; }
public double Value { get; set; }
public Neuron()
{
InputSynapses = new List<Synapse>();
OutputSynapses = new List<Synapse>();
Bias = NeuralNetwork.NextRandom();
}
public Neuron(List<Neuron> inputNeurons) : this()
{
foreach (var inputNeuron in inputNeurons)
{
var synapse = new Synapse(inputNeuron, this);
inputNeuron.OutputSynapses.Add(synapse);
InputSynapses.Add(synapse);
}
}
public virtual double CalculateValue()
{
var d = InputSynapses.Sum(a => a.Weight * a.InputNeuron.Value) + Bias;
return Value = IsHidden ? NeuralNetwork.SigmoidFunction(d) : NeuralNetwork.IdentityFunction(d);
}
public virtual double CalculateDerivative()
{
var d = Value;
return IsHidden ? NeuralNetwork.SigmoidDerivative(d) : NeuralNetwork.IdentityDerivative();
}
public double CalculateError(double target)
{
return target - Value;
}
public double CalculateGradient(double target)
{
return Gradient = CalculateError(target) * CalculateDerivative();
}
public double CalculateGradient()
{
return Gradient = OutputSynapses.Sum(a => a.OutputNeuron.Gradient * a.Weight) * CalculateDerivative();
}
public void UpdateWeights(double learnRate, double momentum)
{
var prevDelta = BiasDelta;
BiasDelta = learnRate * Gradient; // * 1
Bias += BiasDelta + momentum * prevDelta;
foreach (var s in InputSynapses)
{
prevDelta = s.WeightDelta;
s.WeightDelta = learnRate * Gradient * s.InputNeuron.Value;
s.Weight += s.WeightDelta + momentum * prevDelta;
}
}
}
public class Synapse
{
public Neuron InputNeuron { get; set; }
public Neuron OutputNeuron { get; set; }
public double Weight { get; set; }
public double WeightDelta { get; set; }
public Synapse(Neuron inputNeuron, Neuron outputNeuron)
{
InputNeuron = inputNeuron;
OutputNeuron = outputNeuron;
Weight = NeuralNetwork.NextRandom();
}
}
}
謝謝。規範輸入確實有幫助。我將不得不做更多的研究,爲什麼這有助於。但是,現在我已經將問題修改爲非線性問題,標準化似乎不再正常工作。 – craigrs84
我覺得是sigmoid函數選擇的時候,當輸入值大的時候,隱藏層的輸出值會是0,隱藏層的梯度也是這樣,我把輸入歸一化到[0 - 1],這個結果幾乎可以解釋 – michaeltang