2014-01-06 56 views
5

更新1/6/2014:我已經更新了問題,以便我試圖解決一個非線性方程。正如你們許多人指出的那樣,我不需要額外的複雜性(隱藏層,sigmoid函數等)來解決非線性問題。使用神經網絡解決Y = X * X + b型公式

另外,我意識到我可以用神經網絡以外的其他方法解決這樣的非線性問題。我沒有試圖寫出最有效的代碼或最少量的代碼。這純粹是爲了更好地學習神經網絡。


我已經創建了自己的後向傳播神經網絡實現。

在訓練解決簡單異或操作時,它工作正常。

但是現在我想適應它&訓練它解決Y = X * X + B類型的公式,但我沒有得到預期的結果。訓練後,網絡不計算正確的答案。神經網絡是否非常適合解決像這樣的代數方程?我意識到我的例子是微不足道的,我只是想了解更多關於神經網絡和他們的能力。

我的隱藏層正在使用sigmoid激活函數,而且我的輸出層正在使用標識函數。

如果你能分析我的代碼並指出任何錯誤,我將不勝感激。

這裏是我完整的代碼(C#.NET):

using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 

namespace NeuralNetwork 
{ 
    class Program 
    { 
     static void Main(string[] args) 
     { 
      Console.WriteLine("Training Network..."); 

      Random r = new Random(); 
      var network = new NeuralNetwork(1, 5, 1); 
      for (int i = 0; i < 100000; i++) 
      { 
       int x = i % 15; 
       int y = x * x + 10; 
       network.Train(x); 
       network.BackPropagate(y); 
      } 

      //Below should output 20, but instead outputs garbage 
      Console.WriteLine("0 * 0 + 10 = " + network.Compute(0)[0]); 

      //Below should output 110, but instead outputs garbage 
      Console.WriteLine("10 * 10 + 10 = " + network.Compute(10)[0]); 

      //Below should output 410, but instead outputs garbage 
      Console.WriteLine("20 * 20 + 10 = " + network.Compute(20)[0]); 
     } 
    } 

    public class NeuralNetwork 
    { 
     public double LearnRate { get; set; } 
     public double Momentum { get; set; } 
     public List<Neuron> InputLayer { get; set; } 
     public List<Neuron> HiddenLayer { get; set; } 
     public List<Neuron> OutputLayer { get; set; } 
     static Random random = new Random(); 

     public NeuralNetwork(int inputSize, int hiddenSize, int outputSize) 
     { 
      LearnRate = .9; 
      Momentum = .04; 
      InputLayer = new List<Neuron>(); 
      HiddenLayer = new List<Neuron>(); 
      OutputLayer = new List<Neuron>(); 

      for (int i = 0; i < inputSize; i++) 
       InputLayer.Add(new Neuron()); 

      for (int i = 0; i < hiddenSize; i++) 
       HiddenLayer.Add(new Neuron(InputLayer)); 

      for (int i = 0; i < outputSize; i++) 
       OutputLayer.Add(new Neuron(HiddenLayer)); 
     } 

     public void Train(params double[] inputs) 
     { 
      int i = 0; 
      InputLayer.ForEach(a => a.Value = inputs[i++]); 
      HiddenLayer.ForEach(a => a.CalculateValue()); 
      OutputLayer.ForEach(a => a.CalculateValue()); 
     } 

     public double[] Compute(params double[] inputs) 
     { 
      Train(inputs); 
      return OutputLayer.Select(a => a.Value).ToArray(); 
     } 

     public double CalculateError(params double[] targets) 
     { 
      int i = 0; 
      return OutputLayer.Sum(a => Math.Abs(a.CalculateError(targets[i++]))); 
     } 

     public void BackPropagate(params double[] targets) 
     { 
      int i = 0; 
      OutputLayer.ForEach(a => a.CalculateGradient(targets[i++])); 
      HiddenLayer.ForEach(a => a.CalculateGradient()); 
      HiddenLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum)); 
      OutputLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum)); 
     } 

     public static double NextRandom() 
     { 
      return 2 * random.NextDouble() - 1; 
     } 

     public static double SigmoidFunction(double x) 
     { 
      if (x < -45.0) return 0.0; 
      else if (x > 45.0) return 1.0; 
      return 1.0/(1.0 + Math.Exp(-x)); 
     } 

     public static double SigmoidDerivative(double f) 
     { 
      return f * (1 - f); 
     } 

     public static double HyperTanFunction(double x) 
     { 
      if (x < -10.0) return -1.0; 
      else if (x > 10.0) return 1.0; 
      else return Math.Tanh(x); 
     } 

     public static double HyperTanDerivative(double f) 
     { 
      return (1 - f) * (1 + f); 
     } 

     public static double IdentityFunction(double x) 
     { 
      return x; 
     } 

     public static double IdentityDerivative() 
     { 
      return 1; 
     } 
    } 

    public class Neuron 
    { 
     public bool IsInput { get { return InputSynapses.Count == 0; } } 
     public bool IsHidden { get { return InputSynapses.Count != 0 && OutputSynapses.Count != 0; } } 
     public bool IsOutput { get { return OutputSynapses.Count == 0; } } 
     public List<Synapse> InputSynapses { get; set; } 
     public List<Synapse> OutputSynapses { get; set; } 
     public double Bias { get; set; } 
     public double BiasDelta { get; set; } 
     public double Gradient { get; set; } 
     public double Value { get; set; } 

     public Neuron() 
     { 
      InputSynapses = new List<Synapse>(); 
      OutputSynapses = new List<Synapse>(); 
      Bias = NeuralNetwork.NextRandom(); 
     } 

     public Neuron(List<Neuron> inputNeurons) : this() 
     { 
      foreach (var inputNeuron in inputNeurons) 
      { 
       var synapse = new Synapse(inputNeuron, this); 
       inputNeuron.OutputSynapses.Add(synapse); 
       InputSynapses.Add(synapse); 
      } 
     } 

     public virtual double CalculateValue() 
     { 
      var d = InputSynapses.Sum(a => a.Weight * a.InputNeuron.Value) + Bias; 
      return Value = IsHidden ? NeuralNetwork.SigmoidFunction(d) : NeuralNetwork.IdentityFunction(d); 
     } 

     public virtual double CalculateDerivative() 
     { 
      var d = Value; 
      return IsHidden ? NeuralNetwork.SigmoidDerivative(d) : NeuralNetwork.IdentityDerivative(); 
     } 

     public double CalculateError(double target) 
     { 
      return target - Value; 
     } 

     public double CalculateGradient(double target) 
     { 
      return Gradient = CalculateError(target) * CalculateDerivative(); 
     } 

     public double CalculateGradient() 
     { 
      return Gradient = OutputSynapses.Sum(a => a.OutputNeuron.Gradient * a.Weight) * CalculateDerivative(); 
     } 

     public void UpdateWeights(double learnRate, double momentum) 
     { 
      var prevDelta = BiasDelta; 
      BiasDelta = learnRate * Gradient; // * 1 
      Bias += BiasDelta + momentum * prevDelta; 

      foreach (var s in InputSynapses) 
      { 
       prevDelta = s.WeightDelta; 
       s.WeightDelta = learnRate * Gradient * s.InputNeuron.Value; 
       s.Weight += s.WeightDelta + momentum * prevDelta; 
      } 
     } 
    } 

    public class Synapse 
    { 
     public Neuron InputNeuron { get; set; } 
     public Neuron OutputNeuron { get; set; } 
     public double Weight { get; set; } 
     public double WeightDelta { get; set; } 

     public Synapse(Neuron inputNeuron, Neuron outputNeuron) 
     { 
      InputNeuron = inputNeuron; 
      OutputNeuron = outputNeuron; 
      Weight = NeuralNetwork.NextRandom(); 
     } 
    } 
} 

回答

1

您使用乙狀結腸作爲輸出funnction其在範圍[0-1] 但你目標值雙範圍爲[0 - MAX_INT],我認爲這就是爲什麼你得到NAN基本resaon 我更新你的代碼,並嘗試在[0-1], 範圍內的值進行標準化,我可以得到像這樣的結果,這就是我所期望的

我認爲我越來越接近真相了,我不知道爲什麼這個答案是投票下來 enter image description here

using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 

namespace NeuralNetwork 
{ 
    class Program 
    { 
     static void Main(string[] args) 
     { 
      Console.WriteLine("Training Network..."); 

      Random r = new Random(); 
      var network = new NeuralNetwork(1, 3, 1); 
      for (int k = 0; k < 60; k++) 
      { 
       for (int i = 0; i < 1000; i++) 
       { 
        double x = i/1000.0;// r.Next(); 
        double y = 3 * x; 
        network.Train(x); 
        network.BackPropagate(y); 
       } 
       double output = network.Compute(0.2)[0]; 
       Console.WriteLine(output); 
      } 
      //Below should output 10, but instead outputs either a very large number or NaN 
      /* double output = network.Compute(3)[0]; 
      Console.WriteLine(output);*/ 
     } 
    } 

    public class NeuralNetwork 
    { 
     public double LearnRate { get; set; } 
     public double Momentum { get; set; } 
     public List<Neuron> InputLayer { get; set; } 
     public List<Neuron> HiddenLayer { get; set; } 
     public List<Neuron> OutputLayer { get; set; } 
     static Random random = new Random(); 

     public NeuralNetwork(int inputSize, int hiddenSize, int outputSize) 
     { 
      LearnRate = .2; 
      Momentum = .04; 
      InputLayer = new List<Neuron>(); 
      HiddenLayer = new List<Neuron>(); 
      OutputLayer = new List<Neuron>(); 

      for (int i = 0; i < inputSize; i++) 
       InputLayer.Add(new Neuron()); 

      for (int i = 0; i < hiddenSize; i++) 
       HiddenLayer.Add(new Neuron(InputLayer)); 

      for (int i = 0; i < outputSize; i++) 
       OutputLayer.Add(new Neuron(HiddenLayer)); 
     } 

     public void Train(params double[] inputs) 
     { 
      int i = 0; 
      InputLayer.ForEach(a => a.Value = inputs[i++]); 
      HiddenLayer.ForEach(a => a.CalculateValue()); 
      OutputLayer.ForEach(a => a.CalculateValue()); 
     } 

     public double[] Compute(params double[] inputs) 
     { 
      Train(inputs); 
      return OutputLayer.Select(a => a.Value).ToArray(); 
     } 

     public double CalculateError(params double[] targets) 
     { 
      int i = 0; 
      return OutputLayer.Sum(a => Math.Abs(a.CalculateError(targets[i++]))); 
     } 

     public void BackPropagate(params double[] targets) 
     { 
      int i = 0; 
      OutputLayer.ForEach(a => a.CalculateGradient(targets[i++])); 
      HiddenLayer.ForEach(a => a.CalculateGradient()); 
      HiddenLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum)); 
      OutputLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum)); 
     } 

     public static double NextRandom() 
     { 
      return 2 * random.NextDouble() - 1; 
     } 

     public static double SigmoidFunction(double x) 
     { 
      if (x < -45.0) 
      { 
       return 0.0; 
      } 
      else if (x > 45.0) 
      { 
       return 1.0; 
      } 
      return 1.0/(1.0 + Math.Exp(-x)); 

     } 

     public static double SigmoidDerivative(double f) 
     { 
      return f * (1 - f); 
     } 

     public static double HyperTanFunction(double x) 
     { 
      if (x < -10.0) return -1.0; 
      else if (x > 10.0) return 1.0; 
      else return Math.Tanh(x); 
     } 

     public static double HyperTanDerivative(double f) 
     { 
      return (1 - f) * (1 + f); 
     } 

     public static double IdentityFunction(double x) 
     { 
      return x; 
     } 

     public static double IdentityDerivative() 
     { 
      return 1; 
     } 
    } 

    public class Neuron 
    { 
     public bool IsInput { get { return InputSynapses.Count == 0; } } 
     public bool IsHidden { get { return InputSynapses.Count != 0 && OutputSynapses.Count != 0; } } 
     public bool IsOutput { get { return OutputSynapses.Count == 0; } } 
     public List<Synapse> InputSynapses { get; set; } 
     public List<Synapse> OutputSynapses { get; set; } 
     public double Bias { get; set; } 
     public double BiasDelta { get; set; } 
     public double Gradient { get; set; } 
     public double Value { get; set; } 

     public Neuron() 
     { 
      InputSynapses = new List<Synapse>(); 
      OutputSynapses = new List<Synapse>(); 
      Bias = NeuralNetwork.NextRandom(); 
     } 

     public Neuron(List<Neuron> inputNeurons) 
      : this() 
     { 
      foreach (var inputNeuron in inputNeurons) 
      { 
       var synapse = new Synapse(inputNeuron, this); 
       inputNeuron.OutputSynapses.Add(synapse); 
       InputSynapses.Add(synapse); 
      } 
     } 

     public virtual double CalculateValue() 
     { 
      var d = InputSynapses.Sum(a => a.Weight * a.InputNeuron.Value);// + Bias; 
      return Value = IsHidden ? NeuralNetwork.SigmoidFunction(d) : NeuralNetwork.IdentityFunction(d); 
     } 

     public virtual double CalculateDerivative() 
     { 
      var d = Value; 
      return IsHidden ? NeuralNetwork.SigmoidDerivative(d) : NeuralNetwork.IdentityDerivative(); 
     } 

     public double CalculateError(double target) 
     { 
      return target - Value; 
     } 

     public double CalculateGradient(double target) 
     { 
      return Gradient = CalculateError(target) * CalculateDerivative(); 
     } 

     public double CalculateGradient() 
     { 
      return Gradient = OutputSynapses.Sum(a => a.OutputNeuron.Gradient * a.Weight) * CalculateDerivative(); 
     } 

     public void UpdateWeights(double learnRate, double momentum) 
     { 
      var prevDelta = BiasDelta; 
      BiasDelta = learnRate * Gradient; // * 1 
      Bias += BiasDelta + momentum * prevDelta; 

      foreach (var s in InputSynapses) 
      { 
       prevDelta = s.WeightDelta; 
       s.WeightDelta = learnRate * Gradient * s.InputNeuron.Value; 
       s.Weight += s.WeightDelta; //;+ momentum * prevDelta; 
      } 
     } 
    } 

    public class Synapse 
    { 
     public Neuron InputNeuron { get; set; } 
     public Neuron OutputNeuron { get; set; } 
     public double Weight { get; set; } 
     public double WeightDelta { get; set; } 

     public Synapse(Neuron inputNeuron, Neuron outputNeuron) 
     { 
      InputNeuron = inputNeuron; 
      OutputNeuron = outputNeuron; 
      Weight = NeuralNetwork.NextRandom(); 
     } 
    } 
} 
+0

謝謝。規範輸入確實有幫助。我將不得不做更多的研究,爲什麼這有助於。但是,現在我已經將問題修改爲非線性問題,標準化似乎不再正常工作。 – craigrs84

+0

我覺得是sigmoid函數選擇的時候,當輸入值大的時候,隱藏層的輸出值會是0,隱藏層的梯度也是這樣,我把輸入歸一化到[0 - 1],這個結果幾乎可以解釋 – michaeltang

0

我沒有分析你的代碼,它是太長了。但我可以給你的基本問題的答案:

是的,神經網絡非常適合這樣的問題。

事實上,在中ax+b=y形式f : R -> R你應該使用一個神經元線性激活功能。不需要三層結構,只需要一個神經元即可。如果你的代碼在這種情況下失敗了,那麼你有一個實現錯誤,因爲它是一個簡單的線性迴歸任務,使用梯度下降法解決。

+0

請離開「-1」投票exaplining什麼是錯在提供的答案 – lejlot

+0

感謝您的信息的評論。對不起,這不是我低調。這種類型的問題解決不需要多層和S形激活是很有意義的。然而,如果我將代數公式從一個線性函數切換到y = x * x + 1之類的東西,那麼我會需要sigmoid加上多層正確的? – craigrs84

+0

是的,一旦您進入非線性函數類,您需要一個非線性激活函數的隱藏層(如通用逼近理論所述)。 – lejlot