8

我試圖用反向傳播實現一個非常簡單的神經網絡。我試圖用邏輯運算符AND來訓練網絡。但預測它對我沒有好處。 :(在Swift中反向傳播的簡單神經網絡

public class ActivationFunction { 

     class func sigmoid(x: Float) -> Float { 
      return 1.0/(1.0 + exp(-x)) 
     } 

     class func dSigmoid(x: Float) -> Float { 
      return x * (1 - x) 
     } 
    } 

    public class NeuralNetConstants { 

     public static let learningRate: Float = 0.3 
     public static let momentum: Float = 0.6 
     public static let iterations: Int = 100000 

    } 

public class Layer { 

    private var output: [Float] 
    private var input: [Float] 
    private var weights: [Float] 
    private var dWeights: [Float] 

    init(inputSize: Int, outputSize: Int) { 
     self.output = [Float](repeating: 0, count: outputSize) 
     self.input = [Float](repeating: 0, count: inputSize + 1) 
     self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize) 
     self.dWeights = [Float](repeating: 0, count: weights.count) 
    } 

    public func run(inputArray: [Float]) -> [Float] { 

     input = inputArray 
     input[input.count-1] = 1 
     var offSet = 0 

     for i in 0..<output.count { 
      for j in 0..<input.count { 
       output[i] += weights[offSet+j] * input[j] 
      } 

      output[i] = ActivationFunction.sigmoid(x: output[i]) 
      offSet += input.count 

     } 

     return output 
    } 

    public func train(error: [Float], learningRate: Float, momentum: Float) -> [Float] { 

     var offset = 0 
     var nextError = [Float](repeating: 0, count: input.count) 

     for i in 0..<output.count { 

      let delta = error[i] * ActivationFunction.dSigmoid(x: output[i]) 

      for j in 0..<input.count { 
       let weightIndex = offset + j 
       nextError[j] = nextError[j] + weights[weightIndex] * delta 
       let dw = input[j] * delta * learningRate 
       weights[weightIndex] += dWeights[weightIndex] * momentum + dw 
       dWeights[weightIndex] = dw 
      } 

      offset += input.count 
     } 

     return nextError 
    } 

} 

public class BackpropNeuralNetwork { 

    private var layers: [Layer] = [] 

    public init(inputSize: Int, hiddenSize: Int, outputSize: Int) { 
     self.layers.append(Layer(inputSize: inputSize, outputSize: hiddenSize)) 
     self.layers.append(Layer(inputSize: hiddenSize, outputSize: outputSize)) 
    } 

    public func getLayer(index: Int) -> Layer { 
     return layers[index] 
    } 

    public func run(input: [Float]) -> [Float] { 

     var activations = input 

     for i in 0..<layers.count { 
      activations = layers[i].run(inputArray: activations) 
     } 

     return activations 
    } 

    public func train(input: [Float], targetOutput: [Float], learningRate: Float, momentum: Float) { 

     let calculatedOutput = run(input: input) 
     var error = [Float](repeating: 0, count: calculatedOutput.count) 

     for i in 0..<error.count { 
      error[i] = targetOutput[i] - calculatedOutput[i] 
     } 

     for i in (0...layers.count-1).reversed() { 
      error = layers[i].train(error: error, learningRate: learningRate, momentum: momentum) 
     } 


    } 


} 

extension ClosedRange where Bound: FloatingPoint { 
    public func random() -> Bound { 
     let range = self.upperBound - self.lowerBound 
     let randomValue = (Bound(arc4random_uniform(UINT32_MAX))/Bound(UINT32_MAX)) * range + self.lowerBound 
     return randomValue 
    } 
} 

這是我的訓練數據我只是想,我的網絡學習簡單AND邏輯運算符

我的輸入數據:

let traningData: [[Float]] = [ [0,0], [0,1], [1,0], [1,1] ] 

let traningResults: [[Float]] = [ [0], [0], [0], [1] ] 

let backProb = BackpropNeuralNetwork(inputSize: 2, hiddenSize: 3, outputSize: 1) 

for iterations in 0..<NeuralNetConstants.iterations { 

    for i in 0..<traningResults.count { 
     backProb.train(input: traningData[i], targetOutput: traningResults[i], learningRate: NeuralNetConstants.learningRate, momentum: NeuralNetConstants.momentum) 
    } 

    for i in 0..<traningResults.count { 
     var t = traningData[i] 
     print("\(t[0]), \(t[1]) -- \(backProb.run(input: t)[0])") 
    } 

} 

這是我整個的代碼爲神經網絡代碼並不真的很靈敏,但我認爲理解神經網絡的理論首先更重要,那麼代碼將更加靈活。問題是我的結果完全錯誤。這是我得到

0.0, 0.0 -- 0.246135 
0.0, 1.0 -- 0.251307 
1.0, 0.0 -- 0.24325 
1.0, 1.0 -- 0.240923 

這就是我想要得到

0,0, 0,0 -- 0,000 
0,0, 1,0 -- 0,005 
1,0, 0,0 -- 0,005 
1,0, 1,0 -- 0,992 

好比較的Java實現正常工作..

public class ActivationFunction { 

    public static float sigmoid(float x) { 
     return (float) (1/(1 + Math.exp(-x))); 
    } 

    public static float dSigmoid(float x) { 
     return x*(1-x); // because the output is the sigmoid(x) !!! we dont have to apply it twice 
    } 
} 

public class NeuralNetConstants { 

    private NeuralNetConstants() { 

    } 

    public static final float LEARNING_RATE = 0.3f; 
    public static final float MOMENTUM = 0.6f; 
    public static final int ITERATIONS = 100000; 
} 

public class Layer { 

    private float[] output; 
    private float[] input; 
    private float[] weights; 
    private float[] dWeights; 
    private Random random; 

    public Layer(int inputSize, int outputSize) { 
     output = new float[outputSize]; 
     input = new float[inputSize + 1]; 
     weights = new float[(1 + inputSize) * outputSize]; 
     dWeights = new float[weights.length]; 
     this.random = new Random(); 
     initWeights(); 
    } 

    public void initWeights() { 
     for (int i = 0; i < weights.length; i++) { 
      weights[i] = (random.nextFloat() - 0.5f) * 4f; 
     } 
    } 

    public float[] run(float[] inputArray) { 

     System.arraycopy(inputArray, 0, input, 0, inputArray.length); 
     input[input.length - 1] = 1; // bias 
     int offset = 0; 

     for (int i = 0; i < output.length; i++) { 
      for (int j = 0; j < input.length; j++) { 
       output[i] += weights[offset + j] * input[j]; 
      } 
      output[i] = ActivationFunction.sigmoid(output[i]); 
      offset += input.length; 
     } 

     return Arrays.copyOf(output, output.length); 
    } 

    public float[] train(float[] error, float learningRate, float momentum) { 

     int offset = 0; 
     float[] nextError = new float[input.length]; 

     for (int i = 0; i < output.length; i++) { 

      float delta = error[i] * ActivationFunction.dSigmoid(output[i]); 
      for (int j = 0; j < input.length; j++) { 
       int previousWeightIndex = offset + j; 
       nextError[j] = nextError[j] + weights[previousWeightIndex] * delta; 
       float dw = input[j] * delta * learningRate; 
       weights[previousWeightIndex] += dWeights[previousWeightIndex] * momentum + dw; 
       dWeights[previousWeightIndex] = dw; 
      } 

      offset += input.length; 
     } 

     return nextError; 
    } 
} 

public class BackpropNeuralNetwork { 

    private Layer[] layers; 

    public BackpropNeuralNetwork(int inputSize, int hiddenSize, int outputSize) { 
     layers = new Layer[2]; 
     layers[0] = new Layer(inputSize, hiddenSize); 
     layers[1] = new Layer(hiddenSize, outputSize); 
    } 

    public Layer getLayer(int index) { 
     return layers[index]; 
    } 

    public float[] run(float[] input) { 
     float[] inputActivation = input; 
     for (int i = 0; i < layers.length; i++) { 
      inputActivation = layers[i].run(inputActivation); 
     } 
     return inputActivation; 
    } 

    public void train(float[] input, float[] targetOutput, float learningRate, float momentum) { 

     float[] calculatedOutput = run(input); 
     float[] error = new float[calculatedOutput.length]; 

     for (int i = 0; i < error.length; i++) { 
      error[i] = targetOutput[i] - calculatedOutput[i]; 
     } 

     for (int i = layers.length - 1; i >= 0; i--) { 
      error = layers[i].train(error, learningRate, momentum); 
     } 
    } 
} 

public class NeuralNetwork { 

    /** 
    * @param args the command line arguments 
    */ 
    public static void main(String[] args) { 
       float[][] trainingData = new float[][] { 
       new float[] { 0, 0 }, 
       new float[] { 0, 1 }, 
       new float[] { 1, 0 }, 
       new float[] { 1, 1 } 
     }; 

     float[][] trainingResults = new float[][] { 
       new float[] { 0 }, 
       new float[] { 0 }, 
       new float[] { 0 }, 
       new float[] { 1 } 
     }; 

     BackpropNeuralNetwork backpropagationNeuralNetworks = new BackpropNeuralNetwork(2, 3,1); 

     for (int iterations = 0; iterations < NeuralNetConstants.ITERATIONS; iterations++) { 

      for (int i = 0; i < trainingResults.length; i++) { 
       backpropagationNeuralNetworks.train(trainingData[i], trainingResults[i], 
         NeuralNetConstants.LEARNING_RATE, NeuralNetConstants.MOMENTUM); 
      } 

      System.out.println(); 
      for (int i = 0; i < trainingResults.length; i++) { 
       float[] t = trainingData[i]; 
       System.out.printf("%d epoch\n", iterations + 1); 
       System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], backpropagationNeuralNetworks.run(t)[0]); 
      } 
     } 
    } 

} 
+0

請加你有你所期望的網絡預測,什麼什麼問題,細節。 –

+0

@MatiasValdenegro回覆你的回覆我更新了我的問題 – BilalReffas

回答

4

您在不同的初始化你的權重。您正在創建一個隨機值並經常使用它。你想要做什麼是創建陣列中的每個重量的隨機值: 更換

self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize) 

self.weights = (0..<(1 + inputSize) * outputSize).map { _ in 
    return (-2.0...2.0).random() 
} 

那旁邊:請考慮只覆蓋你輸入的第一個元素Layer.run方法。因此,而不是

input = inputArray 

你應該這樣做:

for (i, e) in inputArray { 
    self.input[i] = e 
} 
+0

謝謝你,讓我的一天! – BilalReffas

相關問題