2017-04-24 48 views
3

目標是在Java中打開一個模型,該模型是在python中創建/訓練的,其編號爲tensorflow.contrib.learn.learn.DNNClassifier在Java中加載sklearn模型。使用python中的DNNClassifier創建模型

目前的主要問題是要知道在java會話運行方法中給出的「張量」的名稱。

我有蟒蛇這個測試代碼:

from __future__ import division, print_function, absolute_import 
import tensorflow as tf 
import pandas as pd 
import tensorflow.contrib.learn as learn 
import numpy as np 
from sklearn import metrics 
from sklearn.cross_validation import train_test_split 
from tensorflow.contrib import layers 
from tensorflow.contrib.learn.python.learn.utils import input_fn_utils 
from tensorflow.python.ops import array_ops 
from tensorflow.python.framework import dtypes 
from tensorflow.python.util.compat import as_text 

print(tf.VERSION) 

df = pd.read_csv('../NNNormalizeData-out.csv') 

inputs = [] 
target = [] 

y=0;  
for x in df.columns: 
    if y != 35 : 
     #print("added %d" %y) 
     inputs.append(x) 
    else : 
     target.append(x) 
    y+=1 

total_inputs,total_output = df.as_matrix(inputs).astype(np.float32),df.as_matrix([target]).astype(np.int32) 

train_inputs, test_inputs, train_output, test_output = train_test_split(total_inputs, total_output, test_size=0.2, random_state=42) 

feature_columns = [tf.contrib.layers.real_valued_column("", dimension=train_inputs.shape[1],dtype=tf.float32)] 
#target_column = [tf.contrib.layers.real_valued_column("output", dimension=train_output.shape[1])] 

classifier = learn.DNNClassifier(hidden_units=[10, 20, 5], n_classes=5 
           ,feature_columns=feature_columns) 

classifier.fit(train_inputs, train_output, steps=100) 

#Save Model into saved_model.pbtxt file (possible to Load in Java) 
tfrecord_serving_input_fn = tf.contrib.learn.build_parsing_serving_input_fn(layers.create_feature_spec_for_parsing(feature_columns)) 
classifier.export_savedmodel(export_dir_base="test", serving_input_fn = tfrecord_serving_input_fn,as_text=True) 


# Measure accuracy 
pred = list(classifier.predict(test_inputs, as_iterable=True)) 
score = metrics.accuracy_score(test_output, pred) 
print("Final score: {}".format(score)) 

# test individual samples 
sample_1 = np.array([[0.37671986791414125,0.28395908337619136,-0.0966095873607713,-1.0,0.06891621389763203,-0.09716678086712205,0.726029084013637,4.984689881073479E-4,-0.30296253267499107,-0.16192917054985334,0.04820256230479658,0.4951319883569152,0.5269983894210499,-0.2560313828048315,-0.3710980821053321,-0.4845867212612598,-0.8647234314469595,-0.6491591208322198,-1.0,-0.5004549422844073,-0.9880910165770813,0.5540293108747256,0.5625990251930839,0.7420121698556554,0.5445551415657979,0.4644276850235627,0.7316976292340245,0.636690006814346,0.16486621649984112,-0.0466018967678159,0.5261100063227044,0.6256168612312738,-0.544295484930702,0.379125782517193,0.6959368575211544]], dtype=float) 
sample_2 = np.array([[1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085]], dtype=float) 

pred = list(classifier.predict(sample_2, as_iterable=True)) 
print("Prediction for sample_1 is:{} ".format(pred)) 

pred = list(classifier.predict_proba(sample_2, as_iterable=True)) 
print("Prediction for sample_2 is:{} ".format(pred)) 

一個model_saved.pbtxt文件被創建。

我試圖用Java加載該模型用下面的代碼:

public class HelloTF { 
    public static void main(String[] args) throws Exception { 
     SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve"); 
     Session s = bundle.session(); 

     double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085}; 
     float [] inputfloat=new float[inputDouble.length]; 
     for(int i=0;i<inputfloat.length;i++) 
     { 
      inputfloat[i]=(float)inputDouble[i]; 
     } 
     Tensor inputTensor = Tensor.create(new long[] {35}, FloatBuffer.wrap(inputfloat)); 

     Tensor result = s.runner() 
       .feed("input_example_tensor", inputTensor) 
       .fetch("dnn/multi_class_head/predictions/probabilities") 
       .run().get(0); 


     float[] m = new float[5]; 
     float[] vector = result.copyTo(m); 
     float maxVal = 0; 
     int inc = 0; 
     int predict = -1; 
     for(float val : vector) 
     { 
      System.out.println(val+" "); 
      if(val > maxVal) { 
       predict = inc; 
       maxVal = val; 
      } 
      inc++; 
     } 
     System.out.println(predict); 



    } 
} 

我上.RUN()的誤差得到(0);行:

Exception in thread "main" org.tensorflow.TensorFlowException: Output 0 of type float does not match declared output type string for node _recv_input_example_tensor_0 = _Recv[_output_shapes=[[-1]], client_terminated=true, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=3663984897684684554, tensor_name="input_example_tensor:0", tensor_type=DT_STRING, _device="/job:localhost/replica:0/task:0/cpu:0"]() 
    at org.tensorflow.Session.run(Native Method) 
    at org.tensorflow.Session.access$100(Session.java:48) 
    at org.tensorflow.Session$Runner.runHelper(Session.java:285) 
    at org.tensorflow.Session$Runner.run(Session.java:235) 
    at tensorflow.HelloTF.main(HelloTF.java:35) 
+0

因此,您無法在您的Python代碼中獲得Saver創建可識別的模型?我從Saver生成.data和.meta文件,而不是您提到的.pbtxt。 – demongolem

+0

我已經解決了問題...答案2.另外,我已經能夠加載(在Java中)保存在.pbtxt中的keras模型。訣竅是使用張量板來查看圖形以提取輸入和輸出張量的正確名稱。 ... – rjpg

回答

1

錯誤信息提供了一個線索:一個名爲"input_example_tensor"模型張量預計將有string內容,而你所提供float值。

根據張量的名稱和代碼判斷,我猜測你喂的張量是defined in input_fn_utils.py。該張量被傳遞到tf.parse_example() op,該預期向量爲tf.train.Example協議緩衝區,串行化爲字符串。

+0

您可以看到用於在python中創建模型的代碼,輸入定義如下: feature_columns = [tf.contrib.layers.real_valued_column(「」,dimension = train_inputs.shape [1],dtype = tf.float32)] – rjpg

+0

那麼我該怎麼做?發送一個由逗號分隔的浮動字符串? – rjpg

+0

如果你的確在使用@mrry指出的內容,那麼就如他所說的那樣,它期望一個序列化的'tf.train.Example'協議緩衝區。如果您使用的是Maven,那麼您可以使用[org.tensorflow:proto](https://search.maven.org/#artifactdetails%7Corg.tensorflow%7Cproto%7C1.1.0-rc2%7Cjar)包來獲取所有協議緩衝區並構建並序列化示例(請參閱[javadoc](http://static.javadoc.io/org.tensorflow/proto/1.1.0-rc2/org/tensorflow/example/Example.html)) – ash

1

好吧,我終於解決:主要的問題是輸入在Java中使用的是「」 DNN/input_from_feature_columns/input_from_feature_columns/CONCAT」,而不是‘input_example_tensor’的名稱

我有這樣的使用發現。圖形導航用:tensorboard --logdir = d:\蟒\工作區\自動編碼器\ SRC \ DNN \ ModelSave

這裏是Java代碼:

public class HelloTF { 
public static void main(String[] args) throws Exception { 
    SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve"); 
    Session s = bundle.session(); 

    double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085}; 
    float [] inputfloat=new float[inputDouble.length]; 
    for(int i=0;i<inputfloat.length;i++) 
    { 
     inputfloat[i]=(float)inputDouble[i]; 
    } 
FloatBuffer.wrap(inputfloat)); 
    float[][] data= new float[1][35]; 
    data[0]=inputfloat; 
    Tensor inputTensor=Tensor.create(data); 


    Tensor result = s.runner() 
      .feed("dnn/input_from_feature_columns/input_from_feature_columns/concat", inputTensor) 
      //.feed("input_example_tensor", inputTensor) 
      //.fetch("tensorflow/serving/classify") 
      .fetch("dnn/multi_class_head/predictions/probabilities") 
      //.fetch("dnn/zero_fraction_3/Cast") 
      .run().get(0); 


    float[][] m = new float[1][5]; 
    float[][] vector = result.copyTo(m); 
    float maxVal = 0; 
    int inc = 0; 
    int predict = -1; 
    for(float val : vector[0]) 
    { 
     System.out.println(val+" "); 
     if(val > maxVal) { 
      predict = inc; 
      maxVal = val; 
     } 
     inc++; 
    } 
    System.out.println(predict); 



} 

}

我已經測試的輸出:

植酮的一面:

Prediction for sample_2 is:[3] 
Prediction for sample_2 is:[array([ 0.17157166, 0.24475774, 0.16158019, 0.24648622, 0.17560424], dtype=float32)] 

Java方面:

0.17157166 
0.24475774 
0.16158019 
0.24648622 
0.17560424 
3 
+0

NNNormalizeData-out.csv是一個具有5個類「0」,「1」,「2」,「3」,「4」的35個輸入和1個輸出。 它是關於金融時間序列和0是強下來,1-弱下來-2中立,3和4 ...現在我可以專注於模型本身:-) – rjpg

0

我對Tensorflow 1.1得到了一個錯誤,而不feed("input_example_tensor", inputTensor)

但是我發現example.proto可以作爲「input_example_tensor」來提供,雖然它花了很多時間來弄清楚如何爲串行化協議緩衝區創建字符串張量。

這就是我創建inputTensor的方法。

org.tensorflow.example.Example.Builder example = org.tensorflow.example.Example.newBuilder(); 
/* set some features to example... */ 

Tensor exampleTensor = Tensor.create(example.build().toByteArray()); 
// Here, the shape of exampleTensor is not specified yet. 

// Set the shape to feed this as "input_example_tensor" 
Graph g = bundle.graph(); 
Output examplePlaceholder = 
        g.opBuilder("Placeholder", "example") 
        .setAttr("dtype", exampleTensor.dataType())       
         .build().output(0); 
Tensor shapeTensor = Tensor.create(new long[]{1}, IntBuffer.wrap(new int[]{1}));      
Output shapeConst = g.opBuilder("Const", "shape") 
         .setAttr("dtype", shapeTensor.dataType()) 
         .setAttr("value", shapeTensor) 
         .build().output(0); 
Output shaped = g.opBuilder("Reshape", "output").addInput(examplePlaceholder).addInput(shapeConst).build().output(0); 


Tensor inputTensor = s.runner().feed(examplePlaceholder, exampleTensor).fetch(shaped).run().get(0);     
// Now, inputTensor has shape of [1] and ready to feed.  
0

您在.feed()和.fetch()中的參數應該與您的輸入和輸出數據類型匹配。

你可以看看你的savedmodel.pbtxt文件。有關您的參數和它們的輸入/輸出類型的詳細信息。

例如,

我的Java代碼

Tensor result = s.runner() 
     .feed("ParseExample/ParseExample", inputTensor) 
     .fetch("dnn/binary_logistic_head/predictions/probabilities") 
     .run().get(0); 

我savedModel.pbtxt(的一部分)

node { 
    name: "ParseExample/ParseExample" 
    op: "ParseExample" 
    input: "input_example_tensor" 
    input: "ParseExample/ParseExample/names" 
    input: "ParseExample/ParseExample/dense_keys_0" 
    input: "ParseExample/Const" 
    attr { 
    key: "Ndense" 
    value { 
     i: 1 
    } 
    } 
    attr { 
    key: "Nsparse" 
    value { 
     i: 0 
    } 
    } 
    attr { 
    key: "Tdense" 
    value { 
     list { 
     type: DT_FLOAT 
     } 
    } 
    } 
    attr { 
    key: "_output_shapes" 
    value { 
     list { 
     shape { 
      dim { 
      size: -1 
      } 
      dim { 
      size: 2 
      } 
     } 
     } 
    } 
    } 
    attr { 
    key: "dense_shapes" 
    value { 
     list { 
     shape { 
      dim { 
      size: 2 
      } 
     } 
     } 
    } 
    } 
    attr { 
    key: "sparse_types" 
    value { 
     list { 
     } 
    } 
    } 
} 
    outputs { 
    key: "scores" 
    value { 
     name: "dnn/binary_logistic_head/predictions/probabilities:0" 
     dtype: DT_FLOAT 
     tensor_shape { 
     dim { 
      size: -1 
     } 
     dim { 
      size: 2 
     } 
     } 
    } 
    } 

他們都與我的數據類型,浮動兼容。

+0

相同的答案作爲波紋管(完整的代碼)...如果你有更大更復雜的模型,很難在.pbtxt文件中找到名稱。使用張量板更容易,看圖... – rjpg