0
import os
import pandas as pd
data_folder = os.path.join(os.path.expanduser("~"),"data","Ads")
data_filename = os.path.join(data_folder,"ad.data")
def convert_number(x):
try:
return float(x)
except ValueError:
return np.nan
from collections import defaultdict
converters = defaultdict(convert_number)
converters[1558] = lambda x:1 if x.strip() == "ad." else 0
ads = pd.read_csv(data_filename,header=None,converters=converters)
ads[:5]
x = ads.drop(1558,axis=1).values
y = ads[1558]
from sklearn.decomposition import PCA
pca = PCA(n_components=5)
xd = pca.fit_transform(x)
import numpy as np
np.set_printoptions(precision=3,suppress=True)
pca.explained_variance_ratio_
錯誤:
ValueError Traceback (most recent call last)
<ipython-input-10-f726f2ff6f29> in <module>()
1 from sklearn.decomposition import PCA
2 pca = PCA(n_components=5)
----> 3 xd = pca.fit_transform(x)
4 import numpy as np
5 np.set_printoptions(precision=3,suppress=True)
/home/kongnian/anaconda3/lib/python3.5/site-packages/sklearn/decomposition/pca.py in fit_transform(self, X, y)
239
240 """
--> 241 U, S, V = self._fit(X)
242 U = U[:, :self.n_components_]
243
/home/kongnian/anaconda3/lib/python3.5/site-packages/sklearn/decomposition/pca.py in _fit(self, X)
266 requested.
267 """
--> 268 X = check_array(X)
269 n_samples, n_features = X.shape
270 X = as_float_array(X, copy=self.copy)
/home/kongnian/anaconda3/lib/python3.5/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
371 force_all_finite)
372 else:
--> 373 array = np.array(array, dtype=dtype, order=order, copy=copy)
374
375 if ensure_2d:
ValueError: could not convert string to float: '?'
數據集: http://archive.ics.uci.edu/ml/datasets/Internet+Advertisements下載廣告數據集 OS信息: 的Linux Ubuntu 4 .4.0-40-generic#60-Ubuntu SMP Fri Sep 23 16:45:45 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux
添加na_values =,錯誤的詳細信息「?」: ValueError異常:無法將字符串轉換爲浮點數:'?' – kongnian
我相信這意味着你在數據集中有'?''。參數'na_values'也可以接受列表,所以請嘗試以下內容:'na_values = ['?','?']' – Etaoin
謝謝!問題已解決。 – kongnian