2017-03-31 61 views
6

胡傢伙,無法通過蟒蛇/ jupyter得到MNIST數據庫

我是新來的Python /蟒蛇/ jupyter/numpy的,熊貓等....所以請原諒我,如果這是一個非常愚蠢的問題。 我試圖通過使用anaconda/jupyter來獲取MNIST數據庫。但每次我在最後得到一個HTTP錯誤500。這真的是一個服務器問題(如500會表示)還是我做錯了什麼?

輸入在jupyter:

from sklearn.datasets import fetch_mldata 
mnist = fetch_mldata('MNIST original') 

結果:

--------------------------------------------------------------------------- 
    HTTPError         Traceback (most recent call last) 
    <ipython-input-1-15dc285fb373> in <module>() 
      1 from sklearn.datasets import fetch_mldata 
    ----> 2 mnist = fetch_mldata('MNIST original') 

    e:\ProgramData\Anaconda3\lib\site-packages\sklearn\datasets\mldata.py in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home) 
     140   urlname = MLDATA_BASE_URL % quote(dataname) 
     141   try: 
    --> 142    mldata_url = urlopen(urlname) 
     143   except HTTPError as e: 
     144    if e.code == 404: 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context) 
     221  else: 
     222   opener = _opener 
    --> 223  return opener.open(url, data, timeout) 
     224 
     225 def install_opener(opener): 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout) 
     530   for processor in self.process_response.get(protocol, []): 
     531    meth = getattr(processor, meth_name) 
    --> 532    response = meth(req, response) 
     533 
     534   return response 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in http_response(self, request, response) 
     640   if not (200 <= code < 300): 
     641    response = self.parent.error(
    --> 642     'http', request, response, code, msg, hdrs) 
     643 
     644   return response 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in error(self, proto, *args) 
     562    http_err = 0 
     563   args = (dict, proto, meth_name) + args 
    --> 564   result = self._call_chain(*args) 
     565   if result: 
     566    return result 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args) 
     502   for handler in handlers: 
     503    func = getattr(handler, meth_name) 
    --> 504    result = func(*args) 
     505    if result is not None: 
     506     return result 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in http_error_302(self, req, fp, code, msg, headers) 
     754   fp.close() 
     755 
    --> 756   return self.parent.open(new, timeout=req.timeout) 
     757 
     758  http_error_301 = http_error_303 = http_error_307 = http_error_302 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout) 
     530   for processor in self.process_response.get(protocol, []): 
     531    meth = getattr(processor, meth_name) 
    --> 532    response = meth(req, response) 
     533 
     534   return response 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in http_response(self, request, response) 
     640   if not (200 <= code < 300): 
     641    response = self.parent.error(
    --> 642     'http', request, response, code, msg, hdrs) 
     643 
     644   return response 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in error(self, proto, *args) 
     568   if http_err: 
     569    args = (dict, 'default', 'http_error_default') + orig_args 
    --> 570    return self._call_chain(*args) 
     571 
     572 # XXX probably also want an abstract factory that knows when it makes 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args) 
     502   for handler in handlers: 
     503    func = getattr(handler, meth_name) 
    --> 504    result = func(*args) 
     505    if result is not None: 
     506     return result 

    e:\ProgramData\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs) 
     648 class HTTPDefaultErrorHandler(BaseHandler): 
     649  def http_error_default(self, req, fp, code, msg, hdrs): 
    --> 650   raise HTTPError(req.full_url, code, msg, hdrs, fp) 
     651 
     652 class HTTPRedirectHandler(BaseHandler): 

    HTTPError: HTTP Error 500: INTERNAL SERVER ERROR 
+1

錯誤消息返回HTTP錯誤500,這意味着內部服務器錯誤。所以,很可能是服務器上有錯誤。我會再試一次。如果您查看ulr mldata.org,您將看到它目前不可用。 – pafede2

回答

2

我也得到了同樣的錯誤,你。以下是一些不需要此服務器的可能解決方案。

如果您已經安裝tensorflow,您可以通過以下方式獲得MNIST數據:

import tensorflow.examples.tutorials.mnist.input_data as input_data 
m=input_data.read_data_sets("MNIST") 

再比如說len(m.train.images)是55000.

如果你沒有tensorflow,你可以得到這個數據集使用指令here

0

這裏是一個備用位置下載MNIST數據集(從https://github.com/ageron/handson-ml/blob/master/03_classification.ipynb參考)

from six.moves import urllib 
from sklearn.datasets import fetch_mldata 
try: 
    mnist = fetch_mldata('MNIST original') 
except urllib.error.HTTPError as ex: 
    print("Could not download MNIST data from mldata.org, trying alternative...") 

    # Alternative method to load MNIST, if mldata.org is down 
    from scipy.io import loadmat 
    mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat" 
    mnist_path = "./mnist-original.mat" 
    response = urllib.request.urlopen(mnist_alternative_url) 
    with open(mnist_path, "wb") as f: 
     content = response.read() 
     f.write(content) 
    mnist_raw = loadmat(mnist_path) 
    mnist = { 
     "data": mnist_raw["data"].T, 
     "target": mnist_raw["label"][0], 
     "COL_NAMES": ["label", "data"], 
     "DESCR": "mldata.org dataset: mnist-original", 
    } 
    print("Success!") 
0

找到一個很好的解決方案在這裏:https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py

它下載燕LeCun的網站(http://yann.lecun.com/exdb/mnist/)數據集。

import os 
from urllib import urlretrieve 

def download(filename, source='http://yann.lecun.com/exdb/mnist/'): 
    print("Downloading %s" % filename) 
    urlretrieve(source + filename, filename) 

# We then define functions for loading MNIST images and labels. 
# For convenience, they also download the requested files if needed. 
import gzip 

def load_mnist_images(filename): 
    if not os.path.exists(filename): 
     download(filename) 
    # Read the inputs in Yann LeCun's binary format. 
    with gzip.open(filename, 'rb') as f: 
     data = np.frombuffer(f.read(), np.uint8, offset=16) 
    # The inputs are vectors now, we reshape them to monochrome 2D images, 
    # following the shape convention: (examples, channels, rows, columns) 
    data = data.reshape(-1, 1, 28, 28) 
    # The inputs come as bytes, we convert them to float32 in range [0,1]. 
    # (Actually to range [0, 255/256], for compatibility to the version 
    # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) 
    return data/np.float32(256) 

def load_mnist_labels(filename): 
    if not os.path.exists(filename): 
     download(filename) 
    # Read the labels in Yann LeCun's binary format. 
    with gzip.open(filename, 'rb') as f: 
     data = np.frombuffer(f.read(), np.uint8, offset=8) 
    # The labels are vectors of integers now, that's exactly what we want. 
    return data 


X_train = load_mnist_images('train-images-idx3-ubyte.gz') 
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz') 
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz') 
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz') 
2

我也有同樣的錯誤,不得不關閉防火牆。在Macbook上,依次轉到系統首選項>安全&隱私>防火牆>關閉防火牆。

2
from sklearn.datasets import fetch_mldata 
try: 
    mnist = fetch_mldata('MNIST original') 
except Exception as ex:   
    from six.moves import urllib 
    from scipy.io import loadmat 
    import os 

    mnist_path = os.path.join(".", "datasets", "mnist-original.mat") 

    # download dataset from github. 
    mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat" 
    response = urllib.request.urlopen(mnist_alternative_url) 
    with open(mnist_path, "wb") as f: 
     content = response.read() 
     f.write(content) 

    mnist_raw = loadmat(mnist_path) 
    mnist = { 
     "data": mnist_raw["data"].T, 
     "target": mnist_raw["label"][0], 
     "COL_NAMES": ["label", "data"], 
     "DESCR": "mldata.org dataset: mnist-original", 
    } 
    print("Done!") 
+0

這個答案是全部代碼。請添加一些解釋和放大信息。 –