2012-11-09 75 views
4

我在python中使用matplotlib構建散點圖。從matplotlib的陰謀獲取數據

假設我有以下2個數據列表。

X = [1,2,3,4,5]

Y = [6,7,8,9,10]

然後我用X作爲X-將Y軸值作爲Y軸值,將Y作爲散點圖。所以我會有一張有5個散射點的圖片,對吧?

現在的問題是:是否有可能爲這5點與實際數據建立連接。例如,當我點擊這5點中的一點時,它可以告訴我用什麼原始數據來表達這一點?

在此先感謝

+0

對於通過搜索在這裏結束的人,也請檢查[這個問題](https://stackoverflow.com/questions/7908636/possible-to-make-labels-appear-when-hovering-over-a-point-in- matplotlib)。 – ImportanceOfBeingErnest

回答

14

使用的Joe Kington's DataCursor略加修改:

import matplotlib.pyplot as plt 
import matplotlib.mlab as mlab 
import matplotlib.cbook as cbook 
import numpy as np 

def fmt(x, y): 
    return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x = x, y = y) 

class DataCursor(object): 
    # https://stackoverflow.com/a/4674445/190597 
    """A simple data cursor widget that displays the x,y location of a 
    matplotlib artist when it is selected.""" 
    def __init__(self, artists, x = [], y = [], tolerance = 5, offsets = (-20, 20), 
       formatter = fmt, display_all = False): 
     """Create the data cursor and connect it to the relevant figure. 
     "artists" is the matplotlib artist or sequence of artists that will be 
      selected. 
     "tolerance" is the radius (in points) that the mouse click must be 
      within to select the artist. 
     "offsets" is a tuple of (x,y) offsets in points from the selected 
      point to the displayed annotation box 
     "formatter" is a callback function which takes 2 numeric arguments and 
      returns a string 
     "display_all" controls whether more than one annotation box will 
      be shown if there are multiple axes. Only one will be shown 
      per-axis, regardless. 
     """ 
     self._points = np.column_stack((x,y)) 
     self.formatter = formatter 
     self.offsets = offsets 
     self.display_all = display_all 
     if not cbook.iterable(artists): 
      artists = [artists] 
     self.artists = artists 
     self.axes = tuple(set(art.axes for art in self.artists)) 
     self.figures = tuple(set(ax.figure for ax in self.axes)) 

     self.annotations = {} 
     for ax in self.axes: 
      self.annotations[ax] = self.annotate(ax) 

     for artist in self.artists: 
      artist.set_picker(tolerance) 
     for fig in self.figures: 
      fig.canvas.mpl_connect('pick_event', self) 

    def annotate(self, ax): 
     """Draws and hides the annotation box for the given axis "ax".""" 
     annotation = ax.annotate(self.formatter, xy = (0, 0), ha = 'right', 
       xytext = self.offsets, textcoords = 'offset points', va = 'bottom', 
       bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), 
       arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0') 
       ) 
     annotation.set_visible(False) 
     return annotation 

    def snap(self, x, y): 
     """Return the value in self._points closest to (x, y). 
     """ 
     idx = np.nanargmin(((self._points - (x,y))**2).sum(axis = -1)) 
     return self._points[idx] 
    def __call__(self, event): 
     """Intended to be called through "mpl_connect".""" 
     # Rather than trying to interpolate, just display the clicked coords 
     # This will only be called if it's within "tolerance", anyway. 
     x, y = event.mouseevent.xdata, event.mouseevent.ydata 
     annotation = self.annotations[event.artist.axes] 
     if x is not None: 
      if not self.display_all: 
       # Hide any other annotation boxes... 
       for ann in self.annotations.values(): 
        ann.set_visible(False) 
      # Update the annotation in the current axis.. 
      x, y = self.snap(x, y) 
      annotation.xy = x, y 
      annotation.set_text(self.formatter(x, y)) 
      annotation.set_visible(True) 
      event.canvas.draw() 

x=[1,2,3,4,5] 
y=[6,7,8,9,10] 

fig = plt.figure() 
ax = fig.add_subplot(1, 1, 1) 
scat = ax.scatter(x, y) 
DataCursor(scat, x, y) 
plt.show() 

產生

enter image description here

您可以點擊任何點和氣球將顯示基礎數據值。


我的輕微修改的DataCursor是添加snap方法,其確保顯示的數據點來自原始數據集,而不是在鼠標實際點擊的位置。


如果你SciPy的安裝,你可能更喜歡這個版本的光標,這使得氣球跟隨鼠標(不點擊)的:

import matplotlib.pyplot as plt 
import numpy as np 
import scipy.spatial as spatial 

def fmt(x, y): 
    return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x=x, y=y) 

class FollowDotCursor(object): 
    """Display the x,y location of the nearest data point.""" 
    def __init__(self, ax, x, y, tolerance=5, formatter=fmt, offsets=(-20, 20)): 
     try: 
      x = np.asarray(x, dtype='float') 
     except (TypeError, ValueError): 
      x = np.asarray(mdates.date2num(x), dtype='float') 
     y = np.asarray(y, dtype='float') 
     self._points = np.column_stack((x, y)) 
     self.offsets = offsets 
     self.scale = x.ptp() 
     self.scale = y.ptp()/self.scale if self.scale else 1 
     self.tree = spatial.cKDTree(self.scaled(self._points)) 
     self.formatter = formatter 
     self.tolerance = tolerance 
     self.ax = ax 
     self.fig = ax.figure 
     self.ax.xaxis.set_label_position('top') 
     self.dot = ax.scatter(
      [x.min()], [y.min()], s=130, color='green', alpha=0.7) 
     self.annotation = self.setup_annotation() 
     plt.connect('motion_notify_event', self) 

    def scaled(self, points): 
     points = np.asarray(points) 
     return points * (self.scale, 1) 

    def __call__(self, event): 
     ax = self.ax 
     # event.inaxes is always the current axis. If you use twinx, ax could be 
     # a different axis. 
     if event.inaxes == ax: 
      x, y = event.xdata, event.ydata 
     elif event.inaxes is None: 
      return 
     else: 
      inv = ax.transData.inverted() 
      x, y = inv.transform([(event.x, event.y)]).ravel() 
     annotation = self.annotation 
     x, y = self.snap(x, y) 
     annotation.xy = x, y 
     annotation.set_text(self.formatter(x, y)) 
     self.dot.set_offsets((x, y)) 
     bbox = ax.viewLim 
     event.canvas.draw() 

    def setup_annotation(self): 
     """Draw and hide the annotation box.""" 
     annotation = self.ax.annotate(
      '', xy=(0, 0), ha = 'right', 
      xytext = self.offsets, textcoords = 'offset points', va = 'bottom', 
      bbox = dict(
       boxstyle='round,pad=0.5', fc='yellow', alpha=0.75), 
      arrowprops = dict(
       arrowstyle='->', connectionstyle='arc3,rad=0')) 
     return annotation 

    def snap(self, x, y): 
     """Return the value in self.tree closest to x, y.""" 
     dist, idx = self.tree.query(self.scaled((x, y)), k=1, p=1) 
     try: 
      return self._points[idx] 
     except IndexError: 
      # IndexError: index out of bounds 
      return self._points[0] 

x=[1,2,3,4,5] 
y=[6,7,8,9,10] 

fig = plt.figure() 
ax = fig.add_subplot(1, 1, 1) 
ax.scatter(x, y) 
cursor = FollowDotCursor(ax, x, y) 
plt.show() 

+0

我會建議在「idx = np.argmin」行中使用nanargmin而不是argmin,但是我又喜歡我的代碼對偶爾的NaN強健(實際上在做類似的事情時實際上會出現這種情況)。如果你不這樣做,那麼它會在任何帶有NaN的陣列上靜靜地失敗,因爲argmin會選擇NaN,這會導致註釋不寫:) –

+0

@darkgreen:謝謝!這*是一種改進。 – unutbu