我在python中使用matplotlib構建散點圖。從matplotlib的陰謀獲取數據
假設我有以下2個數據列表。
X = [1,2,3,4,5]
Y = [6,7,8,9,10]
然後我用X作爲X-將Y軸值作爲Y軸值,將Y作爲散點圖。所以我會有一張有5個散射點的圖片,對吧?
現在的問題是:是否有可能爲這5點與實際數據建立連接。例如,當我點擊這5點中的一點時,它可以告訴我用什麼原始數據來表達這一點?
在此先感謝
我在python中使用matplotlib構建散點圖。從matplotlib的陰謀獲取數據
假設我有以下2個數據列表。
X = [1,2,3,4,5]
Y = [6,7,8,9,10]
然後我用X作爲X-將Y軸值作爲Y軸值,將Y作爲散點圖。所以我會有一張有5個散射點的圖片,對吧?
現在的問題是:是否有可能爲這5點與實際數據建立連接。例如,當我點擊這5點中的一點時,它可以告訴我用什麼原始數據來表達這一點?
在此先感謝
使用的Joe Kington's DataCursor略加修改:
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib.cbook as cbook
import numpy as np
def fmt(x, y):
return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x = x, y = y)
class DataCursor(object):
# https://stackoverflow.com/a/4674445/190597
"""A simple data cursor widget that displays the x,y location of a
matplotlib artist when it is selected."""
def __init__(self, artists, x = [], y = [], tolerance = 5, offsets = (-20, 20),
formatter = fmt, display_all = False):
"""Create the data cursor and connect it to the relevant figure.
"artists" is the matplotlib artist or sequence of artists that will be
selected.
"tolerance" is the radius (in points) that the mouse click must be
within to select the artist.
"offsets" is a tuple of (x,y) offsets in points from the selected
point to the displayed annotation box
"formatter" is a callback function which takes 2 numeric arguments and
returns a string
"display_all" controls whether more than one annotation box will
be shown if there are multiple axes. Only one will be shown
per-axis, regardless.
"""
self._points = np.column_stack((x,y))
self.formatter = formatter
self.offsets = offsets
self.display_all = display_all
if not cbook.iterable(artists):
artists = [artists]
self.artists = artists
self.axes = tuple(set(art.axes for art in self.artists))
self.figures = tuple(set(ax.figure for ax in self.axes))
self.annotations = {}
for ax in self.axes:
self.annotations[ax] = self.annotate(ax)
for artist in self.artists:
artist.set_picker(tolerance)
for fig in self.figures:
fig.canvas.mpl_connect('pick_event', self)
def annotate(self, ax):
"""Draws and hides the annotation box for the given axis "ax"."""
annotation = ax.annotate(self.formatter, xy = (0, 0), ha = 'right',
xytext = self.offsets, textcoords = 'offset points', va = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')
)
annotation.set_visible(False)
return annotation
def snap(self, x, y):
"""Return the value in self._points closest to (x, y).
"""
idx = np.nanargmin(((self._points - (x,y))**2).sum(axis = -1))
return self._points[idx]
def __call__(self, event):
"""Intended to be called through "mpl_connect"."""
# Rather than trying to interpolate, just display the clicked coords
# This will only be called if it's within "tolerance", anyway.
x, y = event.mouseevent.xdata, event.mouseevent.ydata
annotation = self.annotations[event.artist.axes]
if x is not None:
if not self.display_all:
# Hide any other annotation boxes...
for ann in self.annotations.values():
ann.set_visible(False)
# Update the annotation in the current axis..
x, y = self.snap(x, y)
annotation.xy = x, y
annotation.set_text(self.formatter(x, y))
annotation.set_visible(True)
event.canvas.draw()
x=[1,2,3,4,5]
y=[6,7,8,9,10]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
scat = ax.scatter(x, y)
DataCursor(scat, x, y)
plt.show()
產生
您可以點擊任何點和氣球將顯示基礎數據值。
我的輕微修改的DataCursor是添加snap
方法,其確保顯示的數據點來自原始數據集,而不是在鼠標實際點擊的位置。
如果你SciPy的安裝,你可能更喜歡這個版本的光標,這使得氣球跟隨鼠標(不點擊)的:
import matplotlib.pyplot as plt
import numpy as np
import scipy.spatial as spatial
def fmt(x, y):
return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x=x, y=y)
class FollowDotCursor(object):
"""Display the x,y location of the nearest data point."""
def __init__(self, ax, x, y, tolerance=5, formatter=fmt, offsets=(-20, 20)):
try:
x = np.asarray(x, dtype='float')
except (TypeError, ValueError):
x = np.asarray(mdates.date2num(x), dtype='float')
y = np.asarray(y, dtype='float')
self._points = np.column_stack((x, y))
self.offsets = offsets
self.scale = x.ptp()
self.scale = y.ptp()/self.scale if self.scale else 1
self.tree = spatial.cKDTree(self.scaled(self._points))
self.formatter = formatter
self.tolerance = tolerance
self.ax = ax
self.fig = ax.figure
self.ax.xaxis.set_label_position('top')
self.dot = ax.scatter(
[x.min()], [y.min()], s=130, color='green', alpha=0.7)
self.annotation = self.setup_annotation()
plt.connect('motion_notify_event', self)
def scaled(self, points):
points = np.asarray(points)
return points * (self.scale, 1)
def __call__(self, event):
ax = self.ax
# event.inaxes is always the current axis. If you use twinx, ax could be
# a different axis.
if event.inaxes == ax:
x, y = event.xdata, event.ydata
elif event.inaxes is None:
return
else:
inv = ax.transData.inverted()
x, y = inv.transform([(event.x, event.y)]).ravel()
annotation = self.annotation
x, y = self.snap(x, y)
annotation.xy = x, y
annotation.set_text(self.formatter(x, y))
self.dot.set_offsets((x, y))
bbox = ax.viewLim
event.canvas.draw()
def setup_annotation(self):
"""Draw and hide the annotation box."""
annotation = self.ax.annotate(
'', xy=(0, 0), ha = 'right',
xytext = self.offsets, textcoords = 'offset points', va = 'bottom',
bbox = dict(
boxstyle='round,pad=0.5', fc='yellow', alpha=0.75),
arrowprops = dict(
arrowstyle='->', connectionstyle='arc3,rad=0'))
return annotation
def snap(self, x, y):
"""Return the value in self.tree closest to x, y."""
dist, idx = self.tree.query(self.scaled((x, y)), k=1, p=1)
try:
return self._points[idx]
except IndexError:
# IndexError: index out of bounds
return self._points[0]
x=[1,2,3,4,5]
y=[6,7,8,9,10]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(x, y)
cursor = FollowDotCursor(ax, x, y)
plt.show()
我會建議在「idx = np.argmin」行中使用nanargmin而不是argmin,但是我又喜歡我的代碼對偶爾的NaN強健(實際上在做類似的事情時實際上會出現這種情況)。如果你不這樣做,那麼它會在任何帶有NaN的陣列上靜靜地失敗,因爲argmin會選擇NaN,這會導致註釋不寫:) –
@darkgreen:謝謝!這*是一種改進。 – unutbu
能做到這一點使用mpld3現在在幾行:
不幸的是,[mpld3不再維護](https://github.com/mpld3/mpld3/blob/master/README.md)。 – user1735003
對於通過搜索在這裏結束的人,也請檢查[這個問題](https://stackoverflow.com/questions/7908636/possible-to-make-labels-appear-when-hovering-over-a-point-in- matplotlib)。 – ImportanceOfBeingErnest