-1
所以我有一個基因芯片的數據集,其中16芯片測量1組織樣本。我想從每個芯片中的每個基因中減去所有芯片上該基因的平均值。所以我按基因分組並計算平均值。現在我想獲取原始PM強度值並從該基因中減去平均值。 因此,我需要將基因列與表中存儲此基因組平均值的索引匹配,然後從PM列中減去此值。熊貓減去從列表值的組平均值
totalgene = genedata.groupby(genedata['GENE']).mean()[['PM','LOGPM']]
genedata['MEANNORM'] = genedata['PM'] - totalgene.ix[genedata['GENE']]['AVGPM']
genedata['MEANNORM'] = genedata['LOGPM'] - totalgene.ix[genedata['GENE']]['AVGLOGPM']
導致錯誤:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-18-08c1bb979f9c> in <module>()
----> 1 genedata['MEANNORM'] = genedata['PM'] - totalgene.ix[genedata['GENE'],'AVGPM']
2 genedata['MEANNORM'] = genedata['LOGPM'] - totalgene.ix[genedata['GENE'],'AVGLOGPM']
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\frame.py in __setitem__(self, key, value)
2417 else:
2418 # set column
-> 2419 self._set_item(key, value)
2420
2421 def _setitem_slice(self, key, value):
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\frame.py in _set_item(self, key, value)
2483
2484 self._ensure_valid_index(value)
-> 2485 value = self._sanitize_column(key, value)
2486 NDFrame._set_item(self, key, value)
2487
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\frame.py in _sanitize_column(self, key, value, broadcast)
2633
2634 if isinstance(value, Series):
-> 2635 value = reindexer(value)
2636
2637 elif isinstance(value, DataFrame):
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\frame.py in reindexer(value)
2625 # duplicate axis
2626 if not value.index.is_unique:
-> 2627 raise e
2628
2629 # other
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\frame.py in reindexer(value)
2620 # GH 4107
2621 try:
-> 2622 value = value.reindex(self.index)._values
2623 except Exception as e:
2624
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\series.py in reindex(self, index, **kwargs)
2360 @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
2361 def reindex(self, index=None, **kwargs):
-> 2362 return super(Series, self).reindex(index=index, **kwargs)
2363
2364 @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\generic.py in reindex(self, *args, **kwargs)
2257 # perform the reindex on the axes
2258 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 2259 fill_value, copy).__finalize__(self)
2260
2261 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
2275 obj = obj._reindex_with_indexers({axis: [new_index, indexer]},
2276 fill_value=fill_value,
-> 2277 copy=copy, allow_dups=False)
2278
2279 return obj
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
2369 fill_value=fill_value,
2370 allow_dups=allow_dups,
-> 2371 copy=copy)
2372
2373 if copy and new_data is self._data:
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\core\internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
3837 # some axes don't allow reindexing with dups
3838 if not allow_dups:
-> 3839 self.axes[axis]._can_reindex(indexer)
3840
3841 if axis >= self.ndim:
C:\Users\timothy\Anaconda3\lib\site-packages\pandas\indexes\base.py in _can_reindex(self, indexer)
2492 # trying to reindex on an axis with duplicates
2493 if not self.is_unique and len(indexer):
-> 2494 raise ValueError("cannot reindex from a duplicate axis")
2495
2496 def reindex(self, target, method=None, level=None, limit=None,
ValueError: cannot reindex from a duplicate axis
,我不知道爲什麼? 有人可以幫忙嗎?
請包括_complete_錯誤信息, – DyZ
完成和道歉:) –
我正在考慮試圖破譯這一點,併爲您提供一些有用的信息......但我無法弄清楚。您可以閱讀[** MCVE **](http://stackoverflow.com/help/mcve)和[** HowToAsk **](http://stackoverflow.com/help/how-to - 問) – piRSquared