我們在運行在1.25GHz的多核linux嵌入式系統中缺少中斷。是否是內核凍結
背景:
- 內核版本:2.6.32.27
- 我們有哪些需要實時性能的用戶空間進程。
- 它們在1ms邊界內運行。
- 也就是說,在1ms內,他們需要完成一組任務,最大可能需要800uS。
- 我們有一個外部組件FPGA,它通過配置爲邊沿觸發中斷的GPIO引腳向多核處理器提供1ms和10ms中斷。
- 這些中斷在內核驅動程序中處理。
軟件體系結構是這樣一種方式,用戶進程在完成其工作之後會對GPIO驅動程序執行ioctl操作。
在此ioctl中,驅動程序會將進程置於wakeup_interruptible狀態。 每當接收到下一個1ms中斷時,ISR都會喚醒該進程。這個循環重複。
1ms和10ms中斷都使用smp_affinity路由到處理器的單個內核。
問題:
- 有時我們會發現一些中斷被錯過。
- (即ISR本身不被調用)。
- 12至20ms後ISR正常打中。
- 我們可以通過分析連續的ISR調用之間的持續時間,並讓計數器增加ISR中的第一件事來理解。
這主要發生在過程級別的高系統負載,並且是隨機的並且很難再現。
我附上了骨骼代碼。
首先,我必須隔離它是硬件還是軟件問題。由於它是一個給予中斷的FPGA,所以我們對硬件沒有太多懷疑。
該內核是否凍結?這是最有可能的情況,因爲cpu週期正在增加。
它可能是由於熱條件導致cpu凍結的一種情況嗎?如果是這樣,那麼cpu週期不會首先增加。
考慮到我們正在使用的內核版本以及此內核版本中可用的性能分析/調試 ,任何指示調試/隔離根本原因的指針都將非常有幫助 。
骨骼代碼:
/* Build time Configuration */
/* Macros */
DECLARE_WAIT_QUEUE_HEAD(wait);
/** Structure Definitions */
/** Global Variables */
gpio_dev_t gpio1msDev, gpio10msDev;
GpioIntProfileSectorData_t GpioSigProfileData[MAX_GPIO_INT_CONSUMERS];
GpioIntProfileSectorData_t *ProfilePtrSector;
GpioIntProfileData_t GpioProfileData;
GpioIntProfileData_t *GpioIntProfilePtr;
CurrentTickProfile_t TimeStamp;
uint64_t ModuleInitDone = 0, FirstTimePIDWrite = 0;
uint64_t PrevCycle = 0, NowCycle = 0;
volatile uint64_t TenMsFlag, OneMsFlag;
uint64_t OneMsCounter;
uint64_t OneMsIsrTime, TenMsIsrTime;
uint64_t OneMsCounter, OneMsTime, TenMsTime, SyncStarted;
uint64_t Prev = 0, Now = 0, DiffTen = 0, DiffOne, SesSyncHappened;
static spinlock_t GpioSyncLock = SPIN_LOCK_UNLOCKED;
static spinlock_t IoctlSyncLock = SPIN_LOCK_UNLOCKED;
uint64_t EventPresent[MAX_GPIO_INT_CONSUMERS];
GpioEvent_t CurrentEvent = KERN_NO_EVENT;
TickSyncSes_t *SyncSesPtr = NULL;
/** Function Declarations */
ssize_t write_pid(struct file *filep, const char __user * buf, size_t count, loff_t * ppos);
long Gpio_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
static const struct file_operations my_fops = {
write:write_pid,
compat_ioctl:Gpio_compat_ioctl,
};
/**
* IOCTL function for GPIO interrupt module
*
* @return
*/
long Gpio_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
int len = 1, status = 0;
uint8_t Instance;
uint64_t *EventPtr;
GpioIntProfileSectorData_t *SectorProfilePtr, *DebugProfilePtr;
GpioEvent_t EventToGive = KERN_NO_EVENT;
pid_t CurrentPid = current->pid;
spin_lock(&IoctlSyncLock); // Take the spinlock
Instance = GetSector(CurrentPid);
SectorProfilePtr = &GpioSigProfileData[Instance];
EventPtr = &EventPresent[Instance];
spin_unlock(&IoctlSyncLock);
if (Instance <= MAX_GPIO_INT_CONSUMERS)
{
switch (cmd)
{
case IOCTL_WAIT_ON_EVENT:
if (*EventPtr)
{
/* Dont block here since this is a case where interrupt has happened
* before process calling the polling API */
*EventPtr = 0;
/* some profiling code */
}
else
{
status = wait_event_interruptible(wait, (*EventPtr == 1));
*EventPtr = 0;
}
/* profiling code */
TimeStamp.CurrentEvent = EventToGive;
len = copy_to_user((char *)arg, (char *)&TimeStamp, sizeof(CurrentTickProfile_t));
break;
default:
break;
}
}
else
{
return -EINVAL;
}
return 0;
}
/**
* Send signals to registered PID's.
*
* @return
*/
static void WakeupWaitQueue(GpioEvent_t Event)
{
int i;
/* some profile code */
CurrentEvent = Event;
// we dont wake up debug app hence "< MAX_GPIO_INT_CONSUMERS" is used in for loop
for (i = 0; i < MAX_GPIO_INT_CONSUMERS; i++)
{
EventPresent[i] = 1;
}
wake_up_interruptible(&wait);
}
/**
* 1ms Interrupt handler
*
* @return
*/
static irqreturn_t gpio_int_handler_1ms(int irq, void *irq_arg)
{
uint64_t reg_read, my_core_num;
unsigned long flags;
GpioEvent_t event = KERN_NO_EVENT;
/* code to clear the interrupt registers */
/************ profiling start************/
NowCycle = get_cpu_cycle();
GpioIntProfilePtr->TotalOneMsInterrupts++;
/* Check the max diff between consecutive interrupts */
if (PrevCycle)
{
DiffOne = NowCycle - PrevCycle;
if (DiffOne > GpioIntProfilePtr->OneMsMaxDiff)
GpioIntProfilePtr->OneMsMaxDiff = DiffOne;
}
PrevCycle = NowCycle;
TimeStamp.OneMsCount++; /* increment the counter */
/* Store the timestamp */
GpioIntProfilePtr->Gpio1msTimeStamp[GpioIntProfilePtr->IndexOne] = NowCycle;
TimeStamp.OneMsTimeStampAtIsr = NowCycle;
GpioIntProfilePtr->IndexOne++;
if (GpioIntProfilePtr->IndexOne == GPIO_PROFILE_ARRAY_SIZE)
GpioIntProfilePtr->IndexOne = 0;
/************ profiling end************/
/*
* Whenever 10ms Interrupt happens we send only one event to the upper layers.
* Hence it is necessary to sync between 1 & 10ms interrupts.
* There is a chance that sometimes 1ms can happen at first and sometimes 10ms.
*
*/
/******** Sync mechanism ***********/
spin_lock_irqsave(&GpioSyncLock, flags); // Take the spinlock
OneMsCounter++;
OneMsTime = NowCycle;
DiffOne = OneMsTime - TenMsTime;
if (DiffOne < MAX_OFFSET_BETWEEN_1_AND_10MS) //ten ms has happened first
{
if (OneMsCounter == 10)
{
event = KERN_BOTH_EVENT;
SyncStarted = 1;
}
else
{
if (SyncStarted)
{
if (OneMsCounter < 10)
{
GpioIntProfilePtr->TickSyncErrAt1msLess++;
}
else if (OneMsCounter > 10)
{
GpioIntProfilePtr->TickSyncErrAt1msMore++;
}
}
}
OneMsCounter = 0;
}
else
{
if (OneMsCounter < 10)
{
if (SyncStarted)
{
event = KERN_ONE_MS_EVENT;
}
}
else if (OneMsCounter > 10)
{
OneMsCounter = 0;
if (SyncStarted)
{
GpioIntProfilePtr->TickSyncErrAt1msMore++;
}
}
}
TimeStamp.SFN = OneMsCounter;
spin_unlock_irqrestore(&GpioSyncLock, flags);
/******** Sync mechanism ***********/
if(event != KERN_NO_EVENT)
WakeupWaitQueue(event);
OneMsIsrTime = get_cpu_cycle() - NowCycle;
if (GpioIntProfilePtr->Max1msIsrTime < OneMsIsrTime)
GpioIntProfilePtr->Max1msIsrTime = OneMsIsrTime;
return IRQ_HANDLED;
}
/**
* 10ms Interrupt handler
*
* @return
*/
static irqreturn_t gpio_int_handler_10ms(int irq, void *irq_arg)
{
uint64_t reg_read, my_core_num;
unsigned long flags;
GpioEvent_t event = KERN_NO_EVENT;
/* clear the interrupt */
/************ profiling start************/
GpioIntProfilePtr->TotalTenMsInterrupts++;
Now = get_cpu_cycle();
if (Prev)
{
DiffTen = Now - Prev;
if (DiffTen > GpioIntProfilePtr->TenMsMaxDiff)
GpioIntProfilePtr->TenMsMaxDiff = DiffTen;
}
Prev = Now;
TimeStamp.OneMsCount++; /* increment the counter */
TimeStamp.TenMsCount++;
GpioIntProfilePtr->Gpio10msTimeStamp[GpioIntProfilePtr->IndexTen] = Now;
TimeStamp.TenMsTimeStampAtIsr = Now;
//do_gettimeofday(&TimeOfDayAtIsr.TimeAt10MsIsr);
GpioIntProfilePtr->IndexTen++;
if (GpioIntProfilePtr->IndexTen == GPIO_PROFILE_ARRAY_SIZE)
GpioIntProfilePtr->IndexTen = 0;
/************ profiling end************/
/******** Sync mechanism ***********/
spin_lock_irqsave(&GpioSyncLock, flags);
TenMsTime = Now;
DiffTen = TenMsTime - OneMsTime;
if (DiffTen < MAX_OFFSET_BETWEEN_1_AND_10MS) //one ms has happened first
{
if (OneMsCounter == 10)
{
TimeStamp.OneMsTimeStampAtIsr = Now;
event = KERN_BOTH_EVENT;
SyncStarted = 1;
}
OneMsCounter = 0;
}
else
{
if (SyncStarted)
{
if (OneMsCounter < 9)
{
GpioIntProfilePtr->TickSyncErrAt10msLess++;
OneMsCounter = 0;
}
else if (OneMsCounter > 9)
{
GpioIntProfilePtr->TickSyncErrAt10msMore++;
OneMsCounter = 0;
}
}
else
{
if (OneMsCounter != 9)
OneMsCounter = 0;
}
}
TimeStamp.SFN = OneMsCounter;
spin_unlock_irqrestore(&GpioSyncLock, flags);
/******** Sync mechanism ***********/
if(event != KERN_NO_EVENT)
WakeupWaitQueue(event);
TenMsIsrTime = get_cpu_cycle() - Now;
if (GpioIntProfilePtr->Max10msIsrTime < TenMsIsrTime)
GpioIntProfilePtr->Max10msIsrTime = TenMsIsrTime;
return IRQ_HANDLED;
}
感謝您的意見。會照顧到這一點。然而,我所報告的主要問題是中斷錯過了。即下面的計數器沒有增加到cpu週期流逝的時間。 GpioIntProfilePtr-> TotalTenMsInterrupts ++; GpioIntProfilePtr-> TotalOneMsInterrupts ++; – guy