제43강 : Interrupt(III) Race Condition Race Condition 1 Race Condition New IRQ request should guarantee minimum time for previous IRQ processing ‘ Interrupt disables further CPU interrupt local_irq_enable() reenables CPU interrupt CPU CPU CPU CPU shared memory PIC irq_desc[ ] ISR timer IRQ1 Network IRQ2 SCSI IRQ3 IRQm IRQ action g1() g2() Hard Disk Floppy Disk action f1() f2() Many IRQ lines are competing for PIC Interrupt disables PIC ack(irq) reenables PIC operation Many CPU’s are competing to access shared variable spin_lock( ) spin_unlock( ) 2 do-IRQ() – critical section CPUi selected do_IRQ() ack() access irq_desc[IRQm] PIC request released from IRQm CPUk selected access irq_desc[IRQm] irq_desc[IRQm] becomes a shared variable do_IRQ() becomes critical section need mutual exclusion by spinlock() must run atomically should not be preempted in the middle CPU interrupt disabled 3 ISR is not a critical section CPU CPU CPU CPU CPU shared memory PIC irq_desc[ ] ISR timer IRQ1 Network IRQ2 SCSI IRQ3 IRQm IRQ action g1() g2() Hard Disk Floppy Disk action f1() f2() handle_IRQ_event() is not a critical section do_IRQ() handle_IRQ_event() because IRQm requests are serialized at particular CPUk spin-unlock() before entering handle_IRQ_event() CPU interrupt enabled before entering handle_IRQ_event() Multiple CPU’s compete for irq_desc[IRQm] irq_desc[IRQm] becomes a shared variable do_IRQ() is a critical section mutual exclusion by spinlock() do_IRQ() must run atomically should not be preempted in the middle CPU interrupt disabled 4 asmlinkage unsigned int do_IRQ(struct pt_regs regs) { int irq = regs.orig_eax & 0xff; /* get irq vector on stack, put it into local variable */ irq_desc_t *desc = irq_desc + irq; /* pointer to array */ struct irqaction * action; unsigned int status; irq_enter(); kstat_this_cpu.irqs[irq]++; spin_lock(&desc->lock); /* wait if locked */ desc->handler->ack(irq); status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); /* dev not waiting any more*/ status |= IRQ_PENDING; /* Ack’ed. Need to handle it. */ action = NULL; /* initial value for action == NULL */ if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { action = desc->action; status &= ~IRQ_PENDING; status |= IRQ_INPROGRESS; } descstatus = status; if (unlikely(!action)) goto out; irq_desc[ ] ISR timer IRQ1 Network IRQ2 g1() action /* If nothing has been assigned to action, dog2() nothing */ SCSI IRQ Status Handler Lock 3 IRQm action f1() f2() action 5 for (;;) { irqreturn_t action_ret; spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, &regs, action); /* call handler */ spin_lock(&desc->lock); if (!noirqdebug) note_interrupt(irq, desc, action_ret); if (likely(!(desc->status & IRQ_PENDING)))/*Re-check PENDING. I cleared befo break; /* It remains in reset state. Exit do_IRQ() */ desc->status &= ~IRQ_PENDING; /* New arrival. I handle it. Still INPROGRESS */ } desc->status &= ~IRQ_INPROGRESS; /* No longer INPROGRESS */ out: desc->handler->end(irq); irq_desc[ ] ISR timer IRQ1 spin_unlock(&desc->lock); irq_exit(); Network IRQ2 action g1() g2() Status return 1; SCSI IRQ3 Handler action IRQm } f1() Lock f2() action 6 int handle_IRQ_event(unsigned int irq, struct pt_regs *regs, struct irqaction *action) { int status = 1; /* Force the "do bottom halves" bit */ int retval = 0; irq_desc[ ] if (!(action->flags & SA_INTERRUPT)) local_irq_enable(); Network Status Handler do { } ISR timer IRQ1 Lock IRQ2 SCSI IRQ3 action g1() g2() action IRQm f1() f2() action status |= action->flags; retval |= action->handler(irq, action->dev_id, regs); action = action->next; ISR is device specific operation } while (action); ISR does not share variable if (status & SA_SAMPLE_RANDOM) ISR is not a critical section add_interrupt_randomness(irq); ISR execution does not have to be atomic local_irq_disable(); This CPU may be interrupted while in ISR return retval; PIC can interrupt this CPU while it is running ISR 7 asmlinkage unsigned int do_IRQ(struct pt_regs regs) { int irq = regs.orig_eax & 0xff; irq_desc_t *desc = irq_desc + irq; struct irqaction * action; unsigned int status; irq_enter(); kstat_this_cpu.irqs[irq]++; spin_lock(&desc->lock); status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); status|= IRQ_PENDING; /* signal arrived. Just acked */ action = NULL; if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { action = desc->action; status &= ~IRQ_PENDING; /* we commit to handling */ status|= IRQ_INPROGRESS;/* we’re handling it */ } if (unlikely(!action)) goto out; irqreturn_t action_ret; spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, &regs, action); spin_lock(&desc->lock); if (!noirqdebug) note_interrupt(irq, desc, action_ret); if (likely(!(desc->status & IRQ_PENDING))) break; desc->status &= ~IRQ_PENDING; } Critical Top-Half Interrupt Handler desc->status &= ~IRQ_INPROGRESS; desc->handler->ack(irq); desc->status = status; for (;;) { out: desc->handler->end(irq); spin_unlock(&desc->lock); irq_exit(); return 1; } int handle_IRQ_event( ….) { int status = 1; int retval = 0; if (!(action->flags & SA_INTERRUPT)) local_irq_enable(); do { status |= actionflags; retval |= actionhandler(irq, action->dev_id, regs); action = actionnext; } while (action); if (status & SA_SAMPLE_RANDOM) Non-critical Top-Half add_interrupt_randomness(irq); local_irq_disable(); 8 return retval; } asmlinkage unsigned int do_IRQ(struct pt_regs regs) { int irq = regs.orig_eax & 0xff; irq_desc_t *desc = irq_desc + irq; struct irqaction * action; ISR needs too much unsigned intIf this status; for (;;) { irqreturn_t action_ret; spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, &regs, action); spin_lock(&desc->lock); time to complete work for this device if (!noirqdebug) note_interrupt(irq, desc, action_ret); this ISR just sets a bit here, meaning if (likely(!(desc->status & IRQ_PENDING))) irq_enter(); “more work needs to be done for this device (Bottom Half required)” break; kstat_this_cpu.irqs[irq]++; desc->status &= ~IRQ_PENDING; This bit is called “ ” } spin_lock(&desc->lock); Then this interrupt handler terminates. desc->status &= ~IRQ_INPROGRESS; desc->handler->end(irq); desc->handler->ack(irq); This bit is processed later byout: spin_unlock(&desc->lock); function irq_exit(); status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); return 1; status|= IRQ_PENDING; /* signal arrived. Just acked */ } action = NULL; int handle_IRQ_event( ….) if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { { int status = 1; action = desc->action; int retval = 0; status &= ~IRQ_PENDING; /* we commit to handling */ if (!(action->flags & SA_INTERRUPT)) status|= IRQ_INPROGRESS;/* we’re handling it */ local_irq_enable(); } do { status |= actionflags; irq_desc[ ] retval |= actionhandler(irq, action->dev_id, regs); ISR desc->status = timer status;IRQ1 action = actionnext; } while (action); Network IRQ2 g1() action if (unlikely(!action)) g2() if (status & SA_SAMPLE_RANDOM) goto out; SCSI IRQ3 action add_interrupt_randomness(irq); local_irq_disable(); IRQm f1() f2() 9 return retval; } soft-irq pending bit do_softirq() Love, Chapter 7 When & who calls do_softirq()? 1. Returning hardware interrupt handler – – before do_IRQ() returns it calls irq_exit() do_softirq() 2. kernel thread – – Bovet, p. 150 low priority kernel thread called ksoftirqd_CPUn It runs ksoftirq() function, which calls do_softirq() 3. Any code (such as network subsystem) – checks softirq pending bit and calls do_softirq() 10 Urgent/non-urgent work in TCP/IP PIC IRQ NIC Floppy Disk CPUi selected ack() do_IRQ() update irq_desc[IRQm] move packet from NIC to memory move packet to socket IP TCP ftp 11 Which part is done in which module TCP/IP • [Hardware] -- NIC (Network Interface Card) – receives a packet from network – issues interrupt to CPUN PIC IRQ NIC Floppy Disk • [Top Half] -- Interrupt Handler – Critical Top half (do_IRQ()) – Non-Critical Top half (ISR) Ack, assign a CPU for IRQnetwork Allocates struct sk_buff for packet copy packet from NIC to sk_buff raise “bottom half required” (set bit) • [Bottom Half] -- do_softirq() – delivers packet to IP protocol handler • remote? – forward packet to other host • local? – invoke TCP handler – delivers packet to TCP handler • delivers packet to the socket associated with the portk 12 Device requests interrupt PIC select CPU & Sends signal to CPU Top half & Bottom half (TCP/IP example) (1) Interrupt CPU (CPU/PIC is interrupt disabled) (4) exit I.H. Top Half (2) Runs interrupt handler do_IRQ() ISR ack Assign CPU to IRQ Later … Bottom Half (5) kernel executes bottom half (if the bit is set) Assigned CPU runs ISR which copies data (3) Schedule from NIC bottom halves to kernel (i.e. set bottom half bits) IP handler routes TCP handler packet Assembles data Data Insert data Into socket Wakeup application 13