./kernel/timer.c

      /*
       *  linux/kernel/timer.c
       *
       *  Kernel internal timers, kernel timekeeping, basic process system calls
       *
       *  Copyright (C) 1991, 1992  Linus Torvalds
       *
       *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
       *
       *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
       *              "A Kernel Model for Precision Timekeeping" by Dave Mills
       *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
       *              serialize accesses to xtime/lost_ticks).
       *                              Copyright (C) 1998  Andrea Arcangeli
       *  1999-03-10  Improved NTP compatibility by Ulrich Windl
       */
      
      #include <linux/config.h>
      #include <linux/mm.h>
      #include <linux/timex.h>
      #include <linux/delay.h>
      #include <linux/smp_lock.h>
      #include <linux/interrupt.h>
      #include <linux/kernel_stat.h>
      
      #include <asm/uaccess.h>
      
      /*
       * Timekeeping variables
       */
      
      long tick = (1000000 + HZ/2) / HZ;	/* timer interrupt period */
      
      /* The current time */
      volatile struct timeval xtime __attribute__ ((aligned (16)));
      
      /* Don't completely fail for HZ > 500.  */
      int tickadj = 500/HZ ? : 1;		/* microsecs */
      
      DECLARE_TASK_QUEUE(tq_timer);
      DECLARE_TASK_QUEUE(tq_immediate);
      
      /*
       * phase-lock loop variables
       */
      /* TIME_ERROR prevents overwriting the CMOS clock */
      int time_state = TIME_OK;		/* clock synchronization status	*/
      int time_status = STA_UNSYNC;		/* clock status bits		*/
      long time_offset;			/* time adjustment (us)		*/
      long time_constant = 2;			/* pll time constant		*/
      long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
      long time_precision = 1;		/* clock precision (us)		*/
      long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
      long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
      long time_phase;			/* phase offset (scaled us)	*/
      long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
      					/* frequency offset (scaled ppm)*/
      long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
      long time_reftime;			/* time at last adjustment (s)	*/
      
      long time_adjust;
      long time_adjust_step;
      
      unsigned long event;
      
      extern int do_setitimer(int, struct itimerval *, struct itimerval *);
      
      unsigned long volatile jiffies;
      
      unsigned int * prof_buffer;
      unsigned long prof_len;
      unsigned long prof_shift;
      
      /*
       * Event timer code
       */
      #define TVN_BITS 6
      #define TVR_BITS 8
      #define TVN_SIZE (1 << TVN_BITS)
      #define TVR_SIZE (1 << TVR_BITS)
      #define TVN_MASK (TVN_SIZE - 1)
      #define TVR_MASK (TVR_SIZE - 1)
      
      struct timer_vec {
      	int index;
      	struct list_head vec[TVN_SIZE];
      };
      
      struct timer_vec_root {
      	int index;
      	struct list_head vec[TVR_SIZE];
      };
      
      static struct timer_vec tv5;
      static struct timer_vec tv4;
      static struct timer_vec tv3;
      static struct timer_vec tv2;
      static struct timer_vec_root tv1;
      
      static struct timer_vec * const tvecs[] = {
      	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
      };
      
      #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
      
 106  void init_timervecs (void)
      {
      	int i;
      
 110  	for (i = 0; i < TVN_SIZE; i++) {
 111  		INIT_LIST_HEAD(tv5.vec + i);
 112  		INIT_LIST_HEAD(tv4.vec + i);
 113  		INIT_LIST_HEAD(tv3.vec + i);
 114  		INIT_LIST_HEAD(tv2.vec + i);
      	}
 116  	for (i = 0; i < TVR_SIZE; i++)
 117  		INIT_LIST_HEAD(tv1.vec + i);
      }
      
      static unsigned long timer_jiffies;
      
 122  static inline void internal_add_timer(struct timer_list *timer)
      {
      	/*
      	 * must be cli-ed when calling this
      	 */
      	unsigned long expires = timer->expires;
      	unsigned long idx = expires - timer_jiffies;
      	struct list_head * vec;
      
 131  	if (idx < TVR_SIZE) {
      		int i = expires & TVR_MASK;
      		vec = tv1.vec + i;
 134  	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
      		int i = (expires >> TVR_BITS) & TVN_MASK;
      		vec = tv2.vec + i;
 137  	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
      		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
      		vec =  tv3.vec + i;
 140  	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
      		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
      		vec = tv4.vec + i;
 143  	} else if ((signed long) idx < 0) {
      		/* can happen if you add a timer with expires == jiffies,
      		 * or you set a timer to go off in the past
      		 */
      		vec = tv1.vec + tv1.index;
 148  	} else if (idx <= 0xffffffffUL) {
      		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
      		vec = tv5.vec + i;
 151  	} else {
      		/* Can only get here on architectures with 64-bit jiffies */
 153  		INIT_LIST_HEAD(&timer->list);
 154  		return;
      	}
      	/*
      	 * Timers are FIFO!
      	 */
      	list_add(&timer->list, vec->prev);
      }
      
      /* Initialize both explicitly - let's try to have them in the same cache line */
      spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
      
      #ifdef CONFIG_SMP
      volatile struct timer_list * volatile running_timer;
      #define timer_enter(t) do { running_timer = t; mb(); } while (0)
      #define timer_exit() do { running_timer = NULL; } while (0)
      #define timer_is_running(t) (running_timer == t)
      #define timer_synchronize(t) while (timer_is_running(t)) barrier()
      #else
      #define timer_enter(t)		do { } while (0)
      #define timer_exit()		do { } while (0)
      #endif
      
 176  void add_timer(struct timer_list *timer)
      {
      	unsigned long flags;
      
 180  	spin_lock_irqsave(&timerlist_lock, flags);
 181  	if (timer_pending(timer))
 182  		goto bug;
      	internal_add_timer(timer);
 184  	spin_unlock_irqrestore(&timerlist_lock, flags);
 185  	return;
      bug:
 187  	spin_unlock_irqrestore(&timerlist_lock, flags);
      	printk("bug: kernel timer added twice at %p.\n",
      			__builtin_return_address(0));
      }
      
 192  static inline int detach_timer (struct timer_list *timer)
      {
 194  	if (!timer_pending(timer))
 195  		return 0;
      	list_del(&timer->list);
 197  	return 1;
      }
      
 200  int mod_timer(struct timer_list *timer, unsigned long expires)
      {
      	int ret;
      	unsigned long flags;
      
 205  	spin_lock_irqsave(&timerlist_lock, flags);
      	timer->expires = expires;
      	ret = detach_timer(timer);
      	internal_add_timer(timer);
 209  	spin_unlock_irqrestore(&timerlist_lock, flags);
 210  	return ret;
      }
      
 213  int del_timer(struct timer_list * timer)
      {
      	int ret;
      	unsigned long flags;
      
 218  	spin_lock_irqsave(&timerlist_lock, flags);
      	ret = detach_timer(timer);
      	timer->list.next = timer->list.prev = NULL;
 221  	spin_unlock_irqrestore(&timerlist_lock, flags);
 222  	return ret;
      }
      
      #ifdef CONFIG_SMP
      void sync_timers(void)
      {
      	spin_unlock_wait(&global_bh_lock);
      }
      
      /*
       * SMP specific function to delete periodic timer.
       * Caller must disable by some means restarting the timer
       * for new. Upon exit the timer is not queued and handler is not running
       * on any CPU. It returns number of times, which timer was deleted
       * (for reference counting).
       */
      
      int del_timer_sync(struct timer_list * timer)
      {
      	int ret = 0;
      
      	for (;;) {
      		unsigned long flags;
      		int running;
      
      		spin_lock_irqsave(&timerlist_lock, flags);
      		ret += detach_timer(timer);
      		timer->list.next = timer->list.prev = 0;
      		running = timer_is_running(timer);
      		spin_unlock_irqrestore(&timerlist_lock, flags);
      
      		if (!running)
      			break;
      
      		timer_synchronize(timer);
      	}
      
      	return ret;
      }
      #endif
      
      
 264  static inline void cascade_timers(struct timer_vec *tv)
      {
      	/* cascade all the timers from tv up one level */
      	struct list_head *head, *curr, *next;
      
      	head = tv->vec + tv->index;
      	curr = head->next;
      	/*
      	 * We are removing _all_ timers from the list, so we don't  have to
      	 * detach them individually, just clear the list afterwards.
      	 */
 275  	while (curr != head) {
      		struct timer_list *tmp;
      
      		tmp = list_entry(curr, struct timer_list, list);
      		next = curr->next;
      		list_del(curr); // not needed
      		internal_add_timer(tmp);
      		curr = next;
      	}
 284  	INIT_LIST_HEAD(head);
      	tv->index = (tv->index + 1) & TVN_MASK;
      }
      
 288  static inline void run_timer_list(void)
      {
 290  	spin_lock_irq(&timerlist_lock);
 291  	while ((long)(jiffies - timer_jiffies) >= 0) {
      		struct list_head *head, *curr;
 293  		if (!tv1.index) {
      			int n = 1;
 295  			do {
      				cascade_timers(tvecs[n]);
 297  			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
      		}
      repeat:
      		head = tv1.vec + tv1.index;
      		curr = head->next;
 302  		if (curr != head) {
      			struct timer_list *timer;
      			void (*fn)(unsigned long);
      			unsigned long data;
      
      			timer = list_entry(curr, struct timer_list, list);
       			fn = timer->function;
       			data= timer->data;
      
      			detach_timer(timer);
      			timer->list.next = timer->list.prev = NULL;
 313  			timer_enter(timer);
 314  			spin_unlock_irq(&timerlist_lock);
      			fn(data);
 316  			spin_lock_irq(&timerlist_lock);
 317  			timer_exit();
 318  			goto repeat;
      		}
      		++timer_jiffies; 
      		tv1.index = (tv1.index + 1) & TVR_MASK;
      	}
 323  	spin_unlock_irq(&timerlist_lock);
      }
      
      spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
      
 328  void tqueue_bh(void)
      {
      	run_task_queue(&tq_timer);
      }
      
 333  void immediate_bh(void)
      {
      	run_task_queue(&tq_immediate);
      }
      
      /*
       * this routine handles the overflow of the microsecond field
       *
       * The tricky bits of code to handle the accurate clock support
       * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
       * They were originally developed for SUN and DEC kernels.
       * All the kudos should go to Dave for this stuff.
       *
       */
 347  static void second_overflow(void)
      {
          long ltemp;
      
          /* Bump the maxerror field */
          time_maxerror += time_tolerance >> SHIFT_USEC;
 353      if ( time_maxerror > NTP_PHASE_LIMIT ) {
      	time_maxerror = NTP_PHASE_LIMIT;
      	time_status |= STA_UNSYNC;
          }
      
          /*
           * Leap second processing. If in leap-insert state at
           * the end of the day, the system clock is set back one
           * second; if in leap-delete state, the system clock is
           * set ahead one second. The microtime() routine or
           * external clock driver will insure that reported time
           * is always monotonic. The ugly divides should be
           * replaced.
           */
 367      switch (time_state) {
      
 369      case TIME_OK:
 370  	if (time_status & STA_INS)
      	    time_state = TIME_INS;
 372  	else if (time_status & STA_DEL)
      	    time_state = TIME_DEL;
 374  	break;
      
 376      case TIME_INS:
 377  	if (xtime.tv_sec % 86400 == 0) {
      	    xtime.tv_sec--;
      	    time_state = TIME_OOP;
      	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
      	}
 382  	break;
      
 384      case TIME_DEL:
 385  	if ((xtime.tv_sec + 1) % 86400 == 0) {
      	    xtime.tv_sec++;
      	    time_state = TIME_WAIT;
      	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
      	}
 390  	break;
      
 392      case TIME_OOP:
      	time_state = TIME_WAIT;
 394  	break;
      
 396      case TIME_WAIT:
 397  	if (!(time_status & (STA_INS | STA_DEL)))
      	    time_state = TIME_OK;
          }
      
          /*
           * Compute the phase adjustment for the next second. In
           * PLL mode, the offset is reduced by a fixed factor
           * times the time constant. In FLL mode the offset is
           * used directly. In either mode, the maximum phase
           * adjustment for each second is clamped so as to spread
           * the adjustment over not more than the number of
           * seconds between updates.
           */
 410      if (time_offset < 0) {
      	ltemp = -time_offset;
 412  	if (!(time_status & STA_FLL))
      	    ltemp >>= SHIFT_KG + time_constant;
 414  	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
      	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
      	time_offset += ltemp;
      	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 418      } else {
      	ltemp = time_offset;
 420  	if (!(time_status & STA_FLL))
      	    ltemp >>= SHIFT_KG + time_constant;
 422  	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
      	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
      	time_offset -= ltemp;
      	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
          }
      
          /*
           * Compute the frequency estimate and additional phase
           * adjustment due to frequency error for the next
           * second. When the PPS signal is engaged, gnaw on the
           * watchdog counter and update the frequency computed by
           * the pll and the PPS signal.
           */
          pps_valid++;
 436      if (pps_valid == PPS_VALID) {	/* PPS signal lost */
      	pps_jitter = MAXTIME;
      	pps_stabil = MAXFREQ;
      	time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
      			 STA_PPSWANDER | STA_PPSERROR);
          }
          ltemp = time_freq + pps_freq;
 443      if (ltemp < 0)
      	time_adj -= -ltemp >>
      	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 446      else
      	time_adj += ltemp >>
      	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
      
      #if HZ == 100
          /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
           * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
           */
 454      if (time_adj < 0)
      	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
 456      else
      	time_adj += (time_adj >> 2) + (time_adj >> 5);
      #endif
      }
      
      /* in the NTP reference this is called "hardclock()" */
 462  static void update_wall_time_one_tick(void)
      {
 464  	if ( (time_adjust_step = time_adjust) != 0 ) {
      	    /* We are doing an adjtime thing. 
      	     *
      	     * Prepare time_adjust_step to be within bounds.
      	     * Note that a positive time_adjust means we want the clock
      	     * to run faster.
      	     *
      	     * Limit the amount of the step to be in the range
      	     * -tickadj .. +tickadj
      	     */
 474  	     if (time_adjust > tickadj)
      		time_adjust_step = tickadj;
 476  	     else if (time_adjust < -tickadj)
      		time_adjust_step = -tickadj;
      	     
      	    /* Reduce by this step the amount of time left  */
      	    time_adjust -= time_adjust_step;
      	}
      	xtime.tv_usec += tick + time_adjust_step;
      	/*
      	 * Advance the phase, once it gets to one microsecond, then
      	 * advance the tick more.
      	 */
      	time_phase += time_adj;
 488  	if (time_phase <= -FINEUSEC) {
      		long ltemp = -time_phase >> SHIFT_SCALE;
      		time_phase += ltemp << SHIFT_SCALE;
      		xtime.tv_usec -= ltemp;
      	}
 493  	else if (time_phase >= FINEUSEC) {
      		long ltemp = time_phase >> SHIFT_SCALE;
      		time_phase -= ltemp << SHIFT_SCALE;
      		xtime.tv_usec += ltemp;
      	}
      }
      
      /*
       * Using a loop looks inefficient, but "ticks" is
       * usually just one (we shouldn't be losing ticks,
       * we're doing this this way mainly for interrupt
       * latency reasons, not because we think we'll
       * have lots of lost timer ticks
       */
 507  static void update_wall_time(unsigned long ticks)
      {
 509  	do {
      		ticks--;
      		update_wall_time_one_tick();
 512  	} while (ticks);
      
 514  	if (xtime.tv_usec >= 1000000) {
      	    xtime.tv_usec -= 1000000;
      	    xtime.tv_sec++;
      	    second_overflow();
      	}
      }
      
 521  static inline void do_process_times(struct task_struct *p,
      	unsigned long user, unsigned long system)
      {
      	unsigned long psecs;
      
      	psecs = (p->times.tms_utime += user);
      	psecs += (p->times.tms_stime += system);
 528  	if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
      		/* Send SIGXCPU every second.. */
 530  		if (!(psecs % HZ))
      			send_sig(SIGXCPU, p, 1);
      		/* and SIGKILL when we go over max.. */
 533  		if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
      			send_sig(SIGKILL, p, 1);
      	}
      }
      
 538  static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
      {
      	unsigned long it_virt = p->it_virt_value;
      
 542  	if (it_virt) {
      		it_virt -= ticks;
 544  		if (!it_virt) {
      			it_virt = p->it_virt_incr;
      			send_sig(SIGVTALRM, p, 1);
      		}
      		p->it_virt_value = it_virt;
      	}
      }
      
 552  static inline void do_it_prof(struct task_struct *p)
      {
      	unsigned long it_prof = p->it_prof_value;
      
 556  	if (it_prof) {
 557  		if (--it_prof == 0) {
      			it_prof = p->it_prof_incr;
      			send_sig(SIGPROF, p, 1);
      		}
      		p->it_prof_value = it_prof;
      	}
      }
      
 565  void update_one_process(struct task_struct *p, unsigned long user,
      			unsigned long system, int cpu)
      {
      	p->per_cpu_utime[cpu] += user;
      	p->per_cpu_stime[cpu] += system;
      	do_process_times(p, user, system);
      	do_it_virt(p, user);
      	do_it_prof(p);
      }	
      
      /*
       * Called from the timer interrupt handler to charge one tick to the current 
       * process.  user_tick is 1 if the tick is user time, 0 for system.
       */
 579  void update_process_times(int user_tick)
      {
      	struct task_struct *p = current;
      	int cpu = smp_processor_id(), system = user_tick ^ 1;
      
      	update_one_process(p, user_tick, system, cpu);
 585  	if (p->pid) {
 586  		if (--p->counter <= 0) {
      			p->counter = 0;
      			p->need_resched = 1;
      		}
 590  		if (p->nice > 0)
      			kstat.per_cpu_nice[cpu] += user_tick;
 592  		else
      			kstat.per_cpu_user[cpu] += user_tick;
      		kstat.per_cpu_system[cpu] += system;
 595  	} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
      		kstat.per_cpu_system[cpu] += system;
      }
      
      /*
       * Nr of active tasks - counted in fixed-point numbers
       */
 602  static unsigned long count_active_tasks(void)
      {
      	struct task_struct *p;
      	unsigned long nr = 0;
      
      	read_lock(&tasklist_lock);
 608  	for_each_task(p) {
      		if ((p->state == TASK_RUNNING ||
 610  		     (p->state & TASK_UNINTERRUPTIBLE)))
      			nr += FIXED_1;
      	}
 613  	read_unlock(&tasklist_lock);
 614  	return nr;
      }
      
      /*
       * Hmm.. Changed this, as the GNU make sources (load.c) seems to
       * imply that avenrun[] is the standard name for this kind of thing.
       * Nothing else seems to be standardized: the fractional size etc
       * all seem to differ on different machines.
       */
      unsigned long avenrun[3];
      
 625  static inline void calc_load(unsigned long ticks)
      {
      	unsigned long active_tasks; /* fixed-point */
      	static int count = LOAD_FREQ;
      
      	count -= ticks;
 631  	if (count < 0) {
      		count += LOAD_FREQ;
      		active_tasks = count_active_tasks();
      		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
      		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
      		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
      	}
      }
      
      /* jiffies at the most recent update of wall time */
      unsigned long wall_jiffies;
      
      /*
       * This spinlock protect us from races in SMP while playing with xtime. -arca
       */
      rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
      
 648  static inline void update_times(void)
      {
      	unsigned long ticks;
      
      	/*
      	 * update_times() is run from the raw timer_bh handler so we
      	 * just know that the irqs are locally enabled and so we don't
      	 * need to save/restore the flags of the local CPU here. -arca
      	 */
 657  	write_lock_irq(&xtime_lock);
      
      	ticks = jiffies - wall_jiffies;
 660  	if (ticks) {
      		wall_jiffies += ticks;
      		update_wall_time(ticks);
      	}
 664  	write_unlock_irq(&xtime_lock);
      	calc_load(ticks);
      }
      
 668  void timer_bh(void)
      {
      	update_times();
      	run_timer_list();
      }
      
 674  void do_timer(struct pt_regs *regs)
      {
      	(*(unsigned long *)&jiffies)++;
      #ifndef CONFIG_SMP
      	/* SMP process accounting uses the local APIC timer */
      
      	update_process_times(user_mode(regs));
      #endif
      	mark_bh(TIMER_BH);
 683  	if (TQ_ACTIVE(tq_timer))
      		mark_bh(TQUEUE_BH);
      }
      
      #if !defined(__alpha__) && !defined(__ia64__)
      
      /*
       * For backwards compatibility?  This can be done in libc so Alpha
       * and all newer ports shouldn't need it.
       */
 693  asmlinkage unsigned long sys_alarm(unsigned int seconds)
      {
      	struct itimerval it_new, it_old;
      	unsigned int oldalarm;
      
      	it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
      	it_new.it_value.tv_sec = seconds;
      	it_new.it_value.tv_usec = 0;
      	do_setitimer(ITIMER_REAL, &it_new, &it_old);
      	oldalarm = it_old.it_value.tv_sec;
      	/* ehhh.. We can't return 0 if we have an alarm pending.. */
      	/* And we'd better return too much than too little anyway */
 705  	if (it_old.it_value.tv_usec)
      		oldalarm++;
 707  	return oldalarm;
      }
      
      #endif
      
      #ifndef __alpha__
      
      /*
       * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
       * should be moved into arch/i386 instead?
       */
       
 719  asmlinkage long sys_getpid(void)
      {
      	/* This is SMP safe - current->pid doesn't change */
 722  	return current->tgid;
      }
      
      /*
       * This is not strictly SMP safe: p_opptr could change
       * from under us. However, rather than getting any lock
       * we can use an optimistic algorithm: get the parent
       * pid, and go back and check that the parent is still
       * the same. If it has changed (which is extremely unlikely
       * indeed), we just try again..
       *
       * NOTE! This depends on the fact that even if we _do_
       * get an old value of "parent", we can happily dereference
       * the pointer: we just can't necessarily trust the result
       * until we know that the parent pointer is valid.
       *
       * The "mb()" macro is a memory barrier - a synchronizing
       * event. It also makes sure that gcc doesn't optimize
       * away the necessary memory references.. The barrier doesn't
       * have to have all that strong semantics: on x86 we don't
       * really require a synchronizing instruction, for example.
       * The barrier is more important for code generation than
       * for any real memory ordering semantics (even if there is
       * a small window for a race, using the old pointer is
       * harmless for a while).
       */
 748  asmlinkage long sys_getppid(void)
      {
      	int pid;
      	struct task_struct * me = current;
      	struct task_struct * parent;
      
      	parent = me->p_opptr;
 755  	for (;;) {
      		pid = parent->pid;
      #if CONFIG_SMP
      {
      		struct task_struct *old = parent;
      		mb();
      		parent = me->p_opptr;
      		if (old != parent)
      			continue;
      }
      #endif
 766  		break;
      	}
 768  	return pid;
      }
      
 771  asmlinkage long sys_getuid(void)
      {
      	/* Only we change this so SMP safe */
 774  	return current->uid;
      }
      
 777  asmlinkage long sys_geteuid(void)
      {
      	/* Only we change this so SMP safe */
 780  	return current->euid;
      }
      
 783  asmlinkage long sys_getgid(void)
      {
      	/* Only we change this so SMP safe */
 786  	return current->gid;
      }
      
 789  asmlinkage long sys_getegid(void)
      {
      	/* Only we change this so SMP safe */
 792  	return  current->egid;
      }
      
      #endif
      
 797  asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
      {
      	struct timespec t;
      	unsigned long expire;
      
 802  	if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
 803  		return -EFAULT;
      
 805  	if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
 806  		return -EINVAL;
      
      
      	if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
 810  	    current->policy != SCHED_OTHER)
      	{
      		/*
      		 * Short delay requests up to 2 ms will be handled with
      		 * high precision by a busy wait for all real-time processes.
      		 *
      		 * Its important on SMP not to do this holding locks.
      		 */
      		udelay((t.tv_nsec + 999) / 1000);
 819  		return 0;
      	}
      
      	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
      
      	current->state = TASK_INTERRUPTIBLE;
      	expire = schedule_timeout(expire);
      
 827  	if (expire) {
 828  		if (rmtp) {
      			jiffies_to_timespec(expire, &t);
 830  			if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
 831  				return -EFAULT;
      		}
 833  		return -EINTR;
      	}
 835  	return 0;
      }