diff options
Diffstat (limited to 'gcell/src/lib/runtime/spu/gc_spu_jd_queue.c')
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_spu_jd_queue.c | 125 |
1 files changed, 55 insertions, 70 deletions
diff --git a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c index 0dd165fc00..6fa2d6af06 100644 --- a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c +++ b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c @@ -1,6 +1,6 @@ /* -*- c++ -*- */ /* - * Copyright 2007 Free Software Foundation, Inc. + * Copyright 2007,2008 Free Software Foundation, Inc. * * This file is part of GNU Radio * @@ -22,106 +22,91 @@ #include "gc_jd_queue.h" #include "mutex_lock.h" #include "mutex_unlock.h" +#include "gc_delay.h" +#include "gc_random.h" + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) extern int gc_sys_tag; -/* - * ea must be 128-byte aligned, the mutex is in the first int32_t, and - * it must be safe to write the remaining 124 bytes with anything at - * all. - */ -static __inline void _fast_mutex_unlock(mutex_ea_t ea) +#define INITIAL_BACKOFF 32.0 +#define MAX_BACKOFF 16384.0 +#define RANDOM_WEIGHT 0.2 + +static float +next_backoff(float backoff) { - char _tmp[256]; - vector signed int *buf - = (vector signed int *) ALIGN(_tmp, 128); // get cache-aligned buffer + // exponential with random + float t = backoff * 2.0; + if (t > MAX_BACKOFF) + t = MAX_BACKOFF; - buf[0] = spu_splats(0); // the value that unlocks the mutex + float r = (RANDOM_WEIGHT * (2.0 * (gc_uniform_deviate() - 0.5))); + t = t * (1.0 + r); - mfc_putlluc(buf, ea, 0, 0); // unconditional put, no reservation reqd - spu_readch(MFC_RdAtomicStat); + return t; } - - bool gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, int jd_tag, gc_job_desc_t *item) { - gc_jd_q_links_t local_q; + int status; + char _tmp[256]; + gc_jd_queue_t *local_q = + (gc_jd_queue_t *) ALIGN(_tmp, 128); // get cache-aligned buffer + + float backoff = next_backoff(INITIAL_BACKOFF); - // Before aquiring the lock, see if it's possible that there's - // something in the queue. Checking in this way makes it easier - // for the PPE to insert things, since we're not contending for - // the lock unless there is something in the queue. + do { + // Copy the queue structure in and get a lock line reservation. + // (The structure is 128-byte aligned and completely fills a cache-line) - // copy in the queue structure - mfc_get(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0); - mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in - mfc_read_tag_status_all(); // wait for DMA to complete + mfc_getllar(local_q, q, 0, 0); + spu_readch(MFC_RdAtomicStat); - if (local_q.head == 0){ // empty - return false; - } + if (local_q->mutex != 0) // somebody else has it locked + return false; - // When we peeked, head was non-zero. Now grab the - // lock and do it for real. + if (local_q->head == 0) // the queue is empty + return false; - _mutex_lock(q + offsetof(gc_jd_queue_t, m.mutex)); + // Try to acquire the lock - // copy in the queue structure - mfc_get(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0); - mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in - mfc_read_tag_status_all(); // wait for DMA to complete + local_q->mutex = 1; + mfc_putllc(local_q, q, 0, 0); + status = spu_readch(MFC_RdAtomicStat); + + if (status != 0){ + gc_cdelay((int) backoff); + backoff = next_backoff(backoff); + } - if (local_q.head == 0){ // empty - _fast_mutex_unlock(q + offsetof(gc_jd_queue_t, m.mutex)); - return false; - } + } while (status != 0); + // we're now holding the lock + // copy in job descriptor at head of queue - *item_ea = local_q.head; + *item_ea = local_q->head; // We must use the fence with the jd_tag to ensure that any // previously initiated put of a job desc is locally ordered before // the get of the new one. - mfc_getf(item, local_q.head, sizeof(gc_job_desc_t), jd_tag, 0, 0); + mfc_getf(item, local_q->head, sizeof(gc_job_desc_t), jd_tag, 0, 0); mfc_write_tag_mask(1 << jd_tag); // the tag we're interested in mfc_read_tag_status_all(); // wait for DMA to complete - local_q.head = item->sys.next; + local_q->head = item->sys.next; item->sys.next = 0; - if (local_q.head == 0) // now empty? - local_q.tail = 0; - + if (local_q->head == 0) // now empty? + local_q->tail = 0; - // copy the queue structure back out - mfc_put(&local_q, q, sizeof(local_q), gc_sys_tag, 0, 0); - mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in - mfc_read_tag_status_all(); // wait for DMA to complete + // Copy the queue struct back out and unlock the mutex in one fell swoop. + // We use the unconditional put since it's faster and we own the lock. - // Q: FIXME do we need to order stores in EA or can we just clear the - // local copy of the mutex above and blast it out, removing the need - // for this explicit unlock? - // - // A: Manual says it's better to use an atomic op rather than - // a normal DMA, and that a putlluc is better than a putllc if - // you can use it. + local_q->mutex = 0; + mfc_putlluc(local_q, q, 0, 0); + spu_readch(MFC_RdAtomicStat); - _fast_mutex_unlock(q + offsetof(gc_jd_queue_t, m.mutex)); return true; } - - -void -gc_jd_queue_getllar(gc_eaddr_t q) -{ - // get reservation that includes the flag in the queue - gc_eaddr_t ea = q + offsetof(gc_jd_queue_t, f.flag); - - char _tmp[256]; - char *buf = (char *) ALIGN(_tmp, 128); // get cache-aligned buffer - - mfc_getllar(buf, ALIGN128_EA(ea), 0, 0); - spu_readch(MFC_RdAtomicStat); -} |