summaryrefslogtreecommitdiff
path: root/gcell/src
diff options
context:
space:
mode:
authoreb <eb@221aa14e-8319-0410-a670-987f0aec2ac5>2008-03-24 07:46:47 +0000
committereb <eb@221aa14e-8319-0410-a670-987f0aec2ac5>2008-03-24 07:46:47 +0000
commita0eae4b4a9e0635fbb2a983673d1dd942f150ea7 (patch)
tree5e48b8ac6953d91706fbe601e8a5803a0c903699 /gcell/src
parentb1408be636817de8bfb39f154cc3db0d3736434c (diff)
Fix for gcell corrrectness/performance problem. Replaces mfc_sync
with appropriate use of tag and fenced get. We could pick up a bit of additional performance by double buffering the the local store job descriptor, but that's left for a rainy day. git-svn-id: http://gnuradio.org/svn/gnuradio/trunk@8090 221aa14e-8319-0410-a670-987f0aec2ac5
Diffstat (limited to 'gcell/src')
-rw-r--r--gcell/src/include/spu/gc_jd_queue.h4
-rw-r--r--gcell/src/lib/runtime/spu/gc_main.c7
-rw-r--r--gcell/src/lib/runtime/spu/gc_spu_jd_queue.c11
3 files changed, 13 insertions, 9 deletions
diff --git a/gcell/src/include/spu/gc_jd_queue.h b/gcell/src/include/spu/gc_jd_queue.h
index f1ce1b3bd6..7a6ac2e219 100644
--- a/gcell/src/include/spu/gc_jd_queue.h
+++ b/gcell/src/include/spu/gc_jd_queue.h
@@ -35,12 +35,14 @@ __GC_BEGIN_DECLS
*
* \param[in] q is EA address of queue structure.
* \param[out] item_ea is EA address of item at head of queue.
+ * \param[in] jd_tag is the tag to use to get the LS copy of the item.
* \param[out] item is local store copy of item at head of queue.
* \returns false if the queue is empty, otherwise returns true
* and sets \p item_ea and DMA's job descriptor into \p item
*/
bool
-gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, gc_job_desc_t *item);
+gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
+ int jd_tag, gc_job_desc_t *item);
/*!
diff --git a/gcell/src/lib/runtime/spu/gc_main.c b/gcell/src/lib/runtime/spu/gc_main.c
index ef552f14a8..867a21de8b 100644
--- a/gcell/src/lib/runtime/spu/gc_main.c
+++ b/gcell/src/lib/runtime/spu/gc_main.c
@@ -542,9 +542,6 @@ process_job(gc_eaddr_t jd_ea, gc_job_desc_t *jd)
int tag = ci_tags + ci_idx; // use the current completion tag
mfc_put(jd, jd_ea, sizeof(*jd), tag, 0, 0);
- mfc_sync(tag); // FIXME this makes it work, but is expensive
-
-
// Tell PPE we're done with the job.
//
// We queue these up until we run out of room, or until we can send
@@ -593,7 +590,7 @@ main_loop(void)
// by somebody doing something to the queue. Go look and see
// if there's anything for us.
//
- if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, &jd))
+ if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd))
process_job(jd_ea, &jd);
gc_jd_queue_getllar(spu_args.queue); // get a new reservation
@@ -608,7 +605,7 @@ main_loop(void)
#else
// try to get a job from the job queue
- if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, &jd)){
+ if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, ci_tags + ci_idx, &jd)){
total_jobs++;
gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs);
diff --git a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
index ba4a1b9d22..22752fe68e 100644
--- a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
+++ b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c
@@ -26,7 +26,8 @@
extern int gc_sys_tag;
bool
-gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, gc_job_desc_t *item)
+gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
+ int jd_tag, gc_job_desc_t *item)
{
gc_jd_queue_t local_q;
@@ -61,8 +62,12 @@ gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, gc_job_desc_t *item)
// copy in job descriptor at head of queue
*item_ea = local_q.head;
- mfc_get(item, local_q.head, sizeof(gc_job_desc_t), gc_sys_tag, 0, 0);
- mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in
+
+ // We must use the fence with the jd_tag to ensure that any
+ // previously initiated put of a job desc is locally ordered before
+ // the get of the new one.
+ mfc_getf(item, local_q.head, sizeof(gc_job_desc_t), jd_tag, 0, 0);
+ mfc_write_tag_mask(1 << jd_tag); // the tag we're interested in
mfc_read_tag_status_all(); // wait for DMA to complete
local_q.head = item->sys.next;