summaryrefslogtreecommitdiff
path: root/gcell/include
diff options
context:
space:
mode:
Diffstat (limited to 'gcell/include')
-rw-r--r--gcell/include/Makefile.am24
-rw-r--r--gcell/include/gcell/Makefile.am42
-rw-r--r--gcell/include/gcell/compiler.h45
-rw-r--r--gcell/include/gcell/gc_aligned_alloc.h52
-rw-r--r--gcell/include/gcell/gc_atomic.h29
-rw-r--r--gcell/include/gcell/gc_cdefs.h34
-rw-r--r--gcell/include/gcell/gc_declare_proc.h64
-rw-r--r--gcell/include/gcell/gc_jd_queue.h52
-rw-r--r--gcell/include/gcell/gc_jd_queue_data.h52
-rw-r--r--gcell/include/gcell/gc_jd_stack.h70
-rw-r--r--gcell/include/gcell/gc_job_desc.h213
-rw-r--r--gcell/include/gcell/gc_job_desc_private.h39
-rw-r--r--gcell/include/gcell/gc_job_manager.h286
-rw-r--r--gcell/include/gcell/gc_logging.h166
-rw-r--r--gcell/include/gcell/gc_mbox.h52
-rw-r--r--gcell/include/gcell/gc_spu_args.h60
-rw-r--r--gcell/include/gcell/gc_types.h63
-rw-r--r--gcell/include/gcell/gcp_fft_1d_r2.h64
-rw-r--r--gcell/include/gcell/memory_barrier.h64
-rw-r--r--gcell/include/gcell/spu/Makefile.am30
-rw-r--r--gcell/include/gcell/spu/fft_1d.h103
-rw-r--r--gcell/include/gcell/spu/fft_1d_r2.h529
-rw-r--r--gcell/include/gcell/spu/gc_delay.h27
-rw-r--r--gcell/include/gcell/spu/gc_jd_queue.h53
-rw-r--r--gcell/include/gcell/spu/gc_random.h32
-rw-r--r--gcell/include/gcell/spu/gc_spu_macs.h380
-rw-r--r--gcell/include/gcell/spu/libfft.h113
27 files changed, 2738 insertions, 0 deletions
diff --git a/gcell/include/Makefile.am b/gcell/include/Makefile.am
new file mode 100644
index 0000000000..c96c6f0a34
--- /dev/null
+++ b/gcell/include/Makefile.am
@@ -0,0 +1,24 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+include $(top_srcdir)/Makefile.common
+
+SUBDIRS = gcell
+
diff --git a/gcell/include/gcell/Makefile.am b/gcell/include/gcell/Makefile.am
new file mode 100644
index 0000000000..03255e516b
--- /dev/null
+++ b/gcell/include/gcell/Makefile.am
@@ -0,0 +1,42 @@
+#
+# Copyright 2007,2008 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+include $(top_srcdir)/Makefile.common
+
+SUBDIRS = spu
+
+gcellinclude_HEADERS = \
+ compiler.h \
+ gc_aligned_alloc.h \
+ gc_atomic.h \
+ gc_cdefs.h \
+ gc_declare_proc.h \
+ gc_job_manager.h \
+ gc_jd_queue_data.h \
+ gc_jd_queue.h \
+ gc_jd_stack.h \
+ gc_job_desc.h \
+ gc_job_desc_private.h \
+ gc_logging.h \
+ gc_mbox.h \
+ gc_spu_args.h \
+ gc_types.h \
+ gcp_fft_1d_r2.h \
+ memory_barrier.h
diff --git a/gcell/include/gcell/compiler.h b/gcell/include/gcell/compiler.h
new file mode 100644
index 0000000000..d1adcd1290
--- /dev/null
+++ b/gcell/include/gcell/compiler.h
@@ -0,0 +1,45 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_COMPILER_H
+#define INCLUDED_GCELL_COMPILER_H
+
+/*!
+ * \brief Compiler specific hackery. These are for GCC.
+ */
+
+#define _AL8 __attribute__((aligned (8)))
+#define _AL16 __attribute__((aligned (16)))
+#define _AL128 __attribute__((aligned (128)))
+
+#define _UNUSED __attribute__((unused))
+
+#ifndef likely
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+#endif
+
+
+#endif /* INCLUDED_GCELL_COMPILER_H */
diff --git a/gcell/include/gcell/gc_aligned_alloc.h b/gcell/include/gcell/gc_aligned_alloc.h
new file mode 100644
index 0000000000..bdc21c278a
--- /dev/null
+++ b/gcell/include/gcell/gc_aligned_alloc.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GC_ALIGNED_ALLOC_H
+#define INCLUDED_GC_ALIGNED_ALLOC_H
+
+#include <boost/shared_ptr.hpp>
+
+/*!
+ * \brief Return pointer to chunk of storage of size size bytes.
+ * The allocation will be aligned to an \p alignment boundary.
+ *
+ * \param size is the number of bytes to allocate
+ * \param alignment is the minimum storage alignment in bytes; must be a power of 2.
+ *
+ * Throws if can't allocate memory. The storage should be freed
+ * with "free" when done. The memory is initialized to zero.
+ */
+void *
+gc_aligned_alloc(size_t size, size_t alignment = 128);
+
+/*!
+ * \brief Return boost::shared_ptr to chunk of storage of size size bytes.
+ * The allocation will be aligned to an \p alignment boundary.
+ *
+ * \param size is the number of bytes to allocate
+ * \param alignment is the minimum storage alignment in bytes; must be a power of 2.
+ *
+ * Throws if can't allocate memory. The storage should be freed
+ * with "free" when done. The memory is initialized to zero.
+ */
+boost::shared_ptr<void>
+gc_aligned_alloc_sptr(size_t size, size_t alignment = 128);
+
+#endif /* INCLUDED_GC_ALIGNED_ALLOC_H */
diff --git a/gcell/include/gcell/gc_atomic.h b/gcell/include/gcell/gc_atomic.h
new file mode 100644
index 0000000000..51d38af482
--- /dev/null
+++ b/gcell/include/gcell/gc_atomic.h
@@ -0,0 +1,29 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCELL_GC_ATOMIC_H
+#define INCLUDED_GCELL_GC_ATOMIC_H
+
+#include <stdint.h>
+
+typedef uint32_t gc_atomic_t;
+
+
+#endif /* INCLUDED_GCELL_GC_ATOMIC_H */
diff --git a/gcell/include/gcell/gc_cdefs.h b/gcell/include/gcell/gc_cdefs.h
new file mode 100644
index 0000000000..0c5fc4ad88
--- /dev/null
+++ b/gcell/include/gcell/gc_cdefs.h
@@ -0,0 +1,34 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_GC_CDEFS_H
+#define INCLUDED_GCELL_GC_CDEFS_H
+
+/* C++ needs to know that types and declarations are C, not C++. */
+#ifdef __cplusplus
+# define __GC_BEGIN_DECLS extern "C" {
+# define __GC_END_DECLS }
+#else
+# define __GC_BEGIN_DECLS
+# define __GC_END_DECLS
+#endif
+
+#endif /* INCLUDED_GCELL_GC_CDEFS_H */
diff --git a/gcell/include/gcell/gc_declare_proc.h b/gcell/include/gcell/gc_declare_proc.h
new file mode 100644
index 0000000000..ba77e0fb05
--- /dev/null
+++ b/gcell/include/gcell/gc_declare_proc.h
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCELL_GC_DECLARE_PROC_H
+#define INCLUDED_GCELL_GC_DECLARE_PROC_H
+
+#include <stdint.h>
+#include <gcell/gc_job_desc.h>
+
+/*
+ * This is C, not C++ code...
+ *
+ * ...and is used by both PPE and SPE code
+ */
+__GC_BEGIN_DECLS
+
+#define GC_PROC_DEF_SECTION ".gcell.proc_def"
+
+typedef struct gc_proc_def {
+#if defined(__SPU__)
+ gc_spu_proc_t proc;
+#else
+ uint32_t proc;
+#endif
+ char name[28];
+} _AL16 gc_proc_def_t;
+
+
+#if defined(__SPU__)
+/*!
+ * \brief Tell gcell about a SPU procedure
+ *
+ * \param _proc_ pointer to function (gc_spu_proc_t)
+ * \param _name_ the name of the procedure ("quoted string")
+ *
+ * This macro registers the given procedure with the gcell runtime.
+ * From the PPE, use gc_job_manager::lookup_proc to map \p _name_ to a gc_proc_id_t
+ */
+#define GC_DECLARE_PROC(_proc_, _name_) \
+static struct gc_proc_def \
+ _GCPD_ ## _proc_ __attribute__((section(GC_PROC_DEF_SECTION), used)) = \
+ { _proc_, _name_ }
+#endif
+
+__GC_END_DECLS
+
+#endif /* INCLUDED_GCELL_GC_DECLARE_PROC_H */
diff --git a/gcell/include/gcell/gc_jd_queue.h b/gcell/include/gcell/gc_jd_queue.h
new file mode 100644
index 0000000000..50777a394c
--- /dev/null
+++ b/gcell/include/gcell/gc_jd_queue.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_GC_JD_QUEUE_H
+#define INCLUDED_GCELL_GC_JD_QUEUE_H
+
+#include <gcell/gc_jd_queue_data.h>
+
+__GC_BEGIN_DECLS
+
+/*!
+ * \brief Initialize the queue to empty.
+ */
+void
+gc_jd_queue_init(gc_jd_queue_t *q);
+
+
+/*!
+ * \brief Add \p item to the tail of \p q.
+ */
+void
+gc_jd_queue_enqueue(gc_jd_queue_t *q, gc_job_desc_t *item);
+
+
+/*!
+ * \brief Remove and return item at head of queue, or 0 if queue is empty
+ */
+gc_job_desc_t *
+gc_jd_queue_dequeue(gc_jd_queue_t *q);
+
+__GC_END_DECLS
+
+
+#endif /* INCLUDED_GCELL_GC_JD_QUEUE_H */
diff --git a/gcell/include/gcell/gc_jd_queue_data.h b/gcell/include/gcell/gc_jd_queue_data.h
new file mode 100644
index 0000000000..819b3712ee
--- /dev/null
+++ b/gcell/include/gcell/gc_jd_queue_data.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_GC_JD_QUEUE_DATA_H
+#define INCLUDED_GCELL_GC_JD_QUEUE_DATA_H
+
+#include <gcell/gc_types.h>
+#include <gcell/gc_job_desc.h>
+
+__GC_BEGIN_DECLS
+
+/*!
+ * \brief (Lock free someday...) queue for job descriptors
+ *
+ * This is the main data structure shared between PPEs and SPEs.
+ * It is used to enqueue work for SPEs. SPEs or PPEs may enqueue
+ * work. SPE's dequeue from here.
+ *
+ * FIXME make it lock free ;) For now, use a spin lock.
+ *
+ * (Fills a single cache line)
+ */
+typedef struct gc_jd_queue
+{
+ gc_eaddr_t head _AL16;
+ gc_eaddr_t tail _AL16;
+ uint32_t mutex _AL16; // libsync mutex (spin lock)
+} _AL128 gc_jd_queue_t;
+
+__GC_END_DECLS
+
+#endif /* INCLUDED_GCELL_GC_JD_QUEUE_DATA_H */
+
+
diff --git a/gcell/include/gcell/gc_jd_stack.h b/gcell/include/gcell/gc_jd_stack.h
new file mode 100644
index 0000000000..9eab4e4020
--- /dev/null
+++ b/gcell/include/gcell/gc_jd_stack.h
@@ -0,0 +1,70 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_GC_JD_STACK_H
+#define INCLUDED_GCELL_GC_JD_STACK_H
+
+#include <gcell/gc_types.h>
+#include <gcell/gc_job_desc.h>
+
+__GC_BEGIN_DECLS
+
+/*!
+ * \brief Lock free stack for job descriptors (used for free list)
+ *
+ * This is aligned to a cache line, and fills the cache line,
+ * to avoid inadvertently losing reservations created with
+ * the load-and-reserve instructions.
+ */
+
+typedef struct gc_jd_stack
+{
+ gc_eaddr_t top;
+
+ // pad out to a full cache line
+ uint8_t _pad[128 - sizeof(gc_eaddr_t)];
+} _AL128 gc_jd_stack_t;
+
+
+/*!
+ * \brief Initialize the stack to empty.
+ */
+void
+gc_jd_stack_init(gc_jd_stack_t *stack);
+
+
+/*!
+ * \brief Add \p item to the top of \p stack.
+ */
+void
+gc_jd_stack_push(gc_jd_stack_t *stack, gc_job_desc_t *item);
+
+
+/*!
+ * \brief pop and return top item on stack, or 0 if stack is empty
+ */
+gc_job_desc_t *
+gc_jd_stack_pop(gc_jd_stack_t *stack);
+
+__GC_END_DECLS
+
+
+#endif /* INCLUDED_GCELL_GC_JD_STACK_H */
diff --git a/gcell/include/gcell/gc_job_desc.h b/gcell/include/gcell/gc_job_desc.h
new file mode 100644
index 0000000000..5ff99e2e28
--- /dev/null
+++ b/gcell/include/gcell/gc_job_desc.h
@@ -0,0 +1,213 @@
+/* -*- c -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_GC_JOB_DESC_H
+#define INCLUDED_GCELL_GC_JOB_DESC_H
+
+/*!
+ * This file contains the structures that are used to describe how to
+ * call "jobs" that execute on the SPEs. A "job" is a task, or piece of
+ * work that you want to run on an SPE.
+ *
+ * There is code running in the SPE that knows how to interpret
+ * these job descriptions. Thus, in most cases, the overhead
+ * of invoking these is very low.
+ *
+ * The whole "job idea" is SPE centric. At first pass,
+ * the PPE will be constructing jobs and enqueing them.
+ * However, there is nothing in the implementation that
+ * prohibits SPEs from creating their own jobs in the
+ * future. Also, there is nothing prohibiting SPE-to-SPE
+ * DMA's.
+ *
+ * SPE's dequeue and "pull" jobs to themselves, do the work, then
+ * notify the entity that submitted the job.
+ */
+
+#include <gcell/gc_types.h>
+#include <gcell/gc_job_desc_private.h>
+
+/*
+ * This is C, not C++ code...
+ *
+ * ...and is used by both PPE and SPE code
+ */
+__GC_BEGIN_DECLS
+
+
+//! opaque ID that specifies which code to invoke on the SPE
+typedef uint32_t gc_proc_id_t;
+#define GCP_UNKNOWN_PROC ((gc_proc_id_t) -1)
+
+
+//! final job status
+typedef enum {
+ JS_OK,
+ JS_SHUTTING_DOWN, // job mananger is shutting down
+ JS_TOO_MANY_CLIENTS, // too many client threads
+ JS_UNKNOWN_PROC, // didn't recognize the procedure ID
+ JS_BAD_DIRECTION, // EA arg has invalid direction
+ JS_BAD_EAH, // not all EA args have the same high 32 address bits
+ JS_BAD_N_DIRECT, // too many direct args
+ JS_BAD_N_EA, // too many EA args
+ JS_ARGS_TOO_LONG, // total length of EA args exceeds limit
+ JS_BAD_JUJU, // misc problem: you're having a bad day
+ JS_BAD_JOB_DESC, // gc_job_desc was not allocated using mgr->alloc_job_desc()
+
+} gc_job_status_t;
+
+#define MAX_ARGS_DIRECT 8 // maximum number of args passed using "direct" method
+#define MAX_ARGS_EA 8 // maximum number of args passed via EA memory (dma)
+
+/*
+ * We support two classes of arguments,
+ * "direct", which are contained in the gc_job_desc_args and
+ * "EA", which are copied in/out according to info in gc_job_desc_args
+ */
+
+/*!
+ * \brief Tag type of "direct" argument
+ */
+typedef enum {
+ GCT_S32,
+ GCT_U32,
+ GCT_S64,
+ GCT_U64,
+ GCT_FLOAT,
+ GCT_DOUBLE,
+ GCT_FLT_CMPLX,
+ GCT_DBL_CMPLX,
+ GCT_EADDR,
+
+} gc_tag_t;
+
+
+/*!
+ * \brief union for passing "direct" argument
+ */
+typedef union gc_arg_union
+{
+ int32_t s32;
+ uint32_t u32;
+ int64_t s64;
+ uint64_t u64;
+ float f;
+ double d;
+ //float complex cf; // 64-bits (C99)
+ //double complex cd; // 128-bits (C99)
+ gc_eaddr_t ea; // 64-bits
+} _AL8 gc_arg_union_t;
+
+
+/*!
+ * \brief "direct" input or output arguments
+ */
+typedef struct gc_job_direct_args
+{
+ uint32_t nargs; // # of "direct" args
+ gc_tag_t tag[MAX_ARGS_DIRECT] _AL16; // type of direct arg[i]
+ gc_arg_union_t arg[MAX_ARGS_DIRECT] _AL16; // direct argument values
+
+} _AL16 gc_job_direct_args_t;
+
+
+// specifies direction for args passed in EA memory
+
+#define GCJD_DMA_GET 0x01 // in to SPE
+#define GCJD_DMA_PUT 0x02 // out from SPE
+
+/*!
+ * \brief Description of args passed in EA memory.
+ * These are DMA'd between EA and LS as specified.
+ */
+typedef struct gc_job_ea_arg {
+ //! EA address of buffer
+ gc_eaddr_t ea_addr;
+
+ //! GC_JD_DMA_* get arg or put arg
+ uint32_t direction;
+
+ //! number of bytes to get
+ uint32_t get_size;
+
+ //! number of bytes to put
+ uint32_t put_size;
+
+#if defined(__SPU__)
+ //! local store address (filled in by SPU runtime)
+ void *ls_addr;
+ uint32_t _pad[2];
+#else
+ uint32_t _pad[3];
+#endif
+
+} _AL16 gc_job_ea_arg_t;
+
+
+typedef struct gc_job_ea_args {
+ uint32_t nargs;
+ gc_job_ea_arg_t arg[MAX_ARGS_EA];
+
+} _AL16 gc_job_ea_args_t;
+
+
+/*!
+ * \brief "job description" that is DMA'd to/from the SPE.
+ */
+typedef struct gc_job_desc
+{
+ gc_job_desc_private_t sys; // internals
+ gc_job_status_t status; // what happened (output)
+ gc_proc_id_t proc_id; // specifies which procedure to run
+ gc_job_direct_args_t input; // direct args to SPE
+ gc_job_direct_args_t output; // direct args from SPE
+ gc_job_ea_args_t eaa; // args passed via EA memory
+
+} _AL128 gc_job_desc_t;
+
+
+/*!
+ * type of procedure invoked on spu
+ */
+typedef void (*gc_spu_proc_t)(const gc_job_direct_args_t *input,
+ gc_job_direct_args_t *output,
+ const gc_job_ea_args_t *eaa);
+
+#if !defined(__SPU__)
+
+static inline gc_job_desc_t *
+ea_to_jdp(gc_eaddr_t ea)
+{
+ return (gc_job_desc_t *) ea_to_ptr(ea);
+}
+
+static inline gc_eaddr_t
+jdp_to_ea(gc_job_desc_t *item)
+{
+ return ptr_to_ea(item);
+}
+
+#endif
+
+
+__GC_END_DECLS
+
+#endif /* INCLUDED_GCELL_GC_JOB_DESC_H */
diff --git a/gcell/include/gcell/gc_job_desc_private.h b/gcell/include/gcell/gc_job_desc_private.h
new file mode 100644
index 0000000000..fa831a88ed
--- /dev/null
+++ b/gcell/include/gcell/gc_job_desc_private.h
@@ -0,0 +1,39 @@
+/* -*- c -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_GC_JOB_DESC_PRIVATE_H
+#define INCLUDED_GCELL_GC_JOB_DESC_PRIVATE_H
+
+// #include <libsync.h>
+
+/*!
+ * \brief Implementation details we'd like to hide from the user.
+ */
+typedef struct gc_job_desc_private
+{
+ gc_eaddr_t next; // used to implement job queue and free list
+ uint16_t job_id;
+ uint16_t client_id;
+ uint32_t direction_union; // union of all gc_job_ea_arg.direction fields
+} gc_job_desc_private_t;
+
+#endif /* INCLUDED_GCELL_GC_JOB_PRIVATE_H */
+
diff --git a/gcell/include/gcell/gc_job_manager.h b/gcell/include/gcell/gc_job_manager.h
new file mode 100644
index 0000000000..67abce7ed0
--- /dev/null
+++ b/gcell/include/gcell/gc_job_manager.h
@@ -0,0 +1,286 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GC_JOB_MANAGER_H
+#define INCLUDED_GC_JOB_MANAGER_H
+
+#include <boost/utility.hpp>
+#include <boost/shared_ptr.hpp>
+#include <vector>
+#include <string>
+#include <stdexcept>
+#include <libspe2.h>
+#include "gc_job_desc.h"
+
+class gc_job_manager;
+typedef boost::shared_ptr<gc_job_manager> gc_job_manager_sptr;
+typedef boost::shared_ptr<spe_program_handle_t> spe_program_handle_sptr;
+typedef boost::shared_ptr<gc_job_desc> gc_job_desc_sptr;
+
+/*!
+ * \brief Return a boost::shared_ptr to an spe_program_handle_t
+ *
+ * \param filename is the name of the SPE ELF executable to open.
+ *
+ * Calls spe_image_open to open the file. If successful returns a
+ * boost::shared_ptr that will call spe_image_close when it's time to
+ * free the object.
+ *
+ * Returns the equivalent of the NULL pointer if the file cannot be
+ * opened, or if it's not an SPE ELF object file.
+ *
+ * \sa gc_program_handle_from_address
+ */
+spe_program_handle_sptr
+gc_program_handle_from_filename(const std::string &filename);
+
+/*!
+ * \brief Return a boost::shared_ptr to an spe_program_handle_t
+ *
+ * \param handle is a non-zero pointer to an embedded SPE image.
+ *
+ * If successful returns a boost::shared_ptr that does nothing when
+ * it's time to free the object.
+ *
+ * \sa gc_program_handle_from_filename
+ */
+spe_program_handle_sptr
+gc_program_handle_from_address(spe_program_handle_t *handle);
+
+/*!
+ * \brief map gc_job_status_t into a string
+ */
+const std::string
+gc_job_status_string(gc_job_status_t status);
+
+/*
+ * \brief Options that configure the job_manager.
+ * The default values are reasonable.
+ */
+struct gc_jm_options {
+ unsigned int max_jobs; // max # of job descriptors in system
+ unsigned int max_client_threads; // max # of client threads of job manager
+ unsigned int nspes; // how many SPEs shall we use? 0 -> all of them
+ bool gang_schedule; // shall we gang schedule?
+ bool use_affinity; // shall we try for affinity (FIXME not implmented)
+ bool enable_logging; // shall we log SPE events?
+ uint32_t log2_nlog_entries; // log2 of number of log entries (default is 12 == 4k)
+ spe_program_handle_sptr program_handle; // program to load into SPEs
+
+ gc_jm_options() :
+ max_jobs(0), max_client_threads(0), nspes(0),
+ gang_schedule(false), use_affinity(false),
+ enable_logging(false), log2_nlog_entries(12)
+ {
+ }
+
+ gc_jm_options(spe_program_handle_sptr program_handle_,
+ unsigned int nspes_ = 0) :
+ max_jobs(0), max_client_threads(0), nspes(nspes_),
+ gang_schedule(false), use_affinity(false),
+ enable_logging(false), log2_nlog_entries(12),
+ program_handle(program_handle_)
+ {
+ }
+};
+
+enum gc_wait_mode {
+ GC_WAIT_ANY,
+ GC_WAIT_ALL,
+};
+
+/*
+ * exception classes
+ */
+class gc_exception : public std::runtime_error
+{
+public:
+ gc_exception(const std::string &msg);
+};
+
+class gc_unknown_proc : public gc_exception
+{
+public:
+ gc_unknown_proc(const std::string &msg);
+};
+
+class gc_bad_alloc : public gc_exception
+{
+public:
+ gc_bad_alloc(const std::string &msg);
+};
+
+class gc_bad_align : public gc_exception
+{
+public:
+ gc_bad_align(const std::string &msg);
+};
+
+class gc_bad_submit : public gc_exception
+{
+public:
+ gc_bad_submit(const std::string &name, gc_job_status_t status);
+};
+
+/*
+ * \brief Create an instance of the job manager
+ */
+gc_job_manager_sptr
+gc_make_job_manager(const gc_jm_options *options = 0);
+
+
+/*!
+ * \brief Abstract class that manages SPE jobs.
+ *
+ * There is typically a single instance derived from this class.
+ * It is safe to call its methods from any thread.
+ */
+class gc_job_manager : boost::noncopyable
+{
+public:
+ gc_job_manager(const gc_jm_options *options = 0);
+
+ virtual ~gc_job_manager();
+
+ /*!
+ * Stop accepting new jobs. Wait for existing jobs to complete.
+ * Return all managed SPE's to the system.
+ */
+ virtual bool shutdown() = 0;
+
+ /*!
+ * \brief Return number of SPE's currently allocated to job manager.
+ */
+ virtual int nspes() const = 0;
+
+ /*!
+ * \brief Return a pointer to a properly aligned job descriptor,
+ * or throws gc_bad_alloc if there are none available.
+ */
+ virtual gc_job_desc *alloc_job_desc() = 0;
+
+ /*
+ *! Free a job descriptor previously allocated with alloc_job_desc()
+ *
+ * \param[in] jd pointer to job descriptor to free.
+ */
+ virtual void free_job_desc(gc_job_desc *jd) = 0;
+
+ /*!
+ * \brief Submit a job for asynchronous processing on an SPE.
+ *
+ * \param[in] jd pointer to job description
+ *
+ * The caller must not read or write the job description
+ * or any of the memory associated with any indirect arguments
+ * until after calling wait_job.
+ *
+ * \returns true iff the job was successfully enqueued.
+ * If submit_job returns false, check jd->status for additional info.
+ */
+ virtual bool submit_job(gc_job_desc *jd) = 0;
+
+ /*!
+ * \brief Wait for job to complete.
+ *
+ * A thread may only wait for jobs which it submitted.
+ *
+ * \returns true if sucessful, else false.
+ */
+ virtual bool
+ wait_job(gc_job_desc *jd) = 0;
+
+ /*!
+ * \brief wait for 1 or more jobs to complete.
+ *
+ * \param[input] njobs is the length of arrays \p jd and \p done.
+ * \param[input] jd are the jobs that are to be waited for.
+ * \param[output] done indicates whether the corresponding job is complete.
+ * \param[input] mode indicates whether to wait for ALL or ANY of the jobs
+ * in \p jd to complete.
+ *
+ * A thread may only wait for jobs which it submitted.
+ *
+ * \returns number of jobs completed, or -1 if error.
+ * The caller must examine the status field of each job to confirm
+ * successful completion of the job.
+ */
+ virtual int
+ wait_jobs(unsigned int njobs,
+ gc_job_desc *jd[], bool done[], gc_wait_mode mode) = 0;
+
+ /*!
+ * Return the maximum number of bytes of EA arguments that may be
+ * copied to or from the SPE in a single job. The limit applies
+ * independently to the "get" and "put" args.
+ * \sa gc_job_desc_t, gc_job_ea_args_t
+ */
+ virtual int ea_args_maxsize() = 0;
+
+ /*!
+ * Return gc_proc_id_t associated with spu procedure \p proc_name if one
+ * exists, otherwise throws gc_unknown_proc.
+ */
+ virtual gc_proc_id_t lookup_proc(const std::string &proc_name) = 0;
+
+ /*!
+ * Return a vector of all known spu procedure names.
+ */
+ virtual std::vector<std::string> proc_names() = 0;
+
+ virtual void set_debug(int debug);
+ virtual int debug();
+
+ /* ----- static methods ----- */
+
+ /*!
+ * \brief Set the singleton gc_job_manager instance.
+ * \param mgr is the job manager instance.
+ *
+ * The singleton is weakly held, thus the caller must maintain
+ * a reference to the mgr for the duration. (If we held the
+ * manager strongly, the destructor would never be called, and the
+ * resources (SPEs) would not be returned.) Bottom line: the
+ * caller is responsible for life-time management.
+ */
+ static void set_singleton(gc_job_manager_sptr mgr);
+
+ /*!
+ * \brief Retrieve the singleton gc_job_manager instance.
+ *
+ * Returns the singleton gc_job_manager instance or raises
+ * boost::bad_weak_ptr if the singleton is empty.
+ */
+ static gc_job_manager_sptr singleton();
+
+ /*!
+ * \brief return a boost::shared_ptr to a job descriptor.
+ */
+ static gc_job_desc_sptr make_jd_sptr(gc_job_manager_sptr mgr, gc_job_desc *jd);
+
+ /*!
+ * \brief allocate a job descriptor and return a boost::shared_ptr to it.
+ */
+ static gc_job_desc_sptr alloc_job_desc(gc_job_manager_sptr mgr);
+};
+
+
+#endif /* INCLUDED_GC_JOB_MANAGER_H */
diff --git a/gcell/include/gcell/gc_logging.h b/gcell/include/gcell/gc_logging.h
new file mode 100644
index 0000000000..9cc6fa77bd
--- /dev/null
+++ b/gcell/include/gcell/gc_logging.h
@@ -0,0 +1,166 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCELL_GC_LOGGING_H
+#define INCLUDED_GCELL_GC_LOGGING_H
+
+#include <gcell/gc_types.h>
+#include <string.h>
+
+__GC_BEGIN_DECLS
+
+typedef struct gc_log {
+ gc_eaddr_t base; // gc_log_entry_t * (16 byte aligned)
+ uint32_t nentries; // number of entries (power-of-2)
+} gc_log_t;
+
+typedef struct gc_log_entry {
+ uint32_t seqno; // monotonic sequence number
+ uint32_t timestamp; // decrementer value (wraps every 53s on PS3)
+ uint16_t subsystem; // 0 to 255 reserved for system, user gets 256 and up
+ uint16_t event;
+ uint32_t info[5];
+} _AL16 gc_log_entry_t;
+
+#define GCL_SS_SYS 0 // lowest system reserved subsystem
+#define GCL_SS_USER 256 // lowest user reserved subsystem
+
+
+/*
+ * The resulting log files can be displayed using using:
+ *
+ * $ od -t x4 -w32 spu_log.00 | less
+ */
+
+
+#if defined(__SPU__)
+
+/*!
+ * System fills in seqno and timestamp. User is responsible for the rest.
+ */
+
+void _gc_log_write(gc_log_entry_t entry);
+
+#ifdef ENABLE_GC_LOGGING
+#define gc_log_write(entry) _gc_log_write(entry)
+#else
+#define gc_log_write(entry) do { } while (0)
+#endif
+
+inline static void
+gc_log_write0(int subsystem, int event)
+{
+ gc_log_entry_t e;
+ e.subsystem = subsystem;
+ e.event = event;
+ e.info[0] = 0;
+ e.info[1] = 0;
+ e.info[2] = 0;
+ e.info[3] = 0;
+ e.info[4] = 0;
+ gc_log_write(e);
+}
+
+inline static void
+gc_log_write1(int subsystem, int event,
+ uint32_t info0)
+{
+ gc_log_entry_t e;
+ e.subsystem = subsystem;
+ e.event = event;
+ e.info[0] = info0;
+ e.info[1] = 0;
+ e.info[2] = 0;
+ e.info[3] = 0;
+ e.info[4] = 0;
+ gc_log_write(e);
+}
+
+inline static void
+gc_log_write2(int subsystem, int event,
+ uint32_t info0, uint32_t info1)
+{
+ gc_log_entry_t e;
+ e.subsystem = subsystem;
+ e.event = event;
+ e.info[0] = info0;
+ e.info[1] = info1;
+ e.info[2] = 0;
+ e.info[3] = 0;
+ e.info[4] = 0;
+ gc_log_write(e);
+}
+
+inline static void
+gc_log_write3(int subsystem, int event,
+ uint32_t info0, uint32_t info1, uint32_t info2)
+{
+ gc_log_entry_t e;
+ e.subsystem = subsystem;
+ e.event = event;
+ e.info[0] = info0;
+ e.info[1] = info1;
+ e.info[2] = info2;
+ e.info[3] = 0;
+ e.info[4] = 0;
+ gc_log_write(e);
+}
+
+inline static void
+gc_log_write4(int subsystem, int event,
+ uint32_t info0, uint32_t info1, uint32_t info2, uint32_t info3)
+{
+ gc_log_entry_t e;
+ e.subsystem = subsystem;
+ e.event = event;
+ e.info[0] = info0;
+ e.info[1] = info1;
+ e.info[2] = info2;
+ e.info[3] = info3;
+ e.info[4] = 0;
+ gc_log_write(e);
+}
+
+inline static void
+gc_log_write5(int subsystem, int event,
+ uint32_t info0, uint32_t info1, uint32_t info2, uint32_t info3, uint32_t info4)
+{
+ gc_log_entry_t e;
+ e.subsystem = subsystem;
+ e.event = event;
+ e.info[0] = info0;
+ e.info[1] = info1;
+ e.info[2] = info2;
+ e.info[3] = info3;
+ e.info[4] = info4;
+ gc_log_write(e);
+}
+
+/*!
+ * One time initialization called by system runtime
+ */
+void
+_gc_log_init(gc_log_t log_info);
+
+#endif
+
+__GC_END_DECLS
+
+#endif /* INCLUDED_GCELL_GC_LOGGING_H */
diff --git a/gcell/include/gcell/gc_mbox.h b/gcell/include/gcell/gc_mbox.h
new file mode 100644
index 0000000000..1d577ff8f4
--- /dev/null
+++ b/gcell/include/gcell/gc_mbox.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCELL_GC_MBOX_H
+#define INCLUDED_GCELL_GC_MBOX_H
+
+/*
+ * The PPE and SPE exchange a few 32-bit messages via mailboxes.
+ * All have a 4 bit opcode in the high bits.
+ *
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | op | arg |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ */
+
+#define MK_MBOX_MSG(cmd, args) ((((cmd) & 0xf) << 28) | ((args) & 0x0fffffff))
+#define MBOX_MSG_OP(msg) (((msg) >> 28) & 0xf)
+#define MBOX_MSG_ARG(msg) ((msg) & 0x0fffffff)
+
+// PPE to SPE (sent via SPE Read Inbound Mailbox)
+
+#define OP_EXIT 0x0 // exit now
+#define OP_GET_SPU_BUFSIZE 0x1
+
+// SPE to PPE (sent via SPE Write Outbound Interrupt Mailbox)
+
+#define OP_JOBS_DONE 0x2 // arg is 0 or 1, indicating which
+ // gc_completion_info_t contains the info
+#define OP_SPU_BUFSIZE 0x3 // arg is max number of bytes
+
+
+#endif /* INCLUDED_GCELL_GC_MBOX_H */
diff --git a/gcell/include/gcell/gc_spu_args.h b/gcell/include/gcell/gc_spu_args.h
new file mode 100644
index 0000000000..3719bac222
--- /dev/null
+++ b/gcell/include/gcell/gc_spu_args.h
@@ -0,0 +1,60 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCELL_GC_SPU_ARGS_H
+#define INCLUDED_GCELL_GC_SPU_ARGS_H
+
+#include <gcell/gc_types.h>
+#include <gcell/gc_logging.h>
+
+// args passed to SPE at initialization time
+
+typedef struct gc_spu_args {
+ gc_eaddr_t queue; // address of job queue (gc_jd_queue_t *)
+ gc_eaddr_t comp_info[2]; // completion info (gc_comp_info_t *)
+ uint32_t spu_idx; // which spu we are: [0,nspus-1]
+ uint32_t nspus; // number of spus we're using
+ uint32_t proc_def_ls_addr; // LS addr of proc_def table
+ uint32_t nproc_defs; // number of proc_defs in table
+ gc_log_t log; // logging info
+} _AL16 gc_spu_args_t;
+
+
+#define GC_CI_NJOBS 62 // makes gc_comp_info 1 cache line long
+
+/*!
+ * \brief Used to return info to PPE on which jobs are completed.
+ *
+ * When each SPE is initalized, it is passed EA pointers to two of
+ * these structures. The SPE uses these to communicate which jobs
+ * that it has worked on are complete. The SPE notifies the PPE by
+ * sending an OP_JOBS_DONE message (see gc_mbox.h) with an argument of
+ * 0 or 1, indicating which of the two comp_info's to examine. The
+ * SPE sets the in_use flag to 1 before DMA'ing to the PPE. When the
+ * PPE is done with the structure, it must clear the in_use field to
+ * let the SPE know it can begin using it again.
+ */
+typedef struct gc_comp_info {
+ uint16_t in_use; // set by SPE, cleared by PPE when it's finished
+ uint16_t ncomplete; // number of valid job_id's
+ uint16_t job_id[GC_CI_NJOBS]; // job_id's of completed jobs
+} _AL128 gc_comp_info_t;
+
+#endif /* INCLUDED_GCELL_GC_SPU_ARGS_H */
diff --git a/gcell/include/gcell/gc_types.h b/gcell/include/gcell/gc_types.h
new file mode 100644
index 0000000000..b75bcd8d70
--- /dev/null
+++ b/gcell/include/gcell/gc_types.h
@@ -0,0 +1,63 @@
+/* -*- c -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_GC_TYPES_H
+#define INCLUDED_GCELL_GC_TYPES_H
+
+#include <stdint.h>
+#include <gcell/gc_cdefs.h>
+#include <gcell/compiler.h>
+
+__GC_BEGIN_DECLS
+
+#ifndef __cplusplus
+typedef int bool;
+#define true 1
+#define false 0
+#endif
+
+/*!
+ * \brief 64-bit integer type representing an effective address (EA)
+ *
+ * This type is always 64-bits, regardless of whether we're
+ * running in 32 or 64-bit mode.
+ */
+typedef uint64_t gc_eaddr_t;
+
+#if !defined(__SPU__)
+static inline void *
+ea_to_ptr(gc_eaddr_t ea)
+{
+ // in 32-bit mode we're tossing the top 32-bits.
+ return (void *) (uintptr_t) ea;
+}
+
+static inline gc_eaddr_t
+ptr_to_ea(void *p)
+{
+ // two steps to avoid compiler warning in 32-bit mode.
+ return (gc_eaddr_t) (uintptr_t) p;
+}
+#endif
+
+__GC_END_DECLS
+
+#endif /* INCLUDED_GCELL_GC_TYPES_H */
diff --git a/gcell/include/gcell/gcp_fft_1d_r2.h b/gcell/include/gcell/gcp_fft_1d_r2.h
new file mode 100644
index 0000000000..c1d331833b
--- /dev/null
+++ b/gcell/include/gcell/gcp_fft_1d_r2.h
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCP_FFT_1D_R2_H
+#define INCLUDED_GCP_FFT_1D_R2_H
+
+#include <gcell/gc_job_manager.h>
+#include <complex>
+
+/*!
+ * \brief Submit a job that computes the forward or inverse FFT.
+ *
+ * \param mgr is the job manager instance
+ * \param log2_fft_length is the log2 of the fft_length (4 <= x <= 12).
+ * \param forward is true to compute the forward transform, else the inverse.
+ * \param shift indicates if an "fftshift" should be applied to the output data
+ * \param out is the fft_length output from FFT (must be 16-byte aligned).
+ * \param in is the fft_length input to FFT (must be 16-byte aligned).
+ * \param twiddle is fft_length/4 twiddle factor input to FFT (must be 16-byte aligned).
+ * \param window is the window to be applied to the input data.
+ * The window length must be either 0 or fft_length (must be 16-byte aligned).
+ *
+ * Returns a shared_ptr to a job descriptor which should be passed to wait_job*.
+ * Throws an exception in the event of a problem.
+ * This uses the FFTW conventions for scaling. That is, neither the forward nor inverse
+ * are scaled by 1/fft_length.
+ */
+gc_job_desc_sptr
+gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr,
+ unsigned int log2_fft_length,
+ bool forward,
+ bool shift,
+ std::complex<float> *out,
+ const std::complex<float> *in,
+ const std::complex<float> *twiddle,
+ const float *window);
+
+/*!
+ * \brief Compute twiddle factors
+ *
+ * \param log2_fft_length is the log2 of the fft_length.
+ * \param W is fft_length/4 twiddle factor output (must be 16-byte aligned).
+ */
+void
+gcp_fft_1d_r2_twiddle(unsigned int log2_fft_length, std::complex<float> *W);
+
+#endif /* INCLUDED_GCP_FFT_1D_R2_H */
diff --git a/gcell/include/gcell/memory_barrier.h b/gcell/include/gcell/memory_barrier.h
new file mode 100644
index 0000000000..4a1f870003
--- /dev/null
+++ b/gcell/include/gcell/memory_barrier.h
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_MEMORY_BARRIER_H
+#define INCLUDED_GCELL_MEMORY_BARRIER_H
+
+/*
+ * powerpc memory barriers
+ *
+ * The sync instruction guarantees that all memory accesses initiated
+ * by this processor have been performed (with respect to all other
+ * mechanisms that access memory). The eieio instruction is a barrier
+ * providing an ordering (separately) for (a) cacheable stores and (b)
+ * loads and stores to non-cacheable memory (e.g. I/O devices).
+ *
+ * smp_mb() prevents loads and stores being reordered across this point.
+ * smp_rmb() prevents loads being reordered across this point.
+ * smp_wmb() prevents stores being reordered across this point.
+ *
+ * We have to use the sync instructions for smp_mb(), since lwsync
+ * doesn't order loads with respect to previous stores. Lwsync is
+ * fine for smp_rmb(), though. For smp_wmb(), we use eieio since it
+ * is only used to order updates to system memory.
+ *
+ * For details, see "PowerPC Virtual Environment Architecture, Book
+ * II". Especially Chapter 1, "Storage Model" and Chapter 3, "Storage
+ * Control Instructions." (site:ibm.com)
+ */
+
+static inline void smp_mb(void)
+{
+ __asm__ volatile ("sync" : : : "memory");
+}
+
+static inline void smp_rmb(void)
+{
+ __asm__ volatile ("lwsync" : : : "memory");
+}
+
+static inline void smp_wmb(void)
+{
+ __asm__ volatile ("eieio" : : : "memory");
+}
+
+
+#endif /* INCLUDED_GCELL_MEMORY_BARRIER_H */
diff --git a/gcell/include/gcell/spu/Makefile.am b/gcell/include/gcell/spu/Makefile.am
new file mode 100644
index 0000000000..58816819d0
--- /dev/null
+++ b/gcell/include/gcell/spu/Makefile.am
@@ -0,0 +1,30 @@
+#
+# Copyright 2007,2008 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+include $(top_srcdir)/Makefile.common
+
+gcellspuinclude_HEADERS = \
+ fft_1d.h \
+ fft_1d_r2.h \
+ gc_delay.h \
+ gc_jd_queue.h \
+ gc_random.h \
+ gc_spu_macs.h \
+ libfft.h
diff --git a/gcell/include/gcell/spu/fft_1d.h b/gcell/include/gcell/spu/fft_1d.h
new file mode 100644
index 0000000000..355b84bf1f
--- /dev/null
+++ b/gcell/include/gcell/spu/fft_1d.h
@@ -0,0 +1,103 @@
+/* -------------------------------------------------------------- */
+/* (C)Copyright 2001,2007, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment, Incorporated, */
+/* Toshiba Corporation, */
+/* */
+/* All Rights Reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the */
+/* following conditions are met: */
+/* */
+/* - Redistributions of source code must retain the above copyright*/
+/* notice, this list of conditions and the following disclaimer. */
+/* */
+/* - Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* - Neither the name of IBM Corporation nor the names of its */
+/* contributors may be used to endorse or promote products */
+/* derived from this software without specific prior written */
+/* permission. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
+/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
+/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
+/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
+/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
+/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
+/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
+/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
+/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
+/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+#ifndef _FFT_1D_H_
+#define _FFT_1D_H_ 1
+
+#include <spu_intrinsics.h>
+
+/* BIT_SWAP - swaps up to 16 bits of the integer _i according to the
+ * pattern specified by _pat.
+ */
+#define BIT_SWAP(_i, _pat) spu_extract(spu_gather(spu_shuffle(spu_maskb(_i), _pat, _pat)), 0)
+
+
+#ifndef MAX_FFT_1D_SIZE
+#define MAX_FFT_1D_SIZE 8192
+#endif
+
+#ifndef INV_SQRT_2
+#define INV_SQRT_2 0.7071067811865
+#endif
+
+
+/* The following macro, FFT_1D_BUTTERFLY, performs a 4 way SIMD basic butterfly
+ * operation. The inputs are in parallel arrays (seperate real and imaginary
+ * vectors).
+ *
+ * p --------------------------> P = p + q*Wi
+ * \ /
+ * \ /
+ * \ /
+ * \/
+ * /\
+ * / \
+ * / \
+ * ____ / \
+ * q --| Wi |-----------------> Q = p - q*Wi
+ * ----
+ */
+
+#define FFT_1D_BUTTERFLY(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \
+ vector float _qw_re, _qw_im; \
+ \
+ _qw_re = spu_msub(_q_re, _W_re, spu_mul(_q_im, _W_im)); \
+ _qw_im = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \
+ _P_re = spu_add(_p_re, _qw_re); \
+ _P_im = spu_add(_p_im, _qw_im); \
+ _Q_re = spu_sub(_p_re, _qw_re); \
+ _Q_im = spu_sub(_p_im, _qw_im); \
+}
+
+
+/* FFT_1D_BUTTERFLY_HI is equivalent to FFT_1D_BUTTERFLY with twiddle factors (W_im, -W_re)
+ */
+#define FFT_1D_BUTTERFLY_HI(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \
+ vector float _qw_re, _qw_im; \
+ \
+ _qw_re = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \
+ _qw_im = spu_msub(_q_im, _W_im, spu_mul(_q_re, _W_re)); \
+ _P_re = spu_add(_p_re, _qw_re); \
+ _P_im = spu_add(_p_im, _qw_im); \
+ _Q_re = spu_sub(_p_re, _qw_re); \
+ _Q_im = spu_sub(_p_im, _qw_im); \
+}
+
+#endif /* _FFT_1D_H_ */
diff --git a/gcell/include/gcell/spu/fft_1d_r2.h b/gcell/include/gcell/spu/fft_1d_r2.h
new file mode 100644
index 0000000000..a51cbc341d
--- /dev/null
+++ b/gcell/include/gcell/spu/fft_1d_r2.h
@@ -0,0 +1,529 @@
+/* -------------------------------------------------------------- */
+/* (C)Copyright 2001,2007, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment, Incorporated, */
+/* Toshiba Corporation, */
+/* */
+/* All Rights Reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the */
+/* following conditions are met: */
+/* */
+/* - Redistributions of source code must retain the above copyright*/
+/* notice, this list of conditions and the following disclaimer. */
+/* */
+/* - Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* - Neither the name of IBM Corporation nor the names of its */
+/* contributors may be used to endorse or promote products */
+/* derived from this software without specific prior written */
+/* permission. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
+/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
+/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
+/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
+/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
+/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
+/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
+/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
+/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
+/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+#ifndef _FFT_1D_R2_H_
+#define _FFT_1D_R2_H_ 1
+
+#include "fft_1d.h"
+
+/* fft_1d_r2
+ * ---------
+ * Performs a single precision, complex Fast Fourier Transform using
+ * the DFT (Discrete Fourier Transform) with radix-2 decimation in time.
+ * The input <in> is an array of complex numbers of length (1<<log2_size)
+ * entries. The result is returned in the array of complex numbers specified
+ * by <out>. Note: This routine can support an in-place transformation
+ * by specifying <in> and <out> to be the same array.
+ *
+ * This implementation utilizes the Cooley-Tukey algorithm consisting
+ * of <log2_size> stages. The basic operation is the butterfly.
+ *
+ * p --------------------------> P = p + q*Wi
+ * \ /
+ * \ /
+ * \ /
+ * \/
+ * /\
+ * / \
+ * / \
+ * ____ / \
+ * q --| Wi |-----------------> Q = p - q*Wi
+ * ----
+ *
+ * This routine also requires pre-computed twiddle values, W. W is an
+ * array of single precision complex numbers of length 1<<(log2_size-2)
+ * and is computed as follows:
+ *
+ * for (i=0; i<n/4; i++)
+ * W[i].real = cos(i * 2*PI/n);
+ * W[i].imag = -sin(i * 2*PI/n);
+ * }
+ *
+ * This array actually only contains the first half of the twiddle
+ * factors. Due for symmetry, the second half of the twiddle factors
+ * are implied and equal:
+ *
+ * for (i=0; i<n/4; i++)
+ * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n);
+ * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n);
+ * }
+ *
+ * Further symmetry allows one to generate the twiddle factor table
+ * using half the number of trig computations as follows:
+ *
+ * W[0].real = 1.0;
+ * W[0].imag = 0.0;
+ * for (i=1; i<n/4; i++)
+ * W[i].real = cos(i * 2*PI/n);
+ * W[n/4 - i].imag = -W[i].real;
+ * }
+ *
+ * The complex numbers are packed into quadwords as follows:
+ *
+ * quadword complex
+ * array element array elements
+ * -----------------------------------------------------
+ * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 |
+ * -----------------------------------------------------
+ *
+ */
+
+
+static __inline void _fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size)
+{
+ int i, j, k;
+ int stage, offset;
+ int i_rev;
+ int n, n_2, n_4, n_8, n_16, n_3_16;
+ int w_stride, w_2stride, w_3stride, w_4stride;
+ int stride, stride_2, stride_4, stride_3_4;
+ vector float *W0, *W1, *W2, *W3;
+ vector float *re0, *re1, *re2, *re3;
+ vector float *im0, *im1, *im2, *im3;
+ vector float *in0, *in1, *in2, *in3, *in4, *in5, *in6, *in7;
+ vector float *out0, *out1, *out2, *out3;
+ vector float tmp0, tmp1;
+ vector float w0_re, w0_im, w1_re, w1_im;
+ vector float w0, w1, w2, w3;
+ vector float src_lo0, src_lo1, src_lo2, src_lo3;
+ vector float src_hi0, src_hi1, src_hi2, src_hi3;
+ vector float dst_lo0, dst_lo1, dst_lo2, dst_lo3;
+ vector float dst_hi0, dst_hi1, dst_hi2, dst_hi3;
+ vector float out_re_lo0, out_re_lo1, out_re_lo2, out_re_lo3;
+ vector float out_im_lo0, out_im_lo1, out_im_lo2, out_im_lo3;
+ vector float out_re_hi0, out_re_hi1, out_re_hi2, out_re_hi3;
+ vector float out_im_hi0, out_im_hi1, out_im_hi2, out_im_hi3;
+ vector float re_lo0, re_lo1, re_lo2, re_lo3;
+ vector float im_lo0, im_lo1, im_lo2, im_lo3;
+ vector float re_hi0, re_hi1, re_hi2, re_hi3;
+ vector float im_hi0, im_hi1, im_hi2, im_hi3;
+ vector float pq_lo0, pq_lo1, pq_lo2, pq_lo3;
+ vector float pq_hi0, pq_hi1, pq_hi2, pq_hi3;
+ vector float re[MAX_FFT_1D_SIZE/4], im[MAX_FFT_1D_SIZE/4]; /* real & imaginary working arrays */
+ vector float ppmm = (vector float) { 1.0f, 1.0f, -1.0f, -1.0f};
+ vector float pmmp = (vector float) { 1.0f, -1.0f, -1.0f, 1.0f};
+ vector unsigned char reverse;
+ vector unsigned char shuf_lo = (vector unsigned char) {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 16,17,18,19, 20,21,22,23};
+ vector unsigned char shuf_hi = (vector unsigned char) {
+ 8, 9,10,11, 12,13,14,15,
+ 24,25,26,27, 28,29,30,31};
+ vector unsigned char shuf_0202 = (vector unsigned char) {
+ 0, 1, 2, 3, 8, 9,10,11,
+ 0, 1, 2, 3, 8, 9,10,11};
+ vector unsigned char shuf_1313 = (vector unsigned char) {
+ 4, 5, 6, 7, 12,13,14,15,
+ 4, 5, 6, 7, 12,13,14,15};
+ vector unsigned char shuf_0303 = (vector unsigned char) {
+ 0, 1, 2, 3, 12,13,14,15,
+ 0, 1, 2, 3, 12,13,14,15};
+ vector unsigned char shuf_1212 = (vector unsigned char) {
+ 4, 5, 6, 7, 8, 9,10,11,
+ 4, 5, 6, 7, 8, 9,10,11};
+ vector unsigned char shuf_0415 = (vector unsigned char) {
+ 0, 1, 2, 3, 16,17,18,19,
+ 4, 5, 6, 7, 20,21,22,23};
+ vector unsigned char shuf_2637 = (vector unsigned char) {
+ 8, 9,10,11, 24,25,26,27,
+ 12,13,14,15,28,29,30,31};
+ vector unsigned char shuf_0246 = (vector unsigned char) {
+ 0, 1, 2, 3, 8, 9,10,11,
+ 16,17,18,19,24,25,26,27};
+ vector unsigned char shuf_1357 = (vector unsigned char) {
+ 4, 5, 6, 7, 12,13,14,15,
+ 20,21,22,23,28,29,30,31};
+
+ n = 1 << log2_size;
+ n_2 = n >> 1;
+ n_4 = n >> 2;
+ n_8 = n >> 3;
+ n_16 = n >> 4;
+
+ n_3_16 = n_8 + n_16;
+
+ /* Compute a byte reverse shuffle pattern to be used to produce
+ * an address bit swap.
+ */
+ reverse = spu_or(spu_slqwbyte(spu_splats((unsigned char)0x80), log2_size),
+ spu_rlmaskqwbyte(((vec_uchar16){15,14,13,12, 11,10,9,8,
+ 7, 6, 5, 4, 3, 2,1,0}),
+ log2_size-16));
+
+ /* Perform the first 3 stages of the FFT. These stages differs from
+ * other stages in that the inputs are unscrambled and the data is
+ * reformated into parallel arrays (ie, seperate real and imaginary
+ * arrays). The term "unscramble" means the bit address reverse the
+ * data array. In addition, the first three stages have simple twiddle
+ * weighting factors.
+ * stage 1: (1, 0)
+ * stage 2: (1, 0) and (0, -1)
+ * stage 3: (1, 0), (0.707, -0.707), (0, -1), (-0.707, -0.707)
+ *
+ * The arrays are processed as two halves, simultaneously. The lo (first
+ * half) and hi (second half). This is done because the scramble
+ * shares source value between each half of the output arrays.
+ */
+ i = 0;
+ i_rev = 0;
+
+ in0 = in;
+ in1 = in + n_8;
+ in2 = in + n_16;
+ in3 = in + n_3_16;
+
+ in4 = in + n_4;
+ in5 = in1 + n_4;
+ in6 = in2 + n_4;
+ in7 = in3 + n_4;
+
+ re0 = re;
+ re1 = re + n_8;
+ im0 = im;
+ im1 = im + n_8;
+
+ w0_re = (vector float) { 1.0f, INV_SQRT_2, 0.0f, -INV_SQRT_2};
+ w0_im = (vector float) { 0.0f, -INV_SQRT_2, -1.0f, -INV_SQRT_2};
+
+ do {
+ src_lo0 = in0[i_rev];
+ src_lo1 = in1[i_rev];
+ src_lo2 = in2[i_rev];
+ src_lo3 = in3[i_rev];
+
+ src_hi0 = in4[i_rev];
+ src_hi1 = in5[i_rev];
+ src_hi2 = in6[i_rev];
+ src_hi3 = in7[i_rev];
+
+ /* Perform scramble.
+ */
+ dst_lo0 = spu_shuffle(src_lo0, src_hi0, shuf_lo);
+ dst_hi0 = spu_shuffle(src_lo0, src_hi0, shuf_hi);
+ dst_lo1 = spu_shuffle(src_lo1, src_hi1, shuf_lo);
+ dst_hi1 = spu_shuffle(src_lo1, src_hi1, shuf_hi);
+ dst_lo2 = spu_shuffle(src_lo2, src_hi2, shuf_lo);
+ dst_hi2 = spu_shuffle(src_lo2, src_hi2, shuf_hi);
+ dst_lo3 = spu_shuffle(src_lo3, src_hi3, shuf_lo);
+ dst_hi3 = spu_shuffle(src_lo3, src_hi3, shuf_hi);
+
+ /* Perform the stage 1 butterfly. The multiplier constant, ppmm,
+ * is used to control the sign of the operands since a single
+ * quadword contains both of P and Q valule of the butterfly.
+ */
+ pq_lo0 = spu_madd(ppmm, dst_lo0, spu_rlqwbyte(dst_lo0, 8));
+ pq_hi0 = spu_madd(ppmm, dst_hi0, spu_rlqwbyte(dst_hi0, 8));
+ pq_lo1 = spu_madd(ppmm, dst_lo1, spu_rlqwbyte(dst_lo1, 8));
+ pq_hi1 = spu_madd(ppmm, dst_hi1, spu_rlqwbyte(dst_hi1, 8));
+ pq_lo2 = spu_madd(ppmm, dst_lo2, spu_rlqwbyte(dst_lo2, 8));
+ pq_hi2 = spu_madd(ppmm, dst_hi2, spu_rlqwbyte(dst_hi2, 8));
+ pq_lo3 = spu_madd(ppmm, dst_lo3, spu_rlqwbyte(dst_lo3, 8));
+ pq_hi3 = spu_madd(ppmm, dst_hi3, spu_rlqwbyte(dst_hi3, 8));
+
+ /* Perfrom the stage 2 butterfly. For this stage, the
+ * inputs pq are still interleaved (p.real, p.imag, q.real,
+ * q.imag), so we must first re-order the data into
+ * parallel arrays as well as perform the reorder
+ * associated with the twiddle W[n/4], which equals
+ * (0, -1).
+ *
+ * ie. (A, B) * (0, -1) => (B, -A)
+ */
+ re_lo0 = spu_madd(ppmm,
+ spu_shuffle(pq_lo1, pq_lo1, shuf_0303),
+ spu_shuffle(pq_lo0, pq_lo0, shuf_0202));
+ im_lo0 = spu_madd(pmmp,
+ spu_shuffle(pq_lo1, pq_lo1, shuf_1212),
+ spu_shuffle(pq_lo0, pq_lo0, shuf_1313));
+
+ re_lo1 = spu_madd(ppmm,
+ spu_shuffle(pq_lo3, pq_lo3, shuf_0303),
+ spu_shuffle(pq_lo2, pq_lo2, shuf_0202));
+ im_lo1 = spu_madd(pmmp,
+ spu_shuffle(pq_lo3, pq_lo3, shuf_1212),
+ spu_shuffle(pq_lo2, pq_lo2, shuf_1313));
+
+
+ re_hi0 = spu_madd(ppmm,
+ spu_shuffle(pq_hi1, pq_hi1, shuf_0303),
+ spu_shuffle(pq_hi0, pq_hi0, shuf_0202));
+ im_hi0 = spu_madd(pmmp,
+ spu_shuffle(pq_hi1, pq_hi1, shuf_1212),
+ spu_shuffle(pq_hi0, pq_hi0, shuf_1313));
+
+ re_hi1 = spu_madd(ppmm,
+ spu_shuffle(pq_hi3, pq_hi3, shuf_0303),
+ spu_shuffle(pq_hi2, pq_hi2, shuf_0202));
+ im_hi1 = spu_madd(pmmp,
+ spu_shuffle(pq_hi3, pq_hi3, shuf_1212),
+ spu_shuffle(pq_hi2, pq_hi2, shuf_1313));
+
+
+ /* Perform stage 3 butterfly.
+ */
+ FFT_1D_BUTTERFLY(re0[0], im0[0], re0[1], im0[1], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY(re1[0], im1[0], re1[1], im1[1], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im);
+
+ re0 += 2;
+ re1 += 2;
+ im0 += 2;
+ im1 += 2;
+
+ i += 8;
+ i_rev = BIT_SWAP(i, reverse) / 2;
+ } while (i < n_2);
+
+ /* Process stages 4 to log2_size-2
+ */
+ for (stage=4, stride=4; stage<log2_size-1; stage++, stride += stride) {
+ w_stride = n_2 >> stage;
+ w_2stride = n >> stage;
+ w_3stride = w_stride + w_2stride;
+ w_4stride = w_2stride + w_2stride;
+
+ W0 = W;
+ W1 = W + w_stride;
+ W2 = W + w_2stride;
+ W3 = W + w_3stride;
+
+ stride_2 = stride >> 1;
+ stride_4 = stride >> 2;
+ stride_3_4 = stride_2 + stride_4;
+
+ re0 = re; im0 = im;
+ re1 = re + stride_2; im1 = im + stride_2;
+ re2 = re + stride_4; im2 = im + stride_4;
+ re3 = re + stride_3_4; im3 = im + stride_3_4;
+
+ for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) {
+ /* Compute the twiddle factors
+ */
+ w0 = W0[offset];
+ w1 = W1[offset];
+ w2 = W2[offset];
+ w3 = W3[offset];
+
+ tmp0 = spu_shuffle(w0, w2, shuf_0415);
+ tmp1 = spu_shuffle(w1, w3, shuf_0415);
+
+ w0_re = spu_shuffle(tmp0, tmp1, shuf_0415);
+ w0_im = spu_shuffle(tmp0, tmp1, shuf_2637);
+
+ j = i;
+ k = i + stride;
+ do {
+ re_lo0 = re0[j]; im_lo0 = im0[j];
+ re_lo1 = re1[j]; im_lo1 = im1[j];
+
+ re_hi0 = re2[j]; im_hi0 = im2[j];
+ re_hi1 = re3[j]; im_hi1 = im3[j];
+
+ re_lo2 = re0[k]; im_lo2 = im0[k];
+ re_lo3 = re1[k]; im_lo3 = im1[k];
+
+ re_hi2 = re2[k]; im_hi2 = im2[k];
+ re_hi3 = re3[k]; im_hi3 = im3[k];
+
+ FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im);
+
+ FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im);
+
+ j += 2 * stride;
+ k += 2 * stride;
+ } while (j < n_4);
+ }
+ }
+
+ /* Process stage log2_size-1. This is identical to the stage processing above
+ * except for this stage the inner loop is only executed once so it is removed
+ * entirely.
+ */
+ w_stride = n_2 >> stage;
+ w_2stride = n >> stage;
+ w_3stride = w_stride + w_2stride;
+ w_4stride = w_2stride + w_2stride;
+
+ stride_2 = stride >> 1;
+ stride_4 = stride >> 2;
+
+ stride_3_4 = stride_2 + stride_4;
+
+ re0 = re; im0 = im;
+ re1 = re + stride_2; im1 = im + stride_2;
+ re2 = re + stride_4; im2 = im + stride_4;
+ re3 = re + stride_3_4; im3 = im + stride_3_4;
+
+ for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) {
+ /* Compute the twiddle factors
+ */
+ w0 = W[offset];
+ w1 = W[offset + w_stride];
+ w2 = W[offset + w_2stride];
+ w3 = W[offset + w_3stride];
+
+ tmp0 = spu_shuffle(w0, w2, shuf_0415);
+ tmp1 = spu_shuffle(w1, w3, shuf_0415);
+
+ w0_re = spu_shuffle(tmp0, tmp1, shuf_0415);
+ w0_im = spu_shuffle(tmp0, tmp1, shuf_2637);
+
+ j = i;
+ k = i + stride;
+
+ re_lo0 = re0[j]; im_lo0 = im0[j];
+ re_lo1 = re1[j]; im_lo1 = im1[j];
+
+ re_hi0 = re2[j]; im_hi0 = im2[j];
+ re_hi1 = re3[j]; im_hi1 = im3[j];
+
+ re_lo2 = re0[k]; im_lo2 = im0[k];
+ re_lo3 = re1[k]; im_lo3 = im1[k];
+
+ re_hi2 = re2[k]; im_hi2 = im2[k];
+ re_hi3 = re3[k]; im_hi3 = im3[k];
+
+ FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im);
+
+ FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im);
+ }
+
+
+ /* Process the final stage (stage log2_size). For this stage,
+ * reformat the data from parallel arrays back into
+ * interleaved arrays,storing the result into <in>.
+ *
+ * This loop has been manually unrolled by 2 to improve
+ * dual issue rates and reduce stalls. This unrolling
+ * forces a minimum FFT size of 32.
+ */
+ re0 = re;
+ re1 = re + n_8;
+ re2 = re + n_16;
+ re3 = re + n_3_16;
+
+ im0 = im;
+ im1 = im + n_8;
+ im2 = im + n_16;
+ im3 = im + n_3_16;
+
+ out0 = out;
+ out1 = out + n_4;
+ out2 = out + n_8;
+ out3 = out1 + n_8;
+
+ i = n_16;
+
+ do {
+ /* Fetch the twiddle factors
+ */
+ w0 = W[0];
+ w1 = W[1];
+ w2 = W[2];
+ w3 = W[3];
+
+ W += 4;
+
+ w0_re = spu_shuffle(w0, w1, shuf_0246);
+ w0_im = spu_shuffle(w0, w1, shuf_1357);
+ w1_re = spu_shuffle(w2, w3, shuf_0246);
+ w1_im = spu_shuffle(w2, w3, shuf_1357);
+
+ /* Fetch the butterfly inputs, reals and imaginaries
+ */
+ re_lo0 = re0[0]; im_lo0 = im0[0];
+ re_lo1 = re1[0]; im_lo1 = im1[0];
+ re_lo2 = re0[1]; im_lo2 = im0[1];
+ re_lo3 = re1[1]; im_lo3 = im1[1];
+
+ re_hi0 = re2[0]; im_hi0 = im2[0];
+ re_hi1 = re3[0]; im_hi1 = im3[0];
+ re_hi2 = re2[1]; im_hi2 = im2[1];
+ re_hi3 = re3[1]; im_hi3 = im3[1];
+
+ re0 += 2; im0 += 2;
+ re1 += 2; im1 += 2;
+ re2 += 2; im2 += 2;
+ re3 += 2; im3 += 2;
+
+ /* Perform the butterflys
+ */
+ FFT_1D_BUTTERFLY (out_re_lo0, out_im_lo0, out_re_lo1, out_im_lo1, re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY (out_re_lo2, out_im_lo2, out_re_lo3, out_im_lo3, re_lo2, im_lo2, re_lo3, im_lo3, w1_re, w1_im);
+
+ FFT_1D_BUTTERFLY_HI(out_re_hi0, out_im_hi0, out_re_hi1, out_im_hi1, re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im);
+ FFT_1D_BUTTERFLY_HI(out_re_hi2, out_im_hi2, out_re_hi3, out_im_hi3, re_hi2, im_hi2, re_hi3, im_hi3, w1_re, w1_im);
+
+ /* Interleave the results and store them into the output buffers (ie,
+ * the original input buffers.
+ */
+ out0[0] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_0415);
+ out0[1] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_2637);
+ out0[2] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_0415);
+ out0[3] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_2637);
+
+ out1[0] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_0415);
+ out1[1] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_2637);
+ out1[2] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_0415);
+ out1[3] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_2637);
+
+ out2[0] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_0415);
+ out2[1] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_2637);
+ out2[2] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_0415);
+ out2[3] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_2637);
+
+ out3[0] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_0415);
+ out3[1] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_2637);
+ out3[2] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_0415);
+ out3[3] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_2637);
+
+ out0 += 4;
+ out1 += 4;
+ out2 += 4;
+ out3 += 4;
+
+ i -= 2;
+ } while (i);
+}
+
+#endif /* _FFT_1D_R2_H_ */
diff --git a/gcell/include/gcell/spu/gc_delay.h b/gcell/include/gcell/spu/gc_delay.h
new file mode 100644
index 0000000000..e995b3a946
--- /dev/null
+++ b/gcell/include/gcell/spu/gc_delay.h
@@ -0,0 +1,27 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007,2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCELL_SPU_GC_DELAY_H
+#define INCLUDED_GCELL_SPU_GC_DELAY_H
+
+void gc_udelay(unsigned int usecs);
+void gc_cdelay(unsigned int cpu_cycles);
+
+#endif /* INCLUDED_GCELL_SPU_GC_DELAY_H */
diff --git a/gcell/include/gcell/spu/gc_jd_queue.h b/gcell/include/gcell/spu/gc_jd_queue.h
new file mode 100644
index 0000000000..ce1977c941
--- /dev/null
+++ b/gcell/include/gcell/spu/gc_jd_queue.h
@@ -0,0 +1,53 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2007 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GCELL_SPU_GC_JD_QUEUE_H
+#define INCLUDED_GCELL_SPU_GC_JD_QUEUE_H
+
+#include <gcell/gc_jd_queue_data.h>
+
+/*
+ * Declarations for SPU side of job queue interface
+ */
+
+__GC_BEGIN_DECLS
+
+/*!
+ * \brief Remove and return item at head of queue.
+ *
+ * \param[in] q is EA address of queue structure.
+ * \param[out] item_ea is EA address of item at head of queue.
+ * \param[in] jd_tag is the tag to use to get the LS copy of the item.
+ * \param[out] item is local store copy of item at head of queue.
+ * \returns false if the queue is empty, otherwise returns true
+ * and sets \p item_ea and DMA's job descriptor into \p item
+ *
+ * If return is false, we're holding a lock-line reservation that
+ * covers the queue.
+ */
+bool
+gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea,
+ int jd_tag, gc_job_desc_t *item);
+
+__GC_END_DECLS
+
+
+#endif /* INCLUDED_GCELL_SPU_GC_JD_QUEUE_H */
diff --git a/gcell/include/gcell/spu/gc_random.h b/gcell/include/gcell/spu/gc_random.h
new file mode 100644
index 0000000000..f51b187d4a
--- /dev/null
+++ b/gcell/include/gcell/spu/gc_random.h
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GCELL_SPU_GC_RANDOM_H
+#define INCLUDED_GCELL_SPU_GC_RANDOM_H
+
+/*!
+ * \brief Return a uniformly distributed value in the range [0, 1.0)
+ * (Linear congruential generator. YMMV. Caveat emptor.)
+ */
+
+float gc_uniform_deviate(void);
+void gc_set_seed(int seed);
+
+#endif /* INCLUDED_GCELL_SPU_GC_RANDOM_H */
diff --git a/gcell/include/gcell/spu/gc_spu_macs.h b/gcell/include/gcell/spu/gc_spu_macs.h
new file mode 100644
index 0000000000..0d7dc99781
--- /dev/null
+++ b/gcell/include/gcell/spu/gc_spu_macs.h
@@ -0,0 +1,380 @@
+/* -*- asm -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef INCLUDED_GC_SPU_MACS_H
+#define INCLUDED_GC_SPU_MACS_H
+
+/*
+ * This file contains a set of macros that are generally useful when
+ * coding in SPU assembler
+ *
+ * Note that the multi-instruction macros in here may overwrite
+ * registers 77, 78, and 79 without warning.
+ */
+
+/*
+ * defines for all registers
+ */
+#define r0 $0
+#define r1 $1
+#define r2 $2
+#define r3 $3
+#define r4 $4
+#define r5 $5
+#define r6 $6
+#define r7 $7
+#define r8 $8
+#define r9 $9
+#define r10 $10
+#define r11 $11
+#define r12 $12
+#define r13 $13
+#define r14 $14
+#define r15 $15
+#define r16 $16
+#define r17 $17
+#define r18 $18
+#define r19 $19
+#define r20 $20
+#define r21 $21
+#define r22 $22
+#define r23 $23
+#define r24 $24
+#define r25 $25
+#define r26 $26
+#define r27 $27
+#define r28 $28
+#define r29 $29
+#define r30 $30
+#define r31 $31
+#define r32 $32
+#define r33 $33
+#define r34 $34
+#define r35 $35
+#define r36 $36
+#define r37 $37
+#define r38 $38
+#define r39 $39
+#define r40 $40
+#define r41 $41
+#define r42 $42
+#define r43 $43
+#define r44 $44
+#define r45 $45
+#define r46 $46
+#define r47 $47
+#define r48 $48
+#define r49 $49
+#define r50 $50
+#define r51 $51
+#define r52 $52
+#define r53 $53
+#define r54 $54
+#define r55 $55
+#define r56 $56
+#define r57 $57
+#define r58 $58
+#define r59 $59
+#define r60 $60
+#define r61 $61
+#define r62 $62
+#define r63 $63
+#define r64 $64
+#define r65 $65
+#define r66 $66
+#define r67 $67
+#define r68 $68
+#define r69 $69
+#define r70 $70
+#define r71 $71
+#define r72 $72
+#define r73 $73
+#define r74 $74
+#define r75 $75
+#define r76 $76
+#define r77 $77
+#define r78 $78
+#define r79 $79
+#define r80 $80
+#define r81 $81
+#define r82 $82
+#define r83 $83
+#define r84 $84
+#define r85 $85
+#define r86 $86
+#define r87 $87
+#define r88 $88
+#define r89 $89
+#define r90 $90
+#define r91 $91
+#define r92 $92
+#define r93 $93
+#define r94 $94
+#define r95 $95
+#define r96 $96
+#define r97 $97
+#define r98 $98
+#define r99 $99
+#define r100 $100
+#define r101 $101
+#define r102 $102
+#define r103 $103
+#define r104 $104
+#define r105 $105
+#define r106 $106
+#define r107 $107
+#define r108 $108
+#define r109 $109
+#define r110 $110
+#define r111 $111
+#define r112 $112
+#define r113 $113
+#define r114 $114
+#define r115 $115
+#define r116 $116
+#define r117 $117
+#define r118 $118
+#define r119 $119
+#define r120 $120
+#define r121 $121
+#define r122 $122
+#define r123 $123
+#define r124 $124
+#define r125 $125
+#define r126 $126
+#define r127 $127
+
+
+#define lr r0 // link register
+#define sp r1 // stack pointer
+ // r2 is environment pointer for langs that need it (ALGOL)
+
+#define retval r3 // return values are passed in regs starting at r3
+
+#define arg1 r3 // args are passed in regs starting at r3
+#define arg2 r4
+#define arg3 r5
+#define arg4 r6
+#define arg5 r7
+#define arg6 r8
+#define arg7 r9
+#define arg8 r10
+#define arg9 r11
+#define arg10 r12
+
+// r3 - r74 are volatile (caller saves)
+// r74 - r79 are volatile (scratch regs possibly destroyed by fct prolog/epilog)
+// r80 - r127 are non-volatile (callee-saves)
+
+// scratch registers reserved for use by the macros in this file.
+
+#define _gc_t0 r79
+#define _gc_t1 r78
+#define _gc_t2 r77
+
+/*
+ * ----------------------------------------------------------------
+ * pseudo ops
+ * ----------------------------------------------------------------
+ */
+#define PROC_ENTRY(name) \
+ .text; \
+ .p2align 4; \
+ .global name; \
+ .type name, @function; \
+name:
+
+/*
+ * ----------------------------------------------------------------
+ * aliases for common operations
+ * ----------------------------------------------------------------
+ */
+
+// Move register (even pipe, 2 cycles)
+#define MR(rt, ra) or rt, ra, ra;
+
+// Move register (odd pipe, 4 cycles)
+#define LMR(rt, ra) rotqbyi rt, ra, 0;
+
+// return
+#define RETURN() bi lr;
+
+// hint for a return
+#define HINT_RETURN(ret_label) hbr ret_label, lr;
+
+// return if zero
+#define BRZ_RETURN(rt) biz rt, lr;
+
+// return if not zero
+#define BRNZ_RETURN(rt) binz rt, lr;
+
+// return if halfword zero
+#define BRHZ_RETURN(rt) bihz rt, lr;
+
+// return if halfword not zero
+#define BRHNZ_RETURN(rt) bihnz rt, lr;
+
+
+/*
+ * ----------------------------------------------------------------
+ * modulo like things for constant moduli that are powers of 2
+ * ----------------------------------------------------------------
+ */
+
+// rt = ra & (pow2 - 1)
+#define MODULO(rt, ra, pow2) \
+ andi rt, ra, (pow2)-1;
+
+// rt = pow2 - (ra & (pow2 - 1))
+#define MODULO_NEG(rt, ra, pow2) \
+ andi rt, ra, (pow2)-1; \
+ sfi rt, rt, (pow2);
+
+// rt = ra & -(pow2)
+#define ROUND_DOWN(rt, ra, pow2) \
+ andi rt, ra, -(pow2);
+
+// rt = (ra + (pow2 - 1)) & -(pow2)
+#define ROUND_UP(rt, ra, pow2) \
+ ai rt, ra, (pow2)-1; \
+ andi rt, rt, -(pow2);
+
+/*
+ * ----------------------------------------------------------------
+ * Splat - replicate a particular slot into all slots
+ * Altivec analogs...
+ * ----------------------------------------------------------------
+ */
+
+// replicate byte from slot s [0,15]
+#define VSPLTB(rt, ra, s) \
+ ilh _gc_t0, (s)*0x0101; \
+ shufb rt, ra, ra, _gc_t0;
+
+// replicate halfword from slot s [0,7]
+#define VSPLTH(rt, ra, s) \
+ ilh _gc_t0, 2*(s)*0x0101 + 0x0001; \
+ shufb rt, ra, ra, _gc_t0;
+
+// replicate word from slot s [0,3]
+#define VSPLTW(rt, ra, s) \
+ iluh _gc_t0, 4*(s)*0x0101 + 0x0001; \
+ iohl _gc_t0, 4*(s)*0x0101 + 0x0203; \
+ shufb rt, ra, ra, _gc_t0;
+
+// replicate double from slot s [0,1]
+#define VSPLTD(rt, ra, s) \
+ /* sp is always 16-byte aligned */ \
+ cdd _gc_t0, 8(sp); /* 0x10111213 14151617 00010203 04050607 */ \
+ rotqbyi rt, ra, ra, (s) << 3; /* rotate double into preferred slot */ \
+ shufb rt, rt, rt, _gc_t0;
+
+/*
+ * ----------------------------------------------------------------
+ * lots of min/max variations...
+ *
+ * On a slot by slot basis, compute the min or max
+ *
+ * U - unsigned, else signed
+ * B,H,{} - byte, halfword, word
+ * F float
+ * ----------------------------------------------------------------
+ */
+
+#define MIN_SELB(rt, ra, rb, rc) selb rt, ra, rb, rc;
+#define MAX_SELB(rt, ra, rb, rc) selb rt, rb, ra, rc;
+
+ // words
+
+#define MIN(rt, ra, rb) \
+ cgt _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define MAX(rt, ra, rb) \
+ cgt _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMIN(rt, ra, rb) \
+ clgt _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define UMAX(rt, ra, rb) \
+ clgt _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+ // bytes
+
+#define MINB(rt, ra, rb) \
+ cgtb _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define MAXB(rt, ra, rb) \
+ cgtb _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMINB(rt, ra, rb) \
+ clgtb _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define UMAXB(rt, ra, rb) \
+ clgtb _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+ // halfwords
+
+#define MINH(rt, ra, rb) \
+ cgth _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define MAXH(rt, ra, rb) \
+ cgth _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMINH(rt, ra, rb) \
+ clgth _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define UMAXH(rt, ra, rb) \
+ clgth _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+ // floats
+
+#define FMIN(rt, ra, rb) \
+ fcgt _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define FMAX(rt, ra, rb) \
+ fcgt _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+// Ignoring the sign, select the values with the minimum magnitude
+#define FMINMAG(rt, ra, rb) \
+ fcmgt _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+// Ignoring the sign, select the values with the maximum magnitude
+#define FMAXMAG(rt, ra, rb) \
+ fcmgt _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+
+#endif /* INCLUDED_GC_SPU_MACS_H */
diff --git a/gcell/include/gcell/spu/libfft.h b/gcell/include/gcell/spu/libfft.h
new file mode 100644
index 0000000000..dd387be0c2
--- /dev/null
+++ b/gcell/include/gcell/spu/libfft.h
@@ -0,0 +1,113 @@
+/* -------------------------------------------------------------- */
+/* (C)Copyright 2008 Free Software Foundation, Inc. */
+/* (C)Copyright 2001,2007, */
+/* International Business Machines Corporation, */
+/* Sony Computer Entertainment, Incorporated, */
+/* Toshiba Corporation, */
+/* */
+/* All Rights Reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the */
+/* following conditions are met: */
+/* */
+/* - Redistributions of source code must retain the above copyright*/
+/* notice, this list of conditions and the following disclaimer. */
+/* */
+/* - Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* - Neither the name of IBM Corporation nor the names of its */
+/* contributors may be used to endorse or promote products */
+/* derived from this software without specific prior written */
+/* permission. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
+/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
+/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
+/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
+/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
+/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
+/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
+/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
+/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
+/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+/* -------------------------------------------------------------- */
+/* PROLOG END TAG zYx */
+
+#ifndef INCLUDED_LIBFFT_H
+#define INCLUDED_LIBFFT_H
+
+// must be defined before inclusion of fft_1d_r2.h
+#define MAX_FFT_1D_SIZE 4096
+
+/* fft_1d_r2
+ * ---------
+ * Performs a single precision, complex Fast Fourier Transform using
+ * the DFT (Discrete Fourier Transform) with radix-2 decimation in time.
+ * The input <in> is an array of complex numbers of length (1<<log2_size)
+ * entries. The result is returned in the array of complex numbers specified
+ * by <out>. Note: This routine can support an in-place transformation
+ * by specifying <in> and <out> to be the same array.
+ *
+ * This implementation utilizes the Cooley-Tukey algorithm consisting
+ * of <log2_size> stages. The basic operation is the butterfly.
+ *
+ * p --------------------------> P = p + q*Wi
+ * \ /
+ * \ /
+ * \ /
+ * \/
+ * /\
+ * / \
+ * / \
+ * ____ / \
+ * q --| Wi |-----------------> Q = p - q*Wi
+ * ----
+ *
+ * This routine also requires pre-computed twiddle values, W. W is an
+ * array of single precision complex numbers of length 1<<(log2_size-2)
+ * and is computed as follows:
+ *
+ * for (i=0; i<n/4; i++)
+ * W[i].real = cos(i * 2*PI/n);
+ * W[i].imag = -sin(i * 2*PI/n);
+ * }
+ *
+ * This array actually only contains the first half of the twiddle
+ * factors. Due for symmetry, the second half of the twiddle factors
+ * are implied and equal:
+ *
+ * for (i=0; i<n/4; i++)
+ * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n);
+ * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n);
+ * }
+ *
+ * Further symmetry allows one to generate the twiddle factor table
+ * using half the number of trig computations as follows:
+ *
+ * W[0].real = 1.0;
+ * W[0].imag = 0.0;
+ * for (i=1; i<n/4; i++)
+ * W[i].real = cos(i * 2*PI/n);
+ * W[n/4 - i].imag = -W[i].real;
+ * }
+ *
+ * The complex numbers are packed into quadwords as follows:
+ *
+ * quadword complex
+ * array element array elements
+ * -----------------------------------------------------
+ * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 |
+ * -----------------------------------------------------
+ *
+ */
+
+void fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size);
+
+#endif /* INCLUDED_LIBFFT_H */