diff options
Diffstat (limited to 'gcell/include')
27 files changed, 2738 insertions, 0 deletions
diff --git a/gcell/include/Makefile.am b/gcell/include/Makefile.am new file mode 100644 index 0000000000..c96c6f0a34 --- /dev/null +++ b/gcell/include/Makefile.am @@ -0,0 +1,24 @@ +# +# Copyright 2008 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +include $(top_srcdir)/Makefile.common + +SUBDIRS = gcell + diff --git a/gcell/include/gcell/Makefile.am b/gcell/include/gcell/Makefile.am new file mode 100644 index 0000000000..03255e516b --- /dev/null +++ b/gcell/include/gcell/Makefile.am @@ -0,0 +1,42 @@ +# +# Copyright 2007,2008 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +include $(top_srcdir)/Makefile.common + +SUBDIRS = spu + +gcellinclude_HEADERS = \ + compiler.h \ + gc_aligned_alloc.h \ + gc_atomic.h \ + gc_cdefs.h \ + gc_declare_proc.h \ + gc_job_manager.h \ + gc_jd_queue_data.h \ + gc_jd_queue.h \ + gc_jd_stack.h \ + gc_job_desc.h \ + gc_job_desc_private.h \ + gc_logging.h \ + gc_mbox.h \ + gc_spu_args.h \ + gc_types.h \ + gcp_fft_1d_r2.h \ + memory_barrier.h diff --git a/gcell/include/gcell/compiler.h b/gcell/include/gcell/compiler.h new file mode 100644 index 0000000000..d1adcd1290 --- /dev/null +++ b/gcell/include/gcell/compiler.h @@ -0,0 +1,45 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_COMPILER_H +#define INCLUDED_GCELL_COMPILER_H + +/*! + * \brief Compiler specific hackery. These are for GCC. + */ + +#define _AL8 __attribute__((aligned (8))) +#define _AL16 __attribute__((aligned (16))) +#define _AL128 __attribute__((aligned (128))) + +#define _UNUSED __attribute__((unused)) + +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) +#endif + + +#endif /* INCLUDED_GCELL_COMPILER_H */ diff --git a/gcell/include/gcell/gc_aligned_alloc.h b/gcell/include/gcell/gc_aligned_alloc.h new file mode 100644 index 0000000000..bdc21c278a --- /dev/null +++ b/gcell/include/gcell/gc_aligned_alloc.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GC_ALIGNED_ALLOC_H +#define INCLUDED_GC_ALIGNED_ALLOC_H + +#include <boost/shared_ptr.hpp> + +/*! + * \brief Return pointer to chunk of storage of size size bytes. + * The allocation will be aligned to an \p alignment boundary. + * + * \param size is the number of bytes to allocate + * \param alignment is the minimum storage alignment in bytes; must be a power of 2. + * + * Throws if can't allocate memory. The storage should be freed + * with "free" when done. The memory is initialized to zero. + */ +void * +gc_aligned_alloc(size_t size, size_t alignment = 128); + +/*! + * \brief Return boost::shared_ptr to chunk of storage of size size bytes. + * The allocation will be aligned to an \p alignment boundary. + * + * \param size is the number of bytes to allocate + * \param alignment is the minimum storage alignment in bytes; must be a power of 2. + * + * Throws if can't allocate memory. The storage should be freed + * with "free" when done. The memory is initialized to zero. + */ +boost::shared_ptr<void> +gc_aligned_alloc_sptr(size_t size, size_t alignment = 128); + +#endif /* INCLUDED_GC_ALIGNED_ALLOC_H */ diff --git a/gcell/include/gcell/gc_atomic.h b/gcell/include/gcell/gc_atomic.h new file mode 100644 index 0000000000..51d38af482 --- /dev/null +++ b/gcell/include/gcell/gc_atomic.h @@ -0,0 +1,29 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCELL_GC_ATOMIC_H +#define INCLUDED_GCELL_GC_ATOMIC_H + +#include <stdint.h> + +typedef uint32_t gc_atomic_t; + + +#endif /* INCLUDED_GCELL_GC_ATOMIC_H */ diff --git a/gcell/include/gcell/gc_cdefs.h b/gcell/include/gcell/gc_cdefs.h new file mode 100644 index 0000000000..0c5fc4ad88 --- /dev/null +++ b/gcell/include/gcell/gc_cdefs.h @@ -0,0 +1,34 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_GC_CDEFS_H +#define INCLUDED_GCELL_GC_CDEFS_H + +/* C++ needs to know that types and declarations are C, not C++. */ +#ifdef __cplusplus +# define __GC_BEGIN_DECLS extern "C" { +# define __GC_END_DECLS } +#else +# define __GC_BEGIN_DECLS +# define __GC_END_DECLS +#endif + +#endif /* INCLUDED_GCELL_GC_CDEFS_H */ diff --git a/gcell/include/gcell/gc_declare_proc.h b/gcell/include/gcell/gc_declare_proc.h new file mode 100644 index 0000000000..ba77e0fb05 --- /dev/null +++ b/gcell/include/gcell/gc_declare_proc.h @@ -0,0 +1,64 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCELL_GC_DECLARE_PROC_H +#define INCLUDED_GCELL_GC_DECLARE_PROC_H + +#include <stdint.h> +#include <gcell/gc_job_desc.h> + +/* + * This is C, not C++ code... + * + * ...and is used by both PPE and SPE code + */ +__GC_BEGIN_DECLS + +#define GC_PROC_DEF_SECTION ".gcell.proc_def" + +typedef struct gc_proc_def { +#if defined(__SPU__) + gc_spu_proc_t proc; +#else + uint32_t proc; +#endif + char name[28]; +} _AL16 gc_proc_def_t; + + +#if defined(__SPU__) +/*! + * \brief Tell gcell about a SPU procedure + * + * \param _proc_ pointer to function (gc_spu_proc_t) + * \param _name_ the name of the procedure ("quoted string") + * + * This macro registers the given procedure with the gcell runtime. + * From the PPE, use gc_job_manager::lookup_proc to map \p _name_ to a gc_proc_id_t + */ +#define GC_DECLARE_PROC(_proc_, _name_) \ +static struct gc_proc_def \ + _GCPD_ ## _proc_ __attribute__((section(GC_PROC_DEF_SECTION), used)) = \ + { _proc_, _name_ } +#endif + +__GC_END_DECLS + +#endif /* INCLUDED_GCELL_GC_DECLARE_PROC_H */ diff --git a/gcell/include/gcell/gc_jd_queue.h b/gcell/include/gcell/gc_jd_queue.h new file mode 100644 index 0000000000..50777a394c --- /dev/null +++ b/gcell/include/gcell/gc_jd_queue.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_GC_JD_QUEUE_H +#define INCLUDED_GCELL_GC_JD_QUEUE_H + +#include <gcell/gc_jd_queue_data.h> + +__GC_BEGIN_DECLS + +/*! + * \brief Initialize the queue to empty. + */ +void +gc_jd_queue_init(gc_jd_queue_t *q); + + +/*! + * \brief Add \p item to the tail of \p q. + */ +void +gc_jd_queue_enqueue(gc_jd_queue_t *q, gc_job_desc_t *item); + + +/*! + * \brief Remove and return item at head of queue, or 0 if queue is empty + */ +gc_job_desc_t * +gc_jd_queue_dequeue(gc_jd_queue_t *q); + +__GC_END_DECLS + + +#endif /* INCLUDED_GCELL_GC_JD_QUEUE_H */ diff --git a/gcell/include/gcell/gc_jd_queue_data.h b/gcell/include/gcell/gc_jd_queue_data.h new file mode 100644 index 0000000000..819b3712ee --- /dev/null +++ b/gcell/include/gcell/gc_jd_queue_data.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007,2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_GC_JD_QUEUE_DATA_H +#define INCLUDED_GCELL_GC_JD_QUEUE_DATA_H + +#include <gcell/gc_types.h> +#include <gcell/gc_job_desc.h> + +__GC_BEGIN_DECLS + +/*! + * \brief (Lock free someday...) queue for job descriptors + * + * This is the main data structure shared between PPEs and SPEs. + * It is used to enqueue work for SPEs. SPEs or PPEs may enqueue + * work. SPE's dequeue from here. + * + * FIXME make it lock free ;) For now, use a spin lock. + * + * (Fills a single cache line) + */ +typedef struct gc_jd_queue +{ + gc_eaddr_t head _AL16; + gc_eaddr_t tail _AL16; + uint32_t mutex _AL16; // libsync mutex (spin lock) +} _AL128 gc_jd_queue_t; + +__GC_END_DECLS + +#endif /* INCLUDED_GCELL_GC_JD_QUEUE_DATA_H */ + + diff --git a/gcell/include/gcell/gc_jd_stack.h b/gcell/include/gcell/gc_jd_stack.h new file mode 100644 index 0000000000..9eab4e4020 --- /dev/null +++ b/gcell/include/gcell/gc_jd_stack.h @@ -0,0 +1,70 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_GC_JD_STACK_H +#define INCLUDED_GCELL_GC_JD_STACK_H + +#include <gcell/gc_types.h> +#include <gcell/gc_job_desc.h> + +__GC_BEGIN_DECLS + +/*! + * \brief Lock free stack for job descriptors (used for free list) + * + * This is aligned to a cache line, and fills the cache line, + * to avoid inadvertently losing reservations created with + * the load-and-reserve instructions. + */ + +typedef struct gc_jd_stack +{ + gc_eaddr_t top; + + // pad out to a full cache line + uint8_t _pad[128 - sizeof(gc_eaddr_t)]; +} _AL128 gc_jd_stack_t; + + +/*! + * \brief Initialize the stack to empty. + */ +void +gc_jd_stack_init(gc_jd_stack_t *stack); + + +/*! + * \brief Add \p item to the top of \p stack. + */ +void +gc_jd_stack_push(gc_jd_stack_t *stack, gc_job_desc_t *item); + + +/*! + * \brief pop and return top item on stack, or 0 if stack is empty + */ +gc_job_desc_t * +gc_jd_stack_pop(gc_jd_stack_t *stack); + +__GC_END_DECLS + + +#endif /* INCLUDED_GCELL_GC_JD_STACK_H */ diff --git a/gcell/include/gcell/gc_job_desc.h b/gcell/include/gcell/gc_job_desc.h new file mode 100644 index 0000000000..5ff99e2e28 --- /dev/null +++ b/gcell/include/gcell/gc_job_desc.h @@ -0,0 +1,213 @@ +/* -*- c -*- */ +/* + * Copyright 2007,2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_GC_JOB_DESC_H +#define INCLUDED_GCELL_GC_JOB_DESC_H + +/*! + * This file contains the structures that are used to describe how to + * call "jobs" that execute on the SPEs. A "job" is a task, or piece of + * work that you want to run on an SPE. + * + * There is code running in the SPE that knows how to interpret + * these job descriptions. Thus, in most cases, the overhead + * of invoking these is very low. + * + * The whole "job idea" is SPE centric. At first pass, + * the PPE will be constructing jobs and enqueing them. + * However, there is nothing in the implementation that + * prohibits SPEs from creating their own jobs in the + * future. Also, there is nothing prohibiting SPE-to-SPE + * DMA's. + * + * SPE's dequeue and "pull" jobs to themselves, do the work, then + * notify the entity that submitted the job. + */ + +#include <gcell/gc_types.h> +#include <gcell/gc_job_desc_private.h> + +/* + * This is C, not C++ code... + * + * ...and is used by both PPE and SPE code + */ +__GC_BEGIN_DECLS + + +//! opaque ID that specifies which code to invoke on the SPE +typedef uint32_t gc_proc_id_t; +#define GCP_UNKNOWN_PROC ((gc_proc_id_t) -1) + + +//! final job status +typedef enum { + JS_OK, + JS_SHUTTING_DOWN, // job mananger is shutting down + JS_TOO_MANY_CLIENTS, // too many client threads + JS_UNKNOWN_PROC, // didn't recognize the procedure ID + JS_BAD_DIRECTION, // EA arg has invalid direction + JS_BAD_EAH, // not all EA args have the same high 32 address bits + JS_BAD_N_DIRECT, // too many direct args + JS_BAD_N_EA, // too many EA args + JS_ARGS_TOO_LONG, // total length of EA args exceeds limit + JS_BAD_JUJU, // misc problem: you're having a bad day + JS_BAD_JOB_DESC, // gc_job_desc was not allocated using mgr->alloc_job_desc() + +} gc_job_status_t; + +#define MAX_ARGS_DIRECT 8 // maximum number of args passed using "direct" method +#define MAX_ARGS_EA 8 // maximum number of args passed via EA memory (dma) + +/* + * We support two classes of arguments, + * "direct", which are contained in the gc_job_desc_args and + * "EA", which are copied in/out according to info in gc_job_desc_args + */ + +/*! + * \brief Tag type of "direct" argument + */ +typedef enum { + GCT_S32, + GCT_U32, + GCT_S64, + GCT_U64, + GCT_FLOAT, + GCT_DOUBLE, + GCT_FLT_CMPLX, + GCT_DBL_CMPLX, + GCT_EADDR, + +} gc_tag_t; + + +/*! + * \brief union for passing "direct" argument + */ +typedef union gc_arg_union +{ + int32_t s32; + uint32_t u32; + int64_t s64; + uint64_t u64; + float f; + double d; + //float complex cf; // 64-bits (C99) + //double complex cd; // 128-bits (C99) + gc_eaddr_t ea; // 64-bits +} _AL8 gc_arg_union_t; + + +/*! + * \brief "direct" input or output arguments + */ +typedef struct gc_job_direct_args +{ + uint32_t nargs; // # of "direct" args + gc_tag_t tag[MAX_ARGS_DIRECT] _AL16; // type of direct arg[i] + gc_arg_union_t arg[MAX_ARGS_DIRECT] _AL16; // direct argument values + +} _AL16 gc_job_direct_args_t; + + +// specifies direction for args passed in EA memory + +#define GCJD_DMA_GET 0x01 // in to SPE +#define GCJD_DMA_PUT 0x02 // out from SPE + +/*! + * \brief Description of args passed in EA memory. + * These are DMA'd between EA and LS as specified. + */ +typedef struct gc_job_ea_arg { + //! EA address of buffer + gc_eaddr_t ea_addr; + + //! GC_JD_DMA_* get arg or put arg + uint32_t direction; + + //! number of bytes to get + uint32_t get_size; + + //! number of bytes to put + uint32_t put_size; + +#if defined(__SPU__) + //! local store address (filled in by SPU runtime) + void *ls_addr; + uint32_t _pad[2]; +#else + uint32_t _pad[3]; +#endif + +} _AL16 gc_job_ea_arg_t; + + +typedef struct gc_job_ea_args { + uint32_t nargs; + gc_job_ea_arg_t arg[MAX_ARGS_EA]; + +} _AL16 gc_job_ea_args_t; + + +/*! + * \brief "job description" that is DMA'd to/from the SPE. + */ +typedef struct gc_job_desc +{ + gc_job_desc_private_t sys; // internals + gc_job_status_t status; // what happened (output) + gc_proc_id_t proc_id; // specifies which procedure to run + gc_job_direct_args_t input; // direct args to SPE + gc_job_direct_args_t output; // direct args from SPE + gc_job_ea_args_t eaa; // args passed via EA memory + +} _AL128 gc_job_desc_t; + + +/*! + * type of procedure invoked on spu + */ +typedef void (*gc_spu_proc_t)(const gc_job_direct_args_t *input, + gc_job_direct_args_t *output, + const gc_job_ea_args_t *eaa); + +#if !defined(__SPU__) + +static inline gc_job_desc_t * +ea_to_jdp(gc_eaddr_t ea) +{ + return (gc_job_desc_t *) ea_to_ptr(ea); +} + +static inline gc_eaddr_t +jdp_to_ea(gc_job_desc_t *item) +{ + return ptr_to_ea(item); +} + +#endif + + +__GC_END_DECLS + +#endif /* INCLUDED_GCELL_GC_JOB_DESC_H */ diff --git a/gcell/include/gcell/gc_job_desc_private.h b/gcell/include/gcell/gc_job_desc_private.h new file mode 100644 index 0000000000..fa831a88ed --- /dev/null +++ b/gcell/include/gcell/gc_job_desc_private.h @@ -0,0 +1,39 @@ +/* -*- c -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_GC_JOB_DESC_PRIVATE_H +#define INCLUDED_GCELL_GC_JOB_DESC_PRIVATE_H + +// #include <libsync.h> + +/*! + * \brief Implementation details we'd like to hide from the user. + */ +typedef struct gc_job_desc_private +{ + gc_eaddr_t next; // used to implement job queue and free list + uint16_t job_id; + uint16_t client_id; + uint32_t direction_union; // union of all gc_job_ea_arg.direction fields +} gc_job_desc_private_t; + +#endif /* INCLUDED_GCELL_GC_JOB_PRIVATE_H */ + diff --git a/gcell/include/gcell/gc_job_manager.h b/gcell/include/gcell/gc_job_manager.h new file mode 100644 index 0000000000..67abce7ed0 --- /dev/null +++ b/gcell/include/gcell/gc_job_manager.h @@ -0,0 +1,286 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007,2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GC_JOB_MANAGER_H +#define INCLUDED_GC_JOB_MANAGER_H + +#include <boost/utility.hpp> +#include <boost/shared_ptr.hpp> +#include <vector> +#include <string> +#include <stdexcept> +#include <libspe2.h> +#include "gc_job_desc.h" + +class gc_job_manager; +typedef boost::shared_ptr<gc_job_manager> gc_job_manager_sptr; +typedef boost::shared_ptr<spe_program_handle_t> spe_program_handle_sptr; +typedef boost::shared_ptr<gc_job_desc> gc_job_desc_sptr; + +/*! + * \brief Return a boost::shared_ptr to an spe_program_handle_t + * + * \param filename is the name of the SPE ELF executable to open. + * + * Calls spe_image_open to open the file. If successful returns a + * boost::shared_ptr that will call spe_image_close when it's time to + * free the object. + * + * Returns the equivalent of the NULL pointer if the file cannot be + * opened, or if it's not an SPE ELF object file. + * + * \sa gc_program_handle_from_address + */ +spe_program_handle_sptr +gc_program_handle_from_filename(const std::string &filename); + +/*! + * \brief Return a boost::shared_ptr to an spe_program_handle_t + * + * \param handle is a non-zero pointer to an embedded SPE image. + * + * If successful returns a boost::shared_ptr that does nothing when + * it's time to free the object. + * + * \sa gc_program_handle_from_filename + */ +spe_program_handle_sptr +gc_program_handle_from_address(spe_program_handle_t *handle); + +/*! + * \brief map gc_job_status_t into a string + */ +const std::string +gc_job_status_string(gc_job_status_t status); + +/* + * \brief Options that configure the job_manager. + * The default values are reasonable. + */ +struct gc_jm_options { + unsigned int max_jobs; // max # of job descriptors in system + unsigned int max_client_threads; // max # of client threads of job manager + unsigned int nspes; // how many SPEs shall we use? 0 -> all of them + bool gang_schedule; // shall we gang schedule? + bool use_affinity; // shall we try for affinity (FIXME not implmented) + bool enable_logging; // shall we log SPE events? + uint32_t log2_nlog_entries; // log2 of number of log entries (default is 12 == 4k) + spe_program_handle_sptr program_handle; // program to load into SPEs + + gc_jm_options() : + max_jobs(0), max_client_threads(0), nspes(0), + gang_schedule(false), use_affinity(false), + enable_logging(false), log2_nlog_entries(12) + { + } + + gc_jm_options(spe_program_handle_sptr program_handle_, + unsigned int nspes_ = 0) : + max_jobs(0), max_client_threads(0), nspes(nspes_), + gang_schedule(false), use_affinity(false), + enable_logging(false), log2_nlog_entries(12), + program_handle(program_handle_) + { + } +}; + +enum gc_wait_mode { + GC_WAIT_ANY, + GC_WAIT_ALL, +}; + +/* + * exception classes + */ +class gc_exception : public std::runtime_error +{ +public: + gc_exception(const std::string &msg); +}; + +class gc_unknown_proc : public gc_exception +{ +public: + gc_unknown_proc(const std::string &msg); +}; + +class gc_bad_alloc : public gc_exception +{ +public: + gc_bad_alloc(const std::string &msg); +}; + +class gc_bad_align : public gc_exception +{ +public: + gc_bad_align(const std::string &msg); +}; + +class gc_bad_submit : public gc_exception +{ +public: + gc_bad_submit(const std::string &name, gc_job_status_t status); +}; + +/* + * \brief Create an instance of the job manager + */ +gc_job_manager_sptr +gc_make_job_manager(const gc_jm_options *options = 0); + + +/*! + * \brief Abstract class that manages SPE jobs. + * + * There is typically a single instance derived from this class. + * It is safe to call its methods from any thread. + */ +class gc_job_manager : boost::noncopyable +{ +public: + gc_job_manager(const gc_jm_options *options = 0); + + virtual ~gc_job_manager(); + + /*! + * Stop accepting new jobs. Wait for existing jobs to complete. + * Return all managed SPE's to the system. + */ + virtual bool shutdown() = 0; + + /*! + * \brief Return number of SPE's currently allocated to job manager. + */ + virtual int nspes() const = 0; + + /*! + * \brief Return a pointer to a properly aligned job descriptor, + * or throws gc_bad_alloc if there are none available. + */ + virtual gc_job_desc *alloc_job_desc() = 0; + + /* + *! Free a job descriptor previously allocated with alloc_job_desc() + * + * \param[in] jd pointer to job descriptor to free. + */ + virtual void free_job_desc(gc_job_desc *jd) = 0; + + /*! + * \brief Submit a job for asynchronous processing on an SPE. + * + * \param[in] jd pointer to job description + * + * The caller must not read or write the job description + * or any of the memory associated with any indirect arguments + * until after calling wait_job. + * + * \returns true iff the job was successfully enqueued. + * If submit_job returns false, check jd->status for additional info. + */ + virtual bool submit_job(gc_job_desc *jd) = 0; + + /*! + * \brief Wait for job to complete. + * + * A thread may only wait for jobs which it submitted. + * + * \returns true if sucessful, else false. + */ + virtual bool + wait_job(gc_job_desc *jd) = 0; + + /*! + * \brief wait for 1 or more jobs to complete. + * + * \param[input] njobs is the length of arrays \p jd and \p done. + * \param[input] jd are the jobs that are to be waited for. + * \param[output] done indicates whether the corresponding job is complete. + * \param[input] mode indicates whether to wait for ALL or ANY of the jobs + * in \p jd to complete. + * + * A thread may only wait for jobs which it submitted. + * + * \returns number of jobs completed, or -1 if error. + * The caller must examine the status field of each job to confirm + * successful completion of the job. + */ + virtual int + wait_jobs(unsigned int njobs, + gc_job_desc *jd[], bool done[], gc_wait_mode mode) = 0; + + /*! + * Return the maximum number of bytes of EA arguments that may be + * copied to or from the SPE in a single job. The limit applies + * independently to the "get" and "put" args. + * \sa gc_job_desc_t, gc_job_ea_args_t + */ + virtual int ea_args_maxsize() = 0; + + /*! + * Return gc_proc_id_t associated with spu procedure \p proc_name if one + * exists, otherwise throws gc_unknown_proc. + */ + virtual gc_proc_id_t lookup_proc(const std::string &proc_name) = 0; + + /*! + * Return a vector of all known spu procedure names. + */ + virtual std::vector<std::string> proc_names() = 0; + + virtual void set_debug(int debug); + virtual int debug(); + + /* ----- static methods ----- */ + + /*! + * \brief Set the singleton gc_job_manager instance. + * \param mgr is the job manager instance. + * + * The singleton is weakly held, thus the caller must maintain + * a reference to the mgr for the duration. (If we held the + * manager strongly, the destructor would never be called, and the + * resources (SPEs) would not be returned.) Bottom line: the + * caller is responsible for life-time management. + */ + static void set_singleton(gc_job_manager_sptr mgr); + + /*! + * \brief Retrieve the singleton gc_job_manager instance. + * + * Returns the singleton gc_job_manager instance or raises + * boost::bad_weak_ptr if the singleton is empty. + */ + static gc_job_manager_sptr singleton(); + + /*! + * \brief return a boost::shared_ptr to a job descriptor. + */ + static gc_job_desc_sptr make_jd_sptr(gc_job_manager_sptr mgr, gc_job_desc *jd); + + /*! + * \brief allocate a job descriptor and return a boost::shared_ptr to it. + */ + static gc_job_desc_sptr alloc_job_desc(gc_job_manager_sptr mgr); +}; + + +#endif /* INCLUDED_GC_JOB_MANAGER_H */ diff --git a/gcell/include/gcell/gc_logging.h b/gcell/include/gcell/gc_logging.h new file mode 100644 index 0000000000..9cc6fa77bd --- /dev/null +++ b/gcell/include/gcell/gc_logging.h @@ -0,0 +1,166 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCELL_GC_LOGGING_H +#define INCLUDED_GCELL_GC_LOGGING_H + +#include <gcell/gc_types.h> +#include <string.h> + +__GC_BEGIN_DECLS + +typedef struct gc_log { + gc_eaddr_t base; // gc_log_entry_t * (16 byte aligned) + uint32_t nentries; // number of entries (power-of-2) +} gc_log_t; + +typedef struct gc_log_entry { + uint32_t seqno; // monotonic sequence number + uint32_t timestamp; // decrementer value (wraps every 53s on PS3) + uint16_t subsystem; // 0 to 255 reserved for system, user gets 256 and up + uint16_t event; + uint32_t info[5]; +} _AL16 gc_log_entry_t; + +#define GCL_SS_SYS 0 // lowest system reserved subsystem +#define GCL_SS_USER 256 // lowest user reserved subsystem + + +/* + * The resulting log files can be displayed using using: + * + * $ od -t x4 -w32 spu_log.00 | less + */ + + +#if defined(__SPU__) + +/*! + * System fills in seqno and timestamp. User is responsible for the rest. + */ + +void _gc_log_write(gc_log_entry_t entry); + +#ifdef ENABLE_GC_LOGGING +#define gc_log_write(entry) _gc_log_write(entry) +#else +#define gc_log_write(entry) do { } while (0) +#endif + +inline static void +gc_log_write0(int subsystem, int event) +{ + gc_log_entry_t e; + e.subsystem = subsystem; + e.event = event; + e.info[0] = 0; + e.info[1] = 0; + e.info[2] = 0; + e.info[3] = 0; + e.info[4] = 0; + gc_log_write(e); +} + +inline static void +gc_log_write1(int subsystem, int event, + uint32_t info0) +{ + gc_log_entry_t e; + e.subsystem = subsystem; + e.event = event; + e.info[0] = info0; + e.info[1] = 0; + e.info[2] = 0; + e.info[3] = 0; + e.info[4] = 0; + gc_log_write(e); +} + +inline static void +gc_log_write2(int subsystem, int event, + uint32_t info0, uint32_t info1) +{ + gc_log_entry_t e; + e.subsystem = subsystem; + e.event = event; + e.info[0] = info0; + e.info[1] = info1; + e.info[2] = 0; + e.info[3] = 0; + e.info[4] = 0; + gc_log_write(e); +} + +inline static void +gc_log_write3(int subsystem, int event, + uint32_t info0, uint32_t info1, uint32_t info2) +{ + gc_log_entry_t e; + e.subsystem = subsystem; + e.event = event; + e.info[0] = info0; + e.info[1] = info1; + e.info[2] = info2; + e.info[3] = 0; + e.info[4] = 0; + gc_log_write(e); +} + +inline static void +gc_log_write4(int subsystem, int event, + uint32_t info0, uint32_t info1, uint32_t info2, uint32_t info3) +{ + gc_log_entry_t e; + e.subsystem = subsystem; + e.event = event; + e.info[0] = info0; + e.info[1] = info1; + e.info[2] = info2; + e.info[3] = info3; + e.info[4] = 0; + gc_log_write(e); +} + +inline static void +gc_log_write5(int subsystem, int event, + uint32_t info0, uint32_t info1, uint32_t info2, uint32_t info3, uint32_t info4) +{ + gc_log_entry_t e; + e.subsystem = subsystem; + e.event = event; + e.info[0] = info0; + e.info[1] = info1; + e.info[2] = info2; + e.info[3] = info3; + e.info[4] = info4; + gc_log_write(e); +} + +/*! + * One time initialization called by system runtime + */ +void +_gc_log_init(gc_log_t log_info); + +#endif + +__GC_END_DECLS + +#endif /* INCLUDED_GCELL_GC_LOGGING_H */ diff --git a/gcell/include/gcell/gc_mbox.h b/gcell/include/gcell/gc_mbox.h new file mode 100644 index 0000000000..1d577ff8f4 --- /dev/null +++ b/gcell/include/gcell/gc_mbox.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007,2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCELL_GC_MBOX_H +#define INCLUDED_GCELL_GC_MBOX_H + +/* + * The PPE and SPE exchange a few 32-bit messages via mailboxes. + * All have a 4 bit opcode in the high bits. + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | op | arg | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +#define MK_MBOX_MSG(cmd, args) ((((cmd) & 0xf) << 28) | ((args) & 0x0fffffff)) +#define MBOX_MSG_OP(msg) (((msg) >> 28) & 0xf) +#define MBOX_MSG_ARG(msg) ((msg) & 0x0fffffff) + +// PPE to SPE (sent via SPE Read Inbound Mailbox) + +#define OP_EXIT 0x0 // exit now +#define OP_GET_SPU_BUFSIZE 0x1 + +// SPE to PPE (sent via SPE Write Outbound Interrupt Mailbox) + +#define OP_JOBS_DONE 0x2 // arg is 0 or 1, indicating which + // gc_completion_info_t contains the info +#define OP_SPU_BUFSIZE 0x3 // arg is max number of bytes + + +#endif /* INCLUDED_GCELL_GC_MBOX_H */ diff --git a/gcell/include/gcell/gc_spu_args.h b/gcell/include/gcell/gc_spu_args.h new file mode 100644 index 0000000000..3719bac222 --- /dev/null +++ b/gcell/include/gcell/gc_spu_args.h @@ -0,0 +1,60 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCELL_GC_SPU_ARGS_H +#define INCLUDED_GCELL_GC_SPU_ARGS_H + +#include <gcell/gc_types.h> +#include <gcell/gc_logging.h> + +// args passed to SPE at initialization time + +typedef struct gc_spu_args { + gc_eaddr_t queue; // address of job queue (gc_jd_queue_t *) + gc_eaddr_t comp_info[2]; // completion info (gc_comp_info_t *) + uint32_t spu_idx; // which spu we are: [0,nspus-1] + uint32_t nspus; // number of spus we're using + uint32_t proc_def_ls_addr; // LS addr of proc_def table + uint32_t nproc_defs; // number of proc_defs in table + gc_log_t log; // logging info +} _AL16 gc_spu_args_t; + + +#define GC_CI_NJOBS 62 // makes gc_comp_info 1 cache line long + +/*! + * \brief Used to return info to PPE on which jobs are completed. + * + * When each SPE is initalized, it is passed EA pointers to two of + * these structures. The SPE uses these to communicate which jobs + * that it has worked on are complete. The SPE notifies the PPE by + * sending an OP_JOBS_DONE message (see gc_mbox.h) with an argument of + * 0 or 1, indicating which of the two comp_info's to examine. The + * SPE sets the in_use flag to 1 before DMA'ing to the PPE. When the + * PPE is done with the structure, it must clear the in_use field to + * let the SPE know it can begin using it again. + */ +typedef struct gc_comp_info { + uint16_t in_use; // set by SPE, cleared by PPE when it's finished + uint16_t ncomplete; // number of valid job_id's + uint16_t job_id[GC_CI_NJOBS]; // job_id's of completed jobs +} _AL128 gc_comp_info_t; + +#endif /* INCLUDED_GCELL_GC_SPU_ARGS_H */ diff --git a/gcell/include/gcell/gc_types.h b/gcell/include/gcell/gc_types.h new file mode 100644 index 0000000000..b75bcd8d70 --- /dev/null +++ b/gcell/include/gcell/gc_types.h @@ -0,0 +1,63 @@ +/* -*- c -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_GC_TYPES_H +#define INCLUDED_GCELL_GC_TYPES_H + +#include <stdint.h> +#include <gcell/gc_cdefs.h> +#include <gcell/compiler.h> + +__GC_BEGIN_DECLS + +#ifndef __cplusplus +typedef int bool; +#define true 1 +#define false 0 +#endif + +/*! + * \brief 64-bit integer type representing an effective address (EA) + * + * This type is always 64-bits, regardless of whether we're + * running in 32 or 64-bit mode. + */ +typedef uint64_t gc_eaddr_t; + +#if !defined(__SPU__) +static inline void * +ea_to_ptr(gc_eaddr_t ea) +{ + // in 32-bit mode we're tossing the top 32-bits. + return (void *) (uintptr_t) ea; +} + +static inline gc_eaddr_t +ptr_to_ea(void *p) +{ + // two steps to avoid compiler warning in 32-bit mode. + return (gc_eaddr_t) (uintptr_t) p; +} +#endif + +__GC_END_DECLS + +#endif /* INCLUDED_GCELL_GC_TYPES_H */ diff --git a/gcell/include/gcell/gcp_fft_1d_r2.h b/gcell/include/gcell/gcp_fft_1d_r2.h new file mode 100644 index 0000000000..c1d331833b --- /dev/null +++ b/gcell/include/gcell/gcp_fft_1d_r2.h @@ -0,0 +1,64 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCP_FFT_1D_R2_H +#define INCLUDED_GCP_FFT_1D_R2_H + +#include <gcell/gc_job_manager.h> +#include <complex> + +/*! + * \brief Submit a job that computes the forward or inverse FFT. + * + * \param mgr is the job manager instance + * \param log2_fft_length is the log2 of the fft_length (4 <= x <= 12). + * \param forward is true to compute the forward transform, else the inverse. + * \param shift indicates if an "fftshift" should be applied to the output data + * \param out is the fft_length output from FFT (must be 16-byte aligned). + * \param in is the fft_length input to FFT (must be 16-byte aligned). + * \param twiddle is fft_length/4 twiddle factor input to FFT (must be 16-byte aligned). + * \param window is the window to be applied to the input data. + * The window length must be either 0 or fft_length (must be 16-byte aligned). + * + * Returns a shared_ptr to a job descriptor which should be passed to wait_job*. + * Throws an exception in the event of a problem. + * This uses the FFTW conventions for scaling. That is, neither the forward nor inverse + * are scaled by 1/fft_length. + */ +gc_job_desc_sptr +gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr, + unsigned int log2_fft_length, + bool forward, + bool shift, + std::complex<float> *out, + const std::complex<float> *in, + const std::complex<float> *twiddle, + const float *window); + +/*! + * \brief Compute twiddle factors + * + * \param log2_fft_length is the log2 of the fft_length. + * \param W is fft_length/4 twiddle factor output (must be 16-byte aligned). + */ +void +gcp_fft_1d_r2_twiddle(unsigned int log2_fft_length, std::complex<float> *W); + +#endif /* INCLUDED_GCP_FFT_1D_R2_H */ diff --git a/gcell/include/gcell/memory_barrier.h b/gcell/include/gcell/memory_barrier.h new file mode 100644 index 0000000000..4a1f870003 --- /dev/null +++ b/gcell/include/gcell/memory_barrier.h @@ -0,0 +1,64 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_MEMORY_BARRIER_H +#define INCLUDED_GCELL_MEMORY_BARRIER_H + +/* + * powerpc memory barriers + * + * The sync instruction guarantees that all memory accesses initiated + * by this processor have been performed (with respect to all other + * mechanisms that access memory). The eieio instruction is a barrier + * providing an ordering (separately) for (a) cacheable stores and (b) + * loads and stores to non-cacheable memory (e.g. I/O devices). + * + * smp_mb() prevents loads and stores being reordered across this point. + * smp_rmb() prevents loads being reordered across this point. + * smp_wmb() prevents stores being reordered across this point. + * + * We have to use the sync instructions for smp_mb(), since lwsync + * doesn't order loads with respect to previous stores. Lwsync is + * fine for smp_rmb(), though. For smp_wmb(), we use eieio since it + * is only used to order updates to system memory. + * + * For details, see "PowerPC Virtual Environment Architecture, Book + * II". Especially Chapter 1, "Storage Model" and Chapter 3, "Storage + * Control Instructions." (site:ibm.com) + */ + +static inline void smp_mb(void) +{ + __asm__ volatile ("sync" : : : "memory"); +} + +static inline void smp_rmb(void) +{ + __asm__ volatile ("lwsync" : : : "memory"); +} + +static inline void smp_wmb(void) +{ + __asm__ volatile ("eieio" : : : "memory"); +} + + +#endif /* INCLUDED_GCELL_MEMORY_BARRIER_H */ diff --git a/gcell/include/gcell/spu/Makefile.am b/gcell/include/gcell/spu/Makefile.am new file mode 100644 index 0000000000..58816819d0 --- /dev/null +++ b/gcell/include/gcell/spu/Makefile.am @@ -0,0 +1,30 @@ +# +# Copyright 2007,2008 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +include $(top_srcdir)/Makefile.common + +gcellspuinclude_HEADERS = \ + fft_1d.h \ + fft_1d_r2.h \ + gc_delay.h \ + gc_jd_queue.h \ + gc_random.h \ + gc_spu_macs.h \ + libfft.h diff --git a/gcell/include/gcell/spu/fft_1d.h b/gcell/include/gcell/spu/fft_1d.h new file mode 100644 index 0000000000..355b84bf1f --- /dev/null +++ b/gcell/include/gcell/spu/fft_1d.h @@ -0,0 +1,103 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _FFT_1D_H_ +#define _FFT_1D_H_ 1 + +#include <spu_intrinsics.h> + +/* BIT_SWAP - swaps up to 16 bits of the integer _i according to the + * pattern specified by _pat. + */ +#define BIT_SWAP(_i, _pat) spu_extract(spu_gather(spu_shuffle(spu_maskb(_i), _pat, _pat)), 0) + + +#ifndef MAX_FFT_1D_SIZE +#define MAX_FFT_1D_SIZE 8192 +#endif + +#ifndef INV_SQRT_2 +#define INV_SQRT_2 0.7071067811865 +#endif + + +/* The following macro, FFT_1D_BUTTERFLY, performs a 4 way SIMD basic butterfly + * operation. The inputs are in parallel arrays (seperate real and imaginary + * vectors). + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + */ + +#define FFT_1D_BUTTERFLY(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \ + vector float _qw_re, _qw_im; \ + \ + _qw_re = spu_msub(_q_re, _W_re, spu_mul(_q_im, _W_im)); \ + _qw_im = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \ + _P_re = spu_add(_p_re, _qw_re); \ + _P_im = spu_add(_p_im, _qw_im); \ + _Q_re = spu_sub(_p_re, _qw_re); \ + _Q_im = spu_sub(_p_im, _qw_im); \ +} + + +/* FFT_1D_BUTTERFLY_HI is equivalent to FFT_1D_BUTTERFLY with twiddle factors (W_im, -W_re) + */ +#define FFT_1D_BUTTERFLY_HI(_P_re, _P_im, _Q_re, _Q_im, _p_re, _p_im, _q_re, _q_im, _W_re, _W_im) { \ + vector float _qw_re, _qw_im; \ + \ + _qw_re = spu_madd(_q_re, _W_im, spu_mul(_q_im, _W_re)); \ + _qw_im = spu_msub(_q_im, _W_im, spu_mul(_q_re, _W_re)); \ + _P_re = spu_add(_p_re, _qw_re); \ + _P_im = spu_add(_p_im, _qw_im); \ + _Q_re = spu_sub(_p_re, _qw_re); \ + _Q_im = spu_sub(_p_im, _qw_im); \ +} + +#endif /* _FFT_1D_H_ */ diff --git a/gcell/include/gcell/spu/fft_1d_r2.h b/gcell/include/gcell/spu/fft_1d_r2.h new file mode 100644 index 0000000000..a51cbc341d --- /dev/null +++ b/gcell/include/gcell/spu/fft_1d_r2.h @@ -0,0 +1,529 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifndef _FFT_1D_R2_H_ +#define _FFT_1D_R2_H_ 1 + +#include "fft_1d.h" + +/* fft_1d_r2 + * --------- + * Performs a single precision, complex Fast Fourier Transform using + * the DFT (Discrete Fourier Transform) with radix-2 decimation in time. + * The input <in> is an array of complex numbers of length (1<<log2_size) + * entries. The result is returned in the array of complex numbers specified + * by <out>. Note: This routine can support an in-place transformation + * by specifying <in> and <out> to be the same array. + * + * This implementation utilizes the Cooley-Tukey algorithm consisting + * of <log2_size> stages. The basic operation is the butterfly. + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + * + * This routine also requires pre-computed twiddle values, W. W is an + * array of single precision complex numbers of length 1<<(log2_size-2) + * and is computed as follows: + * + * for (i=0; i<n/4; i++) + * W[i].real = cos(i * 2*PI/n); + * W[i].imag = -sin(i * 2*PI/n); + * } + * + * This array actually only contains the first half of the twiddle + * factors. Due for symmetry, the second half of the twiddle factors + * are implied and equal: + * + * for (i=0; i<n/4; i++) + * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n); + * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n); + * } + * + * Further symmetry allows one to generate the twiddle factor table + * using half the number of trig computations as follows: + * + * W[0].real = 1.0; + * W[0].imag = 0.0; + * for (i=1; i<n/4; i++) + * W[i].real = cos(i * 2*PI/n); + * W[n/4 - i].imag = -W[i].real; + * } + * + * The complex numbers are packed into quadwords as follows: + * + * quadword complex + * array element array elements + * ----------------------------------------------------- + * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 | + * ----------------------------------------------------- + * + */ + + +static __inline void _fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size) +{ + int i, j, k; + int stage, offset; + int i_rev; + int n, n_2, n_4, n_8, n_16, n_3_16; + int w_stride, w_2stride, w_3stride, w_4stride; + int stride, stride_2, stride_4, stride_3_4; + vector float *W0, *W1, *W2, *W3; + vector float *re0, *re1, *re2, *re3; + vector float *im0, *im1, *im2, *im3; + vector float *in0, *in1, *in2, *in3, *in4, *in5, *in6, *in7; + vector float *out0, *out1, *out2, *out3; + vector float tmp0, tmp1; + vector float w0_re, w0_im, w1_re, w1_im; + vector float w0, w1, w2, w3; + vector float src_lo0, src_lo1, src_lo2, src_lo3; + vector float src_hi0, src_hi1, src_hi2, src_hi3; + vector float dst_lo0, dst_lo1, dst_lo2, dst_lo3; + vector float dst_hi0, dst_hi1, dst_hi2, dst_hi3; + vector float out_re_lo0, out_re_lo1, out_re_lo2, out_re_lo3; + vector float out_im_lo0, out_im_lo1, out_im_lo2, out_im_lo3; + vector float out_re_hi0, out_re_hi1, out_re_hi2, out_re_hi3; + vector float out_im_hi0, out_im_hi1, out_im_hi2, out_im_hi3; + vector float re_lo0, re_lo1, re_lo2, re_lo3; + vector float im_lo0, im_lo1, im_lo2, im_lo3; + vector float re_hi0, re_hi1, re_hi2, re_hi3; + vector float im_hi0, im_hi1, im_hi2, im_hi3; + vector float pq_lo0, pq_lo1, pq_lo2, pq_lo3; + vector float pq_hi0, pq_hi1, pq_hi2, pq_hi3; + vector float re[MAX_FFT_1D_SIZE/4], im[MAX_FFT_1D_SIZE/4]; /* real & imaginary working arrays */ + vector float ppmm = (vector float) { 1.0f, 1.0f, -1.0f, -1.0f}; + vector float pmmp = (vector float) { 1.0f, -1.0f, -1.0f, 1.0f}; + vector unsigned char reverse; + vector unsigned char shuf_lo = (vector unsigned char) { + 0, 1, 2, 3, 4, 5, 6, 7, + 16,17,18,19, 20,21,22,23}; + vector unsigned char shuf_hi = (vector unsigned char) { + 8, 9,10,11, 12,13,14,15, + 24,25,26,27, 28,29,30,31}; + vector unsigned char shuf_0202 = (vector unsigned char) { + 0, 1, 2, 3, 8, 9,10,11, + 0, 1, 2, 3, 8, 9,10,11}; + vector unsigned char shuf_1313 = (vector unsigned char) { + 4, 5, 6, 7, 12,13,14,15, + 4, 5, 6, 7, 12,13,14,15}; + vector unsigned char shuf_0303 = (vector unsigned char) { + 0, 1, 2, 3, 12,13,14,15, + 0, 1, 2, 3, 12,13,14,15}; + vector unsigned char shuf_1212 = (vector unsigned char) { + 4, 5, 6, 7, 8, 9,10,11, + 4, 5, 6, 7, 8, 9,10,11}; + vector unsigned char shuf_0415 = (vector unsigned char) { + 0, 1, 2, 3, 16,17,18,19, + 4, 5, 6, 7, 20,21,22,23}; + vector unsigned char shuf_2637 = (vector unsigned char) { + 8, 9,10,11, 24,25,26,27, + 12,13,14,15,28,29,30,31}; + vector unsigned char shuf_0246 = (vector unsigned char) { + 0, 1, 2, 3, 8, 9,10,11, + 16,17,18,19,24,25,26,27}; + vector unsigned char shuf_1357 = (vector unsigned char) { + 4, 5, 6, 7, 12,13,14,15, + 20,21,22,23,28,29,30,31}; + + n = 1 << log2_size; + n_2 = n >> 1; + n_4 = n >> 2; + n_8 = n >> 3; + n_16 = n >> 4; + + n_3_16 = n_8 + n_16; + + /* Compute a byte reverse shuffle pattern to be used to produce + * an address bit swap. + */ + reverse = spu_or(spu_slqwbyte(spu_splats((unsigned char)0x80), log2_size), + spu_rlmaskqwbyte(((vec_uchar16){15,14,13,12, 11,10,9,8, + 7, 6, 5, 4, 3, 2,1,0}), + log2_size-16)); + + /* Perform the first 3 stages of the FFT. These stages differs from + * other stages in that the inputs are unscrambled and the data is + * reformated into parallel arrays (ie, seperate real and imaginary + * arrays). The term "unscramble" means the bit address reverse the + * data array. In addition, the first three stages have simple twiddle + * weighting factors. + * stage 1: (1, 0) + * stage 2: (1, 0) and (0, -1) + * stage 3: (1, 0), (0.707, -0.707), (0, -1), (-0.707, -0.707) + * + * The arrays are processed as two halves, simultaneously. The lo (first + * half) and hi (second half). This is done because the scramble + * shares source value between each half of the output arrays. + */ + i = 0; + i_rev = 0; + + in0 = in; + in1 = in + n_8; + in2 = in + n_16; + in3 = in + n_3_16; + + in4 = in + n_4; + in5 = in1 + n_4; + in6 = in2 + n_4; + in7 = in3 + n_4; + + re0 = re; + re1 = re + n_8; + im0 = im; + im1 = im + n_8; + + w0_re = (vector float) { 1.0f, INV_SQRT_2, 0.0f, -INV_SQRT_2}; + w0_im = (vector float) { 0.0f, -INV_SQRT_2, -1.0f, -INV_SQRT_2}; + + do { + src_lo0 = in0[i_rev]; + src_lo1 = in1[i_rev]; + src_lo2 = in2[i_rev]; + src_lo3 = in3[i_rev]; + + src_hi0 = in4[i_rev]; + src_hi1 = in5[i_rev]; + src_hi2 = in6[i_rev]; + src_hi3 = in7[i_rev]; + + /* Perform scramble. + */ + dst_lo0 = spu_shuffle(src_lo0, src_hi0, shuf_lo); + dst_hi0 = spu_shuffle(src_lo0, src_hi0, shuf_hi); + dst_lo1 = spu_shuffle(src_lo1, src_hi1, shuf_lo); + dst_hi1 = spu_shuffle(src_lo1, src_hi1, shuf_hi); + dst_lo2 = spu_shuffle(src_lo2, src_hi2, shuf_lo); + dst_hi2 = spu_shuffle(src_lo2, src_hi2, shuf_hi); + dst_lo3 = spu_shuffle(src_lo3, src_hi3, shuf_lo); + dst_hi3 = spu_shuffle(src_lo3, src_hi3, shuf_hi); + + /* Perform the stage 1 butterfly. The multiplier constant, ppmm, + * is used to control the sign of the operands since a single + * quadword contains both of P and Q valule of the butterfly. + */ + pq_lo0 = spu_madd(ppmm, dst_lo0, spu_rlqwbyte(dst_lo0, 8)); + pq_hi0 = spu_madd(ppmm, dst_hi0, spu_rlqwbyte(dst_hi0, 8)); + pq_lo1 = spu_madd(ppmm, dst_lo1, spu_rlqwbyte(dst_lo1, 8)); + pq_hi1 = spu_madd(ppmm, dst_hi1, spu_rlqwbyte(dst_hi1, 8)); + pq_lo2 = spu_madd(ppmm, dst_lo2, spu_rlqwbyte(dst_lo2, 8)); + pq_hi2 = spu_madd(ppmm, dst_hi2, spu_rlqwbyte(dst_hi2, 8)); + pq_lo3 = spu_madd(ppmm, dst_lo3, spu_rlqwbyte(dst_lo3, 8)); + pq_hi3 = spu_madd(ppmm, dst_hi3, spu_rlqwbyte(dst_hi3, 8)); + + /* Perfrom the stage 2 butterfly. For this stage, the + * inputs pq are still interleaved (p.real, p.imag, q.real, + * q.imag), so we must first re-order the data into + * parallel arrays as well as perform the reorder + * associated with the twiddle W[n/4], which equals + * (0, -1). + * + * ie. (A, B) * (0, -1) => (B, -A) + */ + re_lo0 = spu_madd(ppmm, + spu_shuffle(pq_lo1, pq_lo1, shuf_0303), + spu_shuffle(pq_lo0, pq_lo0, shuf_0202)); + im_lo0 = spu_madd(pmmp, + spu_shuffle(pq_lo1, pq_lo1, shuf_1212), + spu_shuffle(pq_lo0, pq_lo0, shuf_1313)); + + re_lo1 = spu_madd(ppmm, + spu_shuffle(pq_lo3, pq_lo3, shuf_0303), + spu_shuffle(pq_lo2, pq_lo2, shuf_0202)); + im_lo1 = spu_madd(pmmp, + spu_shuffle(pq_lo3, pq_lo3, shuf_1212), + spu_shuffle(pq_lo2, pq_lo2, shuf_1313)); + + + re_hi0 = spu_madd(ppmm, + spu_shuffle(pq_hi1, pq_hi1, shuf_0303), + spu_shuffle(pq_hi0, pq_hi0, shuf_0202)); + im_hi0 = spu_madd(pmmp, + spu_shuffle(pq_hi1, pq_hi1, shuf_1212), + spu_shuffle(pq_hi0, pq_hi0, shuf_1313)); + + re_hi1 = spu_madd(ppmm, + spu_shuffle(pq_hi3, pq_hi3, shuf_0303), + spu_shuffle(pq_hi2, pq_hi2, shuf_0202)); + im_hi1 = spu_madd(pmmp, + spu_shuffle(pq_hi3, pq_hi3, shuf_1212), + spu_shuffle(pq_hi2, pq_hi2, shuf_1313)); + + + /* Perform stage 3 butterfly. + */ + FFT_1D_BUTTERFLY(re0[0], im0[0], re0[1], im0[1], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY(re1[0], im1[0], re1[1], im1[1], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + + re0 += 2; + re1 += 2; + im0 += 2; + im1 += 2; + + i += 8; + i_rev = BIT_SWAP(i, reverse) / 2; + } while (i < n_2); + + /* Process stages 4 to log2_size-2 + */ + for (stage=4, stride=4; stage<log2_size-1; stage++, stride += stride) { + w_stride = n_2 >> stage; + w_2stride = n >> stage; + w_3stride = w_stride + w_2stride; + w_4stride = w_2stride + w_2stride; + + W0 = W; + W1 = W + w_stride; + W2 = W + w_2stride; + W3 = W + w_3stride; + + stride_2 = stride >> 1; + stride_4 = stride >> 2; + stride_3_4 = stride_2 + stride_4; + + re0 = re; im0 = im; + re1 = re + stride_2; im1 = im + stride_2; + re2 = re + stride_4; im2 = im + stride_4; + re3 = re + stride_3_4; im3 = im + stride_3_4; + + for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) { + /* Compute the twiddle factors + */ + w0 = W0[offset]; + w1 = W1[offset]; + w2 = W2[offset]; + w3 = W3[offset]; + + tmp0 = spu_shuffle(w0, w2, shuf_0415); + tmp1 = spu_shuffle(w1, w3, shuf_0415); + + w0_re = spu_shuffle(tmp0, tmp1, shuf_0415); + w0_im = spu_shuffle(tmp0, tmp1, shuf_2637); + + j = i; + k = i + stride; + do { + re_lo0 = re0[j]; im_lo0 = im0[j]; + re_lo1 = re1[j]; im_lo1 = im1[j]; + + re_hi0 = re2[j]; im_hi0 = im2[j]; + re_hi1 = re3[j]; im_hi1 = im3[j]; + + re_lo2 = re0[k]; im_lo2 = im0[k]; + re_lo3 = re1[k]; im_lo3 = im1[k]; + + re_hi2 = re2[k]; im_hi2 = im2[k]; + re_hi3 = re3[k]; im_hi3 = im3[k]; + + FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + + FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im); + + j += 2 * stride; + k += 2 * stride; + } while (j < n_4); + } + } + + /* Process stage log2_size-1. This is identical to the stage processing above + * except for this stage the inner loop is only executed once so it is removed + * entirely. + */ + w_stride = n_2 >> stage; + w_2stride = n >> stage; + w_3stride = w_stride + w_2stride; + w_4stride = w_2stride + w_2stride; + + stride_2 = stride >> 1; + stride_4 = stride >> 2; + + stride_3_4 = stride_2 + stride_4; + + re0 = re; im0 = im; + re1 = re + stride_2; im1 = im + stride_2; + re2 = re + stride_4; im2 = im + stride_4; + re3 = re + stride_3_4; im3 = im + stride_3_4; + + for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) { + /* Compute the twiddle factors + */ + w0 = W[offset]; + w1 = W[offset + w_stride]; + w2 = W[offset + w_2stride]; + w3 = W[offset + w_3stride]; + + tmp0 = spu_shuffle(w0, w2, shuf_0415); + tmp1 = spu_shuffle(w1, w3, shuf_0415); + + w0_re = spu_shuffle(tmp0, tmp1, shuf_0415); + w0_im = spu_shuffle(tmp0, tmp1, shuf_2637); + + j = i; + k = i + stride; + + re_lo0 = re0[j]; im_lo0 = im0[j]; + re_lo1 = re1[j]; im_lo1 = im1[j]; + + re_hi0 = re2[j]; im_hi0 = im2[j]; + re_hi1 = re3[j]; im_hi1 = im3[j]; + + re_lo2 = re0[k]; im_lo2 = im0[k]; + re_lo3 = re1[k]; im_lo3 = im1[k]; + + re_hi2 = re2[k]; im_hi2 = im2[k]; + re_hi3 = re3[k]; im_hi3 = im3[k]; + + FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + + FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im); + } + + + /* Process the final stage (stage log2_size). For this stage, + * reformat the data from parallel arrays back into + * interleaved arrays,storing the result into <in>. + * + * This loop has been manually unrolled by 2 to improve + * dual issue rates and reduce stalls. This unrolling + * forces a minimum FFT size of 32. + */ + re0 = re; + re1 = re + n_8; + re2 = re + n_16; + re3 = re + n_3_16; + + im0 = im; + im1 = im + n_8; + im2 = im + n_16; + im3 = im + n_3_16; + + out0 = out; + out1 = out + n_4; + out2 = out + n_8; + out3 = out1 + n_8; + + i = n_16; + + do { + /* Fetch the twiddle factors + */ + w0 = W[0]; + w1 = W[1]; + w2 = W[2]; + w3 = W[3]; + + W += 4; + + w0_re = spu_shuffle(w0, w1, shuf_0246); + w0_im = spu_shuffle(w0, w1, shuf_1357); + w1_re = spu_shuffle(w2, w3, shuf_0246); + w1_im = spu_shuffle(w2, w3, shuf_1357); + + /* Fetch the butterfly inputs, reals and imaginaries + */ + re_lo0 = re0[0]; im_lo0 = im0[0]; + re_lo1 = re1[0]; im_lo1 = im1[0]; + re_lo2 = re0[1]; im_lo2 = im0[1]; + re_lo3 = re1[1]; im_lo3 = im1[1]; + + re_hi0 = re2[0]; im_hi0 = im2[0]; + re_hi1 = re3[0]; im_hi1 = im3[0]; + re_hi2 = re2[1]; im_hi2 = im2[1]; + re_hi3 = re3[1]; im_hi3 = im3[1]; + + re0 += 2; im0 += 2; + re1 += 2; im1 += 2; + re2 += 2; im2 += 2; + re3 += 2; im3 += 2; + + /* Perform the butterflys + */ + FFT_1D_BUTTERFLY (out_re_lo0, out_im_lo0, out_re_lo1, out_im_lo1, re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); + FFT_1D_BUTTERFLY (out_re_lo2, out_im_lo2, out_re_lo3, out_im_lo3, re_lo2, im_lo2, re_lo3, im_lo3, w1_re, w1_im); + + FFT_1D_BUTTERFLY_HI(out_re_hi0, out_im_hi0, out_re_hi1, out_im_hi1, re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); + FFT_1D_BUTTERFLY_HI(out_re_hi2, out_im_hi2, out_re_hi3, out_im_hi3, re_hi2, im_hi2, re_hi3, im_hi3, w1_re, w1_im); + + /* Interleave the results and store them into the output buffers (ie, + * the original input buffers. + */ + out0[0] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_0415); + out0[1] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_2637); + out0[2] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_0415); + out0[3] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_2637); + + out1[0] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_0415); + out1[1] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_2637); + out1[2] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_0415); + out1[3] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_2637); + + out2[0] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_0415); + out2[1] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_2637); + out2[2] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_0415); + out2[3] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_2637); + + out3[0] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_0415); + out3[1] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_2637); + out3[2] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_0415); + out3[3] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_2637); + + out0 += 4; + out1 += 4; + out2 += 4; + out3 += 4; + + i -= 2; + } while (i); +} + +#endif /* _FFT_1D_R2_H_ */ diff --git a/gcell/include/gcell/spu/gc_delay.h b/gcell/include/gcell/spu/gc_delay.h new file mode 100644 index 0000000000..e995b3a946 --- /dev/null +++ b/gcell/include/gcell/spu/gc_delay.h @@ -0,0 +1,27 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007,2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCELL_SPU_GC_DELAY_H +#define INCLUDED_GCELL_SPU_GC_DELAY_H + +void gc_udelay(unsigned int usecs); +void gc_cdelay(unsigned int cpu_cycles); + +#endif /* INCLUDED_GCELL_SPU_GC_DELAY_H */ diff --git a/gcell/include/gcell/spu/gc_jd_queue.h b/gcell/include/gcell/spu/gc_jd_queue.h new file mode 100644 index 0000000000..ce1977c941 --- /dev/null +++ b/gcell/include/gcell/spu/gc_jd_queue.h @@ -0,0 +1,53 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GCELL_SPU_GC_JD_QUEUE_H +#define INCLUDED_GCELL_SPU_GC_JD_QUEUE_H + +#include <gcell/gc_jd_queue_data.h> + +/* + * Declarations for SPU side of job queue interface + */ + +__GC_BEGIN_DECLS + +/*! + * \brief Remove and return item at head of queue. + * + * \param[in] q is EA address of queue structure. + * \param[out] item_ea is EA address of item at head of queue. + * \param[in] jd_tag is the tag to use to get the LS copy of the item. + * \param[out] item is local store copy of item at head of queue. + * \returns false if the queue is empty, otherwise returns true + * and sets \p item_ea and DMA's job descriptor into \p item + * + * If return is false, we're holding a lock-line reservation that + * covers the queue. + */ +bool +gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, + int jd_tag, gc_job_desc_t *item); + +__GC_END_DECLS + + +#endif /* INCLUDED_GCELL_SPU_GC_JD_QUEUE_H */ diff --git a/gcell/include/gcell/spu/gc_random.h b/gcell/include/gcell/spu/gc_random.h new file mode 100644 index 0000000000..f51b187d4a --- /dev/null +++ b/gcell/include/gcell/spu/gc_random.h @@ -0,0 +1,32 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GCELL_SPU_GC_RANDOM_H +#define INCLUDED_GCELL_SPU_GC_RANDOM_H + +/*! + * \brief Return a uniformly distributed value in the range [0, 1.0) + * (Linear congruential generator. YMMV. Caveat emptor.) + */ + +float gc_uniform_deviate(void); +void gc_set_seed(int seed); + +#endif /* INCLUDED_GCELL_SPU_GC_RANDOM_H */ diff --git a/gcell/include/gcell/spu/gc_spu_macs.h b/gcell/include/gcell/spu/gc_spu_macs.h new file mode 100644 index 0000000000..0d7dc99781 --- /dev/null +++ b/gcell/include/gcell/spu/gc_spu_macs.h @@ -0,0 +1,380 @@ +/* -*- asm -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef INCLUDED_GC_SPU_MACS_H +#define INCLUDED_GC_SPU_MACS_H + +/* + * This file contains a set of macros that are generally useful when + * coding in SPU assembler + * + * Note that the multi-instruction macros in here may overwrite + * registers 77, 78, and 79 without warning. + */ + +/* + * defines for all registers + */ +#define r0 $0 +#define r1 $1 +#define r2 $2 +#define r3 $3 +#define r4 $4 +#define r5 $5 +#define r6 $6 +#define r7 $7 +#define r8 $8 +#define r9 $9 +#define r10 $10 +#define r11 $11 +#define r12 $12 +#define r13 $13 +#define r14 $14 +#define r15 $15 +#define r16 $16 +#define r17 $17 +#define r18 $18 +#define r19 $19 +#define r20 $20 +#define r21 $21 +#define r22 $22 +#define r23 $23 +#define r24 $24 +#define r25 $25 +#define r26 $26 +#define r27 $27 +#define r28 $28 +#define r29 $29 +#define r30 $30 +#define r31 $31 +#define r32 $32 +#define r33 $33 +#define r34 $34 +#define r35 $35 +#define r36 $36 +#define r37 $37 +#define r38 $38 +#define r39 $39 +#define r40 $40 +#define r41 $41 +#define r42 $42 +#define r43 $43 +#define r44 $44 +#define r45 $45 +#define r46 $46 +#define r47 $47 +#define r48 $48 +#define r49 $49 +#define r50 $50 +#define r51 $51 +#define r52 $52 +#define r53 $53 +#define r54 $54 +#define r55 $55 +#define r56 $56 +#define r57 $57 +#define r58 $58 +#define r59 $59 +#define r60 $60 +#define r61 $61 +#define r62 $62 +#define r63 $63 +#define r64 $64 +#define r65 $65 +#define r66 $66 +#define r67 $67 +#define r68 $68 +#define r69 $69 +#define r70 $70 +#define r71 $71 +#define r72 $72 +#define r73 $73 +#define r74 $74 +#define r75 $75 +#define r76 $76 +#define r77 $77 +#define r78 $78 +#define r79 $79 +#define r80 $80 +#define r81 $81 +#define r82 $82 +#define r83 $83 +#define r84 $84 +#define r85 $85 +#define r86 $86 +#define r87 $87 +#define r88 $88 +#define r89 $89 +#define r90 $90 +#define r91 $91 +#define r92 $92 +#define r93 $93 +#define r94 $94 +#define r95 $95 +#define r96 $96 +#define r97 $97 +#define r98 $98 +#define r99 $99 +#define r100 $100 +#define r101 $101 +#define r102 $102 +#define r103 $103 +#define r104 $104 +#define r105 $105 +#define r106 $106 +#define r107 $107 +#define r108 $108 +#define r109 $109 +#define r110 $110 +#define r111 $111 +#define r112 $112 +#define r113 $113 +#define r114 $114 +#define r115 $115 +#define r116 $116 +#define r117 $117 +#define r118 $118 +#define r119 $119 +#define r120 $120 +#define r121 $121 +#define r122 $122 +#define r123 $123 +#define r124 $124 +#define r125 $125 +#define r126 $126 +#define r127 $127 + + +#define lr r0 // link register +#define sp r1 // stack pointer + // r2 is environment pointer for langs that need it (ALGOL) + +#define retval r3 // return values are passed in regs starting at r3 + +#define arg1 r3 // args are passed in regs starting at r3 +#define arg2 r4 +#define arg3 r5 +#define arg4 r6 +#define arg5 r7 +#define arg6 r8 +#define arg7 r9 +#define arg8 r10 +#define arg9 r11 +#define arg10 r12 + +// r3 - r74 are volatile (caller saves) +// r74 - r79 are volatile (scratch regs possibly destroyed by fct prolog/epilog) +// r80 - r127 are non-volatile (callee-saves) + +// scratch registers reserved for use by the macros in this file. + +#define _gc_t0 r79 +#define _gc_t1 r78 +#define _gc_t2 r77 + +/* + * ---------------------------------------------------------------- + * pseudo ops + * ---------------------------------------------------------------- + */ +#define PROC_ENTRY(name) \ + .text; \ + .p2align 4; \ + .global name; \ + .type name, @function; \ +name: + +/* + * ---------------------------------------------------------------- + * aliases for common operations + * ---------------------------------------------------------------- + */ + +// Move register (even pipe, 2 cycles) +#define MR(rt, ra) or rt, ra, ra; + +// Move register (odd pipe, 4 cycles) +#define LMR(rt, ra) rotqbyi rt, ra, 0; + +// return +#define RETURN() bi lr; + +// hint for a return +#define HINT_RETURN(ret_label) hbr ret_label, lr; + +// return if zero +#define BRZ_RETURN(rt) biz rt, lr; + +// return if not zero +#define BRNZ_RETURN(rt) binz rt, lr; + +// return if halfword zero +#define BRHZ_RETURN(rt) bihz rt, lr; + +// return if halfword not zero +#define BRHNZ_RETURN(rt) bihnz rt, lr; + + +/* + * ---------------------------------------------------------------- + * modulo like things for constant moduli that are powers of 2 + * ---------------------------------------------------------------- + */ + +// rt = ra & (pow2 - 1) +#define MODULO(rt, ra, pow2) \ + andi rt, ra, (pow2)-1; + +// rt = pow2 - (ra & (pow2 - 1)) +#define MODULO_NEG(rt, ra, pow2) \ + andi rt, ra, (pow2)-1; \ + sfi rt, rt, (pow2); + +// rt = ra & -(pow2) +#define ROUND_DOWN(rt, ra, pow2) \ + andi rt, ra, -(pow2); + +// rt = (ra + (pow2 - 1)) & -(pow2) +#define ROUND_UP(rt, ra, pow2) \ + ai rt, ra, (pow2)-1; \ + andi rt, rt, -(pow2); + +/* + * ---------------------------------------------------------------- + * Splat - replicate a particular slot into all slots + * Altivec analogs... + * ---------------------------------------------------------------- + */ + +// replicate byte from slot s [0,15] +#define VSPLTB(rt, ra, s) \ + ilh _gc_t0, (s)*0x0101; \ + shufb rt, ra, ra, _gc_t0; + +// replicate halfword from slot s [0,7] +#define VSPLTH(rt, ra, s) \ + ilh _gc_t0, 2*(s)*0x0101 + 0x0001; \ + shufb rt, ra, ra, _gc_t0; + +// replicate word from slot s [0,3] +#define VSPLTW(rt, ra, s) \ + iluh _gc_t0, 4*(s)*0x0101 + 0x0001; \ + iohl _gc_t0, 4*(s)*0x0101 + 0x0203; \ + shufb rt, ra, ra, _gc_t0; + +// replicate double from slot s [0,1] +#define VSPLTD(rt, ra, s) \ + /* sp is always 16-byte aligned */ \ + cdd _gc_t0, 8(sp); /* 0x10111213 14151617 00010203 04050607 */ \ + rotqbyi rt, ra, ra, (s) << 3; /* rotate double into preferred slot */ \ + shufb rt, rt, rt, _gc_t0; + +/* + * ---------------------------------------------------------------- + * lots of min/max variations... + * + * On a slot by slot basis, compute the min or max + * + * U - unsigned, else signed + * B,H,{} - byte, halfword, word + * F float + * ---------------------------------------------------------------- + */ + +#define MIN_SELB(rt, ra, rb, rc) selb rt, ra, rb, rc; +#define MAX_SELB(rt, ra, rb, rc) selb rt, rb, ra, rc; + + // words + +#define MIN(rt, ra, rb) \ + cgt _gc_t0, ra, rb; \ + MIN_SELB(rt, ra, rb, _gc_t0) + +#define MAX(rt, ra, rb) \ + cgt _gc_t0, ra, rb; \ + MAX_SELB(rt, ra, rb, _gc_t0) + +#define UMIN(rt, ra, rb) \ + clgt _gc_t0, ra, rb; \ + MIN_SELB(rt, ra, rb, _gc_t0) + +#define UMAX(rt, ra, rb) \ + clgt _gc_t0, ra, rb; \ + MAX_SELB(rt, ra, rb, _gc_t0) + + // bytes + +#define MINB(rt, ra, rb) \ + cgtb _gc_t0, ra, rb; \ + MIN_SELB(rt, ra, rb, _gc_t0) + +#define MAXB(rt, ra, rb) \ + cgtb _gc_t0, ra, rb; \ + MAX_SELB(rt, ra, rb, _gc_t0) + +#define UMINB(rt, ra, rb) \ + clgtb _gc_t0, ra, rb; \ + MIN_SELB(rt, ra, rb, _gc_t0) + +#define UMAXB(rt, ra, rb) \ + clgtb _gc_t0, ra, rb; \ + MAX_SELB(rt, ra, rb, _gc_t0) + + // halfwords + +#define MINH(rt, ra, rb) \ + cgth _gc_t0, ra, rb; \ + MIN_SELB(rt, ra, rb, _gc_t0) + +#define MAXH(rt, ra, rb) \ + cgth _gc_t0, ra, rb; \ + MAX_SELB(rt, ra, rb, _gc_t0) + +#define UMINH(rt, ra, rb) \ + clgth _gc_t0, ra, rb; \ + MIN_SELB(rt, ra, rb, _gc_t0) + +#define UMAXH(rt, ra, rb) \ + clgth _gc_t0, ra, rb; \ + MAX_SELB(rt, ra, rb, _gc_t0) + + // floats + +#define FMIN(rt, ra, rb) \ + fcgt _gc_t0, ra, rb; \ + MIN_SELB(rt, ra, rb, _gc_t0) + +#define FMAX(rt, ra, rb) \ + fcgt _gc_t0, ra, rb; \ + MAX_SELB(rt, ra, rb, _gc_t0) + +// Ignoring the sign, select the values with the minimum magnitude +#define FMINMAG(rt, ra, rb) \ + fcmgt _gc_t0, ra, rb; \ + MIN_SELB(rt, ra, rb, _gc_t0) + +// Ignoring the sign, select the values with the maximum magnitude +#define FMAXMAG(rt, ra, rb) \ + fcmgt _gc_t0, ra, rb; \ + MAX_SELB(rt, ra, rb, _gc_t0) + + +#endif /* INCLUDED_GC_SPU_MACS_H */ diff --git a/gcell/include/gcell/spu/libfft.h b/gcell/include/gcell/spu/libfft.h new file mode 100644 index 0000000000..dd387be0c2 --- /dev/null +++ b/gcell/include/gcell/spu/libfft.h @@ -0,0 +1,113 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2008 Free Software Foundation, Inc. */ +/* (C)Copyright 2001,2007, */ +/* International Business Machines Corporation, */ +/* Sony Computer Entertainment, Incorporated, */ +/* Toshiba Corporation, */ +/* */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ + +#ifndef INCLUDED_LIBFFT_H +#define INCLUDED_LIBFFT_H + +// must be defined before inclusion of fft_1d_r2.h +#define MAX_FFT_1D_SIZE 4096 + +/* fft_1d_r2 + * --------- + * Performs a single precision, complex Fast Fourier Transform using + * the DFT (Discrete Fourier Transform) with radix-2 decimation in time. + * The input <in> is an array of complex numbers of length (1<<log2_size) + * entries. The result is returned in the array of complex numbers specified + * by <out>. Note: This routine can support an in-place transformation + * by specifying <in> and <out> to be the same array. + * + * This implementation utilizes the Cooley-Tukey algorithm consisting + * of <log2_size> stages. The basic operation is the butterfly. + * + * p --------------------------> P = p + q*Wi + * \ / + * \ / + * \ / + * \/ + * /\ + * / \ + * / \ + * ____ / \ + * q --| Wi |-----------------> Q = p - q*Wi + * ---- + * + * This routine also requires pre-computed twiddle values, W. W is an + * array of single precision complex numbers of length 1<<(log2_size-2) + * and is computed as follows: + * + * for (i=0; i<n/4; i++) + * W[i].real = cos(i * 2*PI/n); + * W[i].imag = -sin(i * 2*PI/n); + * } + * + * This array actually only contains the first half of the twiddle + * factors. Due for symmetry, the second half of the twiddle factors + * are implied and equal: + * + * for (i=0; i<n/4; i++) + * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n); + * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n); + * } + * + * Further symmetry allows one to generate the twiddle factor table + * using half the number of trig computations as follows: + * + * W[0].real = 1.0; + * W[0].imag = 0.0; + * for (i=1; i<n/4; i++) + * W[i].real = cos(i * 2*PI/n); + * W[n/4 - i].imag = -W[i].real; + * } + * + * The complex numbers are packed into quadwords as follows: + * + * quadword complex + * array element array elements + * ----------------------------------------------------- + * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 | + * ----------------------------------------------------- + * + */ + +void fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size); + +#endif /* INCLUDED_LIBFFT_H */ |