#ifndef __PDLTHREAD_H
#define __PDLTHREAD_H
#define PDL_BROADCAST_MAGICKED (1 << 0)
#define PDL_BROADCAST_MAGICK_BUSY (1 << 1)
#define PDL_BROADCAST_INITIALIZED (1 << 2)
#define PDL_LIST_FLAGS_PDLBROADCAST(X) \
X(PDL_BROADCAST_MAGICKED) \
X(PDL_BROADCAST_MAGICK_BUSY) \
X(PDL_BROADCAST_INITIALIZED)
#define PDL_BRC_MAGICNO 0x92314764
#define PDL_BRC_CHKMAGIC(it) PDL_CHKMAGIC_GENERAL(it, PDL_BRC_MAGICNO, "BROADCAST")
#define PDL_BRC_SETMAGIC(it) (it)->magicno = PDL_BRC_MAGICNO
/* XXX To avoid mallocs, these should also have "default" values */
typedef struct pdl_broadcast {
struct pdl_transvtable *transvtable;
unsigned int magicno;
int gflags; /* Flags about this struct */
PDL_Indx ndims; /* Number of dimensions broadcasted over */
PDL_Indx nimpl; /* Number of these that are implicit */
PDL_Indx npdls; /* Number of pdls involved */
PDL_Indx nextra;
PDL_Indx *inds; /* Indices for each of the dimensions */
PDL_Indx *dims; /* Dimensions of each dimension */
PDL_Indx *offs; /* Offsets for each of the pdls */
PDL_Indx *incs; /* npdls * ndims array of increments. Fast because
of constant indices for first loops */
PDL_Indx *realdims; /* realdims for each pdl (e.g., specified by PP signature) */
pdl **pdls;
char *flags; /* per pdl flags */
PDL_Indx mag_nth; /* magicked thread dim */
PDL_Indx mag_nthpdl; /* magicked ndarray */
PDL_Indx mag_nthr; /* number of threads */
PDL_Indx mag_skip; /* first pthread to skip if remainder, 0=none */
PDL_Indx mag_stride; /* the base size to stride, without adding 1 if before drop */
/*
**
t****
****
****
--k--->thr (zero-based)
t=3 (mag_stride)
k=2 (mag_skip)
offsets=[0,4,8,11,14]
t****
****
****
k----->thr (zero-based)
t=3 (mag_stride)
k=0 (mag_skip)
offsets=[0,3,6,9,12]
offset=thr*t + MIN(thr,k) // see macro PDL_BRC_OFFSET
*/
} pdl_broadcast;
#define PDL_BRC_OFFSET(thr, broadcast) ((thr)*((broadcast)->mag_stride) + PDLMIN((thr),(broadcast)->mag_skip))
#define PDL_BRC_INC(incs, npdls, p, d) ((incs)[(d)*(npdls) + (p)])
#define PDL_BRC_THR_OFFSET(broadcast, thr, j) \
(PDL_REPROFFS(broadcast->pdls[j]) + ( \
!thr ? 0 : \
PDL_BISTEMP(broadcast->flags[j]) ? thr * broadcast->pdls[j]->dimincs[broadcast->pdls[j]->ndims-1] : \
PDL_BRC_OFFSET(thr, broadcast) * PDL_BRC_INC(broadcast->incs, broadcast->npdls, j, broadcast->mag_nth) \
))
static inline int pdl_broadcast_nd_step(
PDL_Indx npdls, PDL_Indx *offsp,
PDL_Indx nth, PDL_Indx ndims, PDL_Indx *incs, PDL_Indx *dims, PDL_Indx *inds
) {
PDL_Indx i,j;
for (i=nth; i < ndims; i++) {
for (j=0; j < npdls; j++) offsp[j] += incs[i*npdls + j];
if (++inds[i] < dims[i]) return 1; /* Actual carry test */
inds[i] = 0;
for (j=0; j < npdls; j++) offsp[j] -= incs[i*npdls + j] * dims[i];
}
return 0;
}
/* Broadcast per pdl flags */
#define PDL_BROADCAST_TEMP (1 << 1)
#define PDL_BISTEMP(flag) (flag & PDL_BROADCAST_TEMP)
/* __PDLTHREAD_H */
#endif