systems-model-code

package sysmodel

import (
    "math"
    "math/bits"
    "math/cmplx"

    "github.com/mjibson/go-dsp/fft"
    "gonum.org/v1/gonum/stat/distuv"
)

// Distribution - stripped down representation of a probability distribution
type Distribution []float64

// Percentile - calculates `p`th percentile of a distribution
func (d0 Distribution) Percentile(p float64) int {
    var m float64
    for i := 0; i < len(d0); i++ {
        m += d0[i]
        if m >= p {
            return i
        }
    }
    return len(d0) - 1
}

// Sequential - calculates distribution of sum of `n` sequential executions
// Uses FFT and IFFT to avoid O(n^2) polynomial multiplication...
func (d Distribution) Sequential(n float64) Distribution {

    // NOTE: tried a _super_ aggressive optimization that truncates
    // at (1e-6)^(1/n); breaks down when n is too large, but pretty good
    // otherwise
    var (
        k        = math.Pow(1e-6, 1.0/n)
        truncPos = float64(d.Percentile(1 - k))
        olen     = 1 << int(math.Ceil(math.Log2(n*truncPos)))
        distPad  = make(Distribution, olen)
    )

    copy(distPad, d)
    fwdFFT := fft.FFTReal(distPad)
    prod := make([]complex128, olen)
    for i := 0; i < len(fwdFFT); i++ {
        prod[i] = cmplx.Pow(fwdFFT[i], complex(n, 0))
    }

    for i, coef := range fft.IFFT(prod) {
        distPad[i] = real(coef)
    }
    return distPad
}

// ParallelCalls - calculates distribution of `k`th longest of `n` parallel executions
// Avoids FFT by _pretending_ the calculation is on U(0, 1) which has Beta distributed order
// statistics. Then transforming result back to the space of `d0`. Still must calculate
// `invRegBeta` internally :(
func (d0 Distribution) Parallel(n, k float64) Distribution {

    var (
        dparallel  = make(Distribution, len(d0))
        B          = distuv.Beta{Alpha: k, Beta: n + 1 - k}
        q          = d0[0]
        pCDF, cCDF = B.CDF(0.0), B.CDF(q)
    )

    for i := 1; i < len(d0); i++ {
        dparallel[i-1] = cCDF - pCDF
        q += d0[i]
        pCDF, cCDF = cCDF, B.CDF(q)
    }
    return dparallel
}

// Add - calculates distribution of sequential execution of two base distributions
// Uses FFT and IFFT to avoid O(n^2) polynomial multiplication...
func (d0 Distribution) Add(d1 Distribution) Distribution {

    var (
        k        = math.Pow(1e-6, 0.5)
        trunc0   = d0.Percentile(1 - k)
        trunc1   = d1.Percentile(1 - k)
        olen     = 1 << bits.Len64(uint64(trunc0+trunc1+1))
        extDist0 = make(Distribution, olen)
        extDist1 = make(Distribution, olen)
    )

    copy(extDist0, d0)
    copy(extDist1, d1)

    fwdFFT0, fwdFFT1 := fft.FFTReal(extDist0), fft.FFTReal(extDist1)
    prod := make([]complex128, olen)
    for i := 0; i < olen; i++ {
        prod[i] = fwdFFT0[i] * fwdFFT1[i]
    }

    for i, coef := range fft.IFFT(prod) {
        extDist0[i] = real(coef)
    }
    return extDist0
}

// ParallelMixed - calculates distribution of `k`th longest of `m` parallel executions. As
// these distributions  aren't identical, we can't Beta transform as in `Parallel`. Computes
// Poisson-Binomial coefficients, and runs in O(m^2 * n) time (?). Not bad.
func ParallelMixed(ds []Distribution, k int) Distribution {

    var longest, n = 0, len(ds)
    for i := 0; i < n; i++ {
        if len(ds[i]) > longest {
            longest = len(ds[i])
        }
    }

    // allocate space for all `n` distributions (assuming all have length == `longest`).
    // compute the CDFs of the distributions online and interleave the results. contiguous
    // slice w. len `n` starting at any `k` s.t. `k%n=0` represents the CDFs at `k//n`.
    // not really essential, saves 50-70% (?) by avoiding bopping around indices...
    cDists := make([]float64, longest*len(ds))
    for i := 0; i < n; i++ {
        for j, val := range ds[i] {
            if j > 0 {
                cDists[j*n+i] = val + cDists[(j-1)*n+i]
            } else {
                cDists[j*n+i] = val
            }
        }
    }

    var cur, prev float64
    oDist := make(Distribution, longest)
    prev = directConvolution(cDists[0:n], k)
    for thresh := 1; thresh < longest; thresh++ {
        cur = directConvolution(cDists[n*thresh:(n*thresh+n)], k)
        oDist[thresh] = cur - prev
        prev = cur
    }
    return oDist
}

// directConvolution - implements an algo. from "A simple and fast method for computing the Poisson
// binomial distribution function" (https://doi.org/10.1016/j.csda.2018.01.007).
func directConvolution(ps []float64, m int) float64 {
    pmf := make([]float64, len(ps)+1)
    npmf := make([]float64, len(ps)+1)
    var p, pComplement float64
    pmf[0] = 1

    // almost line-for-line translation of the Wikipedia pseudo-code of the algorithm
    // devised in the paper above.
    // see: https://en.wikipedia.org/wiki/Poisson_binomial_distribution#Probability_Mass_Function
    for i := 1; i <= len(ps); i++ {
        p, pComplement = ps[i-1], 1-ps[i-1]
        npmf[0] = pComplement * pmf[0]
        npmf[i] = p * pmf[i-1]
        for k := 1; k < i; k++ {
            npmf[k] = p*pmf[k-1] + pComplement*pmf[k]
        }
        copy(pmf, npmf)
        clear(npmf)
    }

    // a little wasteful - we can compute ALL order statistics in a single pass...
    // instead just return sum of prob. up to requested index `m`
    var o float64
    for i := 0; i < m; i++ {
        o += pmf[i]
    }
    return 1 - o
}

// Composition - executes a Distributin, `db` a total of `Z` times
// with attempts (`Z`) distributed by Z ~ da.
func Composition(da Distribution, db Distribution) Distribution {

    var (
        n        = float64(len(da))
        k        = math.Pow(1e-6, 1.0/n)
        truncPos = float64(db.Percentile(1 - k))
        olen     = 1 << int(math.Ceil(math.Log2(n*truncPos)))
        oDist    = make(Distribution, olen)
        distPad  = make(Distribution, olen)
        prod     = make([]complex128, olen)
        delta    = 1.0
    )

    copy(distPad, db)
    for numRetry, pRetry := range da {
        if (pRetry < 1e-4) && (numRetry != 0) {
            delta++
            continue
        }

        fwdFFT := fft.FFTReal(distPad)
        if numRetry == 0 {
            copy(prod, fwdFFT)
        } else {
            // accumulate delta && use cmplx.Pow to skip iterations on large jumps
            if delta > 1 {
                for i, j := range fwdFFT {
                    prod[i] *= cmplx.Pow(j, complex(delta, 0))
                }
            }

            // else use regular multiplication...
            if delta == 1 {
                for i, j := range fwdFFT {
                    prod[i] *= j
                }
            }
        }

        for i, coef := range fft.IFFT(prod) {
            oDist[i] += pRetry * real(coef)
        }
        delta = 1.0
    }
    return oDist
}