package sysmodel
import (
"math"
"math/bits"
"math/cmplx"
"github.com/mjibson/go-dsp/fft"
"gonum.org/v1/gonum/stat/distuv"
)
// Distribution - stripped down representation of a probability distribution
type Distribution []float64
// Percentile - calculates `p`th percentile of a distribution
func (d0 Distribution) Percentile(p float64) int {
var m float64
for i := 0; i < len(d0); i++ {
m += d0[i]
if m >= p {
return i
}
}
return len(d0) - 1
}
// Sequential - calculates distribution of sum of `n` sequential executions
// Uses FFT and IFFT to avoid O(n^2) polynomial multiplication...
func (d Distribution) Sequential(n float64) Distribution {
// NOTE: tried a _super_ aggressive optimization that truncates
// at (1e-6)^(1/n); breaks down when n is too large, but pretty good
// otherwise
var (
k = math.Pow(1e-6, 1.0/n)
truncPos = float64(d.Percentile(1 - k))
olen = 1 << int(math.Ceil(math.Log2(n*truncPos)))
distPad = make(Distribution, olen)
)
copy(distPad, d)
fwdFFT := fft.FFTReal(distPad)
prod := make([]complex128, olen)
for i := 0; i < len(fwdFFT); i++ {
prod[i] = cmplx.Pow(fwdFFT[i], complex(n, 0))
}
for i, coef := range fft.IFFT(prod) {
distPad[i] = real(coef)
}
return distPad
}
// ParallelCalls - calculates distribution of `k`th longest of `n` parallel executions
// Avoids FFT by _pretending_ the calculation is on U(0, 1) which has Beta distributed order
// statistics. Then transforming result back to the space of `d0`. Still must calculate
// `invRegBeta` internally :(
func (d0 Distribution) Parallel(n, k float64) Distribution {
var (
dparallel = make(Distribution, len(d0))
B = distuv.Beta{Alpha: k, Beta: n + 1 - k}
q = d0[0]
pCDF, cCDF = B.CDF(0.0), B.CDF(q)
)
for i := 1; i < len(d0); i++ {
dparallel[i-1] = cCDF - pCDF
q += d0[i]
pCDF, cCDF = cCDF, B.CDF(q)
}
return dparallel
}
// Add - calculates distribution of sequential execution of two base distributions
// Uses FFT and IFFT to avoid O(n^2) polynomial multiplication...
func (d0 Distribution) Add(d1 Distribution) Distribution {
var (
k = math.Pow(1e-6, 0.5)
trunc0 = d0.Percentile(1 - k)
trunc1 = d1.Percentile(1 - k)
olen = 1 << bits.Len64(uint64(trunc0+trunc1+1))
extDist0 = make(Distribution, olen)
extDist1 = make(Distribution, olen)
)
copy(extDist0, d0)
copy(extDist1, d1)
fwdFFT0, fwdFFT1 := fft.FFTReal(extDist0), fft.FFTReal(extDist1)
prod := make([]complex128, olen)
for i := 0; i < olen; i++ {
prod[i] = fwdFFT0[i] * fwdFFT1[i]
}
for i, coef := range fft.IFFT(prod) {
extDist0[i] = real(coef)
}
return extDist0
}
// ParallelMixed - calculates distribution of `k`th longest of `m` parallel executions. As
// these distributions aren't identical, we can't Beta transform as in `Parallel`. Computes
// Poisson-Binomial coefficients, and runs in O(m^2 * n) time (?). Not bad.
func ParallelMixed(ds []Distribution, k int) Distribution {
var longest, n = 0, len(ds)
for i := 0; i < n; i++ {
if len(ds[i]) > longest {
longest = len(ds[i])
}
}
// allocate space for all `n` distributions (assuming all have length == `longest`).
// compute the CDFs of the distributions online and interleave the results. contiguous
// slice w. len `n` starting at any `k` s.t. `k%n=0` represents the CDFs at `k//n`.
// not really essential, saves 50-70% (?) by avoiding bopping around indices...
cDists := make([]float64, longest*len(ds))
for i := 0; i < n; i++ {
for j, val := range ds[i] {
if j > 0 {
cDists[j*n+i] = val + cDists[(j-1)*n+i]
} else {
cDists[j*n+i] = val
}
}
}
var cur, prev float64
oDist := make(Distribution, longest)
prev = directConvolution(cDists[0:n], k)
for thresh := 1; thresh < longest; thresh++ {
cur = directConvolution(cDists[n*thresh:(n*thresh+n)], k)
oDist[thresh] = cur - prev
prev = cur
}
return oDist
}
// directConvolution - implements an algo. from "A simple and fast method for computing the Poisson
// binomial distribution function" (https://doi.org/10.1016/j.csda.2018.01.007).
func directConvolution(ps []float64, m int) float64 {
pmf := make([]float64, len(ps)+1)
npmf := make([]float64, len(ps)+1)
var p, pComplement float64
pmf[0] = 1
// almost line-for-line translation of the Wikipedia pseudo-code of the algorithm
// devised in the paper above.
// see: https://en.wikipedia.org/wiki/Poisson_binomial_distribution#Probability_Mass_Function
for i := 1; i <= len(ps); i++ {
p, pComplement = ps[i-1], 1-ps[i-1]
npmf[0] = pComplement * pmf[0]
npmf[i] = p * pmf[i-1]
for k := 1; k < i; k++ {
npmf[k] = p*pmf[k-1] + pComplement*pmf[k]
}
copy(pmf, npmf)
clear(npmf)
}
// a little wasteful - we can compute ALL order statistics in a single pass...
// instead just return sum of prob. up to requested index `m`
var o float64
for i := 0; i < m; i++ {
o += pmf[i]
}
return 1 - o
}
// Composition - executes a Distributin, `db` a total of `Z` times
// with attempts (`Z`) distributed by Z ~ da.
func Composition(da Distribution, db Distribution) Distribution {
var (
n = float64(len(da))
k = math.Pow(1e-6, 1.0/n)
truncPos = float64(db.Percentile(1 - k))
olen = 1 << int(math.Ceil(math.Log2(n*truncPos)))
oDist = make(Distribution, olen)
distPad = make(Distribution, olen)
prod = make([]complex128, olen)
delta = 1.0
)
copy(distPad, db)
for numRetry, pRetry := range da {
if (pRetry < 1e-4) && (numRetry != 0) {
delta++
continue
}
fwdFFT := fft.FFTReal(distPad)
if numRetry == 0 {
copy(prod, fwdFFT)
} else {
// accumulate delta && use cmplx.Pow to skip iterations on large jumps
if delta > 1 {
for i, j := range fwdFFT {
prod[i] *= cmplx.Pow(j, complex(delta, 0))
}
}
// else use regular multiplication...
if delta == 1 {
for i, j := range fwdFFT {
prod[i] *= j
}
}
}
for i, coef := range fft.IFFT(prod) {
oDist[i] += pRetry * real(coef)
}
delta = 1.0
}
return oDist
}