teenyverse - Scientific group life cycle

Algebraic model

Model Sketch

There are research groups \(G\) with a number of non-programmers \(n\) and programmers \(p\). In a data-driven world, we assume that learning to code confer a large benefit to programmers over non-programmer such that \(\alpha << \beta\). There is a constant rate of influx of students who do not know how to learn to code in research groups \(\mu\). There is a cost of learning to code \(c(p,n)\), which depend on the number of programmers and non-programmers within group. We assume that programmers and non-programmers have different graduation rates, \(\nu_p\) and \(\nu_n\), with \(\nu_p > \nu_n\).

We model the group life cycle with the following master equation:

\[\begin{align*} \frac{d}{dt}G_{n,p} &= \mu(G_{n-1,p} - G_{n,p}) + \nu_n \Big((n+1)G_{n+1,p}-nG_{n,p}\Big) \\ &+ \Big[ \tau_g(n+1,p-1)(1-c(n+1, p-1)G_{n+1,p-1} - \tau_g(n,p)G_{n,p} \Big] \\ &+ \nu_p\Big((p+1)G_{n,p+1} - pG_{n,p} \Big) \\ &+ \tau_g(n+1,p)(1-c(n+1,p))G_{n+1,p} \end{align*}\]

Learning to code confers a collective benefits on individuals \(\tau_g(n,p; \alpha, \beta) \propto \frac{\bar{Z}_{n,p}}{Z_{n,p}}\), where

\[\log(Z_{n,p}) \sim \alpha * n + \beta * p\] \[\log(\bar{Z}_{n,p}) \sim \alpha (n-1) +\beta (c * p + (1-c)(p+1))\]

We can think of \(\bar{Z}_{n,p}\) as the potential benefits over \(Z_{n,p}\). Reorganizing the terms, we get:

\[\begin{align*} \log[\tau_g(n,p; \alpha, \beta))] &= \alpha (n-1) +\beta (c * p + (1-c)(p+1)) - \alpha * n + \beta * p \\ &= -\alpha + \beta(1-c) \end{align*}\]

Note that \(\tau_g\) ends up being a function of \(n, p\) through the cost function: \[c(n,p) = c_0*e^{-\frac{p}{n}}\]

You can explore both functions below:

Cost function

function cost_prog(n, i, c_0) { return c_0 * Math.exp(-i/n); }
function cost_prog2(n, i, c_0) { return c_0 * Math.exp(-i/(n+i)); }

max_gr_size = 20
viewof N = Inputs.range([1, max_gr_size], {value: 20, step: 1, label: "group Size"})
viewof coder = Inputs.range([0, (N-1)], {value: 10, step: 1, label: "# coder"})
viewof c_0 = Inputs.range([0, 1], {value: 0.95, step: 0.01, label: "c₀"})
viewof nc = Inputs.range([1, N], {value: (N-coder), step: 1., label: "# non-coder", disabled:true})

c(n,p) = c₀ * exp(-p/n)
c(n,p) = c₀ * exp(-p/(n+p))

non_coder = N - coder
xs = [...Array(N).keys()];
ys = xs.map(x => cost_prog(non_coder, x, c_0))


Plot.lineY(ys).plot({
    height: 400, width: 450, grid: true,
    y: { label: "↑ c(n,p)", domain:[0,1] },
    x: { label: "p/n →" }
})

\(p/n\) \(\Rightarrow\) / =

\(c(n,p)\) = (c=1 means that non-coders always fail to learn to code; c=0 means non-coders always succeed)

Non-programmers can still learn to code when \(p=0\) because of \(c_0\)

I woudl expect a bigger difference when we go from no prorammers in the team to one programmer

x2s = [...Array(N).keys()];
y2s = x2s.map(x => cost_prog2(non_coder, x, c_0))

Plot.lineY(y2s).plot({
    height: 400, width: 450, grid: true,
    y: { label: "↑ c(n,p)", domain:[0,1] },
    x: { label: "p/n →" }
})

Group benefits

function tau(n, i, alpha, beta) {
    const c = cost_prog(n, i, 1)
    return Math.exp(-alpha + beta*(1 - c))
}

tau_max_gr_size = 20
viewof tau_alpha = Inputs.range([2, 4], {value: 1., step: 1, label: "α", format: x => 10**-x})
viewof tau_beta = Inputs.range([1, 3], {value: 1., step: 1, label: "β", format: x => 10**-x})
viewof tau_N = Inputs.range([0, tau_max_gr_size], {value: 20, step: 1, label: "group Size"})
viewof tau_coder = Inputs.range([1, tau_max_gr_size], {value: 10, step: 1, label: "# coder"})
viewof tau_nc = Inputs.range([1, max_gr_size], {value: (tau_N-tau_coder), step: 1., label: "# non-coder", disabled:true})

τ(n,p) 1

tau_non_coder = tau_N - tau_coder
tau_xs = [...Array(tau_N).keys()];
tau_ys = tau_xs.map(x => tau(tau_non_coder, x, 10**-tau_alpha, 10**-tau_beta))

Plot.lineY(tau_ys).plot({
    height: 400, width: 450, grid: true,
    y: { label: "↑ τ(α,β;n,p)" },
    x: { label: "p/n →" }
})

\(p/n\) \(\Rightarrow\) / =

Julia model

Initialization scheme

function initialize_u0(;N::Int=20)
  N += 1 # add column for zeroth case
  G = zeros(N, N)
  
  for i=1:N, j=1:N
    G[i,j] = 1/(N*N)
  end
  return ArrayPartition(Tuple([G[n,:] for n=1:N]))
end

μ  = 0.001   # inflow new students-non coders
νₙ = 0.01    # death rate non-coders
νₚ = 0.05    # death rate coders
α  = 0.01    # benefits non coders
β  = 0.1     # benefits coders
p  = [μ, νₙ, νₚ, α, β]

n = 9
u₀ = initialize_u0(N=n)
tspan = (0., 1000.)

c(n, i) = 0.95 * exp(-i / n) # cost function
τ(n, i, α, β) = exp(-α + β*(1 - c(n, i))) # group benefits

function life_cycle_research_groups!(du, u, p, t)

  G, N, P = u, length(u.x), length(first(u₀.x)) # Note that there can be no coders but not non-coders
  μ, νₙ, νₚ, α, β = p
  for n=1:N, i=1:P
    println("n:$(n), i:$(i), G.x[n][i]:$(G.x[n][i])")
    coder, non_coder = i-1, n-1   # we distinguish indices from actual values.
    
    du.x[n][i] = 0

    non_coder > 0 && ( du.x[n][i] += μ*(G.x[n-1][i]) )                # 1st term
    
    # for everybody
    # println("2: $(νₙ*non_coder*G.x[n][i])")
    du.x[n][i] -= νₙ*non_coder*G.x[n][i]
    # println("3: $(νₚ*coder*G.x[n][i])")
    du.x[n][i] -= νₚ*coder*G.x[n][i]

    # upper boxes don't exist 
    if i < P
      # non_coder > 0 && println("4: $(τ(non_coder, coder, α, β)*G.x[n][i] )")
      # We don't want to pass non_coder = 0 to τ()
      non_coder > 0 && ( du.x[n][i] -= τ(non_coder, coder, α, β)*G.x[n][i] )               # 4th term
      # println("5: $(νₚ*(coder+1)*G.x[n][i+1])")
      du.x[n][i] += νₚ*(coder+1)*G.x[n][i+1]  # 5th term
    end
    
    # the bottom boxes don't exist
    if n < N
      # println("6: $(μ*G.x[n][i])")
      du.x[n][i] -= μ*G.x[n][i]                                       # 1st term
      du.x[n][i] += τ(non_coder+1, coder, α, β)*(c(non_coder+1, coder))*G.x[n+1][i]     # 6th term
      du.x[n][i] += νₙ*(non_coder+1)*G.x[n+1][i]                                            # 2nd term
      coder > 0 && ( du.x[n][i] += τ(non_coder+1, coder-1, α, β)*(1-c(non_coder+1, coder-1))*G.x[n+1][i-1] ) # 3rd term 
    end
  end
end

Output

data = FileAttachment("data.json").json()
p = Object.keys(data).map(d => d.split("_"))

minmax = (p, i) => d3.extent(p.map(d => parseFloat(d[i])))

viewof N_    = Inputs.range(minmax(p,0), {step: 1, label: "N", value:"4"})
viewof mu    = Inputs.range(minmax(p,1), {step: 0.03, label: "μ", value:"0.0001"})
viewof nu_n  = Inputs.range(minmax(p,2), {step: 0.05, label: "νₙ", value:"0.01"})
viewof nu_p  = Inputs.range(minmax(p,3), {step: 0.1,  label: "νₚ", value:"0.05"})
viewof alpha = Inputs.range(minmax(p,4), {step: 0.15, label: "α", value:"0.01"})
viewof beta  = Inputs.range(minmax(p,5), {step: 0.05, label: "β", value:"0.1"})

Plot

f = (x) => x.toPrecision() ? x.toPrecision(2) : x

Plot.plot({
  x: {type:"log"},
  y: {domain: [0,1]},
  color: {scheme: "reds", type: "ordinal", legend: true},
  marks: [
    Plot.line(
      data[`${N_}_${mu}_${nu_n}_${nu_p}_${alpha}_${beta}`], {
        x: 'timesteps', y: "value", stroke: "N"
        }),
    Plot.dot(
      data[`${N_}_${mu}_${nu_n}_${nu_p}_${alpha}_${beta}`], {
        x: 'timesteps', y: "value", stroke: "N"
        })
  ]
})

Algebraic model

Julia model

Output

Takeaways: