-0.1450763155960872
2026-03-11
module Forward
struct Dual{T}
v::T
dv::T
end
Dual(x::T) where {T} = Dual(x, one(x))
import Base: +, sin, exp, *, /
function (+)(a::T, x::Dual{T}) where {T}
Dual(a+x.v, x.dv)
end
function (*)(y::Dual, x::Dual)
Dual(y.v*x.v, x.v*y.dv + x.dv*y.v)
end
function (/)(x::Dual, y::Dual)
Dual(x.v/y.v, x.dv/y.v - x.v*y.dv/y.v^2)
end
exp(x::Dual) = Dual(exp(x.v), exp(x.v)*x.dv)
sin(x::Dual) = Dual(sin(x.v), cos(x.v)*x.dv)
function fdx(f,x)
out=f(Dual(x))
(out.v, out.dv)
end
end
Forward.fdx(f,2.0)(0.10839091026481387, -0.14507631594729084)
ForwardDiff.jlusing Distributions, BenchmarkTools
function simulate_logit(observations, β)
x = randn(observations, length(β))
y = (x*β + rand(Logistic(), observations)) .>= 0.0
return((y=y,x=x))
end
function logit_likelihood(β,y,x)
p = map(xb -> cdf(Logistic(),xb), x*β)
sum(log.(ifelse.(y, p, 1.0 .- p)))
end
n = 500
k = 3
β0 = ones(k)
(y,x) = simulate_logit(n,β0)
import ForwardDiff
@btime ∇L = ForwardDiff.gradient(b->logit_likelihood(b,y,x),β0) 7.116 μs (18 allocations: 47.60 KiB)
3-element Vector{Float64}:
-4.544200516511661
0.8780749182470913
6.355796397761692
'znver5' is not a recognized processor for this target (ignoring processor)
'znver5' is not a recognized processor for this target (ignoring processor)
'znver5' is not a recognized processor for this target (ignoring processor)
'znver5' is not a recognized processor for this target (ignoring processor)
'znver5' is not a recognized processor for this target (ignoring processor)
'znver5' is not a recognized processor for this target (ignoring processor)
10.921 μs (20 allocations: 24.06 KiB)
3-element Vector{Float64}:
-1.4048486780873116e6
271458.61712383264
1.9649071679487631e6
6.364 μs (36 allocations: 48.09 KiB)
6.830 μs (55 allocations: 48.70 KiB)
(derivs = ([-4.5442005165116655, 0.8780749182470942, 6.35579639776169], nothing, nothing), val = -248.8578363670095)
Enzyme.API.runtimeActivity!(true) works around some errors.Above three downsides may be better now than two years ago when I first wrote this slide
import Reactant
using Test
xr = Reactant.to_rarray(x)
yr = Reactant.to_rarray(Int.(y))
βr = Reactant.to_rarray(β0)
function logit_likelihood_r(β,y,x)
p = map(xb -> 1/(1+exp(-xb)), x*β)
sum(x->log(x[1]^x[2]*(1.0 - x[1])^(1-x[2])), zip(p,y))
end
@test logit_likelihood_r(β0, y, x) ≈ logit_likelihood(β0, y, x)
∇llreact = Reactant.@compile Enzyme.gradient(Enzyme.ReverseWithPrimal,logit_likelihood_r, βr, Const(yr), Const(xr))
@btime ∇llreact(Enzyme.ReverseWithPrimal, logit_likelihood_r, βr, Const(yr), Const(xr))Precompiling packages... 2484.2 ms ✓ Graphs 36300.9 ms ✓ Mooncake 2 dependencies successfully precompiled in 39 seconds. 64 already precompiled. Precompiling packages... 873.7 ms ✓ Mooncake → MooncakeLogExpFunctionsExt 1190.4 ms ✓ Mooncake → MooncakeSpecialFunctionsExt 2 dependencies successfully precompiled in 1 seconds. 75 already precompiled. Precompiling packages... 9809.4 ms ✓ Mooncake → MooncakeDistributionsExt 1 dependency successfully precompiled in 10 seconds. 100 already precompiled. 18.175 μs (63 allocations: 41.34 KiB)
(-248.85783636700947, (Mooncake.NoTangent(), [-4.544200516511669, 0.8780749182470928, 6.355796397761697]))
log((((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) ^ ys) * ((1 - (1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1))) ^ (1 - ys))))
1×3 Matrix{FastDifferentiation.Node}:
(-(((((1 / (((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) ^ ys) * ((1 - (1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1))) ^ (1 - ys)))) * ((((1 - (1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1))) ^ (1 - ys)) * (ys * ((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) ^ (ys - 1)))) + -((((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) ^ ys) * ((1 - ys) * ((1 - (1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1))) ^ ((1 - ys) - 1))))))) * -(((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)))) * exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))))) * x1) … (-(((((1 / (((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) ^ ys) * ((1 - (1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1))) ^ (1 - ys)))) * ((((1 - (1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1))) ^ (1 - ys)) * (ys * ((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) ^ (ys - 1)))) + -((((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) ^ ys) * ((1 - ys) * ((1 - (1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1))) ^ ((1 - ys) - 1))))))) * -(((1 / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)) / (exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))) + 1)))) * exp(-((((x1 * β1) + (x2 * β2)) + (x3 * β3)))))) * x3)
Precompiling packages... 794.4 ms ✓ DifferentiationInterface → DifferentiationInterfaceMooncakeExt 1 dependency successfully precompiled in 1 seconds. 71 already precompiled. Precompiling packages... 832.0 ms ✓ DifferentiationInterface → DifferentiationInterfaceForwardDiffExt 1 dependency successfully precompiled in 1 seconds. 22 already precompiled.
3-element Vector{Float64}:
-4.544200516511671
0.8780749182470924
6.355796397761696
ReverseDiff.jl a tape based reverse mode package
Long lived and well tested
limitations. Importantly, code must be generic and mutation of arrays is not allowed.
Tracker is a tape based reverse mode package. It was the default autodiff package in Flux before being replaced by Zygote. No longer under active development, but solid and still used.
Diffractor is an automatic differentiation package in development. It was once hoped to be the future of AD in Julia, but has been delayed. It plans to have both forward and reverse mode, but only forward mode is available so far.