major update, thread safety, improved efficiency

2026-06-13 02:02:14 -07:00 · 2021-06-11 15:50:25 +02:00 · 2021-06-11 15:50:25 +02:00 · b1303b9b79
commit b1303b9b79
parent f06635b64b
10 changed files with 693 additions and 350 deletions
--- a/src/WignerSymbols.jl
+++ b/src/WignerSymbols.jl
@ -2,16 +2,24 @@ __precompile__(true)
 module WignerSymbols
 export δ, Δ, clebschgordan, wigner3j, wigner6j, racahV, racahW, HalfInteger

-using Base.GMP.MPZ
 using HalfIntegers
 using RationalRoots
+using LRUCache
 const RRBig = RationalRoot{BigInt}
 import RationalRoots: _convert

+include("growinglist.jl")
 include("primefactorization.jl")
+convert(BigInt, primefactorial(401)) # trigger compilation and generate some fixed data

-const Wigner3j = Dict{Tuple{UInt,UInt,UInt,Int,Int},Tuple{Rational{BigInt},Rational{BigInt}}}()
-const Wigner6j = Dict{NTuple{6,UInt},Tuple{Rational{BigInt},Rational{BigInt}}}()
+const Key3j = Tuple{UInt,UInt,UInt,Int,Int}
+const Key6j = NTuple{6,UInt}
+
+# const Wigner3j = Dict{Key3j,Tuple{Rational{BigInt},Rational{BigInt}}}()
+# const Wigner6j = Dict{Key6j,Tuple{Rational{BigInt},Rational{BigInt}}}()
+#
+const Wigner3j = LRU{Key3j,Tuple{Rational{BigInt},Rational{BigInt}}}(; maxsize = 10^6)
+const Wigner6j = LRU{Key6j,Tuple{Rational{BigInt},Rational{BigInt}}}(; maxsize = 10^6)

 # check integerness and correctness of (j,m) angular momentum
 ϵ(j, m) = (abs(m) <= j && ishalfinteger(j) && isinteger(j-m) && isinteger(j+m))
@ -44,7 +52,8 @@ function Δ(T::Type{<:Real}, j₁, j₂, j₃)
        return zero(T)
    end
    n, d = Δ²(j₁, j₂, j₃)
-    return convert(T, signedroot(RationalRoot{BigInt}, n//d))
+    r = Base.unsafe_rational(n, d)
+    return convert(T, signedroot(RationalRoot{BigInt}, r))
 end

 """
@ -64,6 +73,11 @@ function wigner3j(T::Type{<:Real}, j₁, j₂, j₃, m₁, m₂, m₃ = -m₁-m
    for (jᵢ,mᵢ) in ((j₁, m₁), (j₂, m₂), (j₃, m₃))
        ϵ(jᵢ, mᵢ) || throw(DomainError((jᵢ, mᵢ), "invalid combination (jᵢ, mᵢ)"))
    end
+    return _wigner3j(T, HalfInteger.((j₁, j₂, j₃, m₁, m₂, m₃))...)
+end
+
+function _wigner3j(T::Type{<:Real}, j₁::HalfInteger, j₂::HalfInteger, j₃::HalfInteger,
+                                    m₁::HalfInteger, m₂::HalfInteger, m₃::HalfInteger)
    # check triangle condition and m₁+m₂+m₃ == 0
    if !δ(j₁, j₂, j₃) || !iszero(m₁+m₂+m₃)
        return zero(T)
@ -74,9 +88,9 @@ function wigner3j(T::Type{<:Real}, j₁, j₂, j₃, m₁, m₂, m₃ = -m₁-m
    # TODO: do we also want to use Regge symmetries?
    α₁ = convert(Int, j₂ - m₁ - j₃ ) # can be negative
    α₂ = convert(Int, j₁ + m₂ - j₃ ) # can be negative
-    β₁ = convert(Int, j₁ + j₂ - j₃ )
-    β₂ = convert(Int, j₁ - m₁ )
-    β₃ = convert(Int, j₂ + m₂ )
+    β₁ = convert(UInt, j₁ + j₂ - j₃ )
+    β₂ = convert(UInt, j₁ - m₁ )
+    β₃ = convert(UInt, j₂ + m₂ )

    # extra sign in definition: α₁ - α₂ = j₁ + m₂ - j₂ + m₁ = j₁ - j₂ + m₃
    sgn = isodd(α₁ - α₂) ? -sgn : sgn
@ -90,8 +104,10 @@ function wigner3j(T::Type{<:Real}, j₁, j₂, j₃, m₁, m₂, m₃ = -m₁-m

        snum, rnum = splitsquare(s1n*s2n)
        sden, rden = splitsquare(s1d)
-        s = convert(BigInt, snum) // convert(BigInt, sden)
-        r = convert(BigInt, rnum) // convert(BigInt, rden)
+        snum, sden = divgcd!(snum, sden)
+        rnum, rden = divgcd!(rnum, rden)
+        s = Base.unsafe_rational(convert(BigInt, snum), convert(BigInt, sden))
+        r = Base.unsafe_rational(convert(BigInt, rnum), convert(BigInt, rden))
        s *= compute3jseries(β₁, β₂, β₃, α₁, α₂)
        Wigner3j[(β₁, β₂, β₃, α₁, α₂)] = (r,s)
    end
@ -151,7 +167,11 @@ function wigner6j(T::Type{<:Real}, j₁, j₂, j₃, j₄, j₅, j₆)
    for jᵢ in (j₁, j₂, j₃, j₄, j₅, j₆)
        (ishalfinteger(jᵢ) && jᵢ >= zero(jᵢ)) || throw(DomainError("invalid jᵢ", jᵢ))
    end
+    return _wigner6j(T, HalfInteger.((j₁, j₂, j₃, j₄, j₅, j₆))...)
+end

+function _wigner6j(T::Type{<:Real}, j₁::HalfInteger, j₂::HalfInteger, j₃::HalfInteger,
+                                    j₄::HalfInteger, j₅::HalfInteger, j₆::HalfInteger)
    α̂₁ = (j₁, j₂, j₃)
    α̂₂ = (j₁, j₆, j₅)
    α̂₃ = (j₂, j₄, j₆)
@ -186,10 +206,10 @@ function wigner6j(T::Type{<:Real}, j₁, j₂, j₃, j₄, j₅, j₆)

        snum, rnum = splitsquare(n₁ * n₂ * n₃ * n₄)
        sden, rden = splitsquare(d₁ * d₂ * d₃ * d₄)
-        snu, sden = divgcd!(snum, sden)
-        rnu, rden = divgcd!(rnum, rden)
-        s = convert(BigInt, snum) // convert(BigInt, sden)
-        r = convert(BigInt, rnum) // convert(BigInt, rden)
+        snum, sden = divgcd!(snum, sden)
+        rnum, rden = divgcd!(rnum, rden)
+        s = Base.unsafe_rational(convert(BigInt, snum), convert(BigInt, sden))
+        r = Base.unsafe_rational(convert(BigInt, rnum), convert(BigInt, rden))
        s *= compute6jseries(β₁, β₂, β₃, α₁, α₂, α₃, α₄)

        Wigner6j[(β₁, β₂, β₃, α₁, α₂, α₃)] = (r, s)
@ -223,12 +243,13 @@ end
 # squared triangle coefficient
 function Δ²(j₁, j₂, j₃)
    # also checks the triangle conditions by converting to unsigned integer:
-    n1 = primefactorial( convert(UInt, + j₁ + j₂ - j₃) )
+    n1 = copy(primefactorial( convert(UInt, + j₁ + j₂ - j₃) ))
    n2 = primefactorial( convert(UInt, + j₁ - j₂ + j₃) )
    n3 = primefactorial( convert(UInt, - j₁ + j₂ + j₃) )
-    d = primefactorial( convert(UInt, j₁ + j₂ + j₃ + 1) )
+    num = mul!(mul!(n1, n2), n3)
+    den = copy(primefactorial( convert(UInt, j₁ + j₂ + j₃ + 1) ))
    # result
-    return (n1*n2*n3), d
+    return divgcd!(num, den)
 end

 # reorder parameters determining the 3j symbol to canonical order:
@ -278,14 +299,30 @@ function compute3jseries(β₁, β₂, β₃, α₁, α₂)
    dens = Vector{T}(undef, length(krange))
    for (i, k) in enumerate(krange)
        num = iseven(k) ? one(T) : -one(T)
-        den = primefactorial(k)*primefactorial(k-α₁)*primefactorial(k-α₂)*
-            primefactorial(β₁-k)*primefactorial(β₂-k)*primefactorial(β₃-k)
-        nums[i], dens[i] = divgcd!(num, den)
+        den = copy(primefactorial(k))
+        den = mul!(mul!(den, primefactorial(k-α₁)), primefactorial(k-α₂))
+        den = mul!(mul!(mul!(den, primefactorial(β₁-k)),
+                                    primefactorial(β₂-k)),
+                                        primefactorial(β₃-k))
+        nums[i], dens[i] = num, den
    end
    den = commondenominator!(nums, dens)
    totalnum = sumlist!(nums)
    totalden = convert(BigInt, den)
-    return totalnum//totalden
+    for n = 1:length(den.powers)
+        p = bigprime(n)
+        while den.powers[n] > 0
+            q, r = divrem(totalnum, p)
+            if iszero(r)
+                totalnum = q
+                den.powers[n] -= 1
+            else
+                break
+            end
+        end
+    end
+    totalden = convert(BigInt, den)
+    return Base.unsafe_rational(totalnum, totalden)
 end

 # compute the sum appearing in the 6j symbol
@ -296,15 +333,32 @@ function compute6jseries(β₁, β₂, β₃, α₁, α₂, α₃, α₄)
    nums = Vector{T}(undef, length(krange))
    dens = Vector{T}(undef, length(krange))
    for (i, k) in enumerate(krange)
-        num = iseven(k) ? primefactorial(k+1) : -primefactorial(k+1)
-        den = primefactorial(k-α₁)*primefactorial(k-α₂)*primefactorial(k-α₃)*
-            primefactorial(k-α₄)*primefactorial(β₁-k)*primefactorial(β₂-k)*primefactorial(β₃-k)
+        num = iseven(k) ? copy(primefactorial(k+1)) : neg!(copy(primefactorial(k+1)))
+        den = copy(primefactorial(k-α₁))
+        den = mul!(mul!(mul!(den, primefactorial(k-α₂)),
+                                    primefactorial(k-α₃)),
+                                        primefactorial(k-α₄))
+        den = mul!(mul!(mul!(den, primefactorial(β₁-k)),
+                                    primefactorial(β₂-k)),
+                                        primefactorial(β₃-k))
        nums[i], dens[i] = divgcd!(num, den)
    end
    den = commondenominator!(nums, dens)
    totalnum = sumlist!(nums)
+    for n = 1:length(den.powers)
+        p = bigprime(n)
+        while den.powers[n] > 0
+            q, r = divrem(totalnum, p)
+            if iszero(r)
+                totalnum = q
+                den.powers[n] -= 1
+            else
+                break
+            end
+        end
+    end
    totalden = convert(BigInt, den)
-    return totalnum//totalden
+    return Base.unsafe_rational(totalnum, totalden)
 end

 end # module
--- a/src/growinglist.jl
+++ b/src/growinglist.jl
@ -0,0 +1,154 @@
+using Base.Threads: Atomic, SpinLock
+
+# ListSegment represents a segment from a GrowingList; it has a list `data` to hold the elements, filled up to `currentlength`, and possibly a reference to the next segment, if it is not the final segment.
+mutable struct ListSegment{T}
+    data::Vector{T}
+    currentlength::Int
+    next::Base.RefValue{ListSegment{T}}
+end
+ListSegment{T}(data::Vector{T}, currentlength::Int) where T =
+    ListSegment{T}(data, currentlength, Ref{ListSegment{T}}())
+
+# getindex, assumes that index is inbounds, traverses the linked list
+function _unsafe_getindex(l::ListSegment, i::Int)
+    if i <= l.currentlength
+        getindex(l.data, i)
+    else
+        _unsafe_getindex(l.next[], i - l.currentlength)
+    end
+end
+
+# get or push a new element at the end; in itself not thread safe, should be protected by the lock in the parent GrowingList
+function _unsafe_get!(l::ListSegment{T}, n::Int, default, newlength) where T
+    N = length(l.data)
+    if n > N
+        if isassigned(l.next)
+            return _unsafe_get!(l.next[], n - N, default, newlength)
+        else
+            newsegment = Vector{T}(undef, newlength)
+            newsegment[1] = default
+            l.next = Ref(ListSegment{T}(newsegment, 1))
+            return default
+        end
+    else
+        if n <= l.currentlength
+            @inbounds return getindex(l.data, n)
+        else
+            @assert n == l.currentlength+1
+            l.data[n] = default
+            l.currentlength += 1
+            return default
+        end
+    end
+end
+
+"""
+    GrowingList{T} <: AbstractVector{T}
+
+    GrowingList([iter,]; sizehint = max(16, length(iter)), growthfactor = 2.)
+
+A thread safe vector / list data structure where new elements can be added at the back.
+Once an element is set, it cannot be changed or removed. This ensures thread safe
+`getindex` of that element without requiring a lock. The `length` of a `GrowingList`
+instance can also be probed without a lock, but the return value will be a lower bound,
+i.e. the list can already have increased in length at the same time.
+
+New elements can be added using the syntax
+`get!(l::GrowingList, i::Int, value)`
+`get!(value_generator::Callable, l::GrowingList, i::Int)`
+
+where the new element `value` or `value_generator()` will only be added if `i` is
+`length(l)+1`. If multiple tasks or threads try to `get!` the same index `i`, only one of
+them will actually be adding that element. The `value` or `value_generator()` produced by
+the different threads should be the same to avoid unpredictable results.
+
+The list is grown by adding new segments using a linked list data structure. This guarantees that existing data does never have to move in memory, which is required in order to make `getindex` threadsafe without lock.
+"""
+mutable struct GrowingList{T} <: AbstractVector{T}
+    first::ListSegment{T}
+    totallength::Atomic{Int}
+    growthfactor::Float64
+    lock::SpinLock
+    function GrowingList{T}(iter;
+                            sizehint = max(16, length(iter)),
+                            growthfactor = 2.) where {T}
+        firstsegment = Vector{T}(undef, sizehint)
+        i = 0
+        next = iterate(iter)
+        @inbounds while i < sizehint && next !== nothing
+            i += 1
+            val, state = next
+            firstsegment[i] = val
+            next = iterate(iter, state)
+        end
+        first = ListSegment{T}(firstsegment, i)
+        while next !== nothing
+            i += 1
+            val, state = next
+            _unsafe_getindex(first, i, val, ceil(Int, (i-1)*growthfactor))
+            next = iterate(iter, state)
+        end
+        return new{T}(first, Atomic{Int}(i), growthfactor, SpinLock())
+    end
+end
+GrowingList(v::Vector{T}; sizehint = max(16, length(v)), growthfactor = 2.) where {T} =
+    GrowingList{T}(v; sizehint = sizehint, growthfactor = growthfactor)
+
+GrowingList{T}(; sizehint = 16, growthfactor = 2.) where {T} =
+    GrowingList{T}((); sizehint = sizehint, growthfactor = growthfactor)
+
+GrowingList(; sizehint = 16, growthfactor = 2.) =
+    GrowingList{Any}((); sizehint = sizehint, growthfactor = growthfactor)
+
+Base.length(l::GrowingList) = l.totallength[]
+Base.size(l::GrowingList) = (length(l),)
+
+@inline function Base.getindex(l::GrowingList, n::Int)
+    @boundscheck checkbounds(l, n)
+    return _unsafe_getindex(l.first, n)
+end
+
+function Base.get!(l::GrowingList, n::Int, default)
+    if n <= l.totallength[]
+        return _unsafe_getindex(l.first, n)
+    else
+        lock(l.lock)
+        len = length(l)
+        if n <= len # try again, maybe already ok now
+            unlock(l.lock)
+            return _unsafe_getindex(l.first, n)
+        elseif n == len+1
+            _unsafe_get!(l.first, n, default, ceil(Int, (l.growthfactor-1)*len))
+            Base.Threads.atomic_add!(l.totallength, 1)
+            unlock(l.lock)
+            return default
+        else
+            @show Base.Threads.threadid(), l.totallength[], n
+            unlock(l.lock)
+            throw(ArgumentError("can only insert new element at next index: $(len+1)"))
+        end
+    end
+end
+
+function Base.get!(default::Base.Callable, l::GrowingList, n::Int)
+    if n <= l.totallength[]
+        return _unsafe_getindex(l.first, n)
+    else
+        v = default()
+        lock(l.lock)
+        len = l.totallength[]
+        if n <= len # try again, maybe already ok now
+            unlock(l.lock)
+            return _unsafe_getindex(l.first, n)
+        elseif n == len+1
+            _unsafe_get!(l.first, n, v, ceil(Int, (l.growthfactor-1)*len))
+            Base.Threads.atomic_add!(l.totallength, 1)
+            unlock(l.lock)
+            return v
+        else
+            @show Base.Threads.threadid(), l.totallength[], n
+            unlock(l.lock)
+            throw(ArgumentError("can only insert new element at next index: $(len+1)"))
+        end
+    end
+end
--- a/src/primefactorization.jl
+++ b/src/primefactorization.jl
@ -1,15 +1,15 @@
 using Primes: isprime
 import Base.divgcd

-const primetable =
-    [2,3,5]
-const factortable =
-    [UInt8[], UInt8[1], UInt8[0,1], UInt8[2], UInt8[0,0,1]]
-const factorialtable =
-    [UInt32[], UInt32[], UInt32[1], UInt32[1,1], UInt32[3,1], UInt32[3,1,1]]
-const bigprimetable =
-    [[big(2)], [big(3)], [big(5)]]
-const bigone = Ref{BigInt}(big(1))
+using Base.GMP.MPZ
+
+const primetable = GrowingList([2, 3]; sizehint = 256)
+const factortable = GrowingList([UInt8[], UInt8[1], UInt8[0,1]]; sizehint = 1024)
+const factorialtable = GrowingList([UInt32[], UInt32[1], UInt32[1,1]]; sizehint = 1024)
+const bigprimetable = GrowingList([GrowingList([big(2)]; sizehint = 512),
+                                    GrowingList([big(3)]; sizehint = 256)];
+                                    sizehint = 256)
+const bigone = big(1)

 # Make a prime iterator
 struct PrimeIterator
@ -22,51 +22,71 @@ Base.eltype(::PrimeIterator) = Int

 # Get the `n`th prime; store all primes up to the `n`th if not yet available
 function prime(n::Int)
-    p = last(primetable)
-    while length(primetable) < n
+    k = min(length(primetable), length(bigprimetable))
+    p = primetable[k]
+    while k < n
        p = p + 2
        while !isprime(p)
            p += 2
        end
-        push!(primetable, p)
-        push!(bigprimetable, [big(p)])
+        k += 1
+        # these lines do not get but set new elements; provided no other task did so earlier
+        get!(primetable, k, p)
+        get!(bigprimetable, k, GrowingList([big(p)]; sizehint = 256))
+        k = min(length(primetable), length(bigprimetable))
+         # other threads might have inserted additional entries,
+         # make sure they are finished with both primetable and bigprimetable
    end
-    @inbounds return primetable[n]
+    return primetable[n]
 end

 Base.iterate(::PrimeIterator, n = 1) = prime(n), n+1

 # get primes and their powers as `BigInt`, also cache all results
 function bigprime(n::Integer, e::Integer=1)
-    e == 0 && return bigone[]
+    e == 0 && return bigone
    p = prime(n) # triggers computation of prime(n) if necessary
-    @inbounds l = length(bigprimetable[n])
+    powerlist = bigprimetable[n]
+    l = length(powerlist)
    @inbounds while l < e
        # compute next prime power as approximate square of existing results
-        k = (l+1)>>1
-        push!(bigprimetable[n], bigprimetable[n][k]*bigprimetable[n][l+1-k])
        l += 1
+        k = l>>1
+        get!(powerlist, l, powerlist[k]*powerlist[l-k])
+        l = length(powerlist) # other threads might have inserted more powers
    end
-    @inbounds return bigprimetable[n][e]
+    @inbounds return powerlist[e]
 end

 # A custom `Integer` subtype to store an integer as its prime factorization
-struct PrimeFactorization{U<:Unsigned} <: Integer
+# mutable to allow in place update of sign
+mutable struct PrimeFactorization{U<:Unsigned} <: Integer
    powers::Vector{U}
    sign::Int8
+    PrimeFactorization{U}(powers::Vector, sign = one(Int8)) where {U<:Unsigned} =
+        new{U}(convert(Vector{U}, powers), sign)
+end
+# convenience constructor: normalizes powers to have last entry nonzero
+PrimeFactorization(powers::Vector{U}, sign = one(Int8)) where {U<:Unsigned} =
+    PrimeFactorization{U}(_normalize_powers!(powers), sign)
+
+function _normalize_powers!(v::Vector{<:Integer})
+    i = findlast(!iszero, v)
+    l = ifelse(i === nothing, 0, i)
+    l < length(v) && resize!(v, l)
+    return v
 end
-PrimeFactorization(powers::Vector{U}) where {U<:Unsigned} =
-    PrimeFactorization{U}(powers, one(Int8))

 # define our own factor function, returning an instance of PrimeFactorization
 function primefactor(n::Integer)
-    iszero(n) && return PrimeFactorization(UInt8[], zero(Int8))
+    iszero(n) && return PrimeFactorization{UInt8}(UInt8[], zero(Int8))
    sn = n < 0 ? -one(Int8) : one(Int8)
    n = abs(n)
    m = length(factortable)
    while m < abs(n)
        m += 1
-        powers = UInt8[] # should be sufficient for all integers up to 2^255
+        powers = UInt8[]
+        # should be sufficient for all integers up to 2^255
        a = m
        for p in primes()
            f = 0
@ -79,16 +99,18 @@ function primefactor(n::Integer)
            push!(powers, f)
            a == 1 && break
        end
-        push!(factortable, powers)
+        get!(factortable, m, powers)
+        m = length(factortable) # other threads may have inserted other entries
    end
-    @inbounds return PrimeFactorization(copy(factortable[n]), sn)
+    @inbounds return PrimeFactorization{UInt8}(factortable[n], sn)
 end

 function primefactorial(n::Integer)
-    n < 0 && throw(DomainError(n))
-    m = length(factorialtable)-1
+    n < 0 && throw(DomainError(n,"primefactorial only works for non-negative numbers"))
+    n <= 1 && return PrimeFactorization{UInt32}(UInt32[], one(Int8))
+    m = length(factorialtable)
    @inbounds while m < n
-        prevfactorial = factorialtable[m+1]
+        prevfactorial = factorialtable[m]
        m += 1
        f = primefactor(m).powers
        powers = copy(prevfactorial)
@ -98,36 +120,48 @@ function primefactorial(n::Integer)
        for k = 1:length(f)
            powers[k] += f[k]
        end
-        push!(factorialtable, powers)
+        get!(factorialtable, m, powers)
+        m = length(factorialtable)  # other threads may have inserted other entries
    end
-    @inbounds return PrimeFactorization(copy(factorialtable[n+1]))
+    @inbounds return PrimeFactorization{UInt32}(factorialtable[n])
 end

 # Methods for PrimeFactorization:
 Base.copy(a::PrimeFactorization) = PrimeFactorization(copy(a.powers), a.sign)
+function Base.copy!(c::PrimeFactorization, a::PrimeFactorization)
+    c.sign = a.sign
+    copy!(c.powers, a.powers)
+    return c
+end

 Base.one(::Type{PrimeFactorization{U}}) where {U<:Unsigned} =
-    PrimeFactorization(Vector{U}())
+    PrimeFactorization{U}(Vector{U}(), one(Int8))
 Base.zero(::Type{PrimeFactorization{U}}) where {U<:Unsigned} =
-    PrimeFactorization(Vector{U}(), zero(Int8))
+    PrimeFactorization{U}(Vector{U}(), zero(Int8))
+
+one!(c::PrimeFactorization) = (c.sign = one(Int8); empty!(c.powers); return c)
+zero!(c::PrimeFactorization) = (c.sign = zero(Int8); empty!(c.powers); return c)

 Base.promote_rule(P::Type{<:PrimeFactorization},::Type{<:Integer}) = P
-Base.promote_rule(P::Type{<:PrimeFactorization},::Type{BigInt}) = BigInt
+Base.promote_rule(::Type{<:PrimeFactorization},::Type{BigInt}) = BigInt
+Base.promote_rule(::Type{PrimeFactorization{U1}},
+                    ::Type{PrimeFactorization{U2}}) where {U1<:Unsigned, U2<:Unsigned} = PrimeFactorization{promote_type(U1, U2)}

 Base.convert(P::Type{<:PrimeFactorization}, n::Integer) = convert(P, primefactor(n))
-function Base.convert(::Type{BigInt}, a::PrimeFactorization)
-    A = one(BigInt)
+function _convert!(x::BigInt, a::PrimeFactorization)
+    MPZ.set!(x, bigone)
    for (n, e) in enumerate(a.powers)
        if !iszero(e)
-            MPZ.mul!(A, bigprime(n, e))
+            MPZ.mul!(x, bigprime(n, e))
        end
    end
-    return a.sign < 0 ? MPZ.neg!(A) : A
+    return a.sign < 0 ? MPZ.neg!(x) : x
 end
+Base.convert(::Type{BigInt}, a::PrimeFactorization) = _convert!(one(BigInt), a)
 Base.convert(::Type{PrimeFactorization{U}}, a::PrimeFactorization{U}) where {U<:Unsigned} =
    a
 Base.convert(::Type{PrimeFactorization{U}}, a::PrimeFactorization) where {U<:Unsigned} =
-    PrimeFactorization(convert(Vector{U}, a.powers), a.sign)
+    PrimeFactorization{U}(convert(Vector{U}, a.powers), a.sign)

 Base.:(==)(a::PrimeFactorization, b::PrimeFactorization) =
    a.powers == b.powers && a.sign == b.sign
@ -138,7 +172,8 @@ function Base.:<(a::PrimeFactorization, b::PrimeFactorization)
        return <(-b, -a)
    else
        ag, bg = divgcd(a, b)
-        if length(ag.powers) <= length(bg.powers) &&
+        ag == bg && return false
+        if length(ag.powers) <= length(bg.powers)
                all(k->ag.powers[k]<bg.powers[k], 1:length(ag.powers))
            return true
        else
@ -151,35 +186,107 @@ end
 # Addition and subtraction will require conversion to BigInt
 Base.sign(a::PrimeFactorization) = a.sign

-Base.:-(a::PrimeFactorization) = PrimeFactorization(a.powers, -a.sign)
-function Base.:*(a::PrimeFactorization{T}, b::PrimeFactorization{T}) where {T}
-    if a.sign == 0
-        return a
-    elseif b.sign ==0
-        return b
+neg!(a::PrimeFactorization) = (a.sign = -a.sign; return a)
+
+function mul!(c::PrimeFactorization, a::PrimeFactorization, b::PrimeFactorization)
+    if a.sign == 0 || b.sign == 0
+        zero!(c)
    else
-        return PrimeFactorization(_vadd!(copy(a.powers), b.powers), a.sign*b.sign)
+        c.sign = a.sign * b.sign
+        la = length(a.powers)
+        lb = length(b.powers)
+        lc = max(la, lb)
+        lc === length(c.powers) || resize!(c.powers, lc)
+        @inbounds for k = 1:min(la,lb)
+            c.powers[k] = +(a.powers[k], b.powers[k])
+        end
+        if c !== a
+            @inbounds for k = lb+1:la
+                c.powers[k] = a.powers[k]
+            end
+        end
+        @inbounds for k = la+1:lb
+            c.powers[k] = b.powers[k]
+        end
    end
+    return c
 end
-function Base.gcd(a::PrimeFactorization{T}, b::PrimeFactorization{T}) where {T}
-    if a.sign == 0
-        return b
-    elseif b.sign ==0
-        return a
+# unlike div, this one errors if the a is not divisible by b
+function divexact!(c::PrimeFactorization, a::PrimeFactorization, b::PrimeFactorization)
+    if iszero(a.sign)
+        zero!(c)
+    elseif iszero(b.sign)
+        throw(DivideError())
    else
-        return PrimeFactorization(_vmin!(copy(a.powers), b.powers))
+        c.sign = a.sign * b.sign
+        la = length(a.powers)
+        lb = length(b.powers)
+        if lb > la
+            throw(DivideError())
+        end
+        lc = la
+        if lb == lc
+            while lc > 0 && a.powers[lc] == b.powers[lc]
+                lc -= 1
+            end
+        end
+        lc == length(c.powers) || resize!(c.powers, lc)
+        @inbounds for k = 1:min(lb, lc)
+            if b.powers[k] > a.powers[k]
+                throw(DivideError())
+            end
+            c.powers[k] = a.powers[k] - b.powers[k]
+        end
+        if c !== a
+            @inbounds for k = lb+1:lc
+                c.powers[k] = a.powers[k]
+            end
+        end
    end
+    return c
 end
-function Base.lcm(a::PrimeFactorization{T}, b::PrimeFactorization{T}) where {T}
+function gcd!(c::PrimeFactorization, a::PrimeFactorization, b::PrimeFactorization)
    if a.sign == 0
-        return a
+        copy!(c.powers, b.powers)
    elseif b.sign ==0
-        return b
+        copy!(c.powers, a.powers)
    else
-        return PrimeFactorization(_vmax!(copy(a.powers), b.powers))
+        c.sign = one(Int8)
+        la = length(a.powers)
+        lb = length(b.powers)
+        lc = min(la, lb)
+        lc === length(c.powers) || resize!(c.powers, lc)
+        @inbounds for k = 1:lc
+            c.powers[k] = min(a.powers[k], b.powers[k])
+        end
    end
+    c.sign = one(Int8)
+    return c
+end
+function lcm!(c::PrimeFactorization, a::PrimeFactorization, b::PrimeFactorization)
+    if a.sign == 0 || b.sign == 0
+        return zero!(c)
+    else
+        c.sign = one(Int8)
+        la = length(a.powers)
+        lb = length(b.powers)
+        lc = max(la, lb)
+        lc === length(c.powers) || resize!(c.powers, lc)
+        @inbounds for k = 1:min(la,lb)
+            c.powers[k] = max(a.powers[k], b.powers[k])
+        end
+        if c !== a
+            @inbounds for k = lb+1:la
+                c.powers[k] = a.powers[k]
+            end
+        end
+        @inbounds for k = la+1:lb
+            c.powers[k] = b.powers[k]
+        end
+    end
+    c.sign = one(Int8)
+    return c
 end
-Base.divgcd(a::PrimeFactorization, b::PrimeFactorization) = divgcd!(copy(a), copy(b))
 function divgcd!(a::PrimeFactorization, b::PrimeFactorization)
    af, bf = a.powers, b.powers
    for k = 1:min(length(af), length(bf))
@ -187,26 +294,50 @@ function divgcd!(a::PrimeFactorization, b::PrimeFactorization)
        af[k] -= gk
        bf[k] -= gk
    end
-    while length(af) > 0 && iszero(last(af))
-        pop!(af)
-    end
-    while length(bf) > 0 && iszero(last(bf))
-        pop!(bf)
-    end
+    _normalize_powers!(a.powers)
+    _normalize_powers!(b.powers)
    return a, b
 end

+mul!(a::PrimeFactorization, b::PrimeFactorization) = mul!(a, a, b)
+divexact!(a::PrimeFactorization, b::PrimeFactorization) = divexact!(a, a, b)
+gcd!(a::PrimeFactorization, b::PrimeFactorization) = gcd!(a, a, b)
+lcm!(a::PrimeFactorization, b::PrimeFactorization) = lcm!(a, a, b)
+
+Base.:-(a::PrimeFactorization) = neg!(copy(a))
+function Base.:*(a::PrimeFactorization, b::PrimeFactorization)
+    P = promote_type(typeof(a), typeof(b))
+    if length(a.powers) >= length(b.powers)
+        return typeof(a) == P ? mul!(copy(a), b) : mul!(convert(P, a), b)
+    else
+        return typeof(b) == P ? mul!(copy(b), a) : mul!(convert(P, b), a)
+    end
+end
+function Base.lcm(a::PrimeFactorization, b::PrimeFactorization)
+    P = promote_type(typeof(a), typeof(b))
+    if length(a.powers) >= length(b.powers)
+        return typeof(a) == P ? lcm!(copy(a), b) : lcm!(convert(P, a), b)
+    else
+        return typeof(b) == P ? lcm!(copy(b), a) : lcm!(convert(P, b), a)
+    end
+end
+function Base.gcd(a::PrimeFactorization, b::PrimeFactorization)
+    P = promote_type(typeof(a), typeof(b))
+    if length(a.powers) <= length(b.powers)
+        return typeof(a) == P ? lcm!(copy(a), b) : lcm!(convert(P, a), b)
+    else
+        return typeof(b) == P ? lcm!(copy(b), a) : lcm!(convert(P, b), a)
+    end
+end
+Base.divgcd(a::PrimeFactorization, b::PrimeFactorization) = divgcd!(copy(a), copy(b))
+# no promotion necessary, should be smaller than a
+divexact(a::PrimeFactorization, b::PrimeFactorization) = divexact!(copy(a), b)
+
 # split `a::PrimeFactorization` into a square `s` and a remainder `r`, such that
 # `a = s^2 * r` and the powers in the prime factorization of `r` are zero or one
 function splitsquare(a::PrimeFactorization)
    r = PrimeFactorization(map(p->convert(UInt8, isodd(p)), a.powers), a.sign)
-    while length(r.powers) > 0 && iszero(last(r.powers))
-        pop!(r.powers)
-    end
    s = PrimeFactorization(map(p->(p>>1), a.powers))
-    while length(s.powers) > 0 && iszero(last(s.powers))
-        pop!(s.powers)
-    end
    return s, r
 end

@ -215,13 +346,13 @@ end
 function commondenominator!(nums::Vector{P}, dens::Vector{P}) where {P<:PrimeFactorization}
    isempty(nums) && return one(P)
    # accumulate lcm of denominator
-    den = PrimeFactorization(copy(dens[1].powers))
+    den = copy(dens[1])
    for i = 2:length(dens)
-        _vmax!(den.powers, dens[i].powers)
+        lcm!(den, dens[i])
    end
    # rescale numerators
    for i = 1:length(nums)
-        _vsub!(_vadd!(nums[i].powers, den.powers), dens[i].powers)
+        divexact!(mul!(nums[i], den), dens[i])
    end
    return den
 end
@ -229,68 +360,17 @@ end
 # auxiliary function to compute sums of a list of PrimeFactorizations as quickly as possible
 function sumlist!(list::Vector{<:PrimeFactorization}, ind = 1:length(list))
    # first compute gcd to take out common factors
-    g = PrimeFactorization(copy(list[ind[1]].powers))
+    g = copy(list[ind[1]])
    for k in ind
-        _vmin!(g.powers, list[k].powers)
+        gcd!(g, list[k])
    end
    for k in ind
-        _vsub!(list[k].powers, g.powers)
+        divexact!(list[k], g)
    end
-    L = length(ind)
-    if L > 32
-        l = L >> 1
-        s = sumlist!(list, first(ind).+(0:l-1)) + sumlist!(list, first(ind).+(l:L-1))
-    else
-        # do sum
-        s = big(0)
-        for k in ind
-            MPZ.add!(s, convert(BigInt, list[k]))
-        end
+    s = big(0)
+    i = big(1)
+    for p in list
+        MPZ.add!(s, _convert!(i, p))
    end
-    return MPZ.mul!(s, convert(BigInt, g))
-end
-
-# Mutating vector methods that also grow and shrink as required
-function _vmin!(af::Vector{U}, bf::Vector{U}) where {U<:Unsigned}
-    while length(af) > length(bf)
-        pop!(af)
-    end
-    @inbounds for k = 1:length(af)
-        af[k] = min(af[k], bf[k])
-    end
-    while length(af) > 0 && iszero(last(af))
-        pop!(af)
-    end
-    return af
-end
-function _vmax!(af::Vector{U}, bf::Vector{U}) where {U<:Unsigned}
-    while length(bf) > length(af)
-        push!(af, zero(U))
-    end
-    @inbounds for k = 1:length(bf)
-        af[k] = max(af[k], bf[k])
-    end
-    return af
-end
-function _vadd!(af::Vector{U}, bf::Vector{U}) where {U<:Unsigned}
-    while length(bf) > length(af)
-        push!(af, zero(U))
-    end
-    @inbounds for k = 1:length(bf)
-        af[k] = +(af[k], bf[k])
-    end
-    return af
-end
-function _vsub!(af::Vector{U}, bf::Vector{U}) where {U<:Unsigned}
-    if length(bf) > length(af)
-        throw(OverflowError())
-    end
-    @inbounds for k = 1:length(bf)
-        bf[k] > af[k] && throw(OverflowError())
-        af[k] -= bf[k]
-    end
-    while length(af) > 0 && iszero(last(af))
-        pop!(af)
-    end
-    return af
+    return MPZ.mul!(s, _convert!(i, g))
 end