2
\$\begingroup\$

I want to factor random values of n that range from uint.MinValue to uint.MaxValue as efficiently as possible. It is my understanding that wheel factorization is about as good as it gets and this is my attempt at a 2, 3, 5 implementation.

public static IEnumerable<T> EnumeratePrimeFactors<T>(this T value) where T : IBinaryInteger<T>, IUnsignedNumber<T> {
    if (BinaryIntegerConstants<T>.Four > value) { yield break; }
    if (BinaryIntegerConstants<T>.Five == value) { yield break; }
    if (BinaryIntegerConstants<T>.Seven == value) { yield break; }
    if (BinaryIntegerConstants<T>.Eleven == value) { yield break; }
    if (BinaryIntegerConstants<T>.Thirteen == value) { yield break; }

    var index = value;

    while (T.Zero == (index & T.One)/* enumerate factors of 2 */) {
        yield return BinaryIntegerConstants<T>.Two;

        index >>= 1;
    }
    while (T.Zero == (index % BinaryIntegerConstants<T>.Three)) { // enumerate factors of 3
        yield return BinaryIntegerConstants<T>.Three;

        index /= BinaryIntegerConstants<T>.Three;
    }
    while (T.Zero == (index % BinaryIntegerConstants<T>.Five)/* enumerate factors of 5 */) {
        yield return BinaryIntegerConstants<T>.Five;

        index /= BinaryIntegerConstants<T>.Five;
    }
    while (T.Zero == (index % BinaryIntegerConstants<T>.Seven)/* enumerate factors of 7 */) {
        yield return BinaryIntegerConstants<T>.Seven;

        index /= BinaryIntegerConstants<T>.Seven;
    }
    while (T.Zero == (index % BinaryIntegerConstants<T>.Eleven)/* enumerate factors of 11 */) {
        yield return BinaryIntegerConstants<T>.Eleven;

        index /= BinaryIntegerConstants<T>.Eleven;
    }
    while (T.Zero == (index % BinaryIntegerConstants<T>.Thirteen)/* enumerate factors of 13 */) {
        yield return BinaryIntegerConstants<T>.Thirteen;

        index /= BinaryIntegerConstants<T>.Thirteen;
    }

    var factor = BinaryIntegerConstants<T>.Seventeen;
    var limit = index.SquareRoot();

    if (factor <= limit) {
        do {
            while (T.Zero == (index % factor)/* enumerate factors of (30k - 13) */) {
                yield return factor;

                index /= factor;
            }

            factor += BinaryIntegerConstants<T>.Two;

            while (T.Zero == (index % factor)/* enumerate factors of (30k - 11) */) {
                yield return factor;

                index /= factor;
            }

            factor += BinaryIntegerConstants<T>.Four;

            while (T.Zero == (index % factor)/* enumerate factors of (30k - 7) */) {
                yield return factor;

                index /= factor;
            }

            factor += BinaryIntegerConstants<T>.Six;

            while (T.Zero == (index % factor)/* enumerate factors of (30k - 1) */) {
                yield return factor;

                index /= factor;
            }

            factor += BinaryIntegerConstants<T>.Two;

            while (T.Zero == (index % factor)/* enumerate factors of (30k + 1) */) {
                yield return factor;

                index /= factor;
            }

            factor += BinaryIntegerConstants<T>.Six;

            while (T.Zero == (index % factor)/* enumerate factors of (30k + 7) */) {
                yield return factor;

                index /= factor;
            }

            factor += BinaryIntegerConstants<T>.Four;

            while (T.Zero == (index % factor)/* enumerate factors of (30k + 11) */) {
                yield return factor;

                index /= factor;
            }

            factor += BinaryIntegerConstants<T>.Two;

            while (T.Zero == (index % factor)/* enumerate factors of (30k + 13) */) {
                yield return factor;

                index /= factor;
            }

            factor += BinaryIntegerConstants<T>.Four;
            limit = index.SquareRoot();
        } while (factor <= limit);
    }

    if ((index != T.One) && (index != value)) {
        yield return index;
    }
}

Helpers:

public static class BinaryIntegerConstants<T> where T : IBinaryInteger<T>
{
    public static T Eleven    { get; }
    public static T Five      { get; }
    public static T Four      { get; }
    public static T Seven     { get; }
    public static T Seventeen { get; }
    public static T Six       { get; }
    public static int Size    { get; }
    public static T Thirteen  { get; }
    public static T Three     { get; }
    public static T Two       { get; }

    static BinaryIntegerConstants() {
        var size = int.CreateChecked(value: T.PopCount(value: T.AllBitsSet));

        Eleven    = T.CreateTruncating(value: 11U);
        Five      = T.CreateTruncating(value: 5U);
        Four      = T.CreateTruncating(value: 4U);
        Six       = T.CreateTruncating(value: 6U);
        Seven     = T.CreateTruncating(value: 7U);
        Seventeen = T.CreateTruncating(value: 17U);
        Size      = int.CreateChecked(value: size);
        Thirteen  = T.CreateTruncating(value: 13U);
        Three     = T.CreateTruncating(value: 3U);
        Two       = T.CreateTruncating(value: 2U);
    }
}
public static class BinaryIntegerFunctions
{
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    internal static T As<T>(this bool value) where T : IBinaryInteger<T> =>
        T.CreateTruncating(value: Unsafe.As<bool, byte>(source: ref value));
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    internal static T IsGreaterThan<T>(this T value, T other) where T : IBinaryInteger<T> =>
        (value > other).As<T>();

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public static T MostSignificantBit<T>(this T value) where T : IBinaryInteger<T> =>
        (T.CreateTruncating(value: BinaryIntegerConstants<T>.Size) - T.LeadingZeroCount(value: value));
    public static T SquareRoot<T>(this T value) where T : IBinaryInteger<T>, IUnsignedNumber<T> {
        return BinaryIntegerConstants<T>.Size switch {
#if !FORCE_SOFTWARE_SQRT
            8 => T.CreateTruncating(value: ((uint)MathF.Sqrt(x: uint.CreateTruncating(value: value)))),
            16 => T.CreateTruncating(value: ((uint)MathF.Sqrt(x: uint.CreateTruncating(value: value)))),
            32 => T.CreateTruncating(value: ((uint)Math.Sqrt(d: uint.CreateTruncating(value: value)))),
            64 => T.CreateTruncating(value: Sqrt(value: ulong.CreateTruncating(value: value))),
#endif
            _ => SoftwareImplementation(value: value),
        };

        /*
             Credit goes to njuffa for providing a reference implementation:
                 https://stackoverflow.com/a/31149161/1186165

             Notes:
                 - This implementation of the algorithm runs in constant time, based on the size of T.
                 - Ignoring the loop that is entered when the size of T exceeds 64, all branches get eliminated during JIT compilation.
         */
        static T SoftwareImplementation(T value) {
            var msb = int.CreateTruncating(value: value.MostSignificantBit());
            var msbIsOdd = (msb & 1);
            var m = ((msb + 1) >> 1);
            var mMinusOne = (m - 1);
            var mPlusOne = (m + 1);
            var x = (T.One << mMinusOne);
            var y = (x - (value >> (mPlusOne - msbIsOdd)));
            var z = y;

            x += x;

            if (BinaryIntegerConstants<T>.Size > 8) {
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
            }

            if (BinaryIntegerConstants<T>.Size > 16) {
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
            }

            if (BinaryIntegerConstants<T>.Size > 32) {
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
                y = (((y * y) >> mPlusOne) + z);
            }

            if (BinaryIntegerConstants<T>.Size > 64) {
                var i = T.CreateTruncating(value: (BinaryIntegerConstants<T>.Size >> 3));

                do {
                    i -= (T.One << 3);
                    y = (((y * y) >> mPlusOne) + z);
                    y = (((y * y) >> mPlusOne) + z);
                    y = (((y * y) >> mPlusOne) + z);
                    y = (((y * y) >> mPlusOne) + z);
                    y = (((y * y) >> mPlusOne) + z);
                    y = (((y * y) >> mPlusOne) + z);
                    y = (((y * y) >> mPlusOne) + z);
                    y = (((y * y) >> mPlusOne) + z);
                } while (i != T.Zero);
            }

            y = (x - y);
            x = T.CreateTruncating(value: msbIsOdd);
            y -= BinaryIntegerConstants<T>.Size switch {
                8 => (x * ((y * T.CreateChecked(value: 5UL)) >> 4)),
                16 => (x * ((y * T.CreateChecked(value: 75UL)) >> 8)),
                32 => (x * ((y * T.CreateChecked(value: 19195UL)) >> 16)),
                64 => (x * ((y * T.CreateChecked(value: 1257966796UL)) >> 32)),
                128 => (x * ((y * T.CreateChecked(value: 5402926248376769403UL)) >> 64)),
                _ => throw new NotSupportedException(), // TODO: Research a way to calculate the proper constant at runtime.
            };
            x = (T.One << (BinaryIntegerConstants<T>.Size - 1));
            y -= (value - (y * y)).IsGreaterThan(other: x);

            if (BinaryIntegerConstants<T>.Size > 8) {
                y -= (value - (y * y)).IsGreaterThan(other: x);
                y -= (value - (y * y)).IsGreaterThan(other: x);
            }

            if (BinaryIntegerConstants<T>.Size > 32) {
                y -= (value - (y * y)).IsGreaterThan(other: x);
                y -= (value - (y * y)).IsGreaterThan(other: x);
                y -= (value - (y * y)).IsGreaterThan(other: x);
            }

            return (y & (T.AllBitsSet >> 1));
        }
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        static uint Sqrt(ulong value) {
            var x = ((uint)Math.Sqrt(d: unchecked((long)value)));
            var y = (unchecked(((ulong)x) * x) > value).As<uint>(); // ((x * x) > value) ? 1 : 0
            var z = ((uint)(value >> 63)); // (64 == value.MostSignificantBit()) ? 1 : 0

            return unchecked(x - (y | z));
        }
    }
}
\$\endgroup\$
7
  • 1
    \$\begingroup\$ If you’re using a lot of / and % operations, your sieve is not going to be very fast. I would understand wheel factorization to mean, picking the product of the first k primes (2, 6, 30, 210, ...) and then quickly looking up the result modulo that wheel number. Alternatively, generating a repeating sequence of intervals to skip over to check only those values that are not divisible by any small prime. \$\endgroup\$
    – Davislor
    Commented Jul 15, 2023 at 0:35
  • \$\begingroup\$ The goal here is complete factorization and so the repetition is unfortunately necessary in order to extract the correct set of factors (see the link in above). I have refactored the code many times but this particular form gives accurate results and ends up benchmarking better than anything else I've tried. If you have a way to improve, please feel free to post an answer. \$\endgroup\$ Commented Jul 15, 2023 at 0:59
  • \$\begingroup\$ Okay, then at a glance, you probably want a lookup table where a single % operation gives you all the prime factors up to k, by the Chinese Remainder Theorem. You might check multiplicity by dividing by the product of the factors and repeating; that’s a number of repetitions equal to the highest multiplicity of any wheel prime. \$\endgroup\$
    – Davislor
    Commented Jul 15, 2023 at 1:18
  • \$\begingroup\$ @Kittoes0124 Why so few code comments? Code is likely clear for someone who has "refactored the code many times", yet not for others. \$\endgroup\$ Commented Sep 30, 2023 at 14:35
  • 1
    \$\begingroup\$ @chux-ReinstateMonica 1) Sorry about that, is a bad habit. 2) IIRC, yes, I did validate for all 32-bit values; also, as I understand things, the maths is such that overflow should be impossible. 3) The wheel implementation doesn't start until the main loop body. 7, 11, and 13 are factored out along with 2, 3, and 5 merely to reduce the number of mod/div operations required in the main loop. \$\endgroup\$ Commented Oct 1, 2023 at 15:03

2 Answers 2

2
\$\begingroup\$

IMHO, code needs more comments.

  • "wheel factorization" deserves in code explanation or in code link.

  • Text like "factor random values of n that range from uint.MinValue to uint.MaxValue" is a good comment for the file.

  • Functions deserve something to describe themselves. I am not looking for a book but a sentence or two.

  • Repeated comments like enumerate factors of N serve little value. One would have been sufficient.

Level of commenting is a balance and quite subjective, yet here there is not enough.

Width

Consider a narrower width for commenting to improve presentation.

// Instead of 
while (T.Zero == (index % BinaryIntegerConstants<T>.Thirteen)/* enumerate factors of 13 */) {

// Perhaps
/* enumerate factors of 13 */
while (T.Zero == (index % BinaryIntegerConstants<T>.Thirteen)) {

From

 Notes:
             - This implementation of the algorithm runs in constant time, based on the size of T.
             - Ignoring the loop that is entered when the size of T exceeds 64, all branches get eliminated during JIT compilation.

To

 Notes:
   - This implementation of the algorithm runs in constant time,
     based on the size of T.
   - Ignoring the loop that is entered when the size of T exceeds 64,
     all branches get eliminated during JIT compilation.
\$\endgroup\$
1
\$\begingroup\$

It’s been a while since I solved a problem in C# using wheel factorization.

If I understand the problem statement here correctly, what you really want to do with wheel factorization to find all the small prime factors in constant time, with only a single % operation (by far the slowest ALU math operation on most CPUs). So, you choose a product of the first k primes as your wheel value (that is, a value in the sequence 2, 6, 30, 210 ...), take the number to be factored modulo the wheel value, and determine all prime factors up to k by the Chinese Remainder Theorem. Since you say you also need their multiplicities, you then divide by the product of the known prime factors and repeat, a number of times equal to the highest multiplicity of any small prime.

Therefore, the data structure you probably want to use is an array of ArraySegment<T> (far preferable to Vector<T> for this), with a number of elements equal to your wheel value. Each entry contains a list of the prime factors of that number, such that if n%wheel_val == i and n > 0, Wheel[i] lists the primes that you can immediately deduce are factors of n. So, Wheel[1] is empty, Wheel[2] references [2], Wheel[3] references [3], Wheel[4] references [2], Wheel[6] references [2,3], and Wheel[18] also references [2,3]. Wheel[0] is always the complete set of wheel primes. (Recall: if your wheel is 30, 30 + 2 is divisible by 2, 30 + 6 is divisible by 2 and 3, and 30 + 15 is divisible by 3 and 5, but 30 + 4 is not divisible by 4, only 2. So you must not store multiplicity in this table.) You could also square your wheel value to be able to remove two levels of multiplicity per iteration. If the wheel is large enough, the cost of memory access might (or might not) come out ahead on time complexity to multiple trial divisions.

You might also want to leave out powers of 2, since it is very easy to check those by counting trailing zeroes. That way, you end up with a modulus of 15, 105, 1155, or so on. You could then halve the size of your array again, by storing only the odd values and dividing all even modular residues by 2, making your array four times smaller in all.

You would either append each run’s new factors at every step and sort at the end, or merge your sorted list of factors with the sorted list of new factors to produce a sorted list. Since the next step is to divide by the product of the factors you just looked up, you might want to store that in a table as well.

It’s possible to build this array at runtime, but it would be better to compile it statically as constants known at compile time. This would be a good candidate for procedurally-generated source code.

I’m not seeing any advantage to what you’re doing right now; you aren’t using repeated additions to skip most items in a sequence that you know must be composite, or sieve a range of primes. You want to factorize single arbitrary numbers in unknown order.

\$\endgroup\$

Not the answer you're looking for? Browse other questions tagged or ask your own question.