6
\$\begingroup\$

I have solved Project Euler's problem number 47, which reads:

The first two consecutive numbers to have two distinct prime factors are:

  • 14 = 2 × 7
  • 15 = 3 × 5

The first three consecutive numbers to have three distinct prime factors are:

  • 644 = 22 × 7 × 23
  • 645 = 3 × 5 × 43
  • 646 = 2 × 17 × 19.

Find the first four consecutive integers to have four distinct prime factors each. What is the first of these numbers?

The correct answer is :

134043.

I used some sort of memoization to work out the problem but the performance is still pretty bad: 5.4–5.5 seconds. The trick in this question here is that the prime factors can actually be on some power which if evaluated doesn't result in a prime number but if the base is prime it's fine: e.g., 22 = 4.

Here is my solution:

public class Problem47 : IProblem
{
    public int ID => 47;
    public string Condition => ProblemsConditions.ProblemConditions[ID];

    //Key - value to power
    //Value - factorized value
    private readonly Dictionary<int, bool> passedNumbers = new Dictionary<int, bool>();
    private readonly Dictionary<int, int> factorizedPowers = new Dictionary<int, int>();
    private readonly HashSet<int> primes = new HashSet<int>();

    private const int primeFactorCount = 4;

    public ProblemOutput Solve()
    {
        Stopwatch sw = Stopwatch.StartNew();
        for (int i = 2 * 3 * 5 * 7; ; i++)
        {
            int[] numbers =
            {
                i, i + 1, i + 2, i + 3
            };
            int skipAmount = 0;
            foreach (int n in numbers)
            {
                bool value;
                if (passedNumbers.TryGetValue(n, out value))
                {
                    if (!value)
                    {
                        skipAmount = n - (i - 1);
                    }
                }
                else
                {
                    passedNumbers.Add(n, HasNPrimeFactors(n, primeFactorCount));
                    if (!passedNumbers[n])
                    {
                        skipAmount = n - (i - 1);
                    }
                }
            }
            if (skipAmount != 0)
            {
                i += skipAmount - 1;
                continue;
            }
            sw.Stop();
            return new ProblemOutput(sw.ElapsedMilliseconds, numbers[numbers.Length - primeFactorCount].ToString());
        }
    }

    private bool HasNPrimeFactors(int input, int n)
    {
        Dictionary<int, int> factors = new Dictionary<int, int>();
        for (int i = 2; input > 1 ; i++)
        {
            if (input % i == 0)
            {
                if (primes.Contains(i) || IsPrime(i))
                {
                    if (!primes.Contains(i))
                    {
                        primes.Add(i);
                    }
                    if (factorizedPowers.ContainsValue(i))
                    {
                        int maximizedPower = GetMaximizedFactor(input, i);
                        input /= maximizedPower;
                        if (factors.ContainsKey(i))
                        {
                            factors[i] += GetPowerOfValue(maximizedPower, i);
                        }
                        else
                        {
                            factors.Add(i, GetPowerOfValue(maximizedPower, i));
                        }
                    }
                    else
                    {
                        input /= i;
                        if (factors.ContainsKey(i))
                        {
                            factors[i]++;
                            factorizedPowers.Add((int) Math.Pow(i, factors[i]), i);
                        }
                        else
                        {
                            factors.Add(i, 1);
                        }
                    }
                }
            }
            if (i > input)
            {
                i = 1;
            }
        }
        return factors.Count == n;
    }

    private int GetMaximizedFactor(int input, int value)
    {
        var matches = factorizedPowers.Where(x => x.Value == value && input % x.Key == 0);
        return matches.Any() ? matches.Max().Key : value;
    }

    private static int GetPowerOfValue(int input, int value)
    {
        int count = 0;
        while (input > 1)
        {
            input /= value;
            count++;
        }
        return count;
    }

    private bool IsPrime(int value)
    {
        if (value < 2) { return false; }
        if (value % 2 == 0) { return value == 2; }
        if (value % 3 == 0) { return value == 3; }
        if (value % 5 == 0) { return value == 5; }
        if (value == 7) { return true; }

        for (int divisor = 7; divisor * divisor <= value; divisor += 30)
        {
            if (value % divisor == 0) { return false; }
            if (value % (divisor + 4) == 0) { return false; }
            if (value % (divisor + 6) == 0) { return false; }
            if (value % (divisor + 10) == 0) { return false; }
            if (value % (divisor + 12) == 0) { return false; }
            if (value % (divisor + 16) == 0) { return false; }
            if (value % (divisor + 22) == 0) { return false; }
            if (value % (divisor + 24) == 0) { return false; }
        }
        return true;
    }
}

Please ignore the inheritance, the ID and Condition properties, and the ProblemOutput class. Those are irrelevant for this question and have no effect on the program whatsoever.

\$\endgroup\$
8
  • 1
    \$\begingroup\$ Why not building up a Sieve of Erastothenes once first for the IsPrime() check? \$\endgroup\$ Commented Jan 3, 2017 at 15:43
  • \$\begingroup\$ Building sieve every time we get a bigger upper-limit sounds more inefficient to me :P \$\endgroup\$
    – Denis
    Commented Jan 3, 2017 at 15:44
  • \$\begingroup\$ You don't need to rebuild the whole sieve every time? \$\endgroup\$ Commented Jan 3, 2017 at 15:45
  • \$\begingroup\$ If you feel like you have a suggestion please write it as an answer, followed by a code example so I can understand what you mean more easily. \$\endgroup\$
    – Denis
    Commented Jan 3, 2017 at 15:48
  • 2
    \$\begingroup\$ @CharlesNRice You need 4 consecutive numbers. 37960, 37961, 37962 are fine, but 37963 is not (it is a prime itself). \$\endgroup\$
    – vnp
    Commented Jan 3, 2017 at 18:08

3 Answers 3

5
\$\begingroup\$

If you're working through the Project Euler questions, you should become familiar with the Seive of Eratoshenes.

Not only is it a very simple way to generate a bunch of prime numbers at once, but it's an algorithm which can be subtly modified to solve other problems.

In this problem, we want to calculate how many prime factors each number in a range has. We've going to be testing a lot of numbers, so it would be great if we can calculate all of the factor counts at once. And we can!

First, pick a safe top number that we're going to test up to. I'll guess that the answer will be a number less than a million.

Create an array of a million zeros.

It starts as an array of zeros, but we want to transform this array so that:

array[i] == count_of_factors(i)

  • Start at 2. array[2] == 0, so therefore 2 is prime. Now we can add 1 to {array[4], array[6], array[8] ... array[999998]}, because they are factors of 2.
  • Move to 3. array[3] == 0, so add 1 to {array[6], array[9], array[12] ... array[999999]}
  • Move to 4. array[4] == 1. (We've already incremented it) Because it is not 0, we know it is not a prime number, so we ignore it.
  • Move to 5. array[5] == 0, so add 1 to {array[10], array[15], array[20], array[25]...array[999995]

And so on. When you're done, every number will either be 0 (if the index is prime), or it will be the count of prime factors of that index. Now it's as simple as finding the first four contiguous fours.

This solution takes around 130 milliseconds:

static int SolveProblem()
{
   int[] factorCounts = new int[1000000];
   for(int i = 2; i < factorCounts.Length; ++i)
   {
      if(factorCounts[i] == 0) // It's Prime
      {
         for(int j = i*2; j < factorCounts.Length; j+=i)
         {
            ++factorCounts[j];
         }
      }
   }

   // Now find the first four contiguous fours
   int contiguousFours = 0;
   for (int i = 210; i < factorCounts.Length; ++i)
   {
      contiguousFours = factorCounts[i] == 4 ? contiguousFours + 1 : 0;
      if(contiguousFours == 4)
      {
         return i - 3;
      }
    }
    return -1;
}

But we can do better than this. The above code has two loops: one which calculates the factor counts and one which searches for the contiguous fours. Lets merge this into one loop: that way it will stop generating factor counts as soon as it finds the fours.

static int SolveProblem()
{
    int[] factorCounts = new int[1000000];
    int contiguousFours = 0;
    for(int i = 2; i < factorCounts.Length; ++i)
    {
        contiguousFours = factorCounts[i] == 4 ? contiguousFours + 1 : 0;
        if (contiguousFours == 4)
        {
            return i - 3;
        }
        if (factorCounts[i] == 0) // It's Prime
        {
            for(int j = i*2; j < factorCounts.Length; j+=i)
            {
                ++factorCounts[j];
            }
        }
    }
    return -1;
}

Now we're down to 85 milliseconds.

\$\endgroup\$
1
  • \$\begingroup\$ U can further improve it, only need to loop till factorCounts/2, and because when at index i, u can check up to i*2 + 1, thus the loop may stop about halfway of the 85 ms version. \$\endgroup\$
    – Eric
    Commented Apr 12, 2023 at 5:39
3
\$\begingroup\$

Your Solve() method looks too complex to me. My (approximate) equivalent looks like:

public int Solve() {

    // 2 * 3 * 5 * 7 = 210.
    for (int i = 210; true; ++i) {
        if (FacCount(i) == 4 &&
            FacCount(i + 1) == 4 &&
            FacCount(i + 2) == 4 &&
            FacCount(i + 3) == 4) {
              return i;
        }
    }
}

Obviously, most of the work is done in FacCount(), which counts the number of distinct prime factors in its parameter:

private int FacCount(int num) {

    int fCount = 0;
    int limit = Isqrt(num);  // Integer square root.

    // Remove factors of 2
    if (num % 2 == 0) {
        ++fCount;
        do {
            num /= 2;
        } while (num % 2 == 0);

        // Reset limit.
        limit = Isqrt(num);
    }

    // Remove other (odd) factors.
    int trialFactor = 3;
    while (trialFactor <= limit) {
        if (num % trialFactor == 0) {
            ++fCount;
            do {
                num /= trialFactor;
            } while (num % trialFactor == 0);
            limit = Isqrt(num);
        }
        trialFactor += 2;
    }

    if (num > 1) { ++fCount; } // Count last factor

    return fCount;

} // end FacCount()

This will run faster if you have a NextPrime() method in your library, based on the Sieve of Eratosthenes. That will avoid having to check odd non-prime trial factors. My Java version runs in about 200 ms using the sieve to pick primes.

There is one helper method, which finds the integer square root. This version uses Newton-Raphson and is taken from CodeCodex.

public int Isqrt(int num) {
    if (0 == num) { return 0; }  // Avoid zero divide
    int n = (num / 2) + 1;       // Initial estimate, never low
    int n1 = (n + (num / n)) / 2;
    while (n1 < n) {
        n = n1;
        n1 = (n + (num / n)) / 2;
    } // end while
    return n;
} // end Isqrt()

My original code was Java, so I may have made a few mistakes in converting it to C#. Check things carefully.

ETA: Following @Heslacher's comments in his answer below, about stepping more than one. Yes, time can be saved, but the step is not constant. The size of step depends on the number of consecutive integers with four different factors found. The search loop could be rewritten:

    // 2 * 3 * 5 * 7 = 210.
    int step = 1;
    for (int i = 210; true; i += step) {
        if (FacCount(i) != 4) {
            step = 1;
        } else if (FacCount(i + 1) != 4) {
            step = 2;
        } else if (FacCount(i + 2) != 4) {
            step = 3;
        } else if (FacCount(i + 3) != 4) {
            step = 4;
        else {
            // Only get here with four consecutive four-counts.
            return i;
        }
    }

This avoids checking a number's factor count twice by stepping to the number after the first non-four factor count. As a disadvantage, the logic of the search is not as clear as with the original version.

\$\endgroup\$
4
  • \$\begingroup\$ You are checking some numbers more than once in Solve. Replacing it with if (FacCount(i) == 4 && FacCount(++i) == 4 && FacCount(++i) == 4 && FacCount(++i) == 4) { return i-3; }, reduce the calls made to FacCount to 133837 from 157361. \$\endgroup\$
    – Xiaoy312
    Commented Jan 4, 2017 at 16:19
  • \$\begingroup\$ True, but I find changing the loop counter in the body of the loop dangerous and not easy to maintain. Given that my code ran well below the 1 minute threshold for Project Euler I didn't bother with further optimisation. \$\endgroup\$
    – rossum
    Commented Jan 4, 2017 at 16:48
  • \$\begingroup\$ It is not dangerous if you are aware of what you are doing. As for maintainability, you just have to document it well. The 1 minute threshold is just there to prevent waste of cpu-time on faulty code or malicious attack and should not be used as a point of reference. As a matter of fact, near 15% of the calls to FacCount is redundant. \$\endgroup\$
    – Xiaoy312
    Commented Jan 4, 2017 at 21:12
  • \$\begingroup\$ You can also write like this to avoid changing the counter within the loop: int found = 0; for (int i = 210; true; ++i) { if (FacCount(i) != 4) { found = 0; continue; } if (++found == 4) { return i - 4 + 1; } } } \$\endgroup\$
    – Xiaoy312
    Commented Jan 4, 2017 at 21:15
1
\$\begingroup\$
    for (int i = 2 * 3 * 5 * 7; ; i++)
    {
        int[] numbers =
        {
            i, i + 1, i + 2, i + 3
        };
        int skipAmount = 0;  
        ...
        ...
        if (skipAmount != 0)
        {
            i += skipAmount - 1;
            continue;
        }

Only a quick note

You are using skipAmount as a flag and to increment the loop counter here. By using a boolean as a flag and just increment the loop counter by 3 you will get the same effect.

Now more of meat

You can speed up your implementation by reversing the numbers array to first check i + 3. If this doesn't has the desired number of prime factors you won't need to check the remaining items in the array any more.

This will result (using poor man benchmarking) in around 2200ms, which isn't anything like @rossum's answer.

Another advantage is that you won't need passedNumbers any more. The loop will boil down to

for (int i = 2 * 3 * 5 * 7; ; i++)
{
    int[] numbers =
    {
        i + 3, i + 2, i + 1, i 
    };

    bool hasFinished = true;

    foreach (int n in numbers)
    {

        if (!HasNPrimeFactors(n, primeFactorCount))
        {
            hasFinished = false;
            break;
        }
    }
    if (hasFinished)
    {
        return numbers[3].ToString();
    }
}  

which as a side effect looks much nicer.


HasNPrimeFactors()

    Dictionary<int, int> factors = new Dictionary<int, int>();
    for (int i = 2; input > 1 ; i++)
    {
        if (input % i == 0)
        {
            if (primes.Contains(i) || IsPrime(i))
            {
                if (!primes.Contains(i))
                {
                    primes.Add(i);
                }  

The second call to primes.Contains(i) can be omitted because the Add() method of a HashSet<T> does almost do the same checks than Contains() but by checking first if !Contains() will do this twice.

Reference Source: AddIfNotPresent() and Contains() please note that Add() just calls AddIfNotPresent()


                if (factorizedPowers.ContainsValue(i))
                {
                    int maximizedPower = GetMaximizedFactor(input, i);
                    input /= maximizedPower;
                    if (factors.ContainsKey(i))
                    {
                        factors[i] += GetPowerOfValue(maximizedPower, i);
                    }
                    else
                    {
                        factors.Add(i, GetPowerOfValue(maximizedPower, i));
                    }
                }
                else
                {
                    input /= i;
                    if (factors.ContainsKey(i))
                    {
                        factors[i]++;
                        factorizedPowers.Add((int) Math.Pow(i, factors[i]), i);
                    }
                    else
                    {
                        factors.Add(i, 1);
                    }
                }

Here you have some code duplication which can be removed by

  • storing the calculated GetPowerOfValue(maximizedPower, i)); into a variable.
  • instead of calling first ContainsKey() you should use TryGetValue() just before the if.

The HasNPrimeFactors() method could then look like

private bool HasNPrimeFactors(int input, int n)
{

    Dictionary<int, int> factors = new Dictionary<int, int>();
    for (int i = 2; input > 1; i++)
    {
        if (input % i == 0)
        {
            if (primes.Contains(i) || IsPrime(i))
            {
                primes.Add(i);

                int factor;
                factors.TryGetValue(i, out factor);

                if (factorizedPowers.ContainsValue(i))
                {
                    int maximizedPower = GetMaximizedFactor(input, i);
                    factors[i] = factor + GetPowerOfValue(maximizedPower, i);

                    input /= maximizedPower;
                }
                else
                {
                    factors[i] = factor++;

                    if (factor > 1)
                    {
                        factorizedPowers.Add((int)Math.Pow(i, factor), i);
                    }

                    input /= i;
                }
            }
        }
        if (i > input)
        {
            i = 1;
        }

    }

    return factors.Count == n;
}

which now runs in ~1750 ms.


Just a little side note regarding the answer/solution posted by @rossum.

In its current form it runs in about ~75ms and produces the correct result. By checking the item of i +3 first like

public int Solve()
{

    // 2 * 3 * 5 * 7 = 210.

    for (int i = 210; true; i++)
    {
        if (FacCount(i + 3) == 4 &&
            FacCount(i + 2) == 4 &&
            FacCount(i + 1) == 4 &&
            FacCount(i) == 4)
        {
            return i;
        }
    }
}

It only takes <50ms.

\$\endgroup\$
2
  • \$\begingroup\$ I don't think you can just skip by 3 like that. Suppose the answer were 211..214. Your loop would skip from 210 to 213 and miss it. That is, unless there was some proof that the sequence had to start from a multiple of 3? \$\endgroup\$
    – JS1
    Commented Jan 21, 2017 at 11:09
  • \$\begingroup\$ @Js1 you are correct. My bad. Edited answer. \$\endgroup\$
    – Heslacher
    Commented Jan 23, 2017 at 5:55

Not the answer you're looking for? Browse other questions tagged or ask your own question.