A simpler Python implementation (based on my previous "Ugly Numbers" solutions but I guess influenced by skimming the answers here). I build the subsets in order of increasing sum and keep a list of subsets built so far. For each number of the whole set, I have a generator that tries to add the number to the subsets built so far. Then I just merge these generators and extend the list with their results.
from heapq import merge
def subsets(wholeset):
yield set()
subsets = [set()]
def add(number):
for subset in subsets:
if not subset or number > max(subset):
yield subset | {number}
for subset in merge(*map(add, wholeset), key=sum):
yield subset
subsets.append(subset)
Demo (Try it online!):
>>> print(*subsets({1, 4, 5, 9}))
set() {1} {4} {1, 4} {5} {1, 5} {4, 5} {9} {1, 4, 5} {1, 9} {9, 4} {1, 4, 9} {9, 5} {1, 5, 9} {9, 4, 5} {1, 4, 5, 9}
Benchmarks with coneyhelixlake's solution, the first case is the case from the question:
First 4096 subsets from random.sample(range(10**6), 64)
11.9 ms ± 0.8 ms subsets
126.7 ms ± 3.4 ms get_next_combs
First 100000 subsets from random.sample(range(10**6), 64)
472.2 ms ± 8.4 ms subsets
2751.2 ms ± 26.1 ms get_next_combs
First 32768 subsets from random.sample(range(10**6), 15)
144.4 ms ± 12.9 ms subsets
493.0 ms ± 16.4 ms get_next_combs
Benchmark code (Try it online!:
def subsets(wholeset):
yield set()
subsets = [set()]
def add(number):
for subset in subsets:
if not subset or number > max(subset):
yield subset | {number}
for subset in merge(*map(add, wholeset), key=sum):
yield subset
subsets.append(subset)
def get_next_combs(N, n=4):
yield set()
A = [0]*len(N)
L = [set()]
S = [0]
j = 1
while any(elem is not None for elem in A):
i_star = np.argmin([S[A[i]] + N[i] if A[i] is not None else np.inf for i in range(0, len(N))])
comb = L[A[i_star]].union((N[i_star],))
L.append(comb)
yield comb
S.append(S[A[i_star]] + N[i_star])
i = A[i_star]
A[i_star] = None
for i_next in range(i+1, len(L)):
if N[i_star] > max(L[i_next]):
A[i_star] = i_next
break
solutions = [subsets, get_next_combs]
from timeit import default_timer as time
import random
from itertools import islice
import numpy as np
from heapq import merge
from statistics import mean, stdev
def consume(iterator, n):
next(islice(iterator, n, n), None)
def test(wholeset, first_n):
print('First', first_n, 'subsets from', wholeset)
wholeset = eval(wholeset)
# Correctness
expect = list(islice(solutions[0](wholeset), first_n))
for solution in solutions[1:]:
result = list(islice(solution(wholeset), first_n))
assert result == expect
# Statistics initialization
times = {solution: [] for solution in solutions}
def stats(solution):
ts = sorted(times[solution])[:5]
return '%6.1f ms ± %4.1f ms ' % (mean(ts) * 1e3, stdev(ts) * 1e3)
# Measure times
for _ in range(10):
random.shuffle(solutions)
for solution in solutions:
start = time()
consume(solution(wholeset), first_n)
end = time()
times[solution].append(end - start)
# Report statistics
for solution in sorted(solutions, key=stats):
print(stats(solution), solution.__name__)
print()
test('random.sample(range(10**6), 64)', 4096)
test('random.sample(range(10**6), 64)', 100000)
test('random.sample(range(10**6), 15)', 2 ** 15)