I've build simple application that allows me to generate all numbers that have desired length and start with prefix.
For example if I specify 12
as prefix and set desired length to 4 it will generate numbers from 1200
to 1299
.
I'm storing all generated numbers in List<int>
then using shuffe method from Jon's answer I'm saving them in pseudo-random order to files, 200000 records per file.
This is my code:
public partial class Form1 : Form
{
private List<int> _prefixes;
private List<int> _results;
private readonly Object _resultsLock = new Object();
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
_prefixes = new List<int>();
_results = new List<int>();
int i = 0;
foreach (string line in AllPrefixes.Lines.Where(l=>!string.IsNullOrWhiteSpace(l) && Int32.TryParse(l, out i)))
{
_prefixes.Add(i);
}
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
_prefixes.AsParallel().ForAll(item =>
{
string prefix = item.ToString();
//here I'm determining how many numbers i must generate?
//can't his be done simpler?
var count = Convert.ToInt32("1".PadRight(9 - prefix.Count() + 1, '0'));
for (int j = 0; j < count; j++)
{
var res = prefix + j.ToString().PadLeft(9 - prefix.Length, '0');
lock (_resultsLock)
{
_results.Add(Convert.ToInt32(res));
}
}
});
stopwatch.Stop();
Debug.WriteLine("Time elapsed (s): {0}", stopwatch.Elapsed.TotalSeconds);
}
//Can't this be done simpler and also parallel?
private void SaveClick(object sender, EventArgs e)
{
const string dir = @"C:\TESTS";
int fileCount = 1;
var file = Path.Combine(dir, string.Format("{0}.csv", fileCount));
var sw = new StreamWriter(file, false);
int i = 0;
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
var rnd = new Random();
foreach (int res in _results.Shuffle(rnd))
{
sw.WriteLine(res);
i++;
if (i % 200000 != 0) continue;
fileCount++;
sw.Close();
file = Path.Combine(dir, string.Format("{0}.csv", fileCount));
sw = new StreamWriter(file, false);
}
sw.Close();
stopwatch.Stop();
Debug.WriteLine("Time elapsed (s): {0}", stopwatch.Elapsed.TotalSeconds);
}
}
For 200 prefixes all 9 digits numbers are generated on my PC in about 80-90 seconds, when I add some more prefixes I get OutOfMemory
exception.
Saving to files takes about 6-8 minutes, probably because I store 200000 results per file and I have more than 100 milion of generated results.
I'd like to optimize this as much as possible, time is priority, but memory usage is most important.
All suggestions are welcome!
First fix (thanks to RobH) - remove AsParallel
private void button1_Click(object sender, EventArgs e)
{
_prefixes = new List<int>();
_results = new List<int>();
int i = 0;
foreach (string line in AllPrefixes.Lines.Where(l=>!string.IsNullOrWhiteSpace(l) && Int32.TryParse(l, out i)))
{
_prefixes.Add(i);
}
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
foreach (var p in _prefixes)
{
string prefix = p.ToString();
var count = Convert.ToInt32("1".PadRight(9 - prefix.Count() + 1, '0'));
for (int j = 0; j < count; j++)
{
var res = prefix + j.ToString().PadLeft(9 - prefix.Length, '0');
_simpleResults.Add(Convert.ToInt32(res));
}
}
stopwatch.Stop();
Debug.WriteLine("Time elapsed (s): {0}", stopwatch.Elapsed.TotalSeconds);
}
This decreased time by 40 seconds (almost 50%), from 80-90, to about 40-50 seconds. Thanks!
AsParallel
and get rid of the locking. \$\endgroup\$123
,1254
,22456
. So they can be different length. \$\endgroup\$