Comming from this answer to Format currency in Bash
, I wonder for ways to determine which characters are used as numeric separators.
There are lot of issue regarding locales and number formating, for sample:
printf '%.5f\n' $(bc -l <<<'4*a(1)')
3.14159
LANG=de_DE printf '%.5f\n' $(bc -l <<<'4*a(1)')
bash: printf: 3.14159265358979323844: invalid number
3,00000
binary calculator bc
seem not handling locale correctly...
Under mentioned answer, searching for decimal separator (or radix character), I've used this:
int2amount() {
local TIMEFORMAT=%U _decsep
read _decsep < <(eval 'time true' 2>&1)
_decsep=${_decsep//[0-9]}
...
}
This work fine:
pi() { local TIMEFORMAT=%U _decsep;read _decsep < <(eval 'time true' 2>&1);_decsep=${_decsep//[0-9]};
local pi=$(bc -l <<<'4*a(1)')
printf '%.5f\n' ${pi/./$_decsep}
}
pi
3.14159
LANG=de_DE pi
3,14159
But as thousand separator is a lot easier to find:
printf -v ts "%'d" 1111 ; ts=${ts//1}
There is no fork, so system footprint is very light.
So I could imagine at begin of source file, something like:
numericSeparators() {
local TIMEFORMAT=%U
read NUM_DEC_SEP < <(eval 'time true' 2>&1)
NUM_DEC_SEP=${NUM_DEC_SEP//[0-9]}
printf -v NUM_THO_SEP "%'d" 1111
NUM_THO_SEP=${NUM_THO_SEP//1}
}
numericSeparators
declare -r NUM_THO_SEP NUM_DEC_SEP
...
But I think <(eval 'time true' 2>&1)
heavy for the goal. I'm searching for a lighter and/or cleaner way for determine them (even both decimal and thousand separators).
Self-answer
Thanks to dan's answer, my functions would become simplier and quicker!
Sample to correct/adapt bc
's output:
pi() {
local _decsep pi=$(bc -l <<<'4*a(1)')
printf -v _decsep %.1f 1
printf '%.5f\n' ${pi/./${_decsep:1:1}}
}
pi
3.14159
LANG=de_DE.UTF-8 pi
3,14159
A small function that will set two variables: NUM_THO_SEP
for thousand separator and NUM_DEC_SEP
for decimal separator:
numericSeparators() {
local numtest
printf -v numtest "%'.1f" 1111
NUM_THO_SEP=${numtest:1:1}
NUM_THO_SEP=${NUM_THO_SEP/1}
NUM_DEC_SEP=${numtest: -2:1}
}
numericSeparators
for loctest in {C,en_US,fr_{CH,FR},de_{CH,DE},it_{CH,IT}}{,.UTF8} ;do
LANG=$loctest numericSeparators
LANG=C printf 'LANG=%-12s thsnd=%-1s \e[2m(%q)\e[0m\e[45G radix=%q\n' \
"$loctest" "$NUM_THO_SEP"{,} "$NUM_DEC_SEP"
done
LANG=C thsnd= ('') radix=.
LANG=C.UTF8 thsnd= ('') radix=.
LANG=en_US thsnd=, (\,) radix=.
LANG=en_US.UTF8 thsnd=, (\,) radix=.
LANG=fr_CH thsnd=' (\') radix=.
LANG=fr_CH.UTF8 thsnd=’ ($'\342\200\231') radix=.
LANG=fr_FR thsnd=�($'\240') radix=\,
LANG=fr_FR.UTF8 thsnd= ($'\342\200\257') radix=\,
LANG=de_CH thsnd=' (\') radix=.
LANG=de_CH.UTF8 thsnd=’ ($'\342\200\231') radix=.
LANG=de_DE thsnd=. (.) radix=\,
LANG=de_DE.UTF8 thsnd=. (.) radix=\,
LANG=it_CH thsnd=' (\') radix=.
LANG=it_CH.UTF8 thsnd=’ ($'\342\200\231') radix=.
LANG=it_IT thsnd=. (.) radix=\,
LANG=it_IT.UTF8 thsnd=. (.) radix=\,
Note: As my terminal is UTF-8, they are unable to print out NO-BREAKABLE SPACE
in plain ASCII ($'\240'
). This is because they show a replacement character: �
instead.
locale decimal_point
for the decimal point, maybe?locale
is not builtin, so system footprint won't be better...LANG=de_DE printf '%.5f\n' $(LANG=de_DE bc -l <<<'4*a(1)')
, maybe?bash: printf: 3.14159265358979323844: invalid number
, then3,00000
.bc
ignores the locale whilebash
does not.