8
\$\begingroup\$

In a bash 3 script for OSX machines, I needed the functional equivalent of the realpath command, complete with support for the --relative-to and --relative-base options.

I would normally just install the homebrew coreutils formula, but I need this script to work when bootstrapping a new machine with no network or XCode available yet. So the code below includes a replacement implementation that's only used when the command is not already installed. Normal usage would be

for resolved_path in $(realpath [--relative-to=...] [--relative-base=...] [one or more paths]); do
    # ...
done

It's meant to be used as a sourced library (. realpathlib.bash), but if run directly with bash realpathlib.bash it runs a simple test suite.

I've been picking up bash best practices as I went along, but would love feedback on all aspects, such as naming conventions and other coding style aspects, techniques, problems I've overlooked, etc.

# shellcheck shell=bash
set -euo pipefail

_contains() {
    # return true if first argument is present in the other arguments
    local elem value

    value="$1"
    shift

    for elem in "$@"; do 
        if [[ $elem == "$value" ]]; then
            return 0
        fi
    done
    return 1
}

_canonicalize_filename_mode() {
    # resolve any symlink targets, GNU readlink -f style
    # where every path component except the last should exist and is
    # resolved if it is a symlink. This is essentially a re-implementation
    # of canonicalize_filename_mode(path, CAN_ALL_BUT_LAST).
    # takes the path to canonicalize as first argument

    local path result component seen
    seen=()
    path="$1"
    result="/"
    if [[ $path != /* ]]; then  # add in current working dir if relative
        result="$PWD"
    fi
    while [[ -n $path ]]; do
        component="${path%%/*}"
        case "$component" in
            '') # empty because it started with /
                path="${path:1}" ;;
            .)  # ./ current directory, do nothing
                path="${path:1}" ;;
            ..) # ../ parent directory
                if [[ $result != "/" ]]; then  # not at the root?
                    result="${result%/*}"      # then remove one element from the path
                fi
                path="${path:2}" ;;
            *)
                # add this component to the result, remove from path
                if [[ $result != */ ]]; then
                    result="$result/"
                fi
                result="$result$component"
                path="${path:${#component}}"
                # element must exist, unless this is the final component
                if [[ $path =~ [^/] && ! -e $result ]]; then
                    echo "$1: No such file or directory" >&2
                    return 1
                fi
                # if the result is a link, prefix it to the path, to continue resolving
                if [[ -L $result ]]; then
                    if _contains "$result" "${seen[@]+"${seen[@]}"}"; then
                        # we've seen this link before, abort
                        echo "$1: Too many levels of symbolic links" >&2
                        return 1
                    fi
                    seen+=("$result")
                    path="$(readlink "$result")$path"
                    if [[ $path = /* ]]; then
                        # if the link is absolute, restart the result from /
                        result="/"
                    elif [[ $result != "/" ]]; then
                        # otherwise remove the basename of the link from the result
                        result="${result%/*}"
                    fi
                elif [[ $path =~ [^/] && ! -d $result ]]; then
                    # otherwise all but the last element must be a dir
                    echo "$1: Not a directory" >&2
                    return 1
                fi
                ;;
        esac
    done
    echo "$result"
}

_realpath() {
    local relative_to relative_base seenerr path

    relative_to=
    relative_base=
    seenerr=

    while [[ $# -gt 0 ]]; do
        case $1 in
            "--relative-to="*)
                relative_to=$(_canonicalize_filename_mode "${1#*=}")
                shift 1;;
            "--relative-base="*)
                relative_base=$(_canonicalize_filename_mode "${1#*=}")
                shift 1;;
            *)
                break;;
        esac
    done

    if [[
        -n $relative_to
        && -n $relative_base
        && ${relative_to#${relative_base}/} == "$relative_to"
    ]]; then
        # relative_to is not a subdir of relative_base -> ignore both
        relative_to=
        relative_base=
    elif [[ -z $relative_to && -n $relative_base ]]; then
        # if relative_to has not been set but relative_base has, then
        # set relative_to from relative_base, simplifies logic later on
        relative_to="$relative_base"
    fi

    for path in "$@"; do
        if ! real=$(_canonicalize_filename_mode "$path"); then
            seenerr=1
            continue
        fi

        # make path relative if so required
        if [[
            -n $relative_to
            && ( # path must not be outside relative_base to be made relative
                -z $relative_base || ${real#${relative_base}/} != "$real"
            )
        ]]; then
            local common_part parentrefs

            common_part="$relative_to"
            parentrefs=
            while [[ ${real#${common_part}/} == "$real" ]]; do
                common_part="$(dirname "$common_part")"
                parentrefs="..${parentrefs:+/$parentrefs}"
            done

            if [[ $common_part != "/" ]]; then
                real="${parentrefs:+${parentrefs}/}${real#${common_part}/}"
            fi
        fi

        echo "$real"
    done
    if [[ $seenerr ]]; then
        return 1
    fi
}

if ! command -v realpath > /dev/null 2>&1; then
    # realpath is not available on OSX unless you install the `coreutils` brew
    realpath() { _realpath "$@"; }
fi

if [[ $0 == "${BASH_SOURCE[0]}" ]]; then
    assert_equal() {
        local result
        while read -r result; do
            if [[ $result != "$1" ]]; then
                echo -e "\033[0;31mFAIL\033[0m"
                echo -e "expected\n\t$1\ngot\n\t$result"
                exit 2
            fi
            shift 1
        done
        # any expected results left over?
        if [[ $# -gt 0 ]]; then
            echo -e "\033[0;31mFAIL\033[0m"
            echo "expected more results"
            printf '\t- %s\n' "$@"
            exit 2
        fi
        echo -e "\033[0;32mOK\033[0m"
    }

    testdir=$(mktemp -d -t "${0##*/}_tests")
    # canonicalize testdir with pwd -P (no .. components, so sufficient)
    pushd "$testdir"
    testdir=$(pwd -P)
    popd >/dev/null

    cleanup() {
        rm -rf "$testdir"
    }
    trap cleanup EXIT

    mkdir -p "$testdir/foo/bar/baz"
    mkdir -p "$testdir/foobar"
    touch "$testdir/target"
    touch "$testdir/foo/target"
    touch "$testdir/foo/bar/target"
    touch "$testdir/foobar/target"
    ln -s "../link" "$testdir/foo/bar/baz/link"
    ln -s "../link" "$testdir/foo/bar/link"
    ln -s "../target" "$testdir/foo/link"
    ln -s "circular2" "$testdir/foo/circular1"
    ln -s "circular1" "$testdir/foo/circular2"
    ln -s "../foo/bar" "$testdir/foobar/dirlink"

    echo -en "chained symlinks:\t"
    _realpath "$testdir/foo/bar/baz/link" \
        | assert_equal "$testdir/target"

    echo -en "circular symlinks:\t"
    { _realpath "$testdir/foo/circular1" 2>&1 || echo "error exit"; } \
        | assert_equal \
            "$testdir/foo/circular1: Too many levels of symbolic links" \
            "error exit"

    echo -en "symlink and .. combo:\t"
    _realpath "$testdir/foobar/dirlink/../target" \
        | assert_equal \
            "$testdir/foo/target"

    echo -en "non-existing path:\t"
    { _realpath "$testdir/nonesuch/foo" 2>&1 || echo "error exit"; } \
        | assert_equal \
            "$testdir/nonesuch/foo: No such file or directory" \
            "error exit"

    echo -en "file as directory:\t"
    { _realpath "$testdir/target/foo" 2>&1 || echo "error exit"; } \
        | assert_equal \
            "$testdir/target/foo: Not a directory" \
            "error exit"

    echo -en "relative paths:\t\t"
    pushd "$testdir/foo" > /dev/null
    _realpath \
            "bar/target" \
            "../target" \
            "target" \
            "$testdir/./foo/../foobar/target" \
        | assert_equal \
            "$testdir/foo/bar/target" \
            "$testdir/target" \
            "$testdir/foo/target" \
            "$testdir/foobar/target"
    popd > /dev/null

    echo -en "relative-base inside:\t"
    _realpath --relative-base="$testdir/foo" "$testdir/foo/bar/target" \
        | assert_equal "bar/target"

    echo -en "relative-base outside:\t"
    _realpath --relative-base="$testdir/foo/bar" "$testdir/foo/target" \
        | assert_equal "$testdir/foo/target"

    echo -en "--r-base name prefix:\t"
    _realpath --relative-base="$testdir/foo" "$testdir/foobar/target" \
        | assert_equal "$testdir/foobar/target"

    echo -en "--r-base extra /-s:\t"
    _realpath --relative-base="$testdir//foo//" "$testdir/foo/target" \
        | assert_equal "target"

    echo -en "--r-base relative:\t"
    pushd "$testdir/foo/bar" > /dev/null
    _realpath --relative-base="../../foo" "$testdir/foo/bar/target" \
        | assert_equal "bar/target"
    popd > /dev/null

    echo -en "multiple --r-base:\t"
    _realpath --relative-base="$testdir/foo" \
            "$testdir/foo/target" \
            "$testdir/target" \
            "$testdir/foo/bar/target" \
        | assert_equal \
            "target" \
            "$testdir/target" \
            "bar/target"

    echo -en "--r-base divergent:\t"
    _realpath --relative-base="$testdir" "/dev/null" \
        | assert_equal "/dev/null"

    echo -en "relative-to inside:\t"
    _realpath --relative-to="$testdir/foo" "$testdir/foo/bar/target" \
        | assert_equal "bar/target"

    echo -en "relative-to outside:\t"
    _realpath --relative-to="$testdir/foo/bar" "$testdir/target" \
        | assert_equal "../../target"

    echo -en "--r-to name prefix:\t"
    _realpath --relative-to="$testdir/foo" "$testdir/foobar/target" \
        | assert_equal "../foobar/target"

    echo -en "--r-to extra /-s:\t"
    _realpath --relative-to="$testdir//foo//" "$testdir/foo/target" \
        | assert_equal "target"

    echo -en "--r-to relative:\t"
    pushd "$testdir/foo" > /dev/null
    _realpath --relative-to="../foobar" "$testdir/foo/target" \
        | assert_equal "../foo/target"
    popd > /dev/null

    echo -en "multiple --r-to:\t"
    _realpath --relative-to="$testdir/foo" \
            "$testdir/foo/target" \
            "$testdir/target" \
            "$testdir/foo/bar/target" \
        | assert_equal \
            "target" \
            "../target" \
            "bar/target"

    echo -en "combined inside both:\t"
    _realpath --relative-base="$testdir" --relative-to="$testdir/foo" "$testdir/foo/bar/target" \
        | assert_equal "bar/target"

    echo -en "combined outside one:\t"
    _realpath --relative-base="$testdir" --relative-to="$testdir/foo/bar" "$testdir/target" \
        | assert_equal "../../target"

    echo -en "combined outside both:\t"
    _realpath --relative-base="$testdir/foo" --relative-to="$testdir/foo/bar" "$testdir/target" \
        | assert_equal "$testdir/target"

    echo -en "multiple combined:\t"
    _realpath --relative-base="$testdir/foo" --relative-to="$testdir/foo/bar" \
            "$testdir/foo/target" "$testdir/target" "$testdir/foo/bar/target" \
        | assert_equal \
            "../target" \
            "$testdir/target" \
            "target"

    echo -en "combined errorcase:\t"
    # -base should be a parent path of -to. If not, the arguments are ignored
    _realpath --relative-base="$testdir/foo/bar" --relative-to="$testdir/foo" "$testdir/foo/bar/target" \
        | assert_equal "$testdir/foo/bar/target"
fi
\$\endgroup\$
5
  • \$\begingroup\$ To me this seems like an XY problem. Why can't you just use the "Multiple installations" section to get homebrew? Ship the install script with the tarball and you can bootstrap from there. \$\endgroup\$
    – Peilonrayz
    Commented May 11, 2020 at 23:23
  • 1
    \$\begingroup\$ @Peilonrayz that’d still require a network connection or a lot more work to package up all dependent bottles: discourse.brew.sh/t/installing-homebrew-without-internet/3321/4. Bootstrapping can’t always rely on a network connection. \$\endgroup\$ Commented May 12, 2020 at 7:28
  • \$\begingroup\$ I wouldn't call 'compiling with a --build-from-source flag once and reusing the same pre-installed package' a lot more work. Seems like a lot less work then writing your own bootstrap ecosystem. With some use of chroot you could probably remove any need for the flag either. \$\endgroup\$
    – Peilonrayz
    Commented May 12, 2020 at 10:15
  • \$\begingroup\$ @Peilonrayz: packaging up and maintaining a minimal network-less homebrew and xcode setup would be more work. This is ~150 lines (not counting tests), and was the only missing tool. \$\endgroup\$ Commented May 12, 2020 at 10:51
  • \$\begingroup\$ I fail to see how this <20 lines script (not counting tests) I have is more work than maintaining a ~150 line script (not counting tests). Takes about 10 minutes to build the package and once built the 382M package runs even without internet. I guess I just lack the ability to do real bootstrapping. \$\endgroup\$
    – Peilonrayz
    Commented May 12, 2020 at 12:44

2 Answers 2

1
\$\begingroup\$
# shellcheck shell=bash

You're using Shellcheck. That makes me happy. :-)

set -euo pipefail

Similarly, this is a good set of options to enable.

    for elem in "$@"; do

in "$@" is redundant here (though it does no harm).

    [[ $elem == "$value" ]]

I'd use plain [ there, and quote both variables, as this is the kind of function I might later want to transplant to a plain POSIX shell script.

    case $1 in
        "--relative-to="*)
            relative_to=$(_canonicalize_filename_mode "${1#*=}")
            shift 1;;
        "--relative-base="*)
            relative_base=$(_canonicalize_filename_mode "${1#*=}")
            shift 1;;

The argument to shift is redundant, and wasn't used in the other function. Try to be consistent!


The tests

... seem happy here:

/tmp/user/1000/242050.sh_tests.CbF6 ~/stackexchange/review
chained symlinks:   [0;32mOK[0m
circular symlinks:  [0;32mOK[0m
symlink and .. combo:   [0;32mOK[0m
non-existing path:  [0;32mOK[0m
file as directory:  [0;32mOK[0m
relative paths:     [0;32mOK[0m
relative-base inside:   [0;32mOK[0m
relative-base outside:  [0;32mOK[0m
--r-base name prefix:   [0;32mOK[0m
--r-base extra /-s: [0;32mOK[0m
--r-base relative:  [0;32mOK[0m
multiple --r-base:  [0;32mOK[0m
--r-base divergent: [0;32mOK[0m
relative-to inside: [0;32mOK[0m
relative-to outside:    [0;32mOK[0m
--r-to name prefix: [0;32mOK[0m
--r-to extra /-s:   [0;32mOK[0m
--r-to relative:    [0;32mOK[0m
multiple --r-to:    [0;32mOK[0m
combined inside both:   [0;32mOK[0m
combined outside one:   [0;32mOK[0m
combined outside both:  [0;32mOK[0m
multiple combined:  [0;32mOK[0m
combined errorcase: [0;32mOK[0m
           echo -e "\033[0;31mFAIL\033[0m"

Please don't hard-code terminal codes in messages like that - you don't know that it will be run in the same terminal type (it looks very ugly in my Emacs compilation buffer). And avoid non-standard echo -e.

On my (Debian) system, mktemp -d -t "${0##*/}_tests" fails:

mktemp: too few X's in template ‘242050.sh_tests’

It's easily fixed:

testdir=$(mktemp -d -t "${0##*/}_tests.XXXX")
pushd "$testdir"
testdir=$(pwd -P)
popd >/dev/null

pushd and popd are good for interactive use, but less well suited to scripts (as evident by the need to discard output). Use plain cd instead:

testdir=$(cd "$testdir" && pwd -P)

I'm surprised not to see any tests with . or .. components in --relative-* options.

I'm also surprised that we don't enter the test directory and provide arguments that are relative rather than absolute.

\$\endgroup\$
1
\$\begingroup\$

Here's a version of the above, with the base realpath bit redone in a simpler way, lots of other tweaks, and most importantly, instead of using an "expected" value for the tests, just run them straight against realpath. It makes it harder to test properly (I used a locally-built install of bash with the same version as on macos, and to test on a macos you'll need to use the original bash with a realpath installed). But the upside of it is that there's no need to go over the tests carefully -- since it should just follow whatever realpath is doing directly.

#!/usr/bin/env bash
set -eu

# based on https://codereview.stackexchange.com/questions/242050

failwith() { { echo -n "error: "; printf "%s\n" "$@"; } 1>&2; exit 1; }

rpath() { # mimics a sane `realpath` for insane OSs that lack one
  local relbase="" relto=""
  while [[ x"${1-}" = x-* ]]; do case "$1" in
    ( "--relative-base="* ) relbase="$(rpath ${1#*=})" ;;
    ( "--relative-to="* )   relto="$(rpath ${1#*=})" ;;
    ( * ) failwith "unrecognized option '$1'"
  esac; shift; done
  if [[ "$#" -eq 0 ]]; then failwith "missing operand"; fi
  if [[ -n "$relto" && -n "$relbase" && "${relto#"$relbase/"}" = "$relto" ]]; then
    # relto is not a subdir of relbase => ignore both
    relto="" relbase=""
  elif [[ -z "$relto" && -n "$relbase" ]]; then
    # relbase is set but relto isn't => set relto from relbase to simplify
    relto="$relbase"
  fi
  local p d f n=0 up common PWD0="$PWD"
  for p in "$@"; do
    cd "$PWD0"
    while (( n++ < 50 )); do
      d="$(dirname "$p")"
      if [[ ! -e "$d" ]]; then failwith "$p: No such file or directory"; fi
      if [[ ! -d "$d" ]]; then failwith "$p: Not a directory"; fi
      cd -P "$d"
      f="$(basename "$p")"
      if [[ -h "$f" ]]; then p="$(readlink "$f")"; continue; fi
      # done getting the realpath
      local r="$PWD/$f"
      if [[ -n "$relto" && ( -z "$relbase" || "${r#"$relbase/"}" != "$r" ) ]]; then
        common="$relto" up=""
        while [[ "${r#"$common/"}" = "$r" ]]; do
          common="${common%/*}" up="..${up:+"/$up"}"
        done
        if [[ "$common" != "/" ]]; then
          r="${up:+"$up"/}${r#"$common/"}"
        fi
      fi
      cd "$PWD0"; echo "$r"; continue 2
    done
    cd "$PWD0"; failwith "$1: Too many levels of symbolic links"
  done
}

###############################################################################

t="/tmp/$$"
errors=0
cleanup() {
  rm -rf "$t"
  if ((errors)); then echo "$errors failures"
  else echo "All OK"; fi
}
try() {
  label="$1"; shift
  echo -n "$label {$*}"
  r="$((rpath "$@") 2>&1 || :)"
  real="$(realpath "$@" 2>&1 || :)"
  if [[ "$real" = "realpath: "* ]]; then real="error: ${real#*: }"; real="${real%%$'\n'*}"; fi
  if [[ "$r" = "$real" ]]; then echo " OK"
  else
    (( errors++ )) || :
    echo " FAIL:"
    echo "  expected: $real"
    echo "  got:      $r"
  fi
}
trap cleanup EXIT

mkdir -p "$t/foo/bar/baz"
mkdir -p "$t/foobar"
touch "$t/target"
touch "$t/foo/target"
touch "$t/foo/bar/target"
touch "$t/foobar/target"
ln -s "../link" "$t/foo/bar/baz/link"
ln -s "../link" "$t/foo/bar/link"
ln -s "../target" "$t/foo/link"
ln -s "circular2" "$t/foo/circular1"
ln -s "circular1" "$t/foo/circular2"
ln -s "../foo/bar" "$t/foobar/dirlink"

try "no args"
try "bogus flag" --bogus
try "chained symlinks"      "$t/foo/bar/baz/link"
try "circular symlinks"     "$t/foo/circular1"
try "symlink and .. combo"  "$t/foobar/dirlink/../target"
try "non-existing path"     "$t/nonesuch/foo"
try "file as directory"     "$t/target/foo"
cd "$t/foo"
try "relative path #1"      "bar/target"
try "relative path #2"      "../target"
try "relative path #3"      "target"
try "relative path #4"      "$t/./foo/../foobar/target"
try "relative paths"        "bar/target" "../target" "target" "$t/./foo/../foobar/target"
cd "$OLDPWD"
try "rel-base inside"       --relative-base="$t/foo" "$t/foo/bar/target"
try "rel-base inside/"      --relative-base="$t/foo/" "$t/foo/bar/target"
try "rel-base outside"      --relative-base="$t/foo/bar" "$t/foo/target"
try "rel-base name prefix"  --relative-base="$t/foo" "$t/foobar/target"
try "rel-base extra //-s"   --relative-base="$t//foo//" "$t/foo/target"
try "rel-base extra ///-s"  --relative-base="$t///foo///" "$t/foo/target"
cd "$t/foo/bar"
try "rel-base relative"     --relative-base="../../foo" "$t/foo/bar/target"
cd "$OLDPWD"
try "multiple rel-base"     --relative-base="$t/foo" \
                            "$t/foo/target" "$t/target" "$t/foo/bar/target"
try "rel-base divergent"    --relative-base="$t" "/dev/null"
try "rel-to inside"         --relative-to="$t/foo" "$t/foo/bar/target"
try "rel-to outside"        --relative-to="$t/foo/bar" "$t/target"
try "rel-to name prefix"    --relative-to="$t/foo" "$t/foobar/target"
try "rel-to extra /-s"      --relative-to="$t//foo//" "$t/foo/target"
cd "$t/foo"
try "rel-to relative"       --relative-to="../foobar" "$t/foo/target"
cd "$OLDPWD"
try "multiple rel-to"       --relative-to="$t/foo" \
                            "$t/foo/target" "$t/target" "$t/foo/bar/target"
try "combined inside both"  --relative-base="$t" --relative-to="$t/foo" "$t/foo/bar/target"
try "combined outside one"  --relative-base="$t" --relative-to="$t/foo/bar" "$t/target"
try "combined outside both" --relative-base="$t/foo" --relative-to="$t/foo/bar" "$t/target"
try "multiple combined"     --relative-base="$t/foo" --relative-to="$t/foo/bar" \
                            "$t/foo/target" "$t/target" "$t/foo/bar/target"
# -base should be a parent path of -to. If not, the arguments are ignored
try "combined errorcase"    --relative-base="$t/foo/bar" --relative-to="$t/foo" "$t/foo/bar/target"
\$\endgroup\$

Not the answer you're looking for? Browse other questions tagged or ask your own question.