lib.fileset.fromSource: init

This commit is contained in:
Silvan Mosberger 2023-10-18 00:16:50 +02:00
parent f82a1686e6
commit 73493584a7
3 changed files with 337 additions and 0 deletions

@ -3,8 +3,10 @@ let
inherit (import ./internal.nix { inherit lib; })
_coerce
_singleton
_coerceMany
_toSourceFilter
_fromSourceFilter
_unionMany
_printFileset
_intersection
@ -187,6 +189,75 @@ If a directory does not recursively contain any file, it is omitted from the sto
filter = sourceFilter;
};
/*
Create a file set with the same files as a `lib.sources`-based value.
This does not import any of the files into the store.
This can be used to gradually migrate from `lib.sources`-based filtering to `lib.fileset`.
A file set can be turned back into a source using [`toSource`](#function-library-lib.fileset.toSource).
:::{.note}
File sets cannot represent empty directories.
Turning the result of this function back into a source using `toSource` will therefore not preserve empty directories.
:::
Type:
fromSource :: SourceLike -> FileSet
Example:
# There's no cleanSource-like function for file sets yet,
# but we can just convert cleanSource to a file set and use it that way
toSource {
root = ./.;
fileset = fromSource (lib.sources.cleanSource ./.);
}
# Keeping a previous sourceByRegex (which could be migrated to `lib.fileset.unions`),
# but removing a subdirectory using file set functions
difference
(fromSource (lib.sources.sourceByRegex ./. [
"^README\.md$"
# This regex includes everything in ./doc
"^doc(/.*)?$"
])
./doc/generated
# Use cleanSource, but limit it to only include ./Makefile and files under ./src
intersection
(fromSource (lib.sources.cleanSource ./.))
(unions [
./Makefile
./src
]);
*/
fromSource = source:
let
# This function uses `._isLibCleanSourceWith`, `.origSrc` and `.filter`,
# which are technically internal to lib.sources,
# but we'll allow this since both libraries are in the same code base
# and this function is a bridge between them.
isFiltered = source ? _isLibCleanSourceWith;
path = if isFiltered then source.origSrc else source;
in
# We can only support sources created from paths
if ! isPath path then
if isStringLike path then
throw ''
lib.fileset.fromSource: The source origin of the argument is a string-like value ("${toString path}"), but it should be a path instead.
Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.''
else
throw ''
lib.fileset.fromSource: The source origin of the argument is of type ${typeOf path}, but it should be a path instead.''
else if ! pathExists path then
throw ''
lib.fileset.fromSource: The source origin (${toString path}) of the argument does not exist.''
else if isFiltered then
_fromSourceFilter path source.filter
else
# If there's no filter, no need to run the expensive conversion, all subpaths will be included
_singleton path;
/*
The file set containing all files that are in either of two given file sets.
This is the same as [`unions`](#function-library-lib.fileset.unions),

@ -461,6 +461,59 @@ rec {
else
nonEmpty;
# Turn a builtins.filterSource-based source filter on a root path into a file set
# containing only files included by the filter.
# The filter is lazily called as necessary to determine whether paths are included
# Type: Path -> (String -> String -> Bool) -> fileset
_fromSourceFilter = root: sourceFilter:
let
# During the recursion we need to track both:
# - The path value such that we can safely call `readDir` on it
# - The path string value such that we can correctly call the `filter` with it
#
# While we could just recurse with the path value,
# this would then require converting it to a path string for every path,
# which is a fairly expensive operation
# Create a file set from a directory entry
fromDirEntry = path: pathString: type:
# The filter needs to run on the path as a string
if ! sourceFilter pathString type then
null
else if type == "directory" then
fromDir path pathString
else
type;
# Create a file set from a directory
fromDir = path: pathString:
mapAttrs
# This looks a bit funny, but we need both the path-based and the path string-based values
(name: fromDirEntry (path + "/${name}") (pathString + "/${name}"))
# We need to readDir on the path value, because reading on a path string
# would be unspecified if there are multiple filesystem roots
(readDir path);
rootPathType = pathType root;
# We need to convert the path to a string to imitate what builtins.path calls the filter function with.
# We don't want to rely on `toString` for this though because it's not very well defined, see ../path/README.md
# So instead we use `lib.path.splitRoot` to safely deconstruct the path into its filesystem root and subpath
# We don't need the filesystem root though, builtins.path doesn't expose that in any way to the filter.
# So we only need the components, which we then turn into a string as one would expect.
rootString = "/" + concatStringsSep "/" (components (splitRoot root).subpath);
in
if rootPathType == "directory" then
# We imitate builtins.path not calling the filter on the root path
_create root (fromDir root rootString)
else
# Direct files are always included by builtins.path without calling the filter
# But we need to lift up the base path to its parent to satisfy the base path invariant
_create (dirOf root)
{
${baseNameOf root} = rootPathType;
};
# Transforms the filesetTree of a file set to a shorter base path, e.g.
# _shortenTreeBase [ "foo" ] (_create /foo/bar null)
# => { bar = null; }

@ -1,5 +1,7 @@
#!/usr/bin/env bash
# shellcheck disable=SC2016
# shellcheck disable=SC2317
# shellcheck disable=SC2192
# Tests lib.fileset
# Run:
@ -839,6 +841,217 @@ touch 0 "${filesToCreate[@]}"
expectTrace 'unions (mapAttrsToList (n: _: ./. + "/${n}") (removeAttrs (builtins.readDir ./.) [ "0" ]))' "$expectedTrace"
rm -rf -- *
## lib.fileset.fromSource
# Check error messages
expectFailure 'fromSource null' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'
expectFailure 'fromSource (lib.cleanSource "")' 'lib.fileset.fromSource: The source origin of the argument is a string-like value \(""\), but it should be a path instead.
\s*Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.'
expectFailure 'fromSource (lib.cleanSource null)' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'
# fromSource on a path works and is the same as coercing that path
mkdir a
touch a/b c
expectEqual 'trace (fromSource ./.) null' 'trace ./. null'
rm -rf -- *
# Check that converting to a file set doesn't read the included files
mkdir a
touch a/b
run() {
expectEqual "trace (fromSource (lib.cleanSourceWith { src = ./a; })) null" "builtins.trace \"$work/a (all files in directory)\" null"
rm a/b
}
withFileMonitor run a/b
rm -rf -- *
# Check that converting to a file set doesn't read entries for directories that are filtered out
mkdir -p a/b
touch a/b/c
run() {
expectEqual "trace (fromSource (lib.cleanSourceWith {
src = ./a;
filter = pathString: type: false;
})) null" "builtins.trace \"(empty)\" null"
rm a/b/c
rmdir a/b
}
withFileMonitor run a/b
rm -rf -- *
# The filter is not needed on empty directories
expectEqual 'trace (fromSource (lib.cleanSourceWith {
src = ./.;
filter = abort "filter should not be needed";
})) null' 'trace _emptyWithoutBase null'
# Single files also work
touch a b
expectEqual 'trace (fromSource (cleanSourceWith { src = ./a; })) null' 'trace ./a null'
rm -rf -- *
# For a tree assigning each subpath true/false,
# check whether a source filter with those results includes the same files
# as a file set created using fromSource. Usage:
#
# tree=(
# [a]=1 # ./a is a file and the filter should return true for it
# [b/]=0 # ./b is a directory and the filter should return false for it
# )
# checkSource
checkSource() {
createTree
# Serialise the tree as JSON (there's only minimal savings with jq,
# and we don't need to handle escapes)
{
echo "{"
first=1
for p in "${!tree[@]}"; do
if [[ -z "$first" ]]; then
echo ","
else
first=
fi
echo "\"$p\":"
case "${tree[$p]}" in
1)
echo "true"
;;
0)
echo "false"
;;
*)
die "Unsupported tree value: ${tree[$p]}"
esac
done
echo "}"
} > "$tmp/tree.json"
# An expression to create a source value with a filter matching the tree
sourceExpr='
let
tree = importJSON '"$tmp"'/tree.json;
in
cleanSourceWith {
src = ./.;
filter =
pathString: type:
let
stripped = removePrefix (toString ./. + "/") pathString;
key = stripped + optionalString (type == "directory") "/";
in
tree.${key} or
(throw "tree key ${key} missing");
}
'
filesetExpr='
toSource {
root = ./.;
fileset = fromSource ('"$sourceExpr"');
}
'
# Turn both into store paths
sourceStorePath=$(expectStorePath "$sourceExpr")
filesetStorePath=$(expectStorePath "$filesetExpr")
# Loop through each path in the tree
while IFS= read -r -d $'\0' subpath; do
if [[ ! -e "$sourceStorePath"/"$subpath" ]]; then
# If it's not in the source store path, it's also not in the file set store path
if [[ -e "$filesetStorePath"/"$subpath" ]]; then
die "The store path $sourceStorePath created by $expr doesn't contain $subpath, but the corresponding store path $filesetStorePath created via fromSource does contain $subpath"
fi
elif [[ -z "$(find "$sourceStorePath"/"$subpath" -type f)" ]]; then
# If it's an empty directory in the source store path, it shouldn't be in the file set store path
if [[ -e "$filesetStorePath"/"$subpath" ]]; then
die "The store path $sourceStorePath created by $expr contains the path $subpath without any files, but the corresponding store path $filesetStorePath created via fromSource didn't omit it"
fi
else
# If it's non-empty directory or a file, it should be in the file set store path
if [[ ! -e "$filesetStorePath"/"$subpath" ]]; then
die "The store path $sourceStorePath created by $expr contains the non-empty path $subpath, but the corresponding store path $filesetStorePath created via fromSource doesn't include it"
fi
fi
done < <(find . -mindepth 1 -print0)
rm -rf -- *
}
# Check whether the filter is evaluated correctly
tree=(
[a]=
[b/]=
[b/c]=
[b/d]=
[e/]=
[e/e/]=
)
# We fill out the above tree values with all possible combinations of 0 and 1
# Then check whether a filter based on those return values gets turned into the corresponding file set
for i in $(seq 0 $((2 ** ${#tree[@]} - 1 ))); do
for p in "${!tree[@]}"; do
tree[$p]=$(( i % 2 ))
(( i /= 2 )) || true
done
checkSource
done
# The filter is called with the same arguments in the same order
mkdir a e
touch a/b a/c d e
expectEqual '
trace (fromSource (cleanSourceWith {
src = ./.;
filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
})) null
' '
builtins.seq (cleanSourceWith {
src = ./.;
filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
}).outPath
builtins.trace "'"$work"' (all files in directory)"
null
'
rm -rf -- *
# Test that if a directory is not included, the filter isn't called on its contents
mkdir a b
touch a/c b/d
expectEqual 'trace (fromSource (cleanSourceWith {
src = ./.;
filter = pathString: type:
if pathString == toString ./a then
false
else if pathString == toString ./b then
true
else if pathString == toString ./b/d then
true
else
abort "This filter should not be called with path ${pathString}";
})) null' 'trace (_create ./. { b = "directory"; }) null'
rm -rf -- *
# The filter is called lazily:
# If a later say intersection removes a part of the tree, the filter won't run on it
mkdir a d
touch a/{b,c} d/e
expectEqual 'trace (intersection ./a (fromSource (lib.cleanSourceWith {
src = ./.;
filter = pathString: type:
if pathString == toString ./a || pathString == toString ./a/b then
true
else if pathString == toString ./a/c then
false
else
abort "filter should not be called on ${pathString}";
}))) null' 'trace ./a/b null'
rm -rf -- *
# TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets
echo >&2 tests ok