From 824d82fa0f6fdd1b2213020283d8bd7aa77738b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Mon, 6 Jun 2016 17:38:51 +0200 Subject: [PATCH] nixos/geoip-updater: new service MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GeoIP databases from MaxMind have no stable URLs and change every month (or so). Our current method of packaging these database in Nix and playing catch-up with ever-changing file hashes is a bad idea. For instance, it makes it impossible to realize old NixOS configurations. This patch adds a NixOS service that periodically updates the GeoIP databases in /var/lib/geoip-databases. Moving NixOS modules over can be done in later patches. I tried adding MD5 check, but not all databases have them, so i skipped it. We are downloading over HTTPS though, it should be good. I also tried adding zip support, but the first zip file I extracted had a different filename inside than the archive name, which breaks an assumption in this service, so I skipped that too. Changes v9 -> v10: - Pass "--max-time" to curl to set upper bound on downloads (ensures no indefinite hanging if there's problem with networking). Timeout for network connectivity check: 60s. Timeout for geoip database (each): 15m. Changes v8 -> v9: - Mention the random timer delay in the documentation for the 'interval' option. Changes v7 -> v8: - Add "RemainAfterExit=true" for the setup service, so it won't be restarted needlessly. (Thanks @danbst!) Changes v6 -> v7: - Add --skip-existing flag to geoip-updater, which skips updating existing database files. Pass that flag when we run the service on boot (and on any NixOS configuration change). (IMHO, this is somewhat a workaround for systemd persistent timers not being triggered immediately when a timer has never expired before. But it does have the nice side effect of ensuring that the installed databases always correspond to the configured ones, since the service is now always run after configuration changes.) Changes v5 -> v6: - Update database files atomically (per DB) - If a database is removed from the configuration, it'll be removed from /var/lib/geoip-databases too (on next run). - Add NixOS module assertion so that if user inputs non- .gz or .xz file there will be a build time error instead of runtime. - Run updater as user "nobody" instead of "root". - Rename NixOS service from "geoip-databases" to "geoip-updater". - Drop RemainAfterExit, or else the timer won't trigger the unit. - Bring back "curl --fail", or else we won't catch and log curl failures. Changes v4 -> v5: - Add "GeoLite2-City.mmdb.gz" to default database list. Changes v3 -> v4: - Remove unneeded geoip-updater-setup.service after adding 'wantedBy = [ "multi-user.target" ]' directly to geoip-updater.service - Drop unneeded "Service" name from service descriptions. Changes v2 -> v3: - Network may be down when starting from a cold boot, so try a few times. Possibly, if using systemd-networkd, it'll pass on the first try. But with default DHCP on NixOS, the service is started before hostnames can be resolved and thus we need a few extra seconds. - Add error handling and mark service as failed if fatal error. - Add proper syslog log levels. - Add RandomizedDelaySec=3600 to the timer to not put high load on the MaxMind servers. Suggested by @Mic92. - Set RemainAfterExit on geoip-updater.service instead of geoip-updater-setup.service. (The latter is only a proxy that pulls in the former service). Changes v1 -> v2: From Данило Глинський (Danylo Hlynskyi) : nixos/geoip-databases: add `databases` option and fix initial setup There were two great issues when using this service: - When you just enable service, databases aren't downloaded, they are downloaded when timer triggers. Fixed this with automatic download on first system activation. - When there is no internet, updater outputs nothing to logs, which is IMO misbehavior. Fixed this with removing `--fail` option, better be explicit here. --- nixos/modules/module-list.nix | 1 + nixos/modules/services/misc/geoip-updater.nix | 300 ++++++++++++++++++ 2 files changed, 301 insertions(+) create mode 100644 nixos/modules/services/misc/geoip-updater.nix diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 7c9c9ac2a1e9..9bf6a400b098 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -259,6 +259,7 @@ ./services/misc/felix.nix ./services/misc/folding-at-home.nix ./services/misc/gammu-smsd.nix + ./services/misc/geoip-updater.nix #./services/misc/gitit.nix ./services/misc/gitlab.nix ./services/misc/gitolite.nix diff --git a/nixos/modules/services/misc/geoip-updater.nix b/nixos/modules/services/misc/geoip-updater.nix new file mode 100644 index 000000000000..021ee02782d2 --- /dev/null +++ b/nixos/modules/services/misc/geoip-updater.nix @@ -0,0 +1,300 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.geoip-updater; + + dbBaseUrl = "https://geolite.maxmind.com/download/geoip/database"; + + randomizedTimerDelaySec = "3600"; + + # Use writeScriptBin instead of writeScript, so that argv[0] (logged to the + # journal) doesn't include the long nix store path hash. (Prefixing the + # ExecStart= command with '@' doesn't work because we start a shell (new + # process) that creates a new argv[0].) + geoip-updater = pkgs.writeScriptBin "geoip-updater" '' + #!${pkgs.stdenv.shell} + skipExisting=0 + debug() + { + echo "<7>$@" + } + info() + { + echo "<6>$@" + } + error() + { + echo "<3>$@" + } + die() + { + error "$@" + exit 1 + } + waitNetworkOnline() + { + ret=1 + for i in $(seq 6); do + curl_out=$("${pkgs.curl.bin}/bin/curl" \ + --silent --fail --show-error --max-time 60 "${dbBaseUrl}" 2>&1) + if [ $? -eq 0 ]; then + debug "Server is reachable (try $i)" + ret=0 + break + else + debug "Server is unreachable (try $i): $curl_out" + sleep 10 + fi + done + return $ret + } + dbFnameTmp() + { + dburl=$1 + echo "${cfg.databaseDir}/.$(basename "$dburl")" + } + dbFnameTmpDecompressed() + { + dburl=$1 + echo "${cfg.databaseDir}/.$(basename "$dburl")" | sed 's/\.\(gz\|xz\)$//' + } + dbFname() + { + dburl=$1 + echo "${cfg.databaseDir}/$(basename "$dburl")" | sed 's/\.\(gz\|xz\)$//' + } + downloadDb() + { + dburl=$1 + curl_out=$("${pkgs.curl.bin}/bin/curl" \ + --silent --fail --show-error --max-time 900 -L -o "$(dbFnameTmp "$dburl")" "$dburl" 2>&1) + if [ $? -ne 0 ]; then + error "Failed to download $dburl: $curl_out" + return 1 + fi + } + decompressDb() + { + fn=$(dbFnameTmp "$1") + ret=0 + case "$fn" in + *.gz) + cmd_out=$("${pkgs.gzip}/bin/gzip" --decompress --force "$fn" 2>&1) + ;; + *.xz) + cmd_out=$("${pkgs.xz.bin}/bin/xz" --decompress --force "$fn" 2>&1) + ;; + *) + cmd_out=$(echo "File \"$fn\" is neither a .gz nor .xz file") + false + ;; + esac + if [ $? -ne 0 ]; then + error "$cmd_out" + ret=1 + fi + } + atomicRename() + { + dburl=$1 + mv "$(dbFnameTmpDecompressed "$dburl")" "$(dbFname "$dburl")" + } + removeIfNotInConfig() + { + # Arg 1 is the full path of an installed DB. + # If the corresponding database is not specified in the NixOS config we + # remove it. + db=$1 + for cdb in ${lib.concatStringsSep " " cfg.databases}; do + confDb=$(echo "$cdb" | sed 's/\.\(gz\|xz\)$//') + if [ "$(basename "$db")" = "$(basename "$confDb")" ]; then + return 0 + fi + done + rm "$db" + if [ $? -eq 0 ]; then + debug "Removed $(basename "$db") (not listed in services.geoip-updater.databases)" + else + error "Failed to remove $db" + fi + } + removeUnspecifiedDbs() + { + for f in "${cfg.databaseDir}/"*; do + test -f "$f" || continue + case "$f" in + *.dat|*.mmdb|*.csv) + removeIfNotInConfig "$f" + ;; + *) + debug "Not removing \"$f\" (unknown file extension)" + ;; + esac + done + } + downloadAndInstall() + { + dburl=$1 + if [ "$skipExisting" -eq 1 -a -f "$(dbFname "$dburl")" ]; then + debug "Skipping existing file: $(dbFname "$dburl")" + return 0 + fi + downloadDb "$dburl" || return 1 + decompressDb "$dburl" || return 1 + atomicRename "$dburl" || return 1 + info "Updated $(basename "$(dbFname "$dburl")")" + } + for arg in "$@"; do + case "$arg" in + --skip-existing) + skipExisting=1 + info "Option --skip-existing is set: not updating existing databases" + ;; + *) + error "Unknown argument: $arg";; + esac + done + waitNetworkOnline || die "Network is down (${dbBaseUrl} is unreachable)" + test -d "${cfg.databaseDir}" || die "Database directory (${cfg.databaseDir}) doesn't exist" + debug "Starting update of GeoIP databases in ${cfg.databaseDir}" + all_ret=0 + for db in ${lib.concatStringsSep " \\\n " cfg.databases}; do + downloadAndInstall "${dbBaseUrl}/$db" || all_ret=1 + done + removeUnspecifiedDbs || all_ret=1 + if [ $all_ret -eq 0 ]; then + info "Completed GeoIP database update in ${cfg.databaseDir}" + else + error "Completed GeoIP database update in ${cfg.databaseDir}, with error(s)" + fi + # Hack to work around systemd journal race: + # https://github.com/systemd/systemd/issues/2913 + sleep 2 + exit $all_ret + ''; + +in + +{ + options = { + services.geoip-updater = { + enable = mkOption { + default = false; + type = types.bool; + description = '' + Whether to enable periodic downloading of GeoIP databases from + maxmind.com. You might want to enable this if you, for instance, use + ntopng or Wireshark. + ''; + }; + + interval = mkOption { + type = types.str; + default = "weekly"; + description = '' + Update the GeoIP databases at this time / interval. + The format is described in + systemd.time + 7. + To prevent load spikes on maxmind.com, the timer interval is + randomized by an additional delay of ${randomizedTimerDelaySec} + seconds. Setting a shorter interval than this is not recommended. + ''; + }; + + databaseDir = mkOption { + type = types.path; + default = "/var/lib/geoip-databases"; + description = '' + Directory that will contain GeoIP databases. + ''; + }; + + databases = mkOption { + type = types.listOf types.str; + default = [ + "GeoLiteCountry/GeoIP.dat.gz" + "GeoIPv6.dat.gz" + "GeoLiteCity.dat.xz" + "GeoLiteCityv6-beta/GeoLiteCityv6.dat.gz" + "asnum/GeoIPASNum.dat.gz" + "asnum/GeoIPASNumv6.dat.gz" + "GeoLite2-Country.mmdb.gz" + "GeoLite2-City.mmdb.gz" + ]; + description = '' + Which GeoIP databases to update. The full URL is ${dbBaseUrl}/ + + the_database. + ''; + }; + + }; + + }; + + config = mkIf cfg.enable { + + assertions = [ + { assertion = (builtins.filter + (x: builtins.match ".*\.(gz|xz)$" x == null) cfg.databases) == []; + message = '' + services.geoip-updater.databases supports only .gz and .xz databases. + + Current value: + ${toString cfg.databases} + + Offending element(s): + ${toString (builtins.filter (x: builtins.match ".*\.(gz|xz)$" x == null) cfg.databases)}; + ''; + } + ]; + + systemd.timers.geoip-updater = + { description = "GeoIP Updater Timer"; + partOf = [ "geoip-updater.service" ]; + wantedBy = [ "timers.target" ]; + timerConfig.OnCalendar = cfg.interval; + timerConfig.Persistent = "true"; + timerConfig.RandomizedDelaySec = randomizedTimerDelaySec; + }; + + systemd.services.geoip-updater = { + description = "GeoIP Updater"; + after = [ "network-online.target" "nss-lookup.target" ]; + wants = [ "network-online.target" ]; + preStart = '' + mkdir -p "${cfg.databaseDir}" + chmod 755 "${cfg.databaseDir}" + chown nobody:root "${cfg.databaseDir}" + ''; + serviceConfig = { + ExecStart = "${geoip-updater}/bin/geoip-updater"; + User = "nobody"; + PermissionsStartOnly = true; + }; + }; + + systemd.services.geoip-updater-setup = { + description = "GeoIP Updater Setup"; + after = [ "network-online.target" "nss-lookup.target" ]; + wants = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + conflicts = [ "geoip-updater.service" ]; + preStart = '' + mkdir -p "${cfg.databaseDir}" + chmod 755 "${cfg.databaseDir}" + chown nobody:root "${cfg.databaseDir}" + ''; + serviceConfig = { + ExecStart = "${geoip-updater}/bin/geoip-updater --skip-existing"; + User = "nobody"; + PermissionsStartOnly = true; + # So it won't be (needlessly) restarted: + RemainAfterExit = true; + }; + }; + + }; +}