From ac403b83fb1a8ee53f7889a2fc1987f196b40e63 Mon Sep 17 00:00:00 2001 From: illustris Date: Sat, 7 May 2022 18:00:04 +0530 Subject: [PATCH] nixos/hadoop: add HBase submodule --- .../modules/services/cluster/hadoop/conf.nix | 1 + .../services/cluster/hadoop/default.nix | 2 +- .../modules/services/cluster/hadoop/hbase.nix | 200 ++++++++++++++++++ nixos/modules/services/databases/hbase.nix | 21 +- nixos/tests/hadoop/default.nix | 1 + nixos/tests/hadoop/hbase.nix | 84 ++++++++ nixos/tests/hbase.nix | 4 +- 7 files changed, 297 insertions(+), 16 deletions(-) create mode 100644 nixos/modules/services/cluster/hadoop/hbase.nix create mode 100644 nixos/tests/hadoop/hbase.nix diff --git a/nixos/modules/services/cluster/hadoop/conf.nix b/nixos/modules/services/cluster/hadoop/conf.nix index e3c26a0d5505..874e3227b74c 100644 --- a/nixos/modules/services/cluster/hadoop/conf.nix +++ b/nixos/modules/services/cluster/hadoop/conf.nix @@ -33,6 +33,7 @@ pkgs.runCommand "hadoop-conf" {} (with cfg; '' mkdir -p $out/ cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/ cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/ + cp ${siteXml "hbase-site.xml" (hbaseSiteDefault // hbaseSite // hbaseSiteInternal)}/* $out/ cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/ cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/ cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/ diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix index a4fdea81037c..a3a28eb7184a 100644 --- a/nixos/modules/services/cluster/hadoop/default.nix +++ b/nixos/modules/services/cluster/hadoop/default.nix @@ -5,7 +5,7 @@ let in with lib; { - imports = [ ./yarn.nix ./hdfs.nix ]; + imports = [ ./yarn.nix ./hdfs.nix ./hbase.nix ]; options.services.hadoop = { coreSite = mkOption { diff --git a/nixos/modules/services/cluster/hadoop/hbase.nix b/nixos/modules/services/cluster/hadoop/hbase.nix new file mode 100644 index 000000000000..050bcd03749d --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/hbase.nix @@ -0,0 +1,200 @@ +{ config, lib, pkgs, ...}: + +with lib; +let + cfg = config.services.hadoop; + hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; + mkIfNotNull = x: mkIf (x != null) x; +in +{ + options.services.hadoop = { + + gatewayRole.enableHbaseCli = mkOption { + description = "Whether to enable HBase CLI tools"; + default = false; + type = types.bool; + }; + + hbaseSiteDefault = mkOption { + default = { + "hbase.regionserver.ipc.address" = "0.0.0.0"; + "hbase.master.ipc.address" = "0.0.0.0"; + "hbase.master.info.bindAddress" = "0.0.0.0"; + "hbase.regionserver.info.bindAddress" = "0.0.0.0"; + + "hbase.cluster.distributed" = "true"; + }; + type = types.attrsOf types.anything; + description = '' + Default options for hbase-site.xml + ''; + }; + hbaseSite = mkOption { + default = {}; + type = with types; attrsOf anything; + example = literalExpression '' + ''; + description = '' + Additional options and overrides for hbase-site.xml + + ''; + }; + hbaseSiteInternal = mkOption { + default = {}; + type = with types; attrsOf anything; + internal = true; + description = '' + Internal option to add configs to hbase-site.xml based on module options + ''; + }; + + hbase = { + + package = mkOption { + type = types.package; + default = pkgs.hbase; + defaultText = literalExpression "pkgs.hbase"; + description = "HBase package"; + }; + + rootdir = mkOption { + description = '' + This option will set "hbase.rootdir" in hbase-site.xml and determine + the directory shared by region servers and into which HBase persists. + The URL should be 'fully-qualified' to include the filesystem scheme. + If a core-site.xml is provided, the FS scheme defaults to the value + of "fs.defaultFS". + + Filesystems other than HDFS (like S3, QFS, Swift) are also supported. + ''; + type = types.str; + example = "hdfs://nameservice1/hbase"; + default = "/hbase"; + }; + zookeeperQuorum = mkOption { + description = '' + This option will set "hbase.zookeeper.quorum" in hbase-site.xml. + Comma separated list of servers in the ZooKeeper ensemble. + ''; + type = with types; nullOr commas; + example = "zk1.internal,zk2.internal,zk3.internal"; + default = null; + }; + master = { + enable = mkEnableOption "HBase Master"; + initHDFS = mkEnableOption "initialization of the hbase directory on HDFS"; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = '' + Open firewall ports for HBase master. + ''; + }; + }; + regionServer = { + enable = mkEnableOption "HBase RegionServer"; + + overrideHosts = mkOption { + type = types.bool; + default = true; + description = '' + Remove /etc/hosts entries for "127.0.0.2" and "::1" defined in nixos/modules/config/networking.nix + Regionservers must be able to resolve their hostnames to their IP addresses, through PTR records + or /etc/hosts entries. + + ''; + }; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = '' + Open firewall ports for HBase master. + ''; + }; + }; + }; + }; + + config = mkMerge [ + (mkIf cfg.hbase.master.enable { + services.hadoop.gatewayRole = { + enable = true; + enableHbaseCli = mkDefault true; + }; + + systemd.services.hbase-master = { + description = "HBase master"; + wantedBy = [ "multi-user.target" ]; + + preStart = mkIf cfg.hbase.master.initHDFS '' + HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfsadmin -safemode wait + HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfs -mkdir -p ${cfg.hbase.rootdir} + HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfs -chown hbase ${cfg.hbase.rootdir} + ''; + + serviceConfig = { + User = "hbase"; + SyslogIdentifier = "hbase-master"; + ExecStart = "${cfg.hbase.package}/bin/hbase --config ${hadoopConf} " + + "master start"; + Restart = "always"; + }; + }; + + services.hadoop.hbaseSiteInternal."hbase.rootdir" = cfg.hbase.rootdir; + + networking.firewall.allowedTCPPorts = (mkIf cfg.hbase.master.openFirewall [ + 16000 16010 + ]); + + }) + + (mkIf cfg.hbase.regionServer.enable { + services.hadoop.gatewayRole = { + enable = true; + enableHbaseCli = mkDefault true; + }; + + systemd.services.hbase-regionserver = { + description = "HBase RegionServer"; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + User = "hbase"; + SyslogIdentifier = "hbase-regionserver"; + ExecStart = "${cfg.hbase.package}/bin/hbase --config /etc/hadoop-conf/ " + + "regionserver start"; + Restart = "always"; + }; + }; + + services.hadoop.hbaseSiteInternal."hbase.rootdir" = cfg.hbase.rootdir; + + networking = { + firewall.allowedTCPPorts = (mkIf cfg.hbase.regionServer.openFirewall [ + 16020 16030 + ]); + hosts = mkIf cfg.hbase.regionServer.overrideHosts { + "127.0.0.2" = mkForce [ ]; + "::1" = mkForce [ ]; + }; + }; + }) + + (mkIf cfg.gatewayRole.enable { + + environment.systemPackages = mkIf cfg.gatewayRole.enableHbaseCli [ cfg.hbase.package ]; + + services.hadoop.hbaseSiteInternal = with cfg.hbase; { + "hbase.zookeeper.quorum" = mkIfNotNull zookeeperQuorum; + }; + + users.users.hbase = { + description = "Hadoop HBase user"; + group = "hadoop"; + isSystemUser = true; + }; + }) + ]; +} diff --git a/nixos/modules/services/databases/hbase.nix b/nixos/modules/services/databases/hbase.nix index 5cc5d97e7984..7c40703282a6 100644 --- a/nixos/modules/services/databases/hbase.nix +++ b/nixos/modules/services/databases/hbase.nix @@ -3,8 +3,8 @@ with lib; let - cfg = config.services.hbase; - opt = options.services.hbase; + cfg = config.services.hbase-standalone; + opt = options.services.hbase-standalone; buildProperty = configAttr: (builtins.concatStringsSep "\n" @@ -35,16 +35,12 @@ in { ###### interface options = { + services.hbase-standalone = { - services.hbase = { - - enable = mkOption { - type = types.bool; - default = false; - description = lib.mdDoc '' - Whether to run HBase. - ''; - }; + enable = mkEnableOption '' + HBase master in standalone mode with embedded regionserver and zookeper. + Do not use this configuration for production nor for evaluating HBase performance. + ''; package = mkOption { type = types.package; @@ -108,12 +104,11 @@ in { }; }; - }; ###### implementation - config = mkIf config.services.hbase.enable { + config = mkIf cfg.enable { systemd.tmpfiles.rules = [ "d '${cfg.dataDir}' - ${cfg.user} ${cfg.group} - -" diff --git a/nixos/tests/hadoop/default.nix b/nixos/tests/hadoop/default.nix index d2a97cbeffb8..479690adc064 100644 --- a/nixos/tests/hadoop/default.nix +++ b/nixos/tests/hadoop/default.nix @@ -4,4 +4,5 @@ all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop.nix { inherit package; }; hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hdfs.nix { inherit package; }; yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./yarn.nix { inherit package; }; + hbase = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hbase.nix { inherit package; }; } diff --git a/nixos/tests/hadoop/hbase.nix b/nixos/tests/hadoop/hbase.nix new file mode 100644 index 000000000000..8ce4567bff3b --- /dev/null +++ b/nixos/tests/hadoop/hbase.nix @@ -0,0 +1,84 @@ +# Test a minimal hbase cluster +{ pkgs, ... }: +import ../make-test-python.nix ({ hadoop ? pkgs.hadoop, hbase ? pkgs.hbase, ... }: +with pkgs.lib; +{ + name = "hadoop-hbase"; + + nodes = let + coreSite = { + "fs.defaultFS" = "hdfs://namenode:8020"; + }; + defOpts = { + enable = true; + openFirewall = true; + }; + zookeeperQuorum = "zookeeper"; + in { + zookeeper = { ... }: { + services.zookeeper.enable = true; + networking.firewall.allowedTCPPorts = [ 2181 ]; + }; + namenode = { ... }: { + services.hadoop = { + hdfs = { + namenode = defOpts // { formatOnInit = true; }; + }; + inherit coreSite; + }; + }; + datanode = { ... }: { + virtualisation.diskSize = 8192; + services.hadoop = { + hdfs.datanode = defOpts; + inherit coreSite; + }; + }; + + master = { ... }:{ + services.hadoop = { + inherit coreSite; + hbase = { + inherit zookeeperQuorum; + master = defOpts // { initHDFS = true; }; + }; + }; + }; + regionserver = { ... }:{ + services.hadoop = { + inherit coreSite; + hbase = { + inherit zookeeperQuorum; + regionServer = defOpts; + }; + }; + }; + }; + + testScript = '' + start_all() + + # wait for HDFS cluster + namenode.wait_for_unit("hdfs-namenode") + namenode.wait_for_unit("network.target") + namenode.wait_for_open_port(8020) + namenode.wait_for_open_port(9870) + datanode.wait_for_unit("hdfs-datanode") + datanode.wait_for_unit("network.target") + datanode.wait_for_open_port(9864) + datanode.wait_for_open_port(9866) + datanode.wait_for_open_port(9867) + + # wait for ZK + zookeeper.wait_for_unit("zookeeper") + zookeeper.wait_for_open_port(2181) + + # wait for HBase to start up + master.wait_for_unit("hbase-master") + regionserver.wait_for_unit("hbase-regionserver") + + assert "1 active master, 0 backup masters, 1 servers" in master.succeed("echo status | HADOOP_USER_NAME=hbase hbase shell -n") + regionserver.wait_until_succeeds("echo \"create 't1','f1'\" | HADOOP_USER_NAME=hbase hbase shell -n") + assert "NAME => 'f1'" in regionserver.succeed("echo \"describe 't1'\" | HADOOP_USER_NAME=hbase hbase shell -n") + ''; +}) diff --git a/nixos/tests/hbase.nix b/nixos/tests/hbase.nix index a449d24dd6fd..7d8e32f81603 100644 --- a/nixos/tests/hbase.nix +++ b/nixos/tests/hbase.nix @@ -1,6 +1,6 @@ import ./make-test-python.nix ({ pkgs, lib, package ? pkgs.hbase, ... }: { - name = "hbase"; + name = "hbase-standalone"; meta = with lib.maintainers; { maintainers = [ illustris ]; @@ -8,7 +8,7 @@ import ./make-test-python.nix ({ pkgs, lib, package ? pkgs.hbase, ... }: nodes = { hbase = { pkgs, ... }: { - services.hbase = { + services.hbase-standalone = { enable = true; inherit package; # Needed for standalone mode in hbase 2+