Remove feature space for now

2023-05-18 16:07:47 -07:00 · 2023-05-18 16:07:47 -07:00 · 807b9f0b3b
commit 807b9f0b3b
parent 022c095075
2 changed files with 0 additions and 1135 deletions
--- a/keras_core/utils/feature_space.py
+++ b/keras_core/utils/feature_space.py
@ -1,757 +0,0 @@
-from tensorflow import data as tf_data
-
-from keras_core import layers
-from keras_core import operations as ops
-from keras_core.api_export import keras_core_export
-from keras_core.layers.layer import Layer
-from keras_core.saving import saving_lib
-from keras_core.saving import serialization_lib
-from keras_core.utils.naming import auto_name
-
-
-class Cross:
-    def __init__(self, feature_names, crossing_dim, output_mode="one_hot"):
-        if output_mode not in {"int", "one_hot"}:
-            raise ValueError(
-                "Invalid value for argument `output_mode`. "
-                "Expected one of {'int', 'one_hot'}. "
-                f"Received: output_mode={output_mode}"
-            )
-        self.feature_names = tuple(feature_names)
-        self.crossing_dim = crossing_dim
-        self.output_mode = output_mode
-
-    @property
-    def name(self):
-        return "_X_".join(self.feature_names)
-
-    def get_config(self):
-        return {
-            "feature_names": self.feature_names,
-            "crossing_dim": self.crossing_dim,
-            "output_mode": self.output_mode,
-        }
-
-    @classmethod
-    def from_config(cls, config):
-        return cls(**config)
-
-
-class Feature:
-    def __init__(self, dtype, preprocessor, output_mode):
-        if output_mode not in {"int", "one_hot", "float"}:
-            raise ValueError(
-                "Invalid value for argument `output_mode`. "
-                "Expected one of {'int', 'one_hot', 'float'}. "
-                f"Received: output_mode={output_mode}"
-            )
-        self.dtype = dtype
-        if isinstance(preprocessor, dict):
-            preprocessor = serialization_lib.deserialize_keras_object(
-                preprocessor
-            )
-        self.preprocessor = preprocessor
-        self.output_mode = output_mode
-
-    def get_config(self):
-        return {
-            "dtype": self.dtype,
-            "preprocessor": serialization_lib.serialize_keras_object(
-                self.preprocessor
-            ),
-            "output_mode": self.output_mode,
-        }
-
-    @classmethod
-    def from_config(cls, config):
-        return cls(**config)
-
-
-@keras_core_export("keras_core.utils.FeatureSpace")
-class FeatureSpace(Layer):
-    """One-stop utility for preprocessing and encoding structured data.
-
-    Arguments:
-        feature_names: Dict mapping the names of your features to their
-            type specification, e.g. `{"my_feature": "integer_categorical"}`
-            or `{"my_feature": FeatureSpace.integer_categorical()}`.
-            For a complete list of all supported types, see
-            "Available feature types" paragraph below.
-        output_mode: One of `"concat"` or `"dict"`. In concat mode, all
-            features get concatenated together into a single vector.
-            In dict mode, the FeatureSpace returns a dict of individually
-            encoded features (with the same keys as the input dict keys).
-        crosses: List of features to be crossed together, e.g.
-            `crosses=[("feature_1", "feature_2")]`. The features will be
-            "crossed" by hashing their combined value into
-            a fixed-length vector.
-        crossing_dim: Default vector size for hashing crossed features.
-            Defaults to `32`.
-        hashing_dim: Default vector size for hashing features of type
-            `"integer_hashed"` and `"string_hashed"`. Defaults to `32`.
-        num_discretization_bins: Default number of bins to be used for
-            discretizing features of type `"float_discretized"`.
-            Defaults to `32`.
-
-    **Available feature types:**
-
-    Note that all features can be referred to by their string name,
-    e.g. `"integer_categorical"`. When using the string name, the default
-    argument values are used.
-
-    ```python
-    # Plain float values.
-    FeatureSpace.float(name=None)
-
-    # Float values to be preprocessed via featurewise standardization
-    # (i.e. via a `keras.layers.Normalization` layer).
-    FeatureSpace.float_normalized(name=None)
-
-    # Float values to be preprocessed via linear rescaling
-    # (i.e. via a `keras.layers.Rescaling` layer).
-    FeatureSpace.float_rescaled(scale=1., offset=0., name=None)
-
-    # Float values to be discretized. By default, the discrete
-    # representation will then be one-hot encoded.
-    FeatureSpace.float_discretized(
-        num_bins, bin_boundaries=None, output_mode="one_hot", name=None)
-
-    # Integer values to be indexed. By default, the discrete
-    # representation will then be one-hot encoded.
-    FeatureSpace.integer_categorical(
-        max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None)
-
-    # String values to be indexed. By default, the discrete
-    # representation will then be one-hot encoded.
-    FeatureSpace.string_categorical(
-        max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None)
-
-    # Integer values to be hashed into a fixed number of bins.
-    # By default, the discrete representation will then be one-hot encoded.
-    FeatureSpace.integer_hashed(num_bins, output_mode="one_hot", name=None)
-
-    # String values to be hashed into a fixed number of bins.
-    # By default, the discrete representation will then be one-hot encoded.
-    FeatureSpace.string_hashed(num_bins, output_mode="one_hot", name=None)
-    ```
-
-    Examples:
-
-    **Basic usage with a dict of input data:**
-
-    ```python
-    raw_data = {
-        "float_values": [0.0, 0.1, 0.2, 0.3],
-        "string_values": ["zero", "one", "two", "three"],
-        "int_values": [0, 1, 2, 3],
-    }
-    dataset = tf.data.Dataset.from_tensor_slices(raw_data)
-
-    feature_space = FeatureSpace(
-        features={
-            "float_values": "float_normalized",
-            "string_values": "string_categorical",
-            "int_values": "integer_categorical",
-        },
-        crosses=[("string_values", "int_values")],
-        output_mode="concat",
-    )
-    # Before you start using the FeatureSpace,
-    # you must `adapt()` it on some data.
-    feature_space.adapt(dataset)
-
-    # You can call the FeatureSpace on a dict of data (batched or unbatched).
-    output_vector = feature_space(raw_data)
-    ```
-
-    **Basic usage with `tf.data`:**
-
-    ```python
-    # Unlabeled data
-    preprocessed_ds = unlabeled_dataset.map(feature_space)
-
-    # Labeled data
-    preprocessed_ds = labeled_dataset.map(lambda x, y: (feature_space(x), y))
-    ```
-
-    **Basic usage with the Keras Functional API:**
-
-    ```python
-    # Retrieve a dict Keras Input objects
-    inputs = feature_space.get_inputs()
-    # Retrieve the corresponding encoded Keras tensors
-    encoded_features = feature_space.get_encoded_features()
-    # Build a Functional model
-    outputs = keras.layers.Dense(1, activation="sigmoid")(encoded_features)
-    model = keras.Model(inputs, outputs)
-    ```
-
-    **Customizing each feature or feature cross:**
-
-    ```python
-    feature_space = FeatureSpace(
-        features={
-            "float_values": FeatureSpace.float_normalized(),
-            "string_values": FeatureSpace.string_categorical(max_tokens=10),
-            "int_values": FeatureSpace.integer_categorical(max_tokens=10),
-        },
-        crosses=[
-            FeatureSpace.cross(("string_values", "int_values"), crossing_dim=32)
-        ],
-        output_mode="concat",
-    )
-    ```
-
-    **Returning a dict of integer-encoded features:**
-
-    ```python
-    feature_space = FeatureSpace(
-        features={
-            "string_values": FeatureSpace.string_categorical(output_mode="int"),
-            "int_values": FeatureSpace.integer_categorical(output_mode="int"),
-        },
-        crosses=[
-            FeatureSpace.cross(
-                feature_names=("string_values", "int_values"),
-                crossing_dim=32,
-                output_mode="int",
-            )
-        ],
-        output_mode="dict",
-    )
-    ```
-
-    **Specifying your own Keras preprocessing layer:**
-
-    ```python
-    # Let's say that one of the features is a short text paragraph that
-    # we want to encode as a vector (one vector per paragraph) via TF-IDF.
-    data = {
-        "text": ["1st string", "2nd string", "3rd string"],
-    }
-
-    # There's a Keras layer for this: TextVectorization.
-    custom_layer = layers.TextVectorization(output_mode="tf_idf")
-
-    # We can use FeatureSpace.feature to create a custom feature
-    # that will use our preprocessing layer.
-    feature_space = FeatureSpace(
-        features={
-            "text": FeatureSpace.feature(
-                preprocessor=custom_layer, dtype="string", output_mode="float"
-            ),
-        },
-        output_mode="concat",
-    )
-    feature_space.adapt(tf.data.Dataset.from_tensor_slices(data))
-    output_vector = feature_space(data)
-    ```
-
-    **Retrieving the underlying Keras preprocessing layers:**
-
-    ```python
-    # The preprocessing layer of each feature is available in `.preprocessors`.
-    preprocessing_layer = feature_space.preprocessors["feature1"]
-
-    # The crossing layer of each feature cross is available in `.crossers`.
-    # It's an instance of keras.layers.HashedCrossing.
-    crossing_layer = feature_space.crossers["feature1_X_feature2"]
-    ```
-
-    **Saving and reloading a FeatureSpace:**
-
-    ```python
-    feature_space.save("myfeaturespace.keras")
-    reloaded_feature_space = keras.models.load_model("myfeaturespace.keras")
-    ```
-    """
-
-    @classmethod
-    def cross(cls, feature_names, crossing_dim, output_mode="one_hot"):
-        return Cross(feature_names, crossing_dim, output_mode=output_mode)
-
-    @classmethod
-    def feature(cls, dtype, preprocessor, output_mode):
-        return Feature(dtype, preprocessor, output_mode)
-
-    @classmethod
-    def float(cls, name=None):
-        from keras.layers.core import identity
-
-        name = name or auto_name("float")
-        preprocessor = identity.Identity(
-            dtype="float32", name=f"{name}_preprocessor"
-        )
-        return Feature(
-            dtype="float32", preprocessor=preprocessor, output_mode="float"
-        )
-
-    @classmethod
-    def float_rescaled(cls, scale=1.0, offset=0.0, name=None):
-        name = name or auto_name("float_rescaled")
-        preprocessor = layers.Rescaling(
-            scale=scale, offset=offset, name=f"{name}_preprocessor"
-        )
-        return Feature(
-            dtype="float32", preprocessor=preprocessor, output_mode="float"
-        )
-
-    @classmethod
-    def float_normalized(cls, name=None):
-        name = name or auto_name("float_normalized")
-        preprocessor = layers.Normalization(
-            axis=-1, name=f"{name}_preprocessor"
-        )
-        return Feature(
-            dtype="float32", preprocessor=preprocessor, output_mode="float"
-        )
-
-    @classmethod
-    def float_discretized(
-        cls, num_bins, bin_boundaries=None, output_mode="one_hot", name=None
-    ):
-        name = name or auto_name("float_discretized")
-        preprocessor = layers.Discretization(
-            num_bins=num_bins,
-            bin_boundaries=bin_boundaries,
-            name=f"{name}_preprocessor",
-        )
-        return Feature(
-            dtype="float32", preprocessor=preprocessor, output_mode=output_mode
-        )
-
-    @classmethod
-    def integer_categorical(
-        cls,
-        max_tokens=None,
-        num_oov_indices=1,
-        output_mode="one_hot",
-        name=None,
-    ):
-        name = name or auto_name("integer_categorical")
-        preprocessor = layers.IntegerLookup(
-            name=f"{name}_preprocessor",
-            max_tokens=max_tokens,
-            num_oov_indices=num_oov_indices,
-        )
-        return Feature(
-            dtype="int64", preprocessor=preprocessor, output_mode=output_mode
-        )
-
-    @classmethod
-    def string_categorical(
-        cls,
-        max_tokens=None,
-        num_oov_indices=1,
-        output_mode="one_hot",
-        name=None,
-    ):
-        name = name or auto_name("string_categorical")
-        preprocessor = layers.StringLookup(
-            name=f"{name}_preprocessor",
-            max_tokens=max_tokens,
-            num_oov_indices=num_oov_indices,
-        )
-        return Feature(
-            dtype="string", preprocessor=preprocessor, output_mode=output_mode
-        )
-
-    @classmethod
-    def string_hashed(cls, num_bins, output_mode="one_hot", name=None):
-        name = name or auto_name("string_hashed")
-        preprocessor = layers.Hashing(
-            name=f"{name}_preprocessor", num_bins=num_bins
-        )
-        return Feature(
-            dtype="string", preprocessor=preprocessor, output_mode=output_mode
-        )
-
-    @classmethod
-    def integer_hashed(cls, num_bins, output_mode="one_hot", name=None):
-        name = name or auto_name("integer_hashed")
-        preprocessor = layers.Hashing(
-            name=f"{name}_preprocessor", num_bins=num_bins
-        )
-        return Feature(
-            dtype="int64", preprocessor=preprocessor, output_mode=output_mode
-        )
-
-    def __init__(
-        self,
-        features,
-        output_mode="concat",
-        crosses=None,
-        crossing_dim=32,
-        hashing_dim=32,
-        num_discretization_bins=32,
-        name=None,
-    ):
-        super().__init__(name=name)
-        if not features:
-            raise ValueError("The `features` argument cannot be None or empty.")
-        self.crossing_dim = crossing_dim
-        self.hashing_dim = hashing_dim
-        self.num_discretization_bins = num_discretization_bins
-        self.features = {
-            name: self._standardize_feature(name, value)
-            for name, value in features.items()
-        }
-        self.crosses = []
-        if crosses:
-            feature_set = set(features.keys())
-            for cross in crosses:
-                if isinstance(cross, dict):
-                    cross = serialization_lib.deserialize_keras_object(cross)
-                if isinstance(cross, Cross):
-                    self.crosses.append(cross)
-                else:
-                    if not crossing_dim:
-                        raise ValueError(
-                            "When specifying `crosses`, the argument "
-                            "`crossing_dim` "
-                            "(dimensionality of the crossing space) "
-                            "should be specified as well."
-                        )
-                    for key in cross:
-                        if key not in feature_set:
-                            raise ValueError(
-                                "All features referenced "
-                                "in the `crosses` argument "
-                                "should be present in the `features` dict. "
-                                f"Received unknown features: {cross}"
-                            )
-                    self.crosses.append(Cross(cross, crossing_dim=crossing_dim))
-        self.crosses_by_name = {cross.name: cross for cross in self.crosses}
-
-        if output_mode not in {"dict", "concat"}:
-            raise ValueError(
-                "Invalid value for argument `output_mode`. "
-                "Expected one of {'dict', 'concat'}. "
-                f"Received: output_mode={output_mode}"
-            )
-        self.output_mode = output_mode
-
-        self.inputs = {
-            name: self._feature_to_input(name, value)
-            for name, value in self.features.items()
-        }
-        self.preprocessors = {
-            name: value.preprocessor for name, value in self.features.items()
-        }
-        self.encoded_features = None
-        self.crossers = {
-            cross.name: self._cross_to_crosser(cross) for cross in self.crosses
-        }
-        self.one_hot_encoders = {}
-        self.built = False
-        self._is_adapted = False
-        self.concat = None
-        self._preprocessed_features_names = None
-        self._crossed_features_names = None
-
-    def _feature_to_input(self, name, feature):
-        return layers.Input(shape=(1,), dtype=feature.dtype, name=name)
-
-    def _standardize_feature(self, name, feature):
-        if isinstance(feature, Feature):
-            return feature
-
-        if isinstance(feature, dict):
-            return serialization_lib.deserialize_keras_object(feature)
-
-        if feature == "float":
-            return self.float(name=name)
-        elif feature == "float_normalized":
-            return self.float_normalized(name=name)
-        elif feature == "float_rescaled":
-            return self.float_rescaled(name=name)
-        elif feature == "float_discretized":
-            return self.float_discretized(
-                name=name, num_bins=self.num_discretization_bins
-            )
-        elif feature == "integer_categorical":
-            return self.integer_categorical(name=name)
-        elif feature == "string_categorical":
-            return self.string_categorical(name=name)
-        elif feature == "integer_hashed":
-            return self.integer_hashed(self.hashing_dim, name=name)
-        elif feature == "string_hashed":
-            return self.string_hashed(self.hashing_dim, name=name)
-        else:
-            raise ValueError(f"Invalid feature type: {feature}")
-
-    def _cross_to_crosser(self, cross):
-        return layers.HashedCrossing(cross.crossing_dim, name=cross.name)
-
-    def _list_adaptable_preprocessors(self):
-        adaptable_preprocessors = []
-        for name in self.features.keys():
-            preprocessor = self.preprocessors[name]
-            # Special case: a Normalization layer with preset mean/variance.
-            # Not adaptable.
-            if isinstance(preprocessor, layers.Normalization):
-                if preprocessor.input_mean is not None:
-                    continue
-            if hasattr(preprocessor, "adapt"):
-                adaptable_preprocessors.append(name)
-        return adaptable_preprocessors
-
-    def adapt(self, dataset):
-        if not isinstance(dataset, tf_data.Dataset):
-            raise ValueError(
-                "`adapt()` can only be called on a tf.data.Dataset. "
-                f"Received instead: {dataset} (of type {type(dataset)})"
-            )
-
-        for name in self._list_adaptable_preprocessors():
-            # Call adapt() on each individual adaptable layer.
-
-            # TODO: consider rewriting this to instead iterate on the
-            # dataset once, split each batch into individual features,
-            # and call the layer's `_adapt_function` on each batch
-            # to simulate the behavior of adapt() in a more performant fashion.
-
-            feature_dataset = dataset.map(lambda x: x[name])
-            preprocessor = self.preprocessors[name]
-            # TODO: consider adding an adapt progress bar.
-            # Sample 1 element to check the rank
-            for x in feature_dataset.take(1):
-                pass
-            if x.shape.rank == 0:
-                # The dataset yields unbatched scalars; batch it.
-                feature_dataset = feature_dataset.batch(32)
-            if x.shape.rank in {0, 1}:
-                # If the rank is 1, add a dimension
-                # so we can reduce on axis=-1.
-                # Note: if rank was previously 0, it is now 1.
-                feature_dataset = feature_dataset.map(
-                    lambda x: ops.expand_dims(x, -1)
-                )
-            preprocessor.adapt(feature_dataset)
-        self._is_adapted = True
-        self.get_encoded_features()  # Finish building the layer
-        self.built = True
-
-    def get_inputs(self):
-        self._check_if_built()
-        return self.inputs
-
-    def get_encoded_features(self):
-        self._check_if_adapted()
-
-        if self.encoded_features is None:
-            preprocessed_features = self._preprocess_features(self.inputs)
-            crossed_features = self._cross_features(preprocessed_features)
-            merged_features = self._merge_features(
-                preprocessed_features, crossed_features
-            )
-            self.encoded_features = merged_features
-        return self.encoded_features
-
-    def _preprocess_features(self, features):
-        return {
-            name: self.preprocessors[name](features[name])
-            for name in features.keys()
-        }
-
-    def _cross_features(self, features):
-        all_outputs = {}
-        for cross in self.crosses:
-            inputs = [features[name] for name in cross.feature_names]
-            outputs = self.crossers[cross.name](inputs)
-            all_outputs[cross.name] = outputs
-        return all_outputs
-
-    def _merge_features(self, preprocessed_features, crossed_features):
-        if not self._preprocessed_features_names:
-            self._preprocessed_features_names = sorted(
-                preprocessed_features.keys()
-            )
-            self._crossed_features_names = sorted(crossed_features.keys())
-
-        all_names = (
-            self._preprocessed_features_names + self._crossed_features_names
-        )
-        all_features = [
-            preprocessed_features[name]
-            for name in self._preprocessed_features_names
-        ] + [crossed_features[name] for name in self._crossed_features_names]
-
-        if self.output_mode == "dict":
-            output_dict = {}
-        else:
-            features_to_concat = []
-
-        if self.built:
-            # Fast mode.
-            for name, feature in zip(all_names, all_features):
-                encoder = self.one_hot_encoders.get(name, None)
-                if encoder:
-                    feature = encoder(feature)
-                if self.output_mode == "dict":
-                    output_dict[name] = feature
-                else:
-                    features_to_concat.append(feature)
-            if self.output_mode == "dict":
-                return output_dict
-            else:
-                return self.concat(features_to_concat)
-
-        # If the object isn't built,
-        # we create the encoder and concat layers below
-        all_specs = [
-            self.features[name] for name in self._preprocessed_features_names
-        ] + [
-            self.crosses_by_name[name] for name in self._crossed_features_names
-        ]
-        for name, feature, spec in zip(all_names, all_features, all_specs):
-            dtype = feature.dtype
-
-            if spec.output_mode == "one_hot":
-                preprocessor = self.preprocessors.get(
-                    name
-                ) or self.crossers.get(name)
-                cardinality = None
-                if not feature.dtype.startswith("int"):
-                    raise ValueError(
-                        f"Feature '{name}' has `output_mode='one_hot'`. "
-                        "Thus its preprocessor should return an int64 dtype. "
-                        f"Instead it returns a {dtype} dtype."
-                    )
-
-                if isinstance(
-                    preprocessor, (layers.IntegerLookup, layers.StringLookup)
-                ):
-                    cardinality = preprocessor.vocabulary_size()
-                elif isinstance(preprocessor, layers.CategoryEncoding):
-                    cardinality = preprocessor.num_tokens
-                elif isinstance(preprocessor, layers.Discretization):
-                    cardinality = preprocessor.num_bins
-                elif isinstance(
-                    preprocessor, (layers.HashedCrossing, layers.Hashing)
-                ):
-                    cardinality = preprocessor.num_bins
-                else:
-                    raise ValueError(
-                        f"Feature '{name}' has `output_mode='one_hot'`. "
-                        "However it isn't a standard feature and the "
-                        "dimensionality of its output space is not known, "
-                        "thus it cannot be one-hot encoded. "
-                        "Try using `output_mode='int'`."
-                    )
-                if cardinality is not None:
-                    encoder = layers.CategoryEncoding(
-                        num_tokens=cardinality, output_mode="multi_hot"
-                    )
-                    self.one_hot_encoders[name] = encoder
-                    feature = encoder(feature)
-
-            if self.output_mode == "concat":
-                dtype = feature.dtype
-                if dtype.startswith("int") or dtype == "string":
-                    raise ValueError(
-                        f"Cannot concatenate features because feature '{name}' "
-                        f"has not been encoded (it has dtype {dtype}). "
-                        "Consider using `output_mode='dict'`."
-                    )
-                features_to_concat.append(feature)
-            else:
-                output_dict[name] = feature
-
-        if self.output_mode == "concat":
-            self.concat = layers.Concatenate(axis=-1)
-            return self.concat(features_to_concat)
-        else:
-            return output_dict
-
-    def _check_if_adapted(self):
-        if not self._is_adapted:
-            if not self._list_adaptable_preprocessors():
-                self._is_adapted = True
-            else:
-                raise ValueError(
-                    "You need to call `.adapt(dataset)` on the FeatureSpace "
-                    "before you can start using it."
-                )
-
-    def _check_if_built(self):
-        if not self.built:
-            self._check_if_adapted()
-            # Finishes building
-            self.get_encoded_features()
-            self.built = True
-
-    def __call__(self, data):
-        self._check_if_built()
-        if not isinstance(data, dict):
-            raise ValueError(
-                "A FeatureSpace can only be called with a dict. "
-                f"Received: data={data} (of type {type(data)}"
-            )
-
-        data = {
-            key: ops.convert_to_tensor(value) for key, value in data.items()
-        }
-        rebatched = False
-        for name, x in data.items():
-            if x.shape.rank == 0:
-                data[name] = ops.reshape(x, (1, 1))
-                rebatched = True
-            elif x.shape.rank == 1:
-                data[name] = ops.expand_dims(x, -1)
-
-        preprocessed_data = self._preprocess_features(data)
-        crossed_data = self._cross_features(preprocessed_data)
-        merged_data = self._merge_features(preprocessed_data, crossed_data)
-        if rebatched:
-            if self.output_mode == "concat":
-                assert merged_data.shape[0] == 1
-                return ops.squeeze(merged_data, axis=0)
-            else:
-                for name, x in merged_data.items():
-                    if x.shape.rank == 2 and x.shape[0] == 1:
-                        merged_data[name] = ops.squeeze(x, axis=0)
-        return merged_data
-
-    def get_config(self):
-        return {
-            "features": serialization_lib.serialize_keras_object(self.features),
-            "output_mode": self.output_mode,
-            "crosses": serialization_lib.serialize_keras_object(self.crosses),
-            "crossing_dim": self.crossing_dim,
-            "hashing_dim": self.hashing_dim,
-            "num_discretization_bins": self.num_discretization_bins,
-        }
-
-    @classmethod
-    def from_config(cls, config):
-        return cls(**config)
-
-    def get_build_config(self):
-        return {
-            name: feature.preprocessor.get_build_config()
-            for name, feature in self.features.items()
-        }
-
-    def build_from_config(self, config):
-        for name in config.keys():
-            self.features[name].preprocessor.build_from_config(config[name])
-        self._is_adapted = True
-
-    def save(self, filepath):
-        """Save the `FeatureSpace` instance to a `.keras` file.
-
-        You can reload it via `keras.models.load_model()`:
-
-        ```python
-        feature_space.save("myfeaturespace.keras")
-        reloaded_feature_space = keras.models.load_model("myfeaturespace.keras")
-        ```
-        """
-        saving_lib.save_model(self, filepath)
-
-    def save_own_variables(self, store):
-        return
-
-    def load_own_variables(self, store):
-        return
--- a/keras_core/utils/feature_space_test.py
+++ b/keras_core/utils/feature_space_test.py
@ -1,378 +0,0 @@
-# from keras_core import testing
-# from keras_core.utils import feature_space
-# from keras_core import operations as ops
-# from tensorflow import nest
-# from tensorflow import data as tf_data
-# from keras_core import layers
-# from keras_core import models
-# import os
-
-
-# class FeatureSpaceTest(testing.TestCase):
-#     def _get_train_data_dict(
-#         self, as_dataset=False, as_tf_tensors=False, as_labeled_dataset=False
-#     ):
-#         data = {
-#             "float_1": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
-#             "float_2": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
-#             "float_3": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
-#             "string_1": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
-#             "string_2": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
-#             "int_1": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
-#             "int_2": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
-#             "int_3": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
-#         }
-#         if as_dataset:
-#             return tf_data.Dataset.from_tensor_slices(data)
-#         elif as_tf_tensors:
-#             return nest.map_structure(ops.convert_to_tensor, data)
-#         elif as_labeled_dataset:
-#             labels = [0, 1, 0, 1, 0, 0, 1, 0, 1, 1]
-#             return tf_data.Dataset.from_tensor_slices((data, labels))
-#         return data
-
-#     def test_basic_usage(self):
-#         fs = feature_space.FeatureSpace(
-#             features={
-#                 "float_1": "float",
-#                 "float_2": "float_normalized",
-#                 "float_3": "float_discretized",
-#                 "string_1": "string_categorical",
-#                 "string_2": "string_hashed",
-#                 "int_1": "integer_categorical",
-#                 "int_2": "integer_hashed",
-#                 "int_3": "integer_categorical",
-#             },
-#             crosses=[("float_3", "string_1"), ("string_2", "int_2")],
-#             output_mode="concat",
-#         )
-#         # Test unbatched adapt
-#         fs.adapt(self._get_train_data_dict(as_dataset=True))
-#         # Test batched adapt
-#         fs.adapt(self._get_train_data_dict(as_dataset=True).batch(4))
-
-#         # Test unbatched call on raw data
-#         data = {
-#             key: value[0] for key, value in self._get_train_data_dict().items()
-#         }
-#         out = fs(data)
-#         self.assertEqual(out.shape, [195])
-
-#         # Test unbatched call on TF tensors
-#         data = self._get_train_data_dict(as_tf_tensors=True)
-#         data = {key: value[0] for key, value in data.items()}
-#         out = fs(data)
-#         self.assertEqual(out.shape, [195])
-
-#         # Test batched call on raw data
-#         out = fs(self._get_train_data_dict())
-#         self.assertEqual(out.shape, [10, 195])
-
-#         # Test batched call on TF tensors
-#         out = fs(self._get_train_data_dict(as_tf_tensors=True))
-#         self.assertEqual(out.shape, [10, 195])
-
-#     def test_output_mode_dict(self):
-#         fs = feature_space.FeatureSpace(
-#             features={
-#                 "float_1": "float",
-#                 "float_2": "float_normalized",
-#                 "float_3": "float_discretized",
-#                 "string_1": "string_categorical",
-#                 "string_2": "string_hashed",
-#                 "int_1": "integer_categorical",
-#                 "int_2": "integer_hashed",
-#                 "int_3": "integer_categorical",
-#             },
-#             crosses=[("float_3", "string_1"), ("string_2", "int_2")],
-#             output_mode="dict",
-#         )
-#         fs.adapt(self._get_train_data_dict(as_dataset=True))
-
-#         # Test unbatched call on raw data
-#         data = {
-#             key: value[0] for key, value in self._get_train_data_dict().items()
-#         }
-#         out = fs(data)
-#         self.assertIsInstance(out, dict)
-#         self.assertLen(out, 10)
-#         self.assertEqual(out["string_1"].shape, [11])
-#         self.assertEqual(out["int_2"].shape, [32])
-#         self.assertEqual(out["string_2_X_int_2"].shape, [32])
-
-#         # Test batched call on raw data
-#         out = fs(self._get_train_data_dict())
-#         self.assertIsInstance(out, dict)
-#         self.assertLen(out, 10)
-#         self.assertEqual(out["string_1"].shape, [10, 11])
-#         self.assertEqual(out["int_2"].shape, [10, 32])
-#         self.assertEqual(out["string_2_X_int_2"].shape, [10, 32])
-
-#         # Test batched call on TF tensors
-#         out = fs(self._get_train_data_dict(as_tf_tensors=True))
-#         self.assertIsInstance(out, dict)
-#         self.assertLen(out, 10)
-#         self.assertEqual(out["string_1"].shape, [10, 11])
-#         self.assertEqual(out["int_2"].shape, [10, 32])
-#         self.assertEqual(out["string_2_X_int_2"].shape, [10, 32])
-
-#     def test_output_mode_dict_of_ints(self):
-#         cls = feature_space.FeatureSpace
-#         fs = feature_space.FeatureSpace(
-#             features={
-#                 "float_1": "float",
-#                 "float_2": "float_normalized",
-#                 "float_3": "float_discretized",
-#                 "string_1": cls.string_categorical(output_mode="int"),
-#                 "string_2": cls.string_hashed(num_bins=32, output_mode="int"),
-#                 "int_1": cls.integer_categorical(output_mode="int"),
-#                 "int_2": cls.integer_hashed(num_bins=32, output_mode="int"),
-#                 "int_3": cls.integer_categorical(output_mode="int"),
-#             },
-#             crosses=[
-#                 cls.cross(
-#                     ("float_3", "string_1"), output_mode="int", crossing_dim=32
-#                 ),
-#                 cls.cross(
-#                     ("string_2", "int_2"), output_mode="int", crossing_dim=32
-#                 ),
-#             ],
-#             output_mode="dict",
-#         )
-#         fs.adapt(self._get_train_data_dict(as_dataset=True))
-#         data = {
-#             key: value[0] for key, value in self._get_train_data_dict().items()
-#         }
-#         out = fs(data)
-#         self.assertIsInstance(out, dict)
-#         self.assertLen(out, 10)
-#         self.assertEqual(out["string_1"].shape, [1])
-#         self.assertEqual(out["string_1"].dtype.name, "int64")
-#         self.assertEqual(out["int_2"].shape, [1])
-#         self.assertEqual(out["int_2"].dtype.name, "int64")
-#         self.assertEqual(out["string_2_X_int_2"].shape, [1])
-#         self.assertEqual(out["string_2_X_int_2"].dtype.name, "int64")
-
-#     def test_functional_api_sync_processing(self):
-#         fs = feature_space.FeatureSpace(
-#             features={
-#                 "float_1": "float",
-#                 "float_2": "float_normalized",
-#                 "float_3": "float_discretized",
-#                 "string_1": "string_categorical",
-#                 "string_2": "string_hashed",
-#                 "int_1": "integer_categorical",
-#                 "int_2": "integer_hashed",
-#                 "int_3": "integer_categorical",
-#             },
-#             crosses=[("float_3", "string_1"), ("string_2", "int_2")],
-#             output_mode="concat",
-#         )
-#         fs.adapt(self._get_train_data_dict(as_dataset=True))
-#         inputs = fs.get_inputs()
-#         features = fs.get_encoded_features()
-#         outputs = layers.Dense(1)(features)
-#         model = models.Model(inputs=inputs, outputs=outputs)
-#         model.compile("adam", "mse")
-#         ds = self._get_train_data_dict(as_labeled_dataset=True)
-#         model.fit(ds.batch(4))
-#         model.evaluate(ds.batch(4))
-#         ds = self._get_train_data_dict(as_dataset=True)
-#         model.predict(ds.batch(4))
-
-#     def test_tf_data_async_processing(self):
-#         fs = feature_space.FeatureSpace(
-#             features={
-#                 "float_1": "float",
-#                 "float_2": "float_normalized",
-#                 "float_3": "float_discretized",
-#                 "string_1": "string_categorical",
-#                 "string_2": "string_hashed",
-#                 "int_1": "integer_categorical",
-#                 "int_2": "integer_hashed",
-#                 "int_3": "integer_categorical",
-#             },
-#             crosses=[("float_3", "string_1"), ("string_2", "int_2")],
-#             output_mode="concat",
-#         )
-#         fs.adapt(self._get_train_data_dict(as_dataset=True))
-#         features = fs.get_encoded_features()
-#         outputs = layers.Dense(1)(features)
-#         model = models.Model(inputs=features, outputs=outputs)
-#         model.compile("adam", "mse")
-#         ds = self._get_train_data_dict(as_labeled_dataset=True)
-#         # Try map before batch
-#         ds = ds.map(lambda x, y: (fs(x), y))
-#         model.fit(ds.batch(4))
-#         # Try map after batch
-#         ds = self._get_train_data_dict(as_labeled_dataset=True)
-#         ds = ds.batch(4)
-#         ds = ds.map(lambda x, y: (fs(x), y))
-#         model.evaluate(ds)
-#         ds = self._get_train_data_dict(as_dataset=True)
-#         ds = ds.map(fs)
-#         model.predict(ds.batch(4))
-
-#     def test_advanced_usage(self):
-#         cls = feature_space.FeatureSpace
-#         fs = feature_space.FeatureSpace(
-#             features={
-#                 "float_1": cls.float(),
-#                 "float_2": cls.float_normalized(),
-#                 "float_3": cls.float_discretized(num_bins=3),
-#                 "string_1": cls.string_categorical(max_tokens=5),
-#                 "string_2": cls.string_hashed(num_bins=32),
-#                 "int_1": cls.integer_categorical(
-#                     max_tokens=5, num_oov_indices=2
-#                 ),
-#                 "int_2": cls.integer_hashed(num_bins=32),
-#                 "int_3": cls.integer_categorical(max_tokens=5),
-#             },
-#             crosses=[
-#                 cls.cross(("float_3", "string_1"), crossing_dim=32),
-#                 cls.cross(("string_2", "int_2"), crossing_dim=32),
-#             ],
-#             output_mode="concat",
-#         )
-#         fs.adapt(self._get_train_data_dict(as_dataset=True))
-#         data = {
-#             key: value[0] for key, value in self._get_train_data_dict().items()
-#         }
-#         out = fs(data)
-#         self.assertEqual(out.shape, [148])
-
-#     def test_manual_kpl(self):
-#         data = {
-#             "text": ["1st string", "2nd string", "3rd string"],
-#         }
-#         cls = feature_space.FeatureSpace
-
-#         # Test with a tf-idf TextVectorization layer
-#         tv = layers.TextVectorization(output_mode="tf_idf")
-#         fs = feature_space.FeatureSpace(
-#             features={
-#                 "text": cls.feature(
-#                     preprocessor=tv, dtype="string", output_mode="float"
-#                 ),
-#             },
-#             output_mode="concat",
-#         )
-#         fs.adapt(tf_data.Dataset.from_tensor_slices(data))
-#         out = fs(data)
-#         self.assertEqual(out.shape, [3, 5])
-
-#     def test_no_adapt(self):
-#         data = {
-#             "int_1": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
-#         }
-#         fs = feature_space.FeatureSpace(
-#             {
-#                 "int_1": "integer_hashed",
-#             },
-#             output_mode="concat",
-#         )
-#         out = fs(data)
-#         self.assertEqual(out.shape, [10, 32])
-
-#     def test_saving(self):
-#         cls = feature_space.FeatureSpace
-#         fs = feature_space.FeatureSpace(
-#             features={
-#                 "float_1": cls.float(),
-#                 "float_2": cls.float_normalized(),
-#                 "float_3": cls.float_discretized(num_bins=3),
-#                 "string_1": cls.string_categorical(max_tokens=5),
-#                 "string_2": cls.string_hashed(num_bins=32),
-#                 "int_1": cls.integer_categorical(
-#                     max_tokens=5, num_oov_indices=2
-#                 ),
-#                 "int_2": cls.integer_hashed(num_bins=32),
-#                 "int_3": cls.integer_categorical(max_tokens=5),
-#             },
-#             crosses=[
-#                 cls.cross(("float_3", "string_1"), crossing_dim=32),
-#                 cls.cross(("string_2", "int_2"), crossing_dim=32),
-#             ],
-#             output_mode="concat",
-#         )
-#         fs.adapt(self._get_train_data_dict(as_dataset=True))
-#         data = {
-#             key: value[0] for key, value in self._get_train_data_dict().items()
-#         }
-#         ref_out = fs(data)
-
-#         temp_filepath = os.path.join(self.get_temp_dir(), "fs.keras")
-#         fs.save(temp_filepath)
-#         fs = models.models.load_model(temp_filepath)
-
-#         # Save again immediately after loading to test idempotency
-#         temp_filepath = os.path.join(self.get_temp_dir(), "fs2.keras")
-#         fs.save(temp_filepath)
-
-#         # Test correctness of the first saved FS
-#         out = fs(data)
-#         self.assertAllClose(out, ref_out)
-
-#         inputs = fs.get_inputs()
-#         outputs = fs.get_encoded_features()
-#         model = models.Model(inputs=inputs, outputs=outputs)
-#         ds = self._get_train_data_dict(as_dataset=True)
-#         out = model.predict(ds.batch(4))
-#         self.assertAllClose(out[0], ref_out)
-
-#         # Test correctness of the re-saved FS
-#         fs = models.models.load_model(temp_filepath)
-#         out = fs(data)
-#         self.assertAllClose(out, ref_out)
-
-#     def test_errors(self):
-#         # Test no features
-#         with self.assertRaisesRegex(ValueError, "cannot be None or empty"):
-#             feature_space.FeatureSpace(features={})
-#         # Test no crossing dim
-#         with self.assertRaisesRegex(ValueError, "`crossing_dim`"):
-#             feature_space.FeatureSpace(
-#                 features={
-#                     "f1": "integer_categorical",
-#                     "f2": "integer_categorical",
-#                 },
-#                 crosses=[("f1", "f2")],
-#                 crossing_dim=None,
-#             )
-#         # Test wrong cross feature name
-#         with self.assertRaisesRegex(ValueError, "should be present in "):
-#             feature_space.FeatureSpace(
-#                 features={
-#                     "f1": "integer_categorical",
-#                     "f2": "integer_categorical",
-#                 },
-#                 crosses=[("f1", "unknown")],
-#                 crossing_dim=32,
-#             )
-#         # Test wrong output mode
-#         with self.assertRaisesRegex(ValueError, "for argument `output_mode`"):
-#             feature_space.FeatureSpace(
-#                 features={
-#                     "f1": "integer_categorical",
-#                     "f2": "integer_categorical",
-#                 },
-#                 output_mode="unknown",
-#             )
-#         # Test call before adapt
-#         with self.assertRaisesRegex(ValueError, "You need to call `.adapt"):
-#             fs = feature_space.FeatureSpace(
-#                 features={
-#                     "f1": "integer_categorical",
-#                     "f2": "integer_categorical",
-#                 }
-#             )
-#             fs({"f1": [0], "f2": [0]})
-#         # Test get_encoded_features before adapt
-#         with self.assertRaisesRegex(ValueError, "You need to call `.adapt"):
-#             fs = feature_space.FeatureSpace(
-#                 features={
-#                     "f1": "integer_categorical",
-#                     "f2": "integer_categorical",
-#                 }
-#             )
-#             fs.get_encoded_features()