diff --git a/keras_core/layers/core/dense.py b/keras_core/layers/core/dense.py
index 520f52708..fbf9b590c 100644
--- a/keras_core/layers/core/dense.py
+++ b/keras_core/layers/core/dense.py
@@ -3,10 +3,59 @@ from keras_core import constraints
 from keras_core import initializers
 from keras_core import operations as ops
 from keras_core import regularizers
+from keras_core.api_export import keras_core_export
+from keras_core.layers.input_spec import InputSpec
 from keras_core.layers.layer import Layer
 
 
+@keras_core_export("keras_core.layers.Dense")
 class Dense(Layer):
+    """Just your regular densely-connected NN layer.
+
+    `Dense` implements the operation:
+    `output = activation(dot(input, kernel) + bias)`
+    where `activation` is the element-wise activation function
+    passed as the `activation` argument, `kernel` is a weights matrix
+    created by the layer, and `bias` is a bias vector created by the layer
+    (only applicable if `use_bias` is `True`).
+
+    Note: If the input to the layer has a rank greater than 2, `Dense`
+    computes the dot product between the `inputs` and the `kernel` along the
+    last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
+    For example, if input has dimensions `(batch_size, d0, d1)`, then we create
+    a `kernel` with shape `(d1, units)`, and the `kernel` operates along axis 2
+    of the `input`, on every sub-tensor of shape `(1, 1, d1)` (there are
+    `batch_size * d0` such sub-tensors). The output in this case will have
+    shape `(batch_size, d0, units)`.
+
+    Args:
+        units: Positive integer, dimensionality of the output space.
+        activation: Activation function to use.
+            If you don't specify anything, no activation is applied
+            (ie. "linear" activation: `a(x) = x`).
+        use_bias: Boolean, whether the layer uses a bias vector.
+        kernel_initializer: Initializer for the `kernel` weights matrix.
+        bias_initializer: Initializer for the bias vector.
+        kernel_regularizer: Regularizer function applied to
+            the `kernel` weights matrix.
+        bias_regularizer: Regularizer function applied to the bias vector.
+        activity_regularizer: Regularizer function applied to
+            the output of the layer (its "activation").
+        kernel_constraint: Constraint function applied to
+            the `kernel` weights matrix.
+        bias_constraint: Constraint function applied to the bias vector.
+
+    Input shape:
+        N-D tensor with shape: `(batch_size, ..., input_dim)`.
+        The most common situation would be
+        a 2D input with shape `(batch_size, input_dim)`.
+
+    Output shape:
+        N-D tensor with shape: `(batch_size, ..., units)`.
+        For instance, for a 2D input with shape `(batch_size, input_dim)`,
+        the output would have shape `(batch_size, units)`.
+    """
+
     def __init__(
         self,
         units,
@@ -32,9 +81,7 @@ class Dense(Layer):
         self.kernel_constraint = constraints.get(kernel_constraint)
         self.bias_constraint = constraints.get(bias_constraint)
 
-        if activity_regularizer:
-            # TODO
-            raise ValueError("activity_regularizer not yet supported.")
+        self.input_spec = InputSpec(min_ndim=2)
 
     def build(self, input_shape):
         input_dim = input_shape[-1]
@@ -49,6 +96,7 @@ class Dense(Layer):
                 initializer=self.bias_initializer,
                 regularizer=self.bias_regularizer,
             )
+        self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
         self.built = True
 
     def call(self, inputs):
diff --git a/keras_core/layers/core/dense_test.py b/keras_core/layers/core/dense_test.py
index 2e59c8d77..3155b57b4 100644
--- a/keras_core/layers/core/dense_test.py
+++ b/keras_core/layers/core/dense_test.py
@@ -2,6 +2,7 @@ import numpy as np
 
 from keras_core import layers
 from keras_core import testing
+from keras_core.backend import keras_tensor
 
 
 class DenseTest(testing.TestCase):
@@ -55,3 +56,9 @@ class DenseTest(testing.TestCase):
             [[-1.0, 2.0]],
         )
         self.assertAllClose(layer(inputs), [[10.0, 0.0]])
+
+    def test_dense_errors(self):
+        with self.assertRaisesRegex(ValueError, "incompatible with the layer"):
+            layer = layers.Dense(units=2, activation="relu")
+            layer(keras_tensor.KerasTensor((1, 2)))
+            layer(keras_tensor.KerasTensor((1, 3)))
diff --git a/keras_core/layers/layer.py b/keras_core/layers/layer.py
index b6b3306b1..4649424ef 100644
--- a/keras_core/layers/layer.py
+++ b/keras_core/layers/layer.py
@@ -13,6 +13,7 @@ And some more magic:
 - add_loss
 - metric tracking
 - RNG seed tracking
+- activity regularization
 """
 import collections
 import inspect
@@ -24,6 +25,7 @@ from tensorflow import nest
 
 from keras_core import backend
 from keras_core import initializers
+from keras_core import regularizers
 from keras_core import utils
 from keras_core.api_export import keras_core_export
 from keras_core.backend import KerasTensor
@@ -38,8 +40,11 @@ from keras_core.utils.tracking import Tracker
 
 @keras_core_export(["keras_core.Layer", "keras_core.layers.Layer"])
 class Layer(Operation):
-    def __init__(self, trainable=True, dtype=None, name=None):
+    def __init__(
+        self, activity_regularizer=None, trainable=True, dtype=None, name=None
+    ):
         super().__init__(name=name)
+        self.activity_regularizer = regularizers.get(activity_regularizer)
         self._trainable = trainable
         if dtype is None:
             dtype = backend.floatx()
@@ -316,9 +321,16 @@ class Layer(Operation):
             kwargs["training"] = training
 
         # TODO: Populate mask argument(s)
+
+        # Call the layer.
         with backend.name_scope(self.name):
             outputs = super().__call__(*args, **kwargs)
+            # Record activity regularizer loss.
+            if self.activity_regularizer is not None:
+                self.add_loss(self.activity_regularizer(outputs))
+
         # TODO: Set masks on outputs
+        # self._set_mask_metadata(inputs, outputs, previous_mask)
 
         # Destroy call context if we created it
         self._maybe_reset_call_context()
@@ -566,6 +578,29 @@ class Layer(Operation):
                 deque.extendleft(layer._layers)
         return layers
 
+    def _set_mask_metadata(self, inputs, outputs, previous_mask):
+        # Many `Layer`s don't need to call `compute_mask`.
+        # This method is optimized to do as little work as needed for the common
+        # case.
+        if not self._supports_masking:
+            return
+
+        flat_outputs = nest.flatten(outputs)
+
+        mask_already_computed = all(
+            getattr(x, "_keras_mask", None) is not None for x in flat_outputs
+        )
+        if mask_already_computed:
+            return
+
+        output_masks = self.compute_mask(inputs, previous_mask)
+        if output_masks is None:
+            return
+
+        flat_masks = nest.flatten(output_masks)
+        for tensor, mask in zip(flat_outputs, flat_masks):
+            tensor._keras_mask = mask
+
 
 def get_arguments_dict(fn, *args, **kwargs):
     """Return a dict mapping argument names to their values."""