From 2523dc7eebcd70232cadfc58b0f0f38cc3651be5 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 26 Jun 2023 09:00:33 -0700
Subject: [PATCH] Add functional and sequential API guides

---
 guides/functional_api.py   | 879 +++++++++++++++++++++++++++++++++++++
 guides/sequential_model.py | 370 ++++++++++++++++
 2 files changed, 1249 insertions(+)
 create mode 100644 guides/functional_api.py
 create mode 100644 guides/sequential_model.py
diff --git a/guides/functional_api.py b/guides/functional_api.py
new file mode 100644
index 000000000..e5750e32e
--- /dev/null
+++ b/guides/functional_api.py
@@ -0,0 +1,879 @@
+"""
+Title: The Functional API
+Author: [fchollet](https://twitter.com/fchollet)
+Date created: 2019/03/01
+Last modified: 2020/04/12
+Description: Complete guide to the functional API.
+Accelerator: GPU
+"""
+"""
+## Setup
+"""
+
+import numpy as np
+import keras_core as keras
+from keras_core import layers
+from keras_core import operations as ops
+
+"""
+## Introduction
+
+The Keras *functional API* is a way to create models that are more flexible
+than the `keras.Sequential` API. The functional API can handle models
+with non-linear topology, shared layers, and even multiple inputs or outputs.
+
+The main idea is that a deep learning model is usually
+a directed acyclic graph (DAG) of layers.
+So the functional API is a way to build *graphs of layers*.
+
+Consider the following model:
+
+<div class="k-default-codeblock">
+```
+(input: 784-dimensional vectors)
+       ↧
+[Dense (64 units, relu activation)]
+       ↧
+[Dense (64 units, relu activation)]
+       ↧
+[Dense (10 units, softmax activation)]
+       ↧
+(output: logits of a probability distribution over 10 classes)
+```
+</div>
+
+This is a basic graph with three layers.
+To build this model using the functional API, start by creating an input node:
+"""
+
+inputs = keras.Input(shape=(784,))
+
+"""
+The shape of the data is set as a 784-dimensional vector.
+The batch size is always omitted since only the shape of each sample is specified.
+
+If, for example, you have an image input with a shape of `(32, 32, 3)`,
+you would use:
+"""
+
+# Just for demonstration purposes.
+img_inputs = keras.Input(shape=(32, 32, 3))
+
+"""
+The `inputs` that is returned contains information about the shape and `dtype`
+of the input data that you feed to your model.
+Here's the shape:
+"""
+
+inputs.shape
+
+"""
+Here's the dtype:
+"""
+
+inputs.dtype
+
+"""
+You create a new node in the graph of layers by calling a layer on this `inputs`
+object:
+"""
+
+dense = layers.Dense(64, activation="relu")
+x = dense(inputs)
+
+"""
+The "layer call" action is like drawing an arrow from "inputs" to this layer
+you created.
+You're "passing" the inputs to the `dense` layer, and you get `x` as the output.
+
+Let's add a few more layers to the graph of layers:
+"""
+
+x = layers.Dense(64, activation="relu")(x)
+outputs = layers.Dense(10)(x)
+
+"""
+At this point, you can create a `Model` by specifying its inputs and outputs
+in the graph of layers:
+"""
+
+model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
+
+"""
+Let's check out what the model summary looks like:
+"""
+
+model.summary()
+
+"""
+You can also plot the model as a graph:
+"""
+
+keras.utils.plot_model(model, "my_first_model.png")
+
+"""
+And, optionally, display the input and output shapes of each layer
+in the plotted graph:
+"""
+
+keras.utils.plot_model(
+    model, "my_first_model_with_shape_info.png", show_shapes=True
+)
+
+"""
+This figure and the code are almost identical. In the code version,
+the connection arrows are replaced by the call operation.
+
+A "graph of layers" is an intuitive mental image for a deep learning model,
+and the functional API is a way to create models that closely mirrors this.
+"""
+
+"""
+## Training, evaluation, and inference
+
+Training, evaluation, and inference work exactly in the same way for models
+built using the functional API as for `Sequential` models.
+
+The `Model` class offers a built-in training loop (the `fit()` method)
+and a built-in evaluation loop (the `evaluate()` method). Note
+that you can easily [customize these loops](/guides/customizing_what_happens_in_fit/)
+to implement training routines beyond supervised learning
+(e.g. [GANs](https://keras.io/examples/generative/dcgan_overriding_train_step/)).
+
+Here, load the MNIST image data, reshape it into vectors,
+fit the model on the data (while monitoring performance on a validation split),
+then evaluate the model on the test data:
+"""
+
+(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
+
+x_train = x_train.reshape(60000, 784).astype("float32") / 255
+x_test = x_test.reshape(10000, 784).astype("float32") / 255
+
+model.compile(
+    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+    optimizer=keras.optimizers.RMSprop(),
+    metrics=["accuracy"],
+)
+
+history = model.fit(
+    x_train, y_train, batch_size=64, epochs=2, validation_split=0.2
+)
+
+test_scores = model.evaluate(x_test, y_test, verbose=2)
+print("Test loss:", test_scores[0])
+print("Test accuracy:", test_scores[1])
+
+"""
+For further reading, see the [training and evaluation](/guides/training_with_built_in_methods/) guide.
+"""
+
+"""
+## Save and serialize
+
+Saving the model and serialization work the same way for models built using
+the functional API as they do for `Sequential` models. The standard way
+to save a functional model is to call `model.save()`
+to save the entire model as a single file. You can later recreate the same model
+from this file, even if the code that built the model is no longer available.
+
+This saved file includes the:
+- model architecture
+- model weight values (that were learned during training)
+- model training config, if any (as passed to `compile()`)
+- optimizer and its state, if any (to restart training where you left off)
+"""
+
+model.save("my_model.keras")
+del model
+# Recreate the exact same model purely from the file:
+model = keras.models.load_model("my_model.keras")
+
+"""
+For details, read the model [serialization & saving](
+    /guides/serialization_and_saving/) guide.
+"""
+
+"""
+## Use the same graph of layers to define multiple models
+
+In the functional API, models are created by specifying their inputs
+and outputs in a graph of layers. That means that a single
+graph of layers can be used to generate multiple models.
+
+In the example below, you use the same stack of layers to instantiate two models:
+an `encoder` model that turns image inputs into 16-dimensional vectors,
+and an end-to-end `autoencoder` model for training.
+"""
+
+encoder_input = keras.Input(shape=(28, 28, 1), name="img")
+x = layers.Conv2D(16, 3, activation="relu")(encoder_input)
+x = layers.Conv2D(32, 3, activation="relu")(x)
+x = layers.MaxPooling2D(3)(x)
+x = layers.Conv2D(32, 3, activation="relu")(x)
+x = layers.Conv2D(16, 3, activation="relu")(x)
+encoder_output = layers.GlobalMaxPooling2D()(x)
+
+encoder = keras.Model(encoder_input, encoder_output, name="encoder")
+encoder.summary()
+
+x = layers.Reshape((4, 4, 1))(encoder_output)
+x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
+x = layers.Conv2DTranspose(32, 3, activation="relu")(x)
+x = layers.UpSampling2D(3)(x)
+x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
+decoder_output = layers.Conv2DTranspose(1, 3, activation="relu")(x)
+
+autoencoder = keras.Model(encoder_input, decoder_output, name="autoencoder")
+autoencoder.summary()
+
+"""
+Here, the decoding architecture is strictly symmetrical
+to the encoding architecture, so the output shape is the same as
+the input shape `(28, 28, 1)`.
+
+The reverse of a `Conv2D` layer is a `Conv2DTranspose` layer,
+and the reverse of a `MaxPooling2D` layer is an `UpSampling2D` layer.
+"""
+
+"""
+## All models are callable, just like layers
+
+You can treat any model as if it were a layer by invoking it on an `Input` or
+on the output of another layer. By calling a model you aren't just reusing
+the architecture of the model, you're also reusing its weights.
+
+To see this in action, here's a different take on the autoencoder example that
+creates an encoder model, a decoder model, and chains them in two calls
+to obtain the autoencoder model:
+"""
+
+encoder_input = keras.Input(shape=(28, 28, 1), name="original_img")
+x = layers.Conv2D(16, 3, activation="relu")(encoder_input)
+x = layers.Conv2D(32, 3, activation="relu")(x)
+x = layers.MaxPooling2D(3)(x)
+x = layers.Conv2D(32, 3, activation="relu")(x)
+x = layers.Conv2D(16, 3, activation="relu")(x)
+encoder_output = layers.GlobalMaxPooling2D()(x)
+
+encoder = keras.Model(encoder_input, encoder_output, name="encoder")
+encoder.summary()
+
+decoder_input = keras.Input(shape=(16,), name="encoded_img")
+x = layers.Reshape((4, 4, 1))(decoder_input)
+x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
+x = layers.Conv2DTranspose(32, 3, activation="relu")(x)
+x = layers.UpSampling2D(3)(x)
+x = layers.Conv2DTranspose(16, 3, activation="relu")(x)
+decoder_output = layers.Conv2DTranspose(1, 3, activation="relu")(x)
+
+decoder = keras.Model(decoder_input, decoder_output, name="decoder")
+decoder.summary()
+
+autoencoder_input = keras.Input(shape=(28, 28, 1), name="img")
+encoded_img = encoder(autoencoder_input)
+decoded_img = decoder(encoded_img)
+autoencoder = keras.Model(autoencoder_input, decoded_img, name="autoencoder")
+autoencoder.summary()
+
+"""
+As you can see, the model can be nested: a model can contain sub-models
+(since a model is just like a layer).
+A common use case for model nesting is *ensembling*.
+For example, here's how to ensemble a set of models into a single model
+that averages their predictions:
+"""
+
+
+def get_model():
+    inputs = keras.Input(shape=(128,))
+    outputs = layers.Dense(1)(inputs)
+    return keras.Model(inputs, outputs)
+
+
+model1 = get_model()
+model2 = get_model()
+model3 = get_model()
+
+inputs = keras.Input(shape=(128,))
+y1 = model1(inputs)
+y2 = model2(inputs)
+y3 = model3(inputs)
+outputs = layers.average([y1, y2, y3])
+ensemble_model = keras.Model(inputs=inputs, outputs=outputs)
+
+"""
+## Manipulate complex graph topologies
+
+### Models with multiple inputs and outputs
+
+The functional API makes it easy to manipulate multiple inputs and outputs.
+This cannot be handled with the `Sequential` API.
+
+For example, if you're building a system for ranking customer issue tickets by
+priority and routing them to the correct department,
+then the model will have three inputs:
+
+- the title of the ticket (text input),
+- the text body of the ticket (text input), and
+- any tags added by the user (categorical input)
+
+This model will have two outputs:
+
+- the priority score between 0 and 1 (scalar sigmoid output), and
+- the department that should handle the ticket (softmax output
+over the set of departments).
+
+You can build this model in a few lines with the functional API:
+"""
+
+num_tags = 12  # Number of unique issue tags
+num_words = 10000  # Size of vocabulary obtained when preprocessing text data
+num_departments = 4  # Number of departments for predictions
+
+title_input = keras.Input(
+    shape=(None,), name="title"
+)  # Variable-length sequence of ints
+body_input = keras.Input(
+    shape=(None,), name="body"
+)  # Variable-length sequence of ints
+tags_input = keras.Input(
+    shape=(num_tags,), name="tags"
+)  # Binary vectors of size `num_tags`
+
+# Embed each word in the title into a 64-dimensional vector
+title_features = layers.Embedding(num_words, 64)(title_input)
+# Embed each word in the text into a 64-dimensional vector
+body_features = layers.Embedding(num_words, 64)(body_input)
+
+# Reduce sequence of embedded words in the title into a single 128-dimensional vector
+title_features = layers.LSTM(128)(title_features)
+# Reduce sequence of embedded words in the body into a single 32-dimensional vector
+body_features = layers.LSTM(32)(body_features)
+
+# Merge all available features into a single large vector via concatenation
+x = layers.concatenate([title_features, body_features, tags_input])
+
+# Stick a logistic regression for priority prediction on top of the features
+priority_pred = layers.Dense(1, name="priority")(x)
+# Stick a department classifier on top of the features
+department_pred = layers.Dense(num_departments, name="department")(x)
+
+# Instantiate an end-to-end model predicting both priority and department
+model = keras.Model(
+    inputs=[title_input, body_input, tags_input],
+    outputs={"priority": priority_pred, "department": department_pred},
+)
+
+"""
+Now plot the model:
+"""
+
+keras.utils.plot_model(
+    model, "multi_input_and_output_model.png", show_shapes=True
+)
+
+"""
+When compiling this model, you can assign different losses to each output.
+You can even assign different weights to each loss -- to modulate
+their contribution to the total training loss.
+"""
+
+model.compile(
+    optimizer=keras.optimizers.RMSprop(1e-3),
+    loss=[
+        keras.losses.BinaryCrossentropy(from_logits=True),
+        keras.losses.CategoricalCrossentropy(from_logits=True),
+    ],
+    loss_weights=[1.0, 0.2],
+)
+
+"""
+Since the output layers have different names, you could also specify
+the losses and loss weights with the corresponding layer names:
+"""
+
+model.compile(
+    optimizer=keras.optimizers.RMSprop(1e-3),
+    loss={
+        "priority": keras.losses.BinaryCrossentropy(from_logits=True),
+        "department": keras.losses.CategoricalCrossentropy(from_logits=True),
+    },
+    loss_weights={"priority": 1.0, "department": 0.2},
+)
+
+"""
+Train the model by passing lists of NumPy arrays of inputs and targets:
+"""
+
+# Dummy input data
+title_data = np.random.randint(num_words, size=(1280, 10))
+body_data = np.random.randint(num_words, size=(1280, 100))
+tags_data = np.random.randint(2, size=(1280, num_tags)).astype("float32")
+
+# Dummy target data
+priority_targets = np.random.random(size=(1280, 1))
+dept_targets = np.random.randint(2, size=(1280, num_departments))
+
+model.fit(
+    {"title": title_data, "body": body_data, "tags": tags_data},
+    {"priority": priority_targets, "department": dept_targets},
+    epochs=2,
+    batch_size=32,
+)
+
+"""
+When calling fit with a `Dataset` object, it should yield either a
+tuple of lists like `([title_data, body_data, tags_data], [priority_targets, dept_targets])`
+or a tuple of dictionaries like
+`({'title': title_data, 'body': body_data, 'tags': tags_data}, {'priority': priority_targets, 'department': dept_targets})`.
+
+For more detailed explanation, refer to the [training and evaluation](/guides/training_with_built_in_methods/) guide.
+"""
+
+"""
+### A toy ResNet model
+
+In addition to models with multiple inputs and outputs,
+the functional API makes it easy to manipulate non-linear connectivity
+topologies -- these are models with layers that are not connected sequentially,
+which the `Sequential` API cannot handle.
+
+A common use case for this is residual connections.
+Let's build a toy ResNet model for CIFAR10 to demonstrate this:
+"""
+
+inputs = keras.Input(shape=(32, 32, 3), name="img")
+x = layers.Conv2D(32, 3, activation="relu")(inputs)
+x = layers.Conv2D(64, 3, activation="relu")(x)
+block_1_output = layers.MaxPooling2D(3)(x)
+
+x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_1_output)
+x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
+block_2_output = layers.add([x, block_1_output])
+
+x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_2_output)
+x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
+block_3_output = layers.add([x, block_2_output])
+
+x = layers.Conv2D(64, 3, activation="relu")(block_3_output)
+x = layers.GlobalAveragePooling2D()(x)
+x = layers.Dense(256, activation="relu")(x)
+x = layers.Dropout(0.5)(x)
+outputs = layers.Dense(10)(x)
+
+model = keras.Model(inputs, outputs, name="toy_resnet")
+model.summary()
+
+"""
+Plot the model:
+"""
+
+keras.utils.plot_model(model, "mini_resnet.png", show_shapes=True)
+
+"""
+Now train the model:
+"""
+
+(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
+
+x_train = x_train.astype("float32") / 255.0
+x_test = x_test.astype("float32") / 255.0
+y_train = keras.utils.to_categorical(y_train, 10)
+y_test = keras.utils.to_categorical(y_test, 10)
+
+model.compile(
+    optimizer=keras.optimizers.RMSprop(1e-3),
+    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
+    metrics=["acc"],
+)
+# We restrict the data to the first 1000 samples so as to limit execution time
+# on Colab. Try to train on the entire dataset until convergence!
+model.fit(
+    x_train[:1000],
+    y_train[:1000],
+    batch_size=64,
+    epochs=1,
+    validation_split=0.2,
+)
+
+"""
+## Shared layers
+
+Another good use for the functional API are models that use *shared layers*.
+Shared layers are layer instances that are reused multiple times in the same model --
+they learn features that correspond to multiple paths in the graph-of-layers.
+
+Shared layers are often used to encode inputs from similar spaces
+(say, two different pieces of text that feature similar vocabulary).
+They enable sharing of information across these different inputs,
+and they make it possible to train such a model on less data.
+If a given word is seen in one of the inputs,
+that will benefit the processing of all inputs that pass through the shared layer.
+
+To share a layer in the functional API, call the same layer instance multiple times.
+For instance, here's an `Embedding` layer shared across two different text inputs:
+"""
+
+# Embedding for 1000 unique words mapped to 128-dimensional vectors
+shared_embedding = layers.Embedding(1000, 128)
+
+# Variable-length sequence of integers
+text_input_a = keras.Input(shape=(None,), dtype="int32")
+
+# Variable-length sequence of integers
+text_input_b = keras.Input(shape=(None,), dtype="int32")
+
+# Reuse the same layer to encode both inputs
+encoded_input_a = shared_embedding(text_input_a)
+encoded_input_b = shared_embedding(text_input_b)
+
+"""
+## Extract and reuse nodes in the graph of layers
+
+Because the graph of layers you are manipulating is a static data structure,
+it can be accessed and inspected. And this is how you are able to plot
+functional models as images.
+
+This also means that you can access the activations of intermediate layers
+("nodes" in the graph) and reuse them elsewhere --
+which is very useful for something like feature extraction.
+
+Let's look at an example. This is a VGG19 model with weights pretrained on ImageNet:
+"""
+
+vgg19 = keras.applications.VGG19()
+
+"""
+And these are the intermediate activations of the model,
+obtained by querying the graph data structure:
+"""
+
+features_list = [layer.output for layer in vgg19.layers]
+
+"""
+Use these features to create a new feature-extraction model that returns
+the values of the intermediate layer activations:
+"""
+
+feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list)
+
+img = np.random.random((1, 224, 224, 3)).astype("float32")
+extracted_features = feat_extraction_model(img)
+
+"""
+This comes in handy for tasks like
+[neural style transfer](https://keras.io/examples/generative/neural_style_transfer/),
+among other things.
+"""
+
+"""
+## Extend the API using custom layers
+
+`keras` includes a wide range of built-in layers, for example:
+
+- Convolutional layers: `Conv1D`, `Conv2D`, `Conv3D`, `Conv2DTranspose`
+- Pooling layers: `MaxPooling1D`, `MaxPooling2D`, `MaxPooling3D`, `AveragePooling1D`
+- RNN layers: `GRU`, `LSTM`, `ConvLSTM2D`
+- `BatchNormalization`, `Dropout`, `Embedding`, etc.
+
+But if you don't find what you need, it's easy to extend the API by creating
+your own layers. All layers subclass the `Layer` class and implement:
+
+- `call` method, that specifies the computation done by the layer.
+- `build` method, that creates the weights of the layer (this is just a style
+convention since you can create weights in `__init__`, as well).
+
+To learn more about creating layers from scratch, read
+[custom layers and models](/guides/making_new_layers_and_models_via_subclassing) guide.
+
+The following is a basic implementation of `keras.layers.Dense`:
+"""
+
+
+class CustomDense(layers.Layer):
+    def __init__(self, units=32):
+        super().__init__()
+        self.units = units
+
+    def build(self, input_shape):
+        self.w = self.add_weight(
+            shape=(input_shape[-1], self.units),
+            initializer="random_normal",
+            trainable=True,
+        )
+        self.b = self.add_weight(
+            shape=(self.units,), initializer="random_normal", trainable=True
+        )
+
+    def call(self, inputs):
+        return ops.matmul(inputs, self.w) + self.b
+
+
+inputs = keras.Input((4,))
+outputs = CustomDense(10)(inputs)
+
+model = keras.Model(inputs, outputs)
+
+"""
+For serialization support in your custom layer, define a `get_config()`
+method that returns the constructor arguments of the layer instance:
+"""
+
+
+class CustomDense(layers.Layer):
+    def __init__(self, units=32):
+        super().__init__()
+        self.units = units
+
+    def build(self, input_shape):
+        self.w = self.add_weight(
+            shape=(input_shape[-1], self.units),
+            initializer="random_normal",
+            trainable=True,
+        )
+        self.b = self.add_weight(
+            shape=(self.units,), initializer="random_normal", trainable=True
+        )
+
+    def call(self, inputs):
+        return ops.matmul(inputs, self.w) + self.b
+
+    def get_config(self):
+        return {"units": self.units}
+
+
+inputs = keras.Input((4,))
+outputs = CustomDense(10)(inputs)
+
+model = keras.Model(inputs, outputs)
+config = model.get_config()
+
+new_model = keras.Model.from_config(
+    config, custom_objects={"CustomDense": CustomDense}
+)
+
+"""
+Optionally, implement the class method `from_config(cls, config)` which is used
+when recreating a layer instance given its config dictionary.
+The default implementation of `from_config` is:
+
+```python
+def from_config(cls, config):
+  return cls(**config)
+```
+"""
+
+"""
+## When to use the functional API
+
+Should you use the Keras functional API to create a new model,
+or just subclass the `Model` class directly? In general, the functional API
+is higher-level, easier and safer, and has a number of
+features that subclassed models do not support.
+
+However, model subclassing provides greater flexibility when building models
+that are not easily expressible as directed acyclic graphs of layers.
+For example, you could not implement a Tree-RNN with the functional API
+and would have to subclass `Model` directly.
+
+For an in-depth look at the differences between the functional API and
+model subclassing, read
+[What are Symbolic and Imperative APIs in TensorFlow 2.0?](https://blog.tensorflow.org/2019/01/what-are-symbolic-and-imperative-apis.html).
+
+### Functional API strengths:
+
+The following properties are also true for Sequential models
+(which are also data structures), but are not true for subclassed models
+(which are Python bytecode, not data structures).
+
+#### Less verbose
+
+There is no `super().__init__(...)`, no `def call(self, ...):`, etc.
+
+Compare:
+
+```python
+inputs = keras.Input(shape=(32,))
+x = layers.Dense(64, activation='relu')(inputs)
+outputs = layers.Dense(10)(x)
+mlp = keras.Model(inputs, outputs)
+```
+
+With the subclassed version:
+
+```python
+class MLP(keras.Model):
+
+  def __init__(self, **kwargs):
+    super().__init__(**kwargs)
+    self.dense_1 = layers.Dense(64, activation='relu')
+    self.dense_2 = layers.Dense(10)
+
+  def call(self, inputs):
+    x = self.dense_1(inputs)
+    return self.dense_2(x)
+
+# Instantiate the model.
+mlp = MLP()
+# Necessary to create the model's state.
+# The model doesn't have a state until it's called at least once.
+_ = mlp(ops.zeros((1, 32)))
+```
+
+#### Model validation while defining its connectivity graph
+
+In the functional API, the input specification (shape and dtype) is created
+in advance (using `Input`). Every time you call a layer,
+the layer checks that the specification passed to it matches its assumptions,
+and it will raise a helpful error message if not.
+
+This guarantees that any model you can build with the functional API will run.
+All debugging -- other than convergence-related debugging --
+happens statically during the model construction and not at execution time.
+This is similar to type checking in a compiler.
+
+#### A functional model is plottable and inspectable
+
+You can plot the model as a graph, and you can easily access intermediate nodes
+in this graph. For example, to extract and reuse the activations of intermediate
+layers (as seen in a previous example):
+
+```python
+features_list = [layer.output for layer in vgg19.layers]
+feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list)
+```
+
+#### A functional model can be serialized or cloned
+
+Because a functional model is a data structure rather than a piece of code,
+it is safely serializable and can be saved as a single file
+that allows you to recreate the exact same model
+without having access to any of the original code.
+See the [serialization & saving guide](/guides/serialization_and_saving/).
+
+To serialize a subclassed model, it is necessary for the implementer
+to specify a `get_config()`
+and `from_config()` method at the model level.
+
+
+### Functional API weakness:
+
+#### It does not support dynamic architectures
+
+The functional API treats models as DAGs of layers.
+This is true for most deep learning architectures, but not all -- for example,
+recursive networks or Tree RNNs do not follow this assumption and cannot
+be implemented in the functional API.
+"""
+
+"""
+## Mix-and-match API styles
+
+Choosing between the functional API or Model subclassing isn't a
+binary decision that restricts you into one category of models.
+All models in the `keras` API can interact with each other, whether they're
+`Sequential` models, functional models, or subclassed models that are written
+from scratch.
+
+You can always use a functional model or `Sequential` model
+as part of a subclassed model or layer:
+"""
+
+units = 32
+timesteps = 10
+input_dim = 5
+
+# Define a Functional model
+inputs = keras.Input((None, units))
+x = layers.GlobalAveragePooling1D()(inputs)
+outputs = layers.Dense(1)(x)
+model = keras.Model(inputs, outputs)
+
+
+class CustomRNN(layers.Layer):
+    def __init__(self):
+        super().__init__()
+        self.units = units
+        self.projection_1 = layers.Dense(units=units, activation="tanh")
+        self.projection_2 = layers.Dense(units=units, activation="tanh")
+        # Our previously-defined Functional model
+        self.classifier = model
+
+    def call(self, inputs):
+        outputs = []
+        state = ops.zeros(shape=(inputs.shape[0], self.units))
+        for t in range(inputs.shape[1]):
+            x = inputs[:, t, :]
+            h = self.projection_1(x)
+            y = h + self.projection_2(state)
+            state = y
+            outputs.append(y)
+        features = ops.stack(outputs, axis=1)
+        print(features.shape)
+        return self.classifier(features)
+
+
+rnn_model = CustomRNN()
+_ = rnn_model(ops.zeros((1, timesteps, input_dim)))
+
+"""
+You can use any subclassed layer or model in the functional API
+as long as it implements a `call` method that follows one of the following patterns:
+
+- `call(self, inputs, **kwargs)` --
+Where `inputs` is a tensor or a nested structure of tensors (e.g. a list of tensors),
+and where `**kwargs` are non-tensor arguments (non-inputs).
+- `call(self, inputs, training=None, **kwargs)` --
+Where `training` is a boolean indicating whether the layer should behave
+in training mode and inference mode.
+- `call(self, inputs, mask=None, **kwargs)` --
+Where `mask` is a boolean mask tensor (useful for RNNs, for instance).
+- `call(self, inputs, training=None, mask=None, **kwargs)` --
+Of course, you can have both masking and training-specific behavior at the same time.
+
+Additionally, if you implement the `get_config` method on your custom Layer or model,
+the functional models you create will still be serializable and cloneable.
+
+Here's a quick example of a custom RNN, written from scratch,
+being used in a functional model:
+"""
+
+units = 32
+timesteps = 10
+input_dim = 5
+batch_size = 16
+
+
+class CustomRNN(layers.Layer):
+    def __init__(self):
+        super().__init__()
+        self.units = units
+        self.projection_1 = layers.Dense(units=units, activation="tanh")
+        self.projection_2 = layers.Dense(units=units, activation="tanh")
+        self.classifier = layers.Dense(1)
+
+    def call(self, inputs):
+        outputs = []
+        state = ops.zeros(shape=(inputs.shape[0], self.units))
+        for t in range(inputs.shape[1]):
+            x = inputs[:, t, :]
+            h = self.projection_1(x)
+            y = h + self.projection_2(state)
+            state = y
+            outputs.append(y)
+        features = ops.stack(outputs, axis=1)
+        return self.classifier(features)
+
+
+# Note that you specify a static batch size for the inputs with the `batch_shape`
+# arg, because the inner computation of `CustomRNN` requires a static batch size
+# (when you create the `state` zeros tensor).
+inputs = keras.Input(batch_shape=(batch_size, timesteps, input_dim))
+x = layers.Conv1D(32, 3)(inputs)
+outputs = CustomRNN()(x)
+
+model = keras.Model(inputs, outputs)
+
+rnn_model = CustomRNN()
+_ = rnn_model(ops.zeros((1, 10, 5)))
diff --git a/guides/sequential_model.py b/guides/sequential_model.py
new file mode 100644
index 000000000..d1cb5c874
--- /dev/null
+++ b/guides/sequential_model.py
@@ -0,0 +1,370 @@
+"""
+Title: The Sequential model
+Author: [fchollet](https://twitter.com/fchollet)
+Date created: 2020/04/12
+Last modified: 2020/04/12
+Description: Complete guide to the Sequential model.
+Accelerator: GPU
+"""
+"""
+## Setup
+
+"""
+
+import keras_core as keras
+from keras_core import layers
+from keras_core import operations as ops
+
+"""
+## When to use a Sequential model
+
+A `Sequential` model is appropriate for **a plain stack of layers**
+where each layer has **exactly one input tensor and one output tensor**.
+
+Schematically, the following `Sequential` model:
+"""
+
+# Define Sequential model with 3 layers
+model = keras.Sequential(
+    [
+        layers.Dense(2, activation="relu", name="layer1"),
+        layers.Dense(3, activation="relu", name="layer2"),
+        layers.Dense(4, name="layer3"),
+    ]
+)
+# Call model on a test input
+x = ops.ones((3, 3))
+y = model(x)
+
+"""
+is equivalent to this function:
+"""
+
+# Create 3 layers
+layer1 = layers.Dense(2, activation="relu", name="layer1")
+layer2 = layers.Dense(3, activation="relu", name="layer2")
+layer3 = layers.Dense(4, name="layer3")
+
+# Call layers on a test input
+x = ops.ones((3, 3))
+y = layer3(layer2(layer1(x)))
+
+"""
+A Sequential model is **not appropriate** when:
+
+- Your model has multiple inputs or multiple outputs
+- Any of your layers has multiple inputs or multiple outputs
+- You need to do layer sharing
+- You want non-linear topology (e.g. a residual connection, a multi-branch
+model)
+"""
+
+"""
+## Creating a Sequential model
+
+You can create a Sequential model by passing a list of layers to the Sequential
+constructor:
+"""
+
+model = keras.Sequential(
+    [
+        layers.Dense(2, activation="relu"),
+        layers.Dense(3, activation="relu"),
+        layers.Dense(4),
+    ]
+)
+
+"""
+Its layers are accessible via the `layers` attribute:
+"""
+
+model.layers
+
+"""
+You can also create a Sequential model incrementally via the `add()` method:
+"""
+
+model = keras.Sequential()
+model.add(layers.Dense(2, activation="relu"))
+model.add(layers.Dense(3, activation="relu"))
+model.add(layers.Dense(4))
+
+"""
+Note that there's also a corresponding `pop()` method to remove layers:
+a Sequential model behaves very much like a list of layers.
+"""
+
+model.pop()
+print(len(model.layers))  # 2
+
+"""
+Also note that the Sequential constructor accepts a `name` argument, just like
+any layer or model in Keras. This is useful to annotate TensorBoard graphs
+with semantically meaningful names.
+"""
+
+model = keras.Sequential(name="my_sequential")
+model.add(layers.Dense(2, activation="relu", name="layer1"))
+model.add(layers.Dense(3, activation="relu", name="layer2"))
+model.add(layers.Dense(4, name="layer3"))
+
+"""
+## Specifying the input shape in advance
+
+Generally, all layers in Keras need to know the shape of their inputs
+in order to be able to create their weights. So when you create a layer like
+this, initially, it has no weights:
+"""
+
+layer = layers.Dense(3)
+layer.weights  # Empty
+
+"""
+It creates its weights the first time it is called on an input, since the shape
+of the weights depends on the shape of the inputs:
+"""
+
+# Call layer on a test input
+x = ops.ones((1, 4))
+y = layer(x)
+layer.weights  # Now it has weights, of shape (4, 3) and (3,)
+
+"""
+Naturally, this also applies to Sequential models. When you instantiate a
+Sequential model without an input shape, it isn't "built": it has no weights
+(and calling
+`model.weights` results in an error stating just this). The weights are created
+when the model first sees some input data:
+"""
+
+model = keras.Sequential(
+    [
+        layers.Dense(2, activation="relu"),
+        layers.Dense(3, activation="relu"),
+        layers.Dense(4),
+    ]
+)  # No weights at this stage!
+
+# At this point, you can't do this:
+# model.weights
+
+# You also can't do this:
+# model.summary()
+
+# Call the model on a test input
+x = ops.ones((1, 4))
+y = model(x)
+print("Number of weights after calling the model:", len(model.weights))  # 6
+
+"""
+Once a model is "built", you can call its `summary()` method to display its
+contents:
+"""
+
+model.summary()
+
+"""
+However, it can be very useful when building a Sequential model incrementally
+to be able to display the summary of the model so far, including the current
+output shape. In this case, you should start your model by passing an `Input`
+object to your model, so that it knows its input shape from the start:
+"""
+
+model = keras.Sequential()
+model.add(keras.Input(shape=(4,)))
+model.add(layers.Dense(2, activation="relu"))
+
+model.summary()
+
+"""
+Note that the `Input` object is not displayed as part of `model.layers`, since
+it isn't a layer:
+"""
+
+model.layers
+
+"""
+Models built with a predefined input shape like this always have weights (even
+before seeing any data) and always have a defined output shape.
+
+In general, it's a recommended best practice to always specify the input shape
+of a Sequential model in advance if you know what it is.
+"""
+
+"""
+## A common debugging workflow: `add()` + `summary()`
+
+When building a new Sequential architecture, it's useful to incrementally stack
+layers with `add()` and frequently print model summaries. For instance, this
+enables you to monitor how a stack of `Conv2D` and `MaxPooling2D` layers is
+downsampling image feature maps:
+"""
+
+model = keras.Sequential()
+model.add(keras.Input(shape=(250, 250, 3)))  # 250x250 RGB images
+model.add(layers.Conv2D(32, 5, strides=2, activation="relu"))
+model.add(layers.Conv2D(32, 3, activation="relu"))
+model.add(layers.MaxPooling2D(3))
+
+# Can you guess what the current output shape is at this point? Probably not.
+# Let's just print it:
+model.summary()
+
+# The answer was: (40, 40, 32), so we can keep downsampling...
+
+model.add(layers.Conv2D(32, 3, activation="relu"))
+model.add(layers.Conv2D(32, 3, activation="relu"))
+model.add(layers.MaxPooling2D(3))
+model.add(layers.Conv2D(32, 3, activation="relu"))
+model.add(layers.Conv2D(32, 3, activation="relu"))
+model.add(layers.MaxPooling2D(2))
+
+# And now?
+model.summary()
+
+# Now that we have 4x4 feature maps, time to apply global max pooling.
+model.add(layers.GlobalMaxPooling2D())
+
+# Finally, we add a classification layer.
+model.add(layers.Dense(10))
+
+"""
+Very practical, right?
+
+
+"""
+
+"""
+## What to do once you have a model
+
+Once your model architecture is ready, you will want to:
+
+- Train your model, evaluate it, and run inference. See our
+[guide to training & evaluation with the built-in loops](
+    /guides/training_with_built_in_methods/)
+- Save your model to disk and restore it. See our
+[guide to serialization & saving](/guides/serialization_and_saving/).
+- Speed up model training by leveraging multiple GPUs. See our
+[guide to multi-GPU and distributed training](https://keras.io/guides/distributed_training/).
+"""
+
+"""
+## Feature extraction with a Sequential model
+
+Once a Sequential model has been built, it behaves like a [Functional API
+model](/guides/functional_api/). This means that every layer has an `input`
+and `output` attribute. These attributes can be used to do neat things, like
+quickly
+creating a model that extracts the outputs of all intermediate layers in a
+Sequential model:
+"""
+
+initial_model = keras.Sequential(
+    [
+        keras.Input(shape=(250, 250, 3)),
+        layers.Conv2D(32, 5, strides=2, activation="relu"),
+        layers.Conv2D(32, 3, activation="relu"),
+        layers.Conv2D(32, 3, activation="relu"),
+    ]
+)
+feature_extractor = keras.Model(
+    inputs=initial_model.inputs,
+    outputs=[layer.output for layer in initial_model.layers],
+)
+
+# Call feature extractor on test input.
+x = ops.ones((1, 250, 250, 3))
+features = feature_extractor(x)
+
+"""
+Here's a similar example that only extract features from one layer:
+"""
+
+initial_model = keras.Sequential(
+    [
+        keras.Input(shape=(250, 250, 3)),
+        layers.Conv2D(32, 5, strides=2, activation="relu"),
+        layers.Conv2D(32, 3, activation="relu", name="my_intermediate_layer"),
+        layers.Conv2D(32, 3, activation="relu"),
+    ]
+)
+feature_extractor = keras.Model(
+    inputs=initial_model.inputs,
+    outputs=initial_model.get_layer(name="my_intermediate_layer").output,
+)
+# Call feature extractor on test input.
+x = ops.ones((1, 250, 250, 3))
+features = feature_extractor(x)
+
+"""
+## Transfer learning with a Sequential model
+
+Transfer learning consists of freezing the bottom layers in a model and only training
+the top layers. If you aren't familiar with it, make sure to read our [guide
+to transfer learning](/guides/transfer_learning/).
+
+Here are two common transfer learning blueprint involving Sequential models.
+
+First, let's say that you have a Sequential model, and you want to freeze all
+layers except the last one. In this case, you would simply iterate over
+`model.layers` and set `layer.trainable = False` on each layer, except the
+last one. Like this:
+
+```python
+model = keras.Sequential([
+    keras.Input(shape=(784)),
+    layers.Dense(32, activation='relu'),
+    layers.Dense(32, activation='relu'),
+    layers.Dense(32, activation='relu'),
+    layers.Dense(10),
+])
+
+# Presumably you would want to first load pre-trained weights.
+model.load_weights(...)
+
+# Freeze all layers except the last one.
+for layer in model.layers[:-1]:
+  layer.trainable = False
+
+# Recompile and train (this will only update the weights of the last layer).
+model.compile(...)
+model.fit(...)
+```
+
+Another common blueprint is to use a Sequential model to stack a pre-trained
+model and some freshly initialized classification layers. Like this:
+
+```python
+# Load a convolutional base with pre-trained weights
+base_model = keras.applications.Xception(
+    weights='imagenet',
+    include_top=False,
+    pooling='avg')
+
+# Freeze the base model
+base_model.trainable = False
+
+# Use a Sequential model to add a trainable classifier on top
+model = keras.Sequential([
+    base_model,
+    layers.Dense(1000),
+])
+
+# Compile & train
+model.compile(...)
+model.fit(...)
+```
+
+If you do transfer learning, you will probably find yourself frequently using
+these two patterns.
+"""
+
+"""
+That's about all you need to know about Sequential models!
+
+To find out more about building models in Keras, see:
+
+- [Guide to the Functional API](/guides/functional_api/)
+- [Guide to making new Layers & Models via subclassing](
+    /guides/making_new_layers_and_models_via_subclassing/)
+"""