keras/keras_core/datasets/mnist.py

72 lines
2.4 KiB
Python

"""MNIST handwritten digits dataset."""
import numpy as np
from keras_core.api_export import keras_core_export
from keras_core.utils.file_utils import get_file
@keras_core_export("keras_core.datasets.mnist.load_data")
def load_data(path="mnist.npz"):
"""Loads the MNIST dataset.
This is a dataset of 60,000 28x28 grayscale images of the 10 digits,
along with a test set of 10,000 images.
More info can be found at the
[MNIST homepage](http://yann.lecun.com/exdb/mnist/).
Args:
path: path where to cache the dataset locally
(relative to `~/.keras/datasets`).
Returns:
Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`.
**`x_train`**: `uint8` NumPy array of grayscale image data with shapes
`(60000, 28, 28)`, containing the training data. Pixel values range
from 0 to 255.
**`y_train`**: `uint8` NumPy array of digit labels (integers in range 0-9)
with shape `(60000,)` for the training data.
**`x_test`**: `uint8` NumPy array of grayscale image data with shapes
`(10000, 28, 28)`, containing the test data. Pixel values range
from 0 to 255.
**`y_test`**: `uint8` NumPy array of digit labels (integers in range 0-9)
with shape `(10000,)` for the test data.
Example:
```python
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
assert x_train.shape == (60000, 28, 28)
assert x_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)
```
License:
Yann LeCun and Corinna Cortes hold the copyright of MNIST dataset,
which is a derivative work from original NIST datasets.
MNIST dataset is made available under the terms of the
[Creative Commons Attribution-Share Alike 3.0 license.](
https://creativecommons.org/licenses/by-sa/3.0/)
"""
origin_folder = (
"https://storage.googleapis.com/tensorflow/tf-keras-datasets/"
)
path = get_file(
fname=path,
origin=origin_folder + "mnist.npz",
file_hash=( # noqa: E501
"731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1"
),
)
with np.load(path, allow_pickle=True) as f:
x_train, y_train = f["x_train"], f["y_train"]
x_test, y_test = f["x_test"], f["y_test"]
return (x_train, y_train), (x_test, y_test)