TensorFlow peut lire des fichiers de données au format json et csv.
Que ce soit json ou csv le principe est le même.
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
import pandas as pd
titanic_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv")
Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv
32768/30874 [===============================] - 0s 0us/step
Rappel sur tensor_slices
var_x = tf.Variable(tf.constant([1, 2, 3]))
print(var_x)
<tf.Variable 'Variable:0' shape=(3,) dtype=int32, numpy=array([1, 2, 3], dtype=int32)>
print(var_x.shape)
(3,)
dataset = tf.data.Dataset.from_tensor_slices(var_x)
list(dataset.as_numpy_iterator())
[1, 2, 3]
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
list(dataset.as_numpy_iterator())
[1, 2, 3]
dataset = tf.data.Dataset.from_tensor_slices([[1, 2], [3, 4]])
list(dataset.as_numpy_iterator())
[array([1, 2], dtype=int32), array([3, 4], dtype=int32)]
dataset = tf.data.Dataset.from_tensor_slices(([1, 2], [3, 4], [5, 6]))
list(dataset.as_numpy_iterator())
[(1, 3, 5), (2, 4, 6)]
dataset = tf.data.Dataset.from_tensor_slices(([1, 2], [3, 4], [5, 6], [7, 8]))
list(dataset.as_numpy_iterator())
[(1, 3, 5, 7), (2, 4, 6, 8)]
dataset = tf.data.Dataset.from_tensor_slices({"a": [1, 2], "b": [3, 4]})
list(dataset.as_numpy_iterator())
[{'a': 1, 'b': 3}, {'a': 2, 'b': 4}]
features = tf.Variable([[1, 2], [3, 4], [5, 6]]) # ==> 3x2 tensor
dataset = tf.data.Dataset.from_tensor_slices((features))
list(dataset.as_numpy_iterator())
[array([1, 2], dtype=int32),
array([3, 4], dtype=int32),
array([5, 6], dtype=int32)]
features = tf.constant([[1, 2], [3, 4], [5, 6]]) # ==> 3x2 tensor
labels = tf.constant(['A', 'B', 'A']) # ==> 3x1 tensor
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
list(dataset.as_numpy_iterator())
[(array([1, 2], dtype=int32), b'A'),
(array([3, 4], dtype=int32), b'B'),
(array([5, 6], dtype=int32), b'A')]
features_dataset = tf.data.Dataset.from_tensor_slices(features)
labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
dataset = tf.data.Dataset.zip((features_dataset, labels_dataset))
list(dataset.as_numpy_iterator())
[(array([1, 2], dtype=int32), b'A'),
(array([3, 4], dtype=int32), b'B'),
(array([5, 6], dtype=int32), b'A')]
list(features_dataset.as_numpy_iterator())
[array([1, 2], dtype=int32),
array([3, 4], dtype=int32),
array([5, 6], dtype=int32)]
Le rappel étant terminé.
df = pd.read_csv(titanic_file, index_col=None)
df.head()
survived sex age n_siblings_spouses parch fare class deck embark_town alone
0 0 male 22.0 1 0 7.2500 Third unknown Southampton n
1 1 female 38.0 1 0 71.2833 First C Cherbourg n
2 1 female 26.0 0 0 7.9250 Third unknown Southampton y
3 1 female 35.0 1 0 53.1000 First C Southampton n
4 0 male 28.0 0 0 8.4583 Third unknown Queenstown y
titanic_slices = tf.data.Dataset.from_tensor_slices(dict(df))
for feature_batch in titanic_slices.take(1):
for key, value in feature_batch.items():
print(" {!r:20s}: {}".format(key, value))
'survived' : 0
'sex' : b'male'
'age' : 22.0
'n_siblings_spouses': 1
'parch' : 0
'fare' : 7.25
'class' : b'Third'
'deck' : b'unknown'
'embark_town' : b'Southampton'
'alone' : b'n'
titanic_batches = tf.data.experimental.make_csv_dataset(
titanic_file, batch_size=4,
label_name="survived")
titanic_batches.take(1)
<TakeDataset shapes: (OrderedDict([(sex, (4,)), (age, (4,)), (n_siblings_spouses, (4,)), (parch, (4,)), (fare, (4,)), (class, (4,)), (deck, (4,)), (embark_town, (4,)), (alone, (4,))]), (4,)), types: (OrderedDict([(sex, tf.string), (age, tf.float32), (n_siblings_spouses, tf.int32), (parch, tf.int32), (fare, tf.float32), (class, tf.string), (deck, tf.string), (embark_town, tf.string), (alone, tf.string)]), tf.int32)>
for feature_batch, label_batch in titanic_batches.take(1):
print("'survived': {}".format(label_batch))
print("features:")
for key, value in feature_batch.items():
print(" {!r:20s}: {}".format(key, value))
'survived': [1 0 1 1]
features:
'sex' : [b'male' b'male' b'female' b'male']
'age' : [28. 28. 52. 44.]
'n_siblings_spouses': [0 0 1 0]
'parch' : [0 0 1 0]
'fare' : [26.55 26.55 93.5 7.925]
'class' : [b'First' b'First' b'First' b'Third']
'deck' : [b'C' b'C' b'B' b'unknown']
'embark_town' : [b'Southampton' b'Southampton' b'Southampton' b'Southampton']
'alone' : [b'y' b'y' b'n' b'y']