TensorFlow peut lire des fichiers de données au format json et csv.
Que ce soit json ou csv le principe est le même.
import numpy as np import tensorflow as tf import matplotlib.pyplot as plt from tensorflow import keras
import pandas as pd
titanic_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv")
Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv
32768/30874 [===============================] - 0s 0us/step
Rappel sur tensor_slices
var_x = tf.Variable(tf.constant([1, 2, 3])) print(var_x)
<tf.Variable 'Variable:0' shape=(3,) dtype=int32, numpy=array([1, 2, 3], dtype=int32)>
print(var_x.shape)
(3,)
dataset = tf.data.Dataset.from_tensor_slices(var_x) list(dataset.as_numpy_iterator())
[1, 2, 3]
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) list(dataset.as_numpy_iterator())
[1, 2, 3]
dataset = tf.data.Dataset.from_tensor_slices([[1, 2], [3, 4]]) list(dataset.as_numpy_iterator())
[array([1, 2], dtype=int32), array([3, 4], dtype=int32)]
dataset = tf.data.Dataset.from_tensor_slices(([1, 2], [3, 4], [5, 6])) list(dataset.as_numpy_iterator())
[(1, 3, 5), (2, 4, 6)]
dataset = tf.data.Dataset.from_tensor_slices(([1, 2], [3, 4], [5, 6], [7, 8])) list(dataset.as_numpy_iterator())
[(1, 3, 5, 7), (2, 4, 6, 8)]
dataset = tf.data.Dataset.from_tensor_slices({"a": [1, 2], "b": [3, 4]}) list(dataset.as_numpy_iterator())
[{'a': 1, 'b': 3}, {'a': 2, 'b': 4}]
features = tf.Variable([[1, 2], [3, 4], [5, 6]]) # ==> 3x2 tensor dataset = tf.data.Dataset.from_tensor_slices((features)) list(dataset.as_numpy_iterator())
[array([1, 2], dtype=int32),
array([3, 4], dtype=int32),
array([5, 6], dtype=int32)]
features = tf.constant([[1, 2], [3, 4], [5, 6]]) # ==> 3x2 tensor labels = tf.constant(['A', 'B', 'A']) # ==> 3x1 tensor dataset = tf.data.Dataset.from_tensor_slices((features, labels)) list(dataset.as_numpy_iterator())
[(array([1, 2], dtype=int32), b'A'),
(array([3, 4], dtype=int32), b'B'),
(array([5, 6], dtype=int32), b'A')]
features_dataset = tf.data.Dataset.from_tensor_slices(features) labels_dataset = tf.data.Dataset.from_tensor_slices(labels) dataset = tf.data.Dataset.zip((features_dataset, labels_dataset)) list(dataset.as_numpy_iterator())
[(array([1, 2], dtype=int32), b'A'),
(array([3, 4], dtype=int32), b'B'),
(array([5, 6], dtype=int32), b'A')]
list(features_dataset.as_numpy_iterator())
[array([1, 2], dtype=int32),
array([3, 4], dtype=int32),
array([5, 6], dtype=int32)]
Le rappel étant terminé.
df = pd.read_csv(titanic_file, index_col=None) df.head()
survived sex age n_siblings_spouses parch fare class deck embark_town alone
0 0 male 22.0 1 0 7.2500 Third unknown Southampton n
1 1 female 38.0 1 0 71.2833 First C Cherbourg n
2 1 female 26.0 0 0 7.9250 Third unknown Southampton y
3 1 female 35.0 1 0 53.1000 First C Southampton n
4 0 male 28.0 0 0 8.4583 Third unknown Queenstown y
titanic_slices = tf.data.Dataset.from_tensor_slices(dict(df)) for feature_batch in titanic_slices.take(1): for key, value in feature_batch.items(): print(" {!r:20s}: {}".format(key, value))
'survived' : 0
'sex' : b'male'
'age' : 22.0
'n_siblings_spouses': 1
'parch' : 0
'fare' : 7.25
'class' : b'Third'
'deck' : b'unknown'
'embark_town' : b'Southampton'
'alone' : b'n'
titanic_batches = tf.data.experimental.make_csv_dataset( titanic_file, batch_size=4, label_name="survived")
titanic_batches.take(1)
<TakeDataset shapes: (OrderedDict([(sex, (4,)), (age, (4,)), (n_siblings_spouses, (4,)), (parch, (4,)), (fare, (4,)), (class, (4,)), (deck, (4,)), (embark_town, (4,)), (alone, (4,))]), (4,)), types: (OrderedDict([(sex, tf.string), (age, tf.float32), (n_siblings_spouses, tf.int32), (parch, tf.int32), (fare, tf.float32), (class, tf.string), (deck, tf.string), (embark_town, tf.string), (alone, tf.string)]), tf.int32)>
for feature_batch, label_batch in titanic_batches.take(1): print("'survived': {}".format(label_batch)) print("features:") for key, value in feature_batch.items(): print(" {!r:20s}: {}".format(key, value))
'survived': [1 0 1 1]
features:
'sex' : [b'male' b'male' b'female' b'male']
'age' : [28. 28. 52. 44.]
'n_siblings_spouses': [0 0 1 0]
'parch' : [0 0 1 0]
'fare' : [26.55 26.55 93.5 7.925]
'class' : [b'First' b'First' b'First' b'Third']
'deck' : [b'C' b'C' b'B' b'unknown']
'embark_town' : [b'Southampton' b'Southampton' b'Southampton' b'Southampton']
'alone' : [b'y' b'y' b'n' b'y']