Creative Commons License

Objectifs

Evaluation

Chaque exercice a un niveau de difficulté. Les exercices faciles et de difficulté moyenne vous aident pour comprendre les fondamentaux. Il est recommandé de finir ces exercices avant de commencer les exercices difficiles. Le niveau de difficulté de l'exercice:

  1. : Facile
  2. ★★: Moyen
  3. ★★★: Difficile

Exercice 1

1. Données capteurs

Copier ce fichier valeurs.json dans votre repertoire afin de faire l'analyse.

from pandas.io.json import json_normalize
import pandas as pd
import json

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
print(dataframe)

Affichez le type de données.

from pandas.io.json import json_normalize
import pandas as pd
import json

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
print(dataframe.dtypes)

2. Changement de type de données

from pandas.io.json import json_normalize
import pandas as pd
import json
import datetime

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
dataframe = dataframe.astype(dtype= {"humidity" : "float32", "lum" : "int32",
  "pression" : "int32", "temp" : "float32",
  "UV" : "int32", "date" : datetime.datetime})
print(dataframe.dtypes)

3. Visualisation de données

from pandas.io.json import json_normalize
import pandas as pd
import json
import matplotlib.pyplot as plot
import datetime

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
dataframe = dataframe.astype(dtype= {"humidity" : "float32", "lum" : "int32",
  "pression" : "int32", "temp" : "float32",
  "UV" : "int32", "date" : datetime.datetime})
plot.plot(dataframe["date"], dataframe["humidity"])
plot.show()

Modifiez le code au-dessus et comparez la différence en performance.

from pandas.io.json import json_normalize
import pandas as pd
import json
import matplotlib.pyplot as plot
import datetime

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
dataframe = dataframe.astype(dtype= {"humidity" : "float32", "lum" : "int32",
  "pression" : "int32", "temp" : "float32", "UV" : "int32",
  "date" : datetime.datetime})
plot.figure()
dataframe.plot(x='date', y= 'humidity')
plot.show()

Question 1: En continuant avec le fichier valeurs.json, codez en Python en utilisant pandas afin d'avoir les résultats suivants:

  • Tracer un graphique montrant la luminosité en fonction de temps
  • Tracer un graphique montrant la pression en fonction de temps
  • Tracer un graphique montrant la UV en fonction de temps
  • Tracer un seul graphique montrant les valeurs UV, luminosité, humidité, pression et température contre en fonction de temps

Exercice 2 ★★

1. Graphique nuage de points

from pandas.io.json import json_normalize
import pandas as pd
import json
import matplotlib.pyplot as plot
import datetime

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
dataframe = dataframe.astype(dtype= {"humidity" : "float32", "lum" : "int32",
  "pression" : "int32", "temp" : "float32", "UV" : "int32",
  "date" : datetime.datetime})
plot.figure()
dataframe.plot(x='lum', y= 'UV', kind="scatter")
plot.show()

Une autre essaie

from pandas.io.json import json_normalize
import pandas as pd
import json
import matplotlib.pyplot as plot
import datetime

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
dataframe = dataframe.astype(dtype= {"humidity" : "float32", "lum" : "int32",
  "pression" : "int32", "temp" : "float32", "UV" : "int32",
  "date" : datetime.datetime})
plot.figure()
dataframe.plot(x='temp', y= 'humidity', kind="scatter")
plot.show()

Quelles sont vos observations?

2. Partitionnement de données

from pandas.io.json import json_normalize
import pandas as pd
import json
import matplotlib.pyplot as plot
import datetime
from sklearn.cluster import KMeans

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
dataframe = dataframe.astype(dtype= {"humidity" : "float32", "lum" : "int32",
  "pression" : "int32", "temp" : "float32", "UV" : "int32",
  "date" : datetime.datetime})

clusters = KMeans(n_clusters = 2)
clusters.fit(dataframe.loc[:, ["temp","humidity"]])
dataframe.plot(x='temp', y= 'humidity', kind="scatter")
centroids = clusters.cluster_centers_
plot.scatter(centroids[:, 0], centroids[:, 1], marker='x', color='r')
plot.show()

Précisez chaque partition en utilisant différentes couleurs.

from pandas.io.json import json_normalize
import pandas as pd
import json
import matplotlib.pyplot as plot
import datetime
from sklearn.cluster import KMeans

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
dataframe = dataframe.astype(dtype= {"humidity" : "float32", "lum" : "int32",
  "pression" : "int32", "temp" : "float32", "UV" : "int32",
  "date" : datetime.datetime})

clusters = KMeans(n_clusters = 2)
clusters.fit(dataframe.loc[:, ["temp","humidity"]])
centroids = clusters.cluster_centers_
color_list = ['b', 'g', 'y']
colors = [color_list[i] for i in clusters.labels_]

plot.scatter(dataframe["temp"], dataframe["humidity"], c=colors)
plot.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='r')
plot.show()

from pandas.io.json import json_normalize
import pandas as pd
import json
import matplotlib.pyplot as plot
import datetime
from sklearn.cluster import KMeans

data = json.load(open('valeurs.json'))
dataframe = json_normalize(data["values"])
dataframe = dataframe.astype(dtype= {"humidity" : "float32", "lum" : "int32",
  "pression" : "int32", "temp" : "float32", "UV" : "int32",
  "date" : datetime.datetime})

clusters = KMeans(n_clusters = 2)
clusters.fit(dataframe.loc[:, ["temp","humidity"]])
centroids = clusters.cluster_centers_
color_list = ['b', 'g', 'y']
colors = [color_list[i] for i in clusters.labels_]

plot.scatter(dataframe["temp"], dataframe["humidity"], c=colors)
plot.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='r')
plot.show()

Question 2: Modifiez le code au-dessus afin d'avoir 3 partitions. Et puis 4 partitions.

Exercice 3 ★★

1. Trouver des couleurs dominantes dans une image

Copiez le fichier flower.jpg dans votre repertoire afin de trouver des couleurs dominantes.

from PIL import Image
import numpy
import math
import matplotlib.pyplot as plot
from sklearn.cluster import KMeans

imgfile = Image.open("flower.jpg")
numarray = numpy.array(imgfile.getdata(), numpy.uint8)

X = []
Y = []

fig, axes = plot.subplots(nrows=5, ncols=2, figsize=(20,25))

xaxis = 0
yaxis = 0
for x in range(2, 12):
    cluster_count = x
    
    clusters = KMeans(n_clusters = cluster_count)
    clusters.fit(numarray)
    
    npbins = numpy.arange(0, cluster_count + 1)
    histogram = numpy.histogram(clusters.labels_, bins=npbins)
    labels = numpy.unique(clusters.labels_)

    barlist = axes[xaxis, yaxis].bar(labels, histogram[0])
    if(yaxis == 0):
       yaxis = 1
    else:
       xaxis = xaxis + 1
       yaxis = 0
    for i in range(cluster_count):
       barlist[i].set_color('#%02x%02x%02x' % (math.ceil(clusters.cluster_centers_[i][0]),
       math.ceil(clusters.cluster_centers_[i][1]), math.ceil(clusters.cluster_centers_[i][2])))


plot.show()

2. Amélioration de performance.

from PIL import Image
import numpy
import math
import matplotlib.pyplot as plot
from sklearn.cluster import MiniBatchKMeans

imgfile = Image.open("flower.jpg")
numarray = numpy.array(imgfile.getdata(), numpy.uint8)

X = []
Y = []

fig, axes = plot.subplots(nrows=5, ncols=2, figsize=(20,25))

xaxis = 0
yaxis = 0
for x in range(2, 12):
    cluster_count = x
    
    clusters = MiniBatchKMeans(n_clusters = cluster_count)
    clusters.fit(numarray)
    
    npbins = numpy.arange(0, cluster_count + 1)
    histogram = numpy.histogram(clusters.labels_, bins=npbins)
    labels = numpy.unique(clusters.labels_)

    barlist = axes[xaxis, yaxis].bar(labels, histogram[0])
    if(yaxis == 0):
       yaxis = 1
    else:
       xaxis = xaxis + 1
       yaxis = 0
    for i in range(cluster_count):
       barlist[i].set_color('#%02x%02x%02x' % (math.ceil(clusters.cluster_centers_[i][0]),
       math.ceil(clusters.cluster_centers_[i][1]), math.ceil(clusters.cluster_centers_[i][2])))


plot.show()

Question 3: Modifiez le code au-dessus afin de trouver 15 couleurs dominantes en utilisant KMeans et MiniBatchKMeans. Comparez la performance.

Exercice 4 ★★★

1. Prévision de population

Copier ce fichier population.csv dans votre repertoire afin de faire l'analyse.

import numpy as np
import matplotlib.pyplot as plot
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
dataset = np.loadtxt("population.csv", dtype={'names': ('year', 'population'), 'formats': ('i4', 'i')},
skiprows=1, delimiter=",", encoding="UTF-8")

dataframe = pd.DataFrame(dataset)
plot.scatter(x=dataframe['year'], y=dataframe['population'])

On va utiliser LinearRegression.

import numpy as np
import matplotlib.pyplot as plot
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
dataset = np.loadtxt("population.csv", dtype={'names': ('year', 'population'), 'formats': ('i4', 'i')},
skiprows=1, delimiter=",", encoding="UTF-8")

dataframe = pd.DataFrame(dataset)
#training data

x_train = df['year'][:40].values.reshape(-1, 1)
y_train = df['population'][:40].values.reshape(-1, 1)

pf = PolynomialFeatures(degree=1)
x_poly = pf.fit_transform(x_train)

#training
lr = LinearRegression()
lr.fit(x_poly, y_train)

#printing coefficients
print(lr.intercept_, lr.coef_)

#prediction
x_predict = x_train = df['year'][41:].values.reshape(-1, 1)
y_actual = df['population'][41:].values.reshape(-1, 1)
y_predict = lr.predict(pf.fit_transform(x_predict))

plot.scatter(x_predict, y_actual)
plot.plot(x_predict, y_predict, color='red', linewidth=2)
plot.show()

Modifiez le code au-dessus en utilisant des PolynomialFeatures de différents degrés (2, 3, 4). Tracez les différents graphiques.

Exercice 5 ★★★

1. Reconnaissance de l'écriture manuscrite

from sklearn import datasets, metrics
from sklearn.linear_model import Perceptron
import numpy as np
import matplotlib.pyplot as plot

digits = datasets.load_digits()
plot.imshow(digits.images[0], cmap=plot.cm.gray_r)
plot.show()

from sklearn import datasets, metrics
from sklearn.linear_model import Perceptron
import numpy as np
import matplotlib.pyplot as plot

digits = datasets.load_digits()
print(digits.target[0])

Affichage d'images:

from sklearn import datasets, metrics
from sklearn.linear_model import Perceptron
import numpy as np
import matplotlib.pyplot as plot

digits = datasets.load_digits()
for i in range(10):
  plot.imshow(digits.images[i], cmap=plot.cm.gray_r)
  plot.show()

Apprentissage en utilisant Perceptron

from sklearn import datasets, metrics
from sklearn.linear_model import Perceptron
import numpy as np
import matplotlib.pyplot as plot

digits = datasets.load_digits()

training_images = digits.images[:int(digits.images.shape[0]/2)]
training_images = training_images.reshape((training_images.shape[0], -1))

training_target = digits.target[0:int(digits.target.shape[0]/2)]

classifier = Perceptron(max_iter=1000)
#training
classifier.fit(training_images, training_target)

#prediction
predict_images = digits.images[int(digits.images.shape[0]/2)+1:]
actual_labels = digits.target[int(digits.target.shape[0]/2)+1:]
predicted_labels = classifier.predict(predict_images.reshape((predict_images.shape[0], -1)))
#prediction
predict_image = digits.images[int(digits.images.shape[0]/2)+2]
print("Predicted value: ", classifier.predict(predict_image.reshape(1,-1)))

plot.imshow(predict_image, cmap=plot.cm.gray_r)
plot.show()

from sklearn import datasets, metrics
from sklearn.linear_model import Perceptron
import numpy as np
import matplotlib.pyplot as plot

digits = datasets.load_digits()

training_images = digits.images[:int(digits.images.shape[0]/2)]
training_images = training_images.reshape((training_images.shape[0], -1))

training_target = digits.target[0:int(digits.target.shape[0]/2)]

classifier = Perceptron(max_iter=1000)
#training
classifier.fit(training_images, training_target)

#prediction
predict_images = digits.images[int(digits.images.shape[0]/2)+1:]
actual_labels = digits.target[int(digits.target.shape[0]/2)+1:]
predicted_labels = classifier.predict(predict_images.reshape((predict_images.shape[0], -1)))

#classification report
print(metrics.classification_report(actual_labels,predicted_labels))

Question 4: En continuant avec le fichier population.csv, codez en Python en utilisant Perceptron afin d'avoir les résultats suivants:

  • Prevoir la population entre 2020 et 2050. Quelles sont vos observations? Quelle est votre solution?