ENT306

Project

Logistic regression

#Install package
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install scikit-learn
import numpy as np
import matplotlib.pyplot as plt
from time import time
import random
import sklearn

1) Device Type:

Choose a device type (Lights, Camera, Smart Speaker, Thermostat,…) and plot the data

#import kagglehub #if from kagglehub
#path = kagglehub.dataset_download("rabieelkharoua/predict-smart-home-device-efficiency-dataset")

from csv import *
filename = f"smart_home_device_usage_data.csv"

#open file
with open(filename, mode='r') as file:
    reader = reader(file)
    data = list(reader)  # Convert in list to make all lines accessible 
n=len(data)-1 #number of lines

DeviceType='...'
count = sum(1 for row in data if len(row) > 1 and row[1] == DeviceType)
print(f"Nombre d'occurrences de l'objet choisi dans la colonne 2 : {count}")

xi_list=np.zeros((count,3)) # three parameters 
yi_list=np.zeros(count) #labels

i=0
for row in data[1:]:
    
    if row[1]==DeviceType:
        yi_list[i]=row[7] # label: 0 - Inefficient, 1 - Efficient
        xi_list[i,0]=row[2] #usagehoursperday
        xi_list[i,1]=row[3] #energyconsumption
        xi_list[i,2]=row[6] #DeviceAgeMonths
        i+=1

assert(i==count)
n=i #data number=count

# normalize data (features between [0,1]):
min_values = np.array([0, 0, 0])  # Features minimum
max_values = np.array([25,10, 60])  # Features maximum
x_normalized = (xi_list - min_values) / (max_values - min_values)
xi_list=x_normalized
#print(np.shape(x_normalized))
Nombre d'occurrences de l'objet choisi dans la colonne 2 : 1039

We are given a family of $n$ points $x_i=(x^0_i,x^1_i,x^2_i)$ in $\mathbb{R}^3$, along with associated $\textit{labels}$ $(y_i)_{1 \leq i \leq n}$ in ${0,1}$.

In previous work, we aimed to establish a relationship between $x$ and $y$ in the form:

  • if $\sigma(\langle w, x\rangle) \geq 0.5$, then $y=1$,
  • else $y=0$,

where $w = (w^0, w^1)\in \mathbb{R}^2$ was the unknown of the problem and $\sigma$ was the sigmoid function $z \mapsto \tfrac{1}{1+e^{-z}}$. Then, for a new unlabeled data $x$, we were computing $\sigma(\langle w,x\rangle) \in [0,1]$, allowing us to classify it as either $y=0$ or $y=1$ while incorporating a measure of uncertainty in the prediction.

To simplify the model, we assumed that the data were centered at $0$ and set the bias term $b=0$: we only had to minimize the log-loss function with respect to the weights.

In general, a bias term $b$ is required, meaning that we introduce an additional parameter $b \in \mathbb{R}$ and seek a relationship of the form:

  • if $\sigma(\langle w, x\rangle + b) \geq 0.5$, then $y=1$,
  • else $y=0$,

since there is no reason to assume that the separating hyperplane should pass through the origin.

However, we can eliminate the bias term by increasing the dimensionality from $d$ to $d + 1$. In our case, this means increasing the dimensions of both $x$ and $w$ from $3$ to $4$: we redefine $$x=(x^0,x^1,x^2,1) \textrm{ and } w=(w^0,w^1,w^2,b).$$ This transformation allows us to maintain the same matrix notation as before in the optimization step.

The log-loss function without a penalization term is thus given by: \begin{equation*} f(w)=-\frac1n \sum_{i=1}^n (y_i log (\sigma(\langle w,x_i \rangle))+ (1-y_i)log (1-\sigma(\langle w,x_i \rangle)). \end{equation*}

Data plot

# data plot
from mpl_toolkits.mplot3d import Axes3D
xi_list_bias=np.zeros((n,4)) #increasing dimension from 3 to 4

for i in range(n): #redefining data x in x=(x0,x1,x2,1)
    xi_list_bias[i, 0:3]= ...
    xi_list_bias[i, 3]= ...

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

for i in range(n//2): 
    if yi_list[i] == 1: # if x_i is of class 1 the point is red : 1 - Efficient
        ax.scatter(xi_list[i, 0],  xi_list[i, 1],  xi_list[i, 2],  color="red", marker=".") 
       
    else: # else green #0 - Inefficient
        ax.scatter(xi_list[i, 0], xi_list[i, 1],  xi_list[i, 2],  color="green", marker=".")   

elevation_angle = 60
azimuthal_angle = 15
ax.view_init(elevation_angle, azimuthal_angle)


ax.set_xlabel("X axis, usage hours/day")
ax.set_ylabel("Y axis, device age")
ax.set_zlabel("Z axis,  energy consumption")


plt.title(u"Data representation.\n" + r"$x_i$ is a green point if $y_i = 0$, red point if $y_i = 1$."+u"\n"+ r"0 - Inefficient, 1 - Efficient")
#plt.axis("tight")
plt.show()

2. Log-loss function

Define a log-loss function with penalty term L2 or with a barrier function.

...

3. Gradient-descent algorithm:

Write a gradient-descent algorithm with the Armijo or the Wolfe-Armijo rule.

# gradient

...
w=np.random.rand(4) 
epsilon = 1e-8
print("This quantity must be of order 1e-8 : ", np.linalg.norm(gradf(w) - np.array([(f(w + epsilon*np.eye(4)[0]) - f(w))/(epsilon), (f(w + epsilon*np.eye(4)[1]) - f(w))/(epsilon),(f(w + epsilon*np.eye(4)[2]) - f(w))/(epsilon),(f(w + epsilon*np.eye(4)[3]) - f(w))/(epsilon)])))
This quantity must be of order 1e-8 :  3.603214732619758e-08
# Gradient descent

... 
    

Best weights/bias

%%time
x0 = np.array([1.,1.,1.,1.])
...
xlist_approx = ...
print(len(xlist_approx))
print(xlist_approx[-1])

Visualization: separation line

## P(y=1|x)
def opt_f(wstar,z):
    return ...
## Display separation hyperplan from wstar, bstar (optimal weights/bias)
dom_points =[0,1,0,1,0,1]
grid_size = 50

# 3D Grid
x = np.linspace(dom_points[0], dom_points[1], grid_size)
y = np.linspace(dom_points[2], dom_points[3], grid_size)
z = np.linspace(dom_points[4], dom_points[5], grid_size)
X, Y, Z = np.meshgrid(x, y, z)

## plot the separation hyperplan 
X2, Y2 = np.meshgrid(x, y)
if xlist_approx[-1][2] != 0:
    Z2 = (-xlist_approx[-1][0] * X2 - xlist_approx[-1][1] * Y2 - xlist_approx[-1][3]) / xlist_approx[-1][2]
    mask = (Z2 >= 0) & (Z2 <= 1)
    Z2[~mask] = np.nan  

# Compute P(y=1|x)
P = np.zeros((grid_size, grid_size, grid_size))
for i in range(grid_size):
    for j in range(grid_size):
        for k in range(grid_size):
            P[i, j, k] = ...
            
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')

ax.plot_surface(X2, Y2, Z2, color='cyan') #separation hyperplan

scatter = ax.scatter(X.flatten(), Y.flatten(), Z.flatten(), c=P.flatten(), cmap='viridis', s=0.2,alpha=0.3) #change opacity with alpha
cbar = plt.colorbar(scatter, ax=ax, shrink=0.5)
cbar.set_label("Valeur de W")

for i in range(n):
    if yi_list[i] == 1:
        ax.scatter(xi_list[i, 0], xi_list[i, 1], xi_list[i, 2], color="red", label="y=1" if i == 0 else "")
    else:
        ax.scatter(xi_list[i, 0], xi_list[i, 1], xi_list[i, 2], color="green", label="y=0" if i == 0 else "")

elevation_angle = 70 #90
azimuthal_angle = 0#35
ax.view_init(elevation_angle, azimuthal_angle)

ax.set_xlabel("X axis, usage hours/day")
ax.set_ylabel("Y axis, device Age Months")
ax.set_zlabel("Z axis,  energy consumption")

ax.set_title(r"Data separation, green points y=0, red points y=1")
ax.set_aspect("equal", adjustable = "box") # pour que les axes aient la même échelle
plt.show()

4. Multi-layer perceptron

Compare the results you obtained with a multi-layer perceptron (no hidden layer, one hidden layer and five hidden layers resp.).

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import log_loss
############
## No hidden layer
mlp_no_hidden = ... #for comparison, use activation="logistic"
...
log_loss_no_hidden = ...
print(f"Log-loss (no hidden layer) : {log_loss_no_hidden:.4f}")

weights_no_hidden = ...
bias_no_hidden = ...
print(weights_no_hidden.transpose()," ", bias_no_hidden)

## Compare to our model:
print(f"Log-loss (our model) : {...:.3f}")
print(xlist_approx[-1])

############
## One hidden layer, 3 neurons, activation=logistic 

mlp_one_hidden = ...
...
log_loss_one_hidden = ...
print(f"Log-loss (one layer) : {log_loss_one_hidden:.4f}")
weights_one_hidden = ...
bias_one_hidden = ...

############
## Five hidden layers, 9 neurons, activation=relu

mlp_five_hidden = ...
...
log_loss_five_hidden = ...
print(f"Log-loss (5 hidden layers) : {log_loss_five_hidden:.4f}")

6. Visualization

With one layer and three neurons, compute P (y = 1|x) in a function opt_f and visualize the results. You should recover the results from mlp_one_hidden.predict_proba.

## P(y=1|x)
def opt_f(wstar,bstar,z):
    W1 = wstar[:9].reshape(3, 3).transpose()  # Hidden layer weights (3 neurons, 3 inputs)
    W2 = wstar[9:].reshape(1, 3)  # Outputs weights (1 neuron, 3 hidden inputs)
    B1 = np.array([bstar[0],bstar[1],bstar[2]]).reshape(3,1)
    B2 = bstar[3]
    z = np.array(z).reshape(3, 1)  

    H = ... #hidden layer
    P = ... 
    
    return P #carreful: must return a float 
    
## Visualization with opt_f

dom_points =[0,1,0,1,0,1]
#dom_points = [0,np.max(xi_list[:, 0]), 0,np.max(xi_list[:, 1]),0,np.max(xi_list[:, 2])]#[-11, 11, -11, 11,]
grid_size = 50

# Génération de la grille 3D
x = np.linspace(dom_points[0], dom_points[1], grid_size)
y = np.linspace(dom_points[2], dom_points[3], grid_size)
z = np.linspace(dom_points[4], dom_points[5], grid_size)
X, Y, Z = np.meshgrid(x, y, z)


P = np.zeros((grid_size, grid_size,grid_size))
w_opt=np.concatenate((weights_one_hidden[0].flatten(),weights_one_hidden[1].flatten()))
print(w_opt)
b_opt=np.concatenate((bias_one_hidden[0].flatten(),bias_one_hidden[1].flatten()))
print(b_opt)
for i in range(grid_size):
    for j in range(grid_size):
        for k in range(grid_size):
            P[i, j, k] =opt_f(...)
            
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')

scatter = ax.scatter(X.flatten(), Y.flatten(), Z.flatten(), c=P.flatten(), cmap='viridis', s=0.2,alpha=1)

# Barre de couleurs
cbar = plt.colorbar(scatter, ax=ax, shrink=0.5)
cbar.set_label("Valeur de W")
for i in range(n):
    if yi_list[i] == 1:
        ax.scatter(xi_list[i, 0], xi_list[i, 1], xi_list[i, 2], color="red", label="y=1" if i == 0 else "")
    else:
        ax.scatter(xi_list[i, 0], xi_list[i, 1], xi_list[i, 2], color="green", label="y=0" if i == 0 else "")

# Légendes et étiquettes

ax.set_xlabel("X axis, usage hours/day")
ax.set_ylabel("Y axis, device Age Months")
ax.set_zlabel("Z axis,  energy consumption")


ax.set_title(r"Data separation, green points y=0, red points y=1")
ax.set_aspect("equal", adjustable = "box") # pour que les axes aient la même échelle
plt.show()


## Data
## Visualization with mlp_one_hidden

dom_points =[0,1,0,1,0,1]
grid_size = 50

x = np.linspace(dom_points[0], dom_points[1], grid_size)
y = np.linspace(dom_points[2], dom_points[3], grid_size)
z = np.linspace(dom_points[4], dom_points[5], grid_size)
X, Y, Z = np.meshgrid(x, y, z)

P = np.zeros((grid_size, grid_size,grid_size))

for i in range(grid_size):
    for j in range(grid_size):
        for k in range(grid_size):
            P[i, j, k] = ... # with mlp_one_hidden
            
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')

scatter = ax.scatter(X.flatten(), Y.flatten(), Z.flatten(), c=P.flatten(), cmap='viridis', s=0.2,alpha=1)
cbar = plt.colorbar(scatter, ax=ax, shrink=0.5)
cbar.set_label("P(y=1|x)")
for i in range(n):
    if yi_list[i] == 1:
        ax.scatter(xi_list[i, 0], xi_list[i, 1], xi_list[i, 2], color="red", label="y=1" if i == 0 else "")
    else:
        ax.scatter(xi_list[i, 0], xi_list[i, 1], xi_list[i, 2], color="green", label="y=0" if i == 0 else "")

ax.set_xlabel("X axis, usage hours/day")
ax.set_ylabel("Y axis, device Age Months")
ax.set_zlabel("Z axis,  energy consumption")


ax.set_title(r"Data separation, green points y=0, red points y=1")
ax.set_aspect("equal", adjustable = "box") 
plt.show()

Conclusion: In PDF or with comments

How do the parameters seem to influence classification?

Bonus: What would you recommend to avoid overfitting? How to be more confidence in our model?

Elise Grosjean
Elise Grosjean
Assistant Professor

My research interests include numerics, P.D.E analysis, Reduced basis methods, inverse problems.