Source code for mechanoChemML.src.stepwiseRegression

"""
Zhenlin Wang 2019
"""

import numpy as np
from mechanoChemML.src import LeastR as LR 
#import LeastR as LR 


[docs]class stepwiseR(object): def __init__(self,F_criteria=[1],F_switch=[],basis_drop_strategy='aggressive',sigma_n=1.0e-20,anchor_index=[-1],alpha_lasso=0,alpha_ridge=0, ridge_cv=[-1],threshold_d=1.0e-14,n_jobs=1): self.F_criteria=F_criteria self.F_switch=F_switch self.sigma_n=sigma_n self.anchor_index=anchor_index self.alpha_lasso=alpha_lasso self.alpha_ridge=alpha_ridge self.ridge_cv=ridge_cv self.n_jobs=n_jobs self.threshold_d=threshold_d self.basis_drop_strategy=basis_drop_strategy self.last_F=0
[docs] def test(self): print('test_pass')
[docs] def stepwiseR_fit_aggressive(self, theta_matrix, X_matrix): _,n_base_orign=theta_matrix.shape self.anchor=np.zeros(n_base_orign) if self.anchor_index[0]!=-1: for key in self.anchor_index: self.anchor[key]=1 self.loss=np.zeros(n_base_orign) self.score=np.zeros(n_base_orign) self.F_index=np.zeros(n_base_orign) self.gamma_matrix=np.zeros((n_base_orign,n_base_orign)) alpha_sum=self.alpha_lasso+self.alpha_ridge+self.ridge_cv[0] threshold_d=self.threshold_d self.best_alpha=np.zeros(n_base_orign) # local_to_global_index local_to_global_index=np.arange(n_base_orign) F_threshold=self.F_criteria[0] ######### #first LS_regression ######### num_column=0 if(alpha_sum==-1): [gamma_vector,self.loss[0]]=LR.fit(theta_matrix,X_matrix) if(self.alpha_lasso>0): [gamma_vector,self.loss[0]]=LR.fit_lasso(theta_matrix,X_matrix, alpha=self.alpha_lasso) if(self.alpha_ridge>0): [gamma_vector,self.loss[0],self.score[0]]=LR.fit_ridge(theta_matrix,X_matrix, alpha=self.alpha_ridge) if(self.ridge_cv[0]>-0.1): [gamma_vector,self.loss[0],self.score[0],self.best_alpha[0]]=LR.fit_ridge_cv(theta_matrix,X_matrix, alpha=self.ridge_cv) self.gamma_matrix[local_to_global_index,num_column]=gamma_vector ######### #stepwise ######### num_column=num_column+1; num_canditate_basis=n_base_orign; frozen_index=[] while num_canditate_basis>1 : #get current F_criteria for i in range(len(self.F_switch)): if num_column>self.F_switch[i]: F_threshold=self.F_criteria[i+1] else: break find_flag=False # put anchor index into frozen_index for i in range(local_to_global_index.size) : if self.anchor[local_to_global_index[i]]==1 : frozen_index.append(i) # begin to do basis reduction for j in range(gamma_vector.size): # continue if j is in the frozen_index if j in frozen_index: continue # calculate the min of gamma_vector except the frozen_index gamma_vector_min=gamma_vector; gamma_vector_min=np.delete(gamma_vector_min, frozen_index) gamma_criteria=min(abs(gamma_vector_min) )+threshold_d; theta_matrix_try=theta_matrix; #tentative delete the basis if abs(gamma_vector[j])<gamma_criteria : frozen_index.append(j) find_flag=True # delete the corresponding column theta_matrix_try=np.delete(theta_matrix_try,j,1) if(alpha_sum==-1): [gamma_vector_try,loss_try]=LR.fit(theta_matrix_try,X_matrix) if(self.alpha_lasso>0): [gamma_vector_try,loss_try]=LR.fit_lasso(theta_matrix_try,X_matrix, alpha=self.alpha_lasso) if(self.alpha_ridge>0): [gamma_vector_try,loss_try,score_tem]=LR.fit_ridge(theta_matrix_try,X_matrix, alpha=self.alpha_ridge) if(self.ridge_cv[0]>-0.1): [gamma_vector_try,loss_try,score_tem,best_alpha_tem]=LR.fit_ridge_cv(theta_matrix_try,X_matrix, alpha=self.ridge_cv) F=(loss_try-self.loss[num_column-1])/self.loss[num_column-1]*(n_base_orign-local_to_global_index.size+1) if(F>self.last_F): self.last_F=F # do F_test if F<F_threshold or loss_try<self.sigma_n: theta_matrix=np.delete(theta_matrix,j,1) local_to_global_index=np.delete(local_to_global_index,j) self.F_index[num_column]=F if(len(self.ridge_cv)>2): self.best_alpha[num_column]=best_alpha_tem self.loss[num_column]=loss_try gamma_vector=gamma_vector_try self.gamma_matrix[local_to_global_index,num_column]=gamma_vector num_column=num_column+1 num_canditate_basis=num_canditate_basis-1 frozen_index=[] # break tentative deleting basis if find_flag==True: break #stop the algorithm if find_flag==0 or gamma_vector_min.size<1: break self.gamma_matrix=np.delete(self.gamma_matrix,np.arange(num_column,n_base_orign), axis=1) self.loss=np.delete(self.loss,np.arange(num_column,n_base_orign)) self.F_index=np.delete(self.F_index,np.arange(num_column,n_base_orign)) self.best_alpha=np.delete(self.best_alpha,np.arange(num_column,n_base_orign))
[docs] def stepwiseR_fit_most_insignificant(self, theta_matrix, X_matrix): _,n_base_orign=theta_matrix.shape self.anchor=np.zeros(n_base_orign) if self.anchor_index[0]!=-1: for key in self.anchor_index: self.anchor[key]=1 self.loss=np.zeros(n_base_orign) self.score=np.zeros(n_base_orign) self.F_index=np.zeros(n_base_orign) self.gamma_matrix=np.zeros((n_base_orign,n_base_orign)) alpha_sum=self.alpha_lasso+self.alpha_ridge+self.ridge_cv[0] threshold_d=self.threshold_d self.best_alpha=np.zeros(n_base_orign) # local_to_global_index local_to_global_index=np.arange(n_base_orign) F_threshold=self.F_criteria[0] ######### #first LS_regression ######### num_column=0 if(alpha_sum==-1): [gamma_vector,self.loss[0]]=LR.fit(theta_matrix,X_matrix) if(self.alpha_lasso>0): [gamma_vector,self.loss[0]]=LR.fit_lasso(theta_matrix,X_matrix, alpha=self.alpha_lasso) if(self.alpha_ridge>0): [gamma_vector,self.loss[0],self.score[0]]=LR.fit_ridge(theta_matrix,X_matrix, alpha=self.alpha_ridge) if(self.ridge_cv[0]>-0.1): [gamma_vector,self.loss[0],self.score[0],self.best_alpha[0]]=LR.fit_ridge_cv(theta_matrix,X_matrix, alpha=self.ridge_cv) self.gamma_matrix[local_to_global_index,num_column]=gamma_vector ######### #stepwise ######### num_column=num_column+1; num_canditate_basis=n_base_orign; frozen_index=[] while num_canditate_basis>1 : #get current F_criteria for i in range(len(self.F_switch)): if num_column>self.F_switch[i]: F_threshold=self.F_criteria[i+1] else: break find_flag=False # put anchor index into frozen_index for i in range(local_to_global_index.size) : if self.anchor[local_to_global_index[i]]==1 : frozen_index.append(i) # begin to do basis reduction loss_tem=np.ones(gamma_vector.size)*1.0e10 best_alpha_tem=np.zeros(gamma_vector.size) score_tem=np.zeros(gamma_vector.size) gamma_matrix_try=np.zeros((gamma_vector.size-1,gamma_vector.size)) for j in range(gamma_vector.size): # continue if j is in the frozen_index if j in frozen_index: continue theta_matrix_try=np.delete(theta_matrix,j,1) if(alpha_sum==-1): [gamma_matrix_try[:,j],loss_tem[j] ]=LR.fit(theta_matrix_try,X_matrix) if(self.alpha_lasso>0): [gamma_vector_try[:,j],loss_tem[j] ]=LR.fit_lasso(theta_matrix_try,X_matrix, alpha=self.alpha_lasso) if(self.alpha_ridge>0): [gamma_matrix_try[:,j],loss_tem[j] ,score_tem[j] ]=LR.fit_ridge(theta_matrix_try,X_matrix, alpha=self.alpha_ridge) if(self.ridge_cv[0]>-0.1): [gamma_matrix_try[:,j],loss_tem[j] ,score_tem[j],best_alpha_tem[j] ]=LR.fit_ridge_cv(theta_matrix_try,X_matrix, alpha=self.ridge_cv) drop_index=np.argmin(loss_tem) loss_try=loss_tem[drop_index] F=(loss_try-self.loss[num_column-1])/self.loss[num_column-1]*(n_base_orign-local_to_global_index.size+1) if(F>self.last_F): self.last_F=F # do F_test if F<F_threshold or loss_try<self.sigma_n: find_flag=True theta_matrix=np.delete(theta_matrix,drop_index,1) local_to_global_index=np.delete(local_to_global_index,drop_index) self.F_index[num_column]=F if(len(self.ridge_cv)>2): self.best_alpha[num_column]=best_alpha_tem[drop_index] self.loss[num_column]=loss_try gamma_vector=gamma_matrix_try[:,drop_index] self.gamma_matrix[local_to_global_index,num_column]=gamma_vector num_column=num_column+1 num_canditate_basis=num_canditate_basis-1 frozen_index=[] #stop the algorithm of no operator can be eliminated or only one operator left. if find_flag==False: break self.gamma_matrix=np.delete(self.gamma_matrix,np.arange(num_column,n_base_orign), axis=1) self.loss=np.delete(self.loss,np.arange(num_column,n_base_orign)) self.F_index=np.delete(self.F_index,np.arange(num_column,n_base_orign)) self.best_alpha=np.delete(self.best_alpha,np.arange(num_column,n_base_orign))
###################################################
[docs] def stepwiseR_fit(self, theta_matrix, X_matrix): if self.basis_drop_strategy=='aggressive': self.stepwiseR_fit_aggressive(theta_matrix, X_matrix) elif self.basis_drop_strategy=='most_insignificant': self.stepwiseR_fit_most_insignificant(theta_matrix, X_matrix) else: print('basis_drop_strategy is not well defined, using most_insignificant instead') self.stepwiseR_fit_most_insignificant(theta_matrix, X_matrix)