TiffanyVlaar
diff --git a/‎.DS_Store
8 KB b/‎.DS_Store
8 KB
diff --git a/‎OGconstraint_CIFAR10_resnet34.py
Lines changed: 93 additions & 0 deletions b/‎OGconstraint_CIFAR10_resnet34.py
Lines changed: 93 additions & 0 deletions
diff --git a/‎Optimizers/.DS_Store
6 KB b/‎Optimizers/.DS_Store
6 KB
diff --git a/‎Optimizers/OGconstraint_ud.py
Lines changed: 183 additions & 0 deletions b/‎Optimizers/OGconstraint_ud.py
Lines changed: 183 additions & 0 deletions
diff --git a/‎Optimizers/__init__.py
Lines changed: 2 additions & 0 deletions b/‎Optimizers/__init__.py
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,93 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from models import *
+from Optimizers import OGconstraint_ud
+from Optimizers import initOGconstraint
+from datasets import CIFAR10data
+from train import train
+from test import test
+
+torch.cuda.set_device(2)
+device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
+print(f"Running on {device}.")
+torch.manual_seed(5) #optional
+
+#Hyperparameters
+h = 0.1
+T = 0
+dt1 = h/3 #for warm-up
+cgamma = 0.9 
+WD = 0 
+dgamma = 0
+num_runs = 3
+num_epochs = 150
+batchsize = 128
+
+loader_train,loader_test = CIFAR10data.generatedata(batchsize=batchsize)
+
+RES_train_loss_allruns = []
+RES_test_loss_allruns = []
+RES_test_acc_allruns = []
+RES_train_acc_allruns = []
+
+for run in range(num_runs):
+    print("run = ", run)
+    net = ResNet34()
+    Constrainedlist, net = initOGconstraint.initOG(net)
+    net = net.to(device)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = OGconstraint_ud.oCoLAud(net.parameters(),device,Constrainedlist=Constrainedlist,lr=dt1,cgamma=cgamma,dgamma=dgamma,weight_decay=WD) 
+
+    RES_train_loss = []
+    RES_train_acc = []
+    RES_test_loss = []
+    RES_test_acc = []
+
+    for epoch in range(num_epochs):
+
+        net, optimizer, loss_train,acc_train = train(epoch,loader_train,net,optimizer,criterion,device)
+        loss_test,acc_test = test(loader_test,net,criterion,device)
+            
+        RES_train_loss.append(loss_train)
+        RES_train_acc.append(acc_train)
+        RES_test_loss.append(loss_test)
+        RES_test_acc.append(acc_test)
+        
+        #warmup
+        if epoch < 2: 
+            dt1 += (h/3) 
+            optimizer.param_groups[0]['lr'] = dt1
+        #learning rate decay
+        elif epoch == 50:
+            optimizer.param_groups[0]['lr'] = 0.01
+        elif epoch == 100:
+            optimizer.param_groups[0]['lr'] = 0.001 
+
+
+    RES_train_loss_allruns.append(RES_train_loss)
+    RES_train_acc_allruns.append(RES_train_acc)
+    RES_test_loss_allruns.append(RES_test_loss)
+    RES_test_acc_allruns.append(RES_test_acc)
+
+
+with open(f'OGconstraint_Resnet34_CIFAR10_batchsize_{batchsize}_WD_{WD}_cgam_{cgamma}_h_{h}_T_{T}_{num_runs}runs_{num_epochs}epochs.txt', 'w+') as f:
+    f.write(f'Training loss min: {np.min(RES_train_loss_allruns,0)}\n') 
+    f.write(f'Test loss min: {np.min(RES_test_loss_allruns,0)}\n') 
+    f.write(f'Training accuracy min: {np.min(RES_train_acc_allruns,0)}\n') 
+    f.write(f'Test accuracy min: {np.min(RES_test_acc_allruns,0)}\n') 
+    f.write(f'Training loss max: {np.max(RES_train_loss_allruns,0)}\n') 
+    f.write(f'Test loss max: {np.max(RES_test_loss_allruns,0)}\n') 
+    f.write(f'Training accuracy max: {np.max(RES_train_acc_allruns,0)}\n') 
+    f.write(f'Test accuracy max: {np.max(RES_test_acc_allruns,0)}\n') 
+    f.write(f'Training loss std: {np.std(RES_train_loss_allruns,0)}\n') 
+    f.write(f'Test loss std: {np.std(RES_test_loss_allruns,0)}\n') 
+    f.write(f'Training accuracy std: {np.std(RES_train_acc_allruns,0)}\n') 
+    f.write(f'Test accuracy std: {np.std(RES_test_acc_allruns,0)}\n') 
+    f.write(f'Training loss mean: {np.mean(RES_train_loss_allruns,0)}\n') 
+    f.write(f'Test loss mean: {np.mean(RES_test_loss_allruns,0)}\n') 
+    f.write(f'Training accuracy mean: {np.mean(RES_train_acc_allruns,0)}\n') 
+    f.write(f'Test accuracy mean: {np.mean(RES_test_acc_allruns,0)}\n') 
+
+
@@ -0,0 +1,183 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.optim import Optimizer
+import numpy as np
+
+class oCoLAud(Optimizer):
+    def __init__(self,params,device,Constrainedlist,lr=0.1,cgamma=0,dgamma=0,weight_decay=0): 
+        self.device = device
+        self.Constrainedlist = Constrainedlist
+        defaults = dict(lr=lr,cgamma=cgamma,dgamma=dgamma,weight_decay=weight_decay)
+        super(oCoLAud,self).__init__(params,defaults)
+           
+    def __setstate__(self,state):
+        super(oCoLAud,self).__setstate__(state)
+
+    @torch.no_grad()
+    def stepMom(self):
+        for group in self.param_groups:
+            
+            for i,p in enumerate(group['params']):
+            
+                if p.grad is None:
+                    continue
+            
+                param_state =self.state[p]
+                shapep = p.shape 
+                if self.Constrainedlist[i] == 1:
+                    shapep0 = shapep[0]
+                    if len(shapep) > 2:
+                        shapep1 = shapep[1]*shapep[2]*shapep[3]
+                    else:
+                        shapep1 = shapep[1]
+
+                    d_p = p.grad
+                    buf = param_state['momentum_buffer'] = -0.01*torch.clone(d_p).detach() 
+                    buffy = torch.clone(buf).detach().reshape((shapep0,shapep1))
+                    Weighty = torch.clone(p).detach().reshape((shapep0,shapep1))
+
+                    if shapep0 >= shapep1:
+                        bufproj = -0.5*torch.matmul(Weighty,(torch.matmul(torch.transpose(buffy,0,1),Weighty)+torch.matmul(torch.transpose(Weighty,0,1),buffy))).reshape(*shapep)
+                    else:
+                        bufproj = -0.5*torch.transpose(torch.matmul(torch.transpose(Weighty,0,1),(torch.matmul(Weighty,torch.transpose(buffy,0,1))+torch.matmul(buffy,torch.transpose(Weighty,0,1)))),0,1).reshape(*shapep)
+
+                    buf.add_(bufproj)
+
+                else:
+                    d_p = p.grad
+                    buf = param_state['momentum_buffer'] = -0.01*torch.clone(d_p).detach()
+
+
+    @torch.no_grad()
+    def step(self):
+
+        for group in self.param_groups:
+            cgamma = group['cgamma']
+            dgamma = group['dgamma']
+            weight_decay = group['weight_decay']
+
+            for i,p in enumerate(group['params']):
+                
+                if p.grad is None:
+                    continue
+
+                param_state = self.state[p]
+                shapep = p.shape
+
+                if self.Constrainedlist[i] == 1:
+
+                    shapep0 = shapep[0]
+                    if len(shapep) > 2:
+                        shapep1 = shapep[1]*shapep[2]*shapep[3]
+                    else:
+                        shapep1 = shapep[1] 
+
+                    if 'OldWeight' not in param_state:
+                        OldWeight = param_state['OldWeight'] = torch.clone(p).detach()
+                        OldWeight = OldWeight.reshape((shapep0,shapep1))
+                        if shapep0 >= shapep1:
+                            prodis = torch.matmul(torch.transpose(OldWeight,0,1),OldWeight)
+                        else:
+                            prodis = torch.matmul(OldWeight,torch.transpose(OldWeight,0,1))
+                            OldWeightT = torch.transpose(OldWeight,0,1)
+                        Id = param_state['Id'] = torch.eye(*prodis.shape).to(self.device)
+                    else:
+                        OldWeight = param_state['OldWeight']
+                        OldWeight = torch.clone(p).detach()
+                        OldWeight = OldWeight.reshape((shapep0,shapep1))
+                        if shapep0 < shapep1:
+                            OldWeightT = torch.transpose(OldWeight,0,1)
+                        Id = param_state['Id']
+
+                    buf = param_state['momentum_buffer'] 
+
+                    # O -step
+                    if dgamma == 0:
+                        buf.mul_(cgamma)
+                    else:
+                        buf.mul_(cgamma).add_(dgamma,torch.cuda.FloatTensor(*shapep).normal_())
+                    buffy = torch.clone(buf).detach().reshape((shapep0,shapep1))
+                    if shapep0 >= shapep1:
+                        bufproj = -0.5*torch.matmul(OldWeight,(torch.matmul(torch.transpose(buffy,0,1),OldWeight)+torch.matmul(torch.transpose(OldWeight,0,1),buffy))).reshape(*shapep)
+                    else:
+                        bufproj = -0.5*torch.transpose(torch.matmul(OldWeightT,(torch.matmul(OldWeight,torch.transpose(buffy,0,1))+torch.matmul(buffy,torch.transpose(OldWeight,0,1)))),0,1).reshape(*shapep)
+                    
+                    buf.add_(bufproj)
+
+                    # B-step
+                    d_p = p.grad
+                    if weight_decay != 0:
+                        d_p = d_p.add(p, alpha=weight_decay)
+
+                    buf.add_(-d_p) 
+                    buffy = torch.clone(buf).detach().reshape((shapep0,shapep1))
+                    if shapep0 >= shapep1:
+                        bufproj = -0.5*torch.matmul(OldWeight,(torch.matmul(torch.transpose(buffy,0,1),OldWeight)+torch.matmul(torch.transpose(OldWeight,0,1),buffy))).reshape(*shapep)
+                    else:
+                        bufproj = -0.5*torch.transpose(torch.matmul(OldWeightT,(torch.matmul(OldWeight,torch.transpose(buffy,0,1))+torch.matmul(buffy,torch.transpose(OldWeight,0,1)))),0,1).reshape(*shapep)
+                    
+                    
+                    buf.add_(bufproj)
+                    d_p = buf
+
+                    # A-step
+                    p.data.add_(d_p,alpha=group['lr'])
+                    p.data = p.reshape((shapep0,shapep1))
+                    FirstStep = torch.clone(p).detach()
+
+                    if shapep0 >= shapep1:
+                        for ks in range(10):
+                            Lambda = torch.matmul(torch.transpose(p,0,1),p)-Id
+                            products = -0.5*torch.matmul(OldWeight,Lambda)
+                            p.add_(products)
+
+                        bufproj1 = ((p.data-FirstStep)/group['lr']).reshape(*shapep)
+                        buf.add_(bufproj1)
+                    else:
+                        for ks in range(10):
+                            Lambda = torch.matmul(p,torch.transpose(p,0,1))-Id
+                            products = -0.5*torch.transpose(torch.matmul(OldWeightT,Lambda),0,1)
+                            p.add_(products)
+
+                        bufproj1 = ((p.data-FirstStep)/group['lr']).reshape(*shapep)
+                        buf.add_(bufproj1)
+
+                    p.data = p.reshape(*shapep)
+                     
+                    OldWeight = torch.clone(p).detach()
+                    OldWeight = OldWeight.reshape((shapep0,shapep1))
+
+                    buffy = torch.clone(buf).detach().reshape((shapep0,shapep1))
+                    if shapep0 >= shapep1:
+                        bufproj = -0.5*torch.matmul(OldWeight,(torch.matmul(torch.transpose(buffy,0,1),OldWeight)+torch.matmul(torch.transpose(OldWeight,0,1),buffy))).reshape(*shapep)
+                    else:
+                        bufproj = -0.5*torch.transpose(torch.matmul(OldWeightT,(torch.matmul(OldWeight,torch.transpose(buffy,0,1))+torch.matmul(buffy,torch.transpose(OldWeight,0,1)))),0,1).reshape(*shapep)
+                    
+                    buf.add_(bufproj)
+                else:
+                    buf = param_state['momentum_buffer'] 
+
+                    if dgamma == 0:
+                        buf.mul_(cgamma)
+                    else:
+                        buf.mul_(cgamma).add_(dgamma,torch.cuda.FloatTensor(*shapep).normal_())
+                    
+                    d_p = p.grad
+                    if weight_decay != 0:
+                        d_p = d_p.add(p, alpha=weight_decay)
+
+                    buf.add_(-d_p,alpha=1) 
+                    d_p = buf
+                    p.data.add_(d_p,alpha=group['lr'])
+
+
+
+
+
+
+
+        
+
+               
@@ -0,0 +1,2 @@
+from .circleconstraint_ud import *
+from .OGconstraint_ud import *
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .circleconstraint_ud import *`
	`2`	`+from .OGconstraint_ud import *`