Open
Description
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as distributions
import matplotlib.pyplot as plt
import numpy as np
import gym
from citylearn import GridLearn
print(gym.__version__)
from entropyUtilities import *
class customEnvCityLearn(gym.Env):
def __init__(self, max_timesteps, n_agents, weather_file, building_attributes):
super().__init__()
self.timestep = 0
self.max_timesteps = max_timesteps
self.n_agents = n_agents
self.past = np.random.randint(0, 2, size = 1)
self.present = np.random.randint(0, 2, size = 1)
self.future = np.random.randint(0, 2, size = 1)
self.tou_periods = [(0, 8), (8, 16), (16, 24)]
self.tou_prices = [0.1, 0.2, 0.3]
self.dr_event_start = 500
self.dr_event_end = 550
self.dr_event_percent_reduction = 0.1
self.grid = GridLearn(weather_file, building_attributes_file = building_attributes)
def step(self, actions):
self.past = np.random.randint(self.past, self.present, axis = 0)
self.present = np.append(self.present, self.future, axis = 0)
self.future = np.append(self.future, actions, axis = 0)
self.timestep += 1
tou_period = self.timestep % 24 // 8
tou_price = self.tou_prices[tou_period]
if self.dr_event_start <= self.timeActustep < self.dr_event_end:
demand_reduction = self.dr_event_percent_reduction
else:
demand_reduction = 0
actions_scaled = actions * self.grid.buildings['Electricity'].peak_power / 2
self.grid.step(actions_scaled, tou_energy_prices = [tou_price] * 3,
demand_response = demand_reduction)
tau = self.present
s = self.past
t = self.future
I_tau_sx = information_mutual_conditional(s, t, tau)
I_tau_sx_shared = information_mutual(s, t)
I_tau_sx_excel = I_tau_sx - I_tau_sx_shared
action_counts = np.bincount(actions, minlength = 2)
action_probabilities = action_counts / len(actions)
diversity_penalty = -np.sum(action_probabilities * np.log(action_probabilities))
reward = -I_tau_sx_excel + diversity_penalty
done = (self.timestep >= self.max_timesteps)
obs = self.grid.get_state()[0]['Electricity']['consumption'].flatten()
return obs, reward, done, {}
def reset(self):
self.timestep = 0
self.past = np.random.randint(0, 2, size = 1)
self.present = np.random.randint(0, 2, size = 1)
self.future = np.random.randint(0, 2, size = 1)
self.grid.reset()
def main(max_timesteps, n_agents, weather_file, building_attributes_file):
env = customEnvCityLearn(max_timesteps = max_timesteps, n_agents = 1,
weather_file = weather_file,
building_attributes = building_attributes_file)
obs = env.reset()
done = False
cumulative_reward = 0
while not done:
action = np.random.randint(0, 2, size = 1)
obs, reward, done, info = env.step(action)
cumulative_reward += reward
return cumulative_reward
Metadata
Metadata
Assignees
Labels
No labels