Skip to content

Integrated CityLearn Reward Function with Temporal Dependency  #64

Open
@kennethZhangML

Description

@kennethZhangML
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.distributions as distributions

import matplotlib.pyplot as plt 
import numpy as np 

import gym
from citylearn import GridLearn
print(gym.__version__)

from entropyUtilities import *

class customEnvCityLearn(gym.Env):
    def __init__(self, max_timesteps, n_agents, weather_file, building_attributes):
        super().__init__()

        self.timestep = 0
        self.max_timesteps = max_timesteps 
        self.n_agents = n_agents 

        self.past = np.random.randint(0, 2, size = 1)
        self.present = np.random.randint(0, 2, size = 1)
        self.future = np.random.randint(0, 2, size = 1)

        self.tou_periods = [(0, 8), (8, 16), (16, 24)]
        self.tou_prices = [0.1, 0.2, 0.3]
        self.dr_event_start = 500
        self.dr_event_end = 550
        self.dr_event_percent_reduction = 0.1
        
        self.grid = GridLearn(weather_file, building_attributes_file = building_attributes)

    def step(self, actions):
        self.past = np.random.randint(self.past, self.present, axis = 0)
        self.present = np.append(self.present, self.future, axis = 0)
        self.future = np.append(self.future, actions, axis = 0)
        self.timestep += 1

        tou_period = self.timestep % 24 // 8
        tou_price = self.tou_prices[tou_period]

        if self.dr_event_start <= self.timeActustep < self.dr_event_end:
            demand_reduction = self.dr_event_percent_reduction
        else:
            demand_reduction = 0
        
        actions_scaled = actions * self.grid.buildings['Electricity'].peak_power / 2
        self.grid.step(actions_scaled, tou_energy_prices = [tou_price] * 3,
                       demand_response = demand_reduction)
        
        tau = self.present 
        s = self.past 
        t = self.future
        I_tau_sx = information_mutual_conditional(s, t, tau)
        I_tau_sx_shared = information_mutual(s, t)
        I_tau_sx_excel = I_tau_sx - I_tau_sx_shared 

        action_counts = np.bincount(actions, minlength = 2)
        action_probabilities = action_counts / len(actions)
        diversity_penalty = -np.sum(action_probabilities * np.log(action_probabilities))
        reward = -I_tau_sx_excel + diversity_penalty

        done = (self.timestep >= self.max_timesteps)
        obs = self.grid.get_state()[0]['Electricity']['consumption'].flatten()
        return obs, reward, done, {}

    def reset(self):
        self.timestep = 0
        self.past = np.random.randint(0, 2, size = 1)
        self.present = np.random.randint(0, 2, size = 1)
        self.future = np.random.randint(0, 2, size = 1)
        self.grid.reset()

def main(max_timesteps, n_agents, weather_file, building_attributes_file):
    env = customEnvCityLearn(max_timesteps = max_timesteps, n_agents = 1,
            weather_file = weather_file,
            building_attributes = building_attributes_file)

    obs = env.reset()
    done = False 
    cumulative_reward = 0

    while not done:
        action = np.random.randint(0, 2, size = 1)
        obs, reward, done, info = env.step(action)
        cumulative_reward += reward 
    return cumulative_reward

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions