Integrated CityLearn Reward Function with Temporal Dependency 

```python

import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.distributions as distributions

import matplotlib.pyplot as plt 
import numpy as np 

import gym
from citylearn import GridLearn
print(gym.__version__)

from entropyUtilities import *

class customEnvCityLearn(gym.Env):
    def __init__(self, max_timesteps, n_agents, weather_file, building_attributes):
        super().__init__()

        self.timestep = 0
        self.max_timesteps = max_timesteps 
        self.n_agents = n_agents 

        self.past = np.random.randint(0, 2, size = 1)
        self.present = np.random.randint(0, 2, size = 1)
        self.future = np.random.randint(0, 2, size = 1)

        self.tou_periods = [(0, 8), (8, 16), (16, 24)]
        self.tou_prices = [0.1, 0.2, 0.3]
        self.dr_event_start = 500
        self.dr_event_end = 550
        self.dr_event_percent_reduction = 0.1
        
        self.grid = GridLearn(weather_file, building_attributes_file = building_attributes)

    def step(self, actions):
        self.past = np.random.randint(self.past, self.present, axis = 0)
        self.present = np.append(self.present, self.future, axis = 0)
        self.future = np.append(self.future, actions, axis = 0)
        self.timestep += 1

        tou_period = self.timestep % 24 // 8
        tou_price = self.tou_prices[tou_period]

        if self.dr_event_start <= self.timeActustep < self.dr_event_end:
            demand_reduction = self.dr_event_percent_reduction
        else:
            demand_reduction = 0
        
        actions_scaled = actions * self.grid.buildings['Electricity'].peak_power / 2
        self.grid.step(actions_scaled, tou_energy_prices = [tou_price] * 3,
                       demand_response = demand_reduction)
        
        tau = self.present 
        s = self.past 
        t = self.future
        I_tau_sx = information_mutual_conditional(s, t, tau)
        I_tau_sx_shared = information_mutual(s, t)
        I_tau_sx_excel = I_tau_sx - I_tau_sx_shared 

        action_counts = np.bincount(actions, minlength = 2)
        action_probabilities = action_counts / len(actions)
        diversity_penalty = -np.sum(action_probabilities * np.log(action_probabilities))
        reward = -I_tau_sx_excel + diversity_penalty

        done = (self.timestep >= self.max_timesteps)
        obs = self.grid.get_state()[0]['Electricity']['consumption'].flatten()
        return obs, reward, done, {}

    def reset(self):
        self.timestep = 0
        self.past = np.random.randint(0, 2, size = 1)
        self.present = np.random.randint(0, 2, size = 1)
        self.future = np.random.randint(0, 2, size = 1)
        self.grid.reset()

def main(max_timesteps, n_agents, weather_file, building_attributes_file):
    env = customEnvCityLearn(max_timesteps = max_timesteps, n_agents = 1,
            weather_file = weather_file,
            building_attributes = building_attributes_file)

    obs = env.reset()
    done = False 
    cumulative_reward = 0

    while not done:
        action = np.random.randint(0, 2, size = 1)
        obs, reward, done, info = env.step(action)
        cumulative_reward += reward 
    return cumulative_reward


```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Integrated CityLearn Reward Function with Temporal Dependency #64

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Integrated CityLearn Reward Function with Temporal Dependency #64

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions