Multi-UAV-Mobile-Edge-Computing-Hybrid-Optimization/config.py at main · Project-Group-BTP/Multi-UAV-Mobile-Edge-Computing-Hybrid-Optimization · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np

# Training Parameters
MODEL: str = "attention_matd3"  # options: 'maddpg', 'matd3', 'mappo', 'masac', 'attention_<model>', 'random'
SEED: int = 42  # random seed for reproducibility
np.random.seed(SEED)  # set numpy random seed
STEPS_PER_EPISODE: int = 1000  # total T
LOG_FREQ: int = 1  # episodes
IMG_FREQ: int = 1000  # steps
TEST_LOG_FREQ: int = 1  # episodes (for testing)
TEST_IMG_FREQ: int = 100  # steps (for testing)

# Simulation Parameters
MBS_POS: np.ndarray = np.array([350.0, 350.0, 30.0], dtype=np.float32)  # (X_mbs, Y_mbs, Z_mbs) in meters
NUM_UAVS: int = 5  # U
NUM_UES: int = 100  # M
AREA_WIDTH: int = 700  # X_max in meters
AREA_HEIGHT: int = 700  # Y_max in meters
TIME_SLOT_DURATION: float = 1.0  # tau in seconds
UE_MAX_DIST: float = 15.0  # d_max^UE in meters
UE_MAX_WAIT_TIME: int = 10  # in time slots

# UAV Parameters
UAV_ALTITUDE: int = 100  # H in meters
UAV_SPEED: float = 15.0  # v^UAV in m/s
UAV_STORAGE_CAPACITY: np.ndarray = np.random.choice(np.arange(40 * 10**6, 80 * 10**6, 10**6), size=NUM_UAVS).astype(np.int64)  # S_u in bytes
UAV_COMPUTING_CAPACITY: np.ndarray = np.random.choice(np.arange(5 * 10**9, 20 * 10**9, 10**9), size=NUM_UAVS).astype(np.int64)  # F_u in cycles/sec
UAV_SENSING_RANGE: float = 300.0  # R^sense in meters
UAV_COVERAGE_RADIUS: float = 100.0  # R in meters
MIN_UAV_SEPARATION: float = 200.0  # d_min in meters
assert np.all(UAV_STORAGE_CAPACITY > 0)
assert np.all(UAV_COMPUTING_CAPACITY > 0)
assert UAV_COVERAGE_RADIUS * 2 <= MIN_UAV_SEPARATION
assert UAV_SENSING_RANGE >= MIN_UAV_SEPARATION

# Collisions and Penalties
COLLISION_AVOIDANCE_ITERATIONS: int = 20  # number of iterations to resolve collisions
COLLISION_PENALTY: float = 10.0  # penalty per collision
BOUNDARY_PENALTY: float = 10.0  # penalty for going out of bounds
NON_SERVED_LATENCY_PENALTY: float = 20.0  # penalty in latency for non-served requests
# IMPORTANT : Reconfigurable, should try for various values including : NUM_UAVS - 1 and NUM_UES
MAX_UAV_NEIGHBORS: int = NUM_UAVS - 1
MAX_ASSOCIATED_UES: int = min(30, NUM_UES // NUM_UAVS + 10)
assert MAX_UAV_NEIGHBORS >= 1 and MAX_UAV_NEIGHBORS <= NUM_UAVS - 1
assert MAX_ASSOCIATED_UES >= 1 and MAX_ASSOCIATED_UES <= NUM_UES

POWER_MOVE: float = 100.0  # P_move in Watts
POWER_HOVER: float = 80.0  # P_hover in Watts

# Request Parameters
NUM_SERVICES: int = 25  # S
NUM_CONTENTS: int = 50  # K
NUM_FILES: int = NUM_SERVICES + NUM_CONTENTS  # S + K
CPU_CYCLES_PER_BYTE: np.ndarray = np.random.randint(2000, 4000, size=NUM_SERVICES)  # omega_s_m
FILE_SIZES: np.ndarray = np.random.randint(10**6, 5 * 10**6, size=NUM_FILES).astype(np.int64)  # in bytes
MIN_INPUT_SIZE: int = 1 * 10**6  # in bytes
MAX_INPUT_SIZE: int = 5 * 10**6  # in bytes
ZIPF_BETA: float = 0.8  # beta^Zipf
K_CPU: float = 1e-27  # CPU capacitance coefficient

# Caching Parameters
T_CACHE_UPDATE_INTERVAL: int = 50  # T_cache
GDSF_SMOOTHING_FACTOR: float = 0.75  # beta^gdsf

# Probabilistic Caching Parameters
AVG_FILE_SIZE: float = float(np.mean(FILE_SIZES))
PROB_GAMMA: float = 0.5  # gamma

# Communication Parameters
G_CONSTS_PRODUCT: float = 2.2846 * 1.42 * 1e-4  # G_0 * g_0
TRANSMIT_POWER: float = 0.5  # P^comm in Watts
AWGN: float = 1e-13  # sigma^2
BANDWIDTH_INTER: int = 20 * 10**6  # B^inter in Hz
BANDWIDTH_EDGE: int = 40 * 10**6  # B^edge in Hz
BANDWIDTH_BACKHAUL: int = 10 * 10**6  # B^backhaul in Hz

# WPT Parameters
UE_BATTERY_CAPACITY: float = 100.0  # B_max in Joules
UE_CRITICAL_THRESHOLD: float = 0.3 * UE_BATTERY_CAPACITY  # B_low in Joules
WPT_TRANSMIT_POWER: float = 500.0 * 1e6  # P^WPT in Watts (UAV WPT transmit power for energy harvesting)
WPT_EFFICIENCY: float = 0.6  # eta (energy harvesting efficiency, 60%)
UE_STATIC_POWER: float = 0.05  # Idle power consumption in Watts

# Model Parameters
# Reward formula: reward = ALPHA_3*log(fairness) - ALPHA_1*log(latency) - ALPHA_2*log(energy) - ALPHA_4*log(1+offline_rate)
# Then scaled by REWARD_SCALING_FACTOR.
ALPHA_1 = 1.0  # weightage for latency (negative term, higher = stronger penalty for latency)
ALPHA_2 = 0.4  # weightage for energy (negative term, lower priority than latency)
ALPHA_3 = 2.0  # weightage for fairness (positive term, encourage equal service)
ALPHA_4 = 50.0  # weightage for offline rate (negative term, penalizes UEs running out of battery)
REWARD_SCALING_FACTOR: float = 0.01  # scaling factor for rewards (prevents exploding values)

SELF_OBS_DIM: int = 2 + NUM_FILES  # pos (2) + cache (NUM_FILES)
UE_OBS_DIM: int = 2 + 3 + 1  # pos (2) + request_tuple (3) + battery level (1)
NEIGHBOR_OBS_DIM: int = 2  # pos (2)
OBS_DIM_SINGLE: int = SELF_OBS_DIM + (MAX_UAV_NEIGHBORS * NEIGHBOR_OBS_DIM) + (MAX_ASSOCIATED_UES * UE_OBS_DIM)
ACTION_DIM: int = 2  # angle, distance from [-1, 1]
MLP_HIDDEN_DIM: int = 128

ACTOR_LR: float = 9e-4
CRITIC_LR: float = 8e-4
DISCOUNT_FACTOR: float = 0.96  # gamma
UPDATE_FACTOR: float = 0.012  # tau
MAX_GRAD_NORM: float = 0.5  # maximum norm for gradient clipping to prevent exploding gradients
LOG_STD_MAX: float = 2  # maximum log standard deviation for stochastic policies
LOG_STD_MIN: float = -20  # minimum log standard deviation for stochastic policies
EPSILON: float = 1e-9  # small value to prevent division by zero

# Off-policy algorithm hyperparameters
REPLAY_BUFFER_SIZE: int = 10**6  # B
REPLAY_BATCH_SIZE: int = 128  # minibatch size
INITIAL_RANDOM_STEPS: int = 5000  # steps of random actions for exploration
LEARN_FREQ: int = 10  # steps to learn after

# Gaussian Noise Parameters (for MADDPG and MATD3)
INITIAL_NOISE_SCALE: float = 0.2
MIN_NOISE_SCALE: float = 0.01
NOISE_DECAY_RATE: float = 0.995

# MATD3 Specific Hyperparameters
POLICY_UPDATE_FREQ: int = 2  # delayed policy update frequency
TARGET_POLICY_NOISE: float = 0.25  # standard deviation of target policy smoothing noise.
NOISE_CLIP: float = 0.5  # range to clip target policy smoothing noise

# MAPPO Specific Hyperparameters
PPO_ROLLOUT_LENGTH: int = STEPS_PER_EPISODE  # number of steps to collect per rollout before updating
PPO_GAE_LAMBDA: float = 0.95  # lambda parameter for GAE
PPO_EPOCHS: int = 10  # number of epochs to run on the collected rollout data
PPO_BATCH_SIZE: int = 200  # size of mini-batches to use during the update step
PPO_CLIP_EPS: float = 0.2  # clipping parameter (epsilon) for the PPO surrogate objective
PPO_ENTROPY_COEF: float = 0.01  # coefficient for the entropy bonus to encourage exploration

# MASAC Specific Hyperparameters
ALPHA_LR: float = 3e-4  # learning rate for the entropy temperature alpha

# Attention Hyperparameters
ATTN_HIDDEN_DIM: int = 64  # Embedding size for internal attention representations
ATTN_NUM_HEADS: int = 8  # Number of attention heads
assert ATTN_HIDDEN_DIM % ATTN_NUM_HEADS == 0, f"ATTN_HIDDEN_DIM ({ATTN_HIDDEN_DIM}) must be divisible by ATTN_NUM_HEADS ({ATTN_NUM_HEADS})"