Skip to content

Commit 4ea1b23

Browse files
authored
Add files via upload
1 parent 37e5a92 commit 4ea1b23

File tree

2 files changed

+427
-0
lines changed

2 files changed

+427
-0
lines changed

src/effectsize.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import pandas
2+
from functions import list_filter, compute_continuous, compute_categorical
3+
4+
#%%
5+
6+
def compute(data,
7+
group,
8+
continuous = [],
9+
categorical = [],
10+
skewed = [],
11+
weights = None,
12+
decimals = 2,
13+
intervals = None):
14+
15+
"""
16+
17+
Computes SDs for all specified variables
18+
19+
Parameters:
20+
data (dataframe): Pandas DataFrame containing observations (rows) and variables (columns)
21+
exposure (str): Variable defining exposed and unexposed
22+
continuous (list): List of string items which are names of the continuous variables for which the SD should be computed
23+
categorical (list): List of string items which are names of the categorical variables for which the SD should be computed
24+
skewed (list): List of string items which are names of the continuous variables which have a skewed distribution (ranked SD computed)
25+
weights (None or str): Variable defining weights for each observation (otherwise assumed to be equally weighted)
26+
decimals (int): Number of decimal places which should be computed
27+
intervals (None or float): Whether CIs should be computed and with what coverage e.g. for 95% CI, intervals = 0.95
28+
29+
Returns:
30+
Returns a Pandas DataFrame containing the computed SDs (plus CIs, if specified)
31+
32+
"""
33+
34+
# Asserting input types
35+
36+
assert type(data) == pandas.DataFrame or type(data) == pandas.core.frame.DataFrame, "Data must be specified as a Pandas DataFrame"
37+
assert type(group) == str, "Group variable must be specified as a string"
38+
assert type(continuous) == list and type(categorical) == list and type(skewed) == list, "Variable names must be specified inside lists"
39+
assert weights == None or type(weights) == str, "If weight variable is present, it must be specified as a string"
40+
assert type(decimals) == int, "Number of decimal places must be specified as an integer"
41+
assert intervals == None or (intervals > 0 and intervals < 1), "CIs must be specified as None or in range (0,1) e.g. for 95% CI, intervals = 0.95"
42+
43+
# Get combined list of variables and sort them into the order in which they appear in the dataframe
44+
45+
specified_variables = (continuous + categorical).copy()
46+
all_variables = list(data)
47+
48+
for variable in specified_variables:
49+
assert type(variable) == str, "The variable names inside lists must all be specified as strings"
50+
if variable not in all_variables:
51+
print("The following variable was not computed as it could not be found in dataframe columns:", variable)
52+
53+
ordered_variables = list_filter(list1 = all_variables, list2 = specified_variables)
54+
55+
# Computing the standardized difference
56+
57+
results = []
58+
59+
for variable in ordered_variables:
60+
61+
if variable in continuous:
62+
63+
if variable in skewed:
64+
65+
stdiff = compute_continuous(data = data,
66+
group = group,
67+
variable = variable,
68+
skewed = True,
69+
weights = weights,
70+
decimals = decimals,
71+
intervals = intervals)
72+
73+
results.append(stdiff)
74+
75+
else:
76+
77+
stdiff = compute_continuous(data = data,
78+
group = group,
79+
variable = variable,
80+
skewed = False,
81+
weights = weights,
82+
decimals = decimals,
83+
intervals = intervals)
84+
85+
results.append(stdiff)
86+
87+
else:
88+
89+
stdiff = compute_categorical(data = data,
90+
group = group,
91+
variable = variable,
92+
weights = weights,
93+
decimals = decimals,
94+
intervals = intervals)
95+
96+
results.append(stdiff)
97+
98+
results = pandas.DataFrame(data = results)
99+
results.set_axis([ordered_variables], axis = 0, inplace = True)
100+
101+
# Computing the CIs
102+
103+
if intervals == None:
104+
105+
results.set_axis(['ES'], axis = 1, inplace = True)
106+
107+
else:
108+
109+
ci_label = round(( intervals * 100 ), ndigits = 2)
110+
results.set_axis(['ES', str(ci_label) + '% CI'], axis = 1, inplace = True)
111+
112+
return results

0 commit comments

Comments
 (0)