-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCyclistic 2022 Project.sql
More file actions
112 lines (100 loc) · 4.08 KB
/
Cyclistic 2022 Project.sql
File metadata and controls
112 lines (100 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
--Creating Table
CREATE TABLE IF NOT EXISTS public.bike
(
ride_id character varying COLLATE pg_catalog."default" NOT NULL,
rideable_type character varying COLLATE pg_catalog."default",
started_at timestamp without time zone,
ended_at timestamp without time zone,
start_station_name character varying COLLATE pg_catalog."default",
start_station_id character varying COLLATE pg_catalog."default",
end_station_name character varying COLLATE pg_catalog."default",
end_station_id character varying COLLATE pg_catalog."default",
start_lat numeric,
start_lng numeric,
end_lat numeric,
end_lng numeric,
member_casual character varying COLLATE pg_catalog."default",
CONSTRAINT ride_pkey PRIMARY KEY (ride_id)
)
--Importing datasets
COPY bike
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202201-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202202-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202203-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202204-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202205-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202206-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202207-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202208-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202209-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202210-divvy-tripdata.csv'
--FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202211-divvy-tripdata.csv'
FROM 'F:\Project_DA_Capstone_Project\Cyclistic_2022_Datasets\202212-divvy-tripdata.csv'
DELIMITER ','
CSV Header;
--Cleaning Data
SELECT *
FROM bike
WHERE
(start_lat != end_lat AND start_lng != end_lng)
AND
(ended_at - started_at) > '00:00:00'
AND
start_station_name IS NOT NULL
AND
end_station_name IS NOT NULL;
--Analyze the data
-- Total Number of Trips per month
SELECT TO_CHAR(started_at, 'fmMonth') AS month, member_casual, COUNT(*) AS total_trip
FROM bike_cleaned
--WHERE member_casual = 'casual'
--WHERE member_casual = 'member'
GROUP BY month, member_casual
ORDER BY total_trip DESC
-- Total Number of Trips per Day
SELECT to_char(started_at, 'Day') AS week_day, COUNT(*) AS total_trip
FROM bike_cleaned
--WHERE member_casual = 'casual'
--WHERE member_casual = 'member'
GROUP BY week_day
ORDER BY total_trip DESC
-- Total Trips per Bicycle Type
SELECT rideable_type, member_casual, COUNT(*) AS total_rides
FROM bike_cleaned
GROUP BY rideable_type, member_casual
ORDER BY rideable_type DESC
-- Average Trip Duration per Day
SELECT member_casual, to_char(started_at, 'Day') AS week_day, AVG(ended_at - started_at) AS duration
FROM bike_cleaned
WHERE start_station_name IS NOT NULL AND end_station_name IS NOT NULL
--AND member_casual = 'casual'
--AND member_casual = 'member'
GROUP BY week_day, member_casual
--Average Duration per month
SELECT member_casual, to_char(started_at, 'fmMonth') AS month, AVG(ended_at - started_at) AS avg_duration
FROM bike_cleaned
WHERE start_station_name IS NOT NULL AND end_station_name IS NOT NULL
--AND member_casual = 'casual'
--AND member_casual = 'member'
GROUP BY month, member_casual
-- Average Trip Duration in 1 Year (2022)
SELECT member_casual, AVG(ended_at - started_at) AS duration
FROM bike_cleaned
WHERE start_station_name IS NOT NULL AND end_station_name IS NOT NULL
GROUP BY member_casual
-- Number of Riders per Hour
SELECT EXTRACT(hour FROM started_at) AS started_hour, member_casual, COUNT(*)
FROM bike_cleaned
--WHERE member_casual = 'casual'
--WHERE member_casual = 'member'
GROUP BY started_hour, member_casual
ORDER BY COUNT(*) DESC
-- Top 10 Busiest Stations
SELECT start_station_name, member_casual, COUNT(*) AS total_trip
FROM bike
WHERE start_station_name IS NOT NULL
--AND member_casual = 'casual'
--AND member_casual = 'member'
GROUP BY start_station_name, member_casual
ORDER BY total_trip DESC
LIMIT 10