review databases... we use SQL.... SQL is a declarative language... 4th generation language. you tell the computer *what* to do, not *how* to do it. DDL: structure create table / drop table DML: data manipulation (mostly retrieval...) crud: insert/select/update/delete select ... from ... where... function: select concat(substr(fname,1,1),substr(lname,1,1)) initials from wahtever -- initials is an alias (and column name in output). aggregate (or group by) functions: sum/avg/stddev/min/max, etc. select count(*) from customer; ---count customers select count(*) from customer where state='NY'; --count cumbers in NY select state,count(*) from customer gruop by state; --count customers by state. --count customers by state in NY, NJ, CT. select state,count(*) from customer where state in ('NY','NJ','CT') gruop by state; ---same as above, except we only want states with > 10000 customers. select state,count(*) from customer where state in ('NY','NJ','CT') --this condition is applied on each record. gruop by state having count(*) > 10000; ---this condition is applied on a group ----------------------------------------------------------------- CTEs; essentially named sub-queires.... e.g.: with tri_state_cust as ( select * from customer where state in ('NY','NJ','CT') ), cnts as ( select state,count(*) cnt from tri_state_cust group by state ), results as ( select * from cnts where cnt > 10000 ) select * from results; ---------------------------------------------------------------- case statements.... select a.*, case when state in ('NY','NJ','CT') then 'Y' else 'N' end as tri_state_cust_fl from customer a --------------------------------------------------------------- --what percentage of customers are tri-state customers? select 100.0* sum( case when state in ('NY','NJ','CT') then 1.0 else 0.0 end) / sum(1.0) from customer; -------------------------------------------------------------- -- product(productid,description,) customer(customerid,username,fname,lname,street1,street2,city,state,zip,dob) purchase(purchaseid,purchasetimestamp,customerid,productid,quantity,price) create table custbrackers as with custage as ( select a.*, extract(year from age(dob)) age from customer ) select a.*, case when age < 18 then 'A1' when age < 21 then 'A2' when age < 22 then 'A3' when age < 30 then 'A4' when age < 40 then 'A5' when age < 55 then 'A6' when age < 65 then 'A7' else 'A8' end cust_bracket from custage; ----determine our most profitable customer bracket. with stats as ( select a.cust_bracket, sum( quantity * price ) tot from custbrackers a inner join purchase b on a.customerid=b.customerid group by a.cust_bracket ), mx as ( select max(tot) tot from states ) select a.* from stats a inner join mx b on a.tot=b.tot ---this potentially will return >1 record (if both have the same max). --bracket name (A4, etc.) will tell us which bracket it is. -------------------------------------------------------------- aggregate sum... but no aggregate product... probabilities... or compounded interest.... exp ( sum ( log ( thing ) ) ) = product of thing. -------------------------------------------------------------- JOINs... A inner join B ---both A and B must have records with matching keys. A left outer join B --- all records from A, and matching records from B A full outer join B --- all records from A and B, and matching records are paired up. A cross join B --every record in A matches every record in B. [everything to everything]. ---cross join are almost never used... but once in a blue moon, they're nice to have. ------------------------------------------------------------- HOW TO LOAD DATA INTO DATABASES. psql -U fall2021 -W -h localhost -d fall2021 create table cts ( TDATE date, SYMBOL varchar(30), OPEN decimal(18,8), HIGH decimal(18,8), LOW decimal(18,8), CLOSE decimal(18,8), VOLUME bigint ); create table dividends ( TDATE date, SYMBOL varchar(30), AMOUNT decimal(18,8) ); create table splits ( TDATE date, SYMBOL varchar(30), POST int, PRE int ); gzip -dc cts.dump.csv.gz | psql -U fall2021 -W -h localhost -d fall2021 -c "COPY cts from STDIN DELIMITER ',' NULL ''" gzip -dc dividends.dump.csv.gz | psql -U fall2021 -W -h localhost -d fall2021 -c "COPY dividends from STDIN DELIMITER ',' NULL ''" gzip -dc splits.dump.csv.gz | psql -U fall2021 -W -h localhost -d fall2021 -c "COPY splits from STDIN DELIMITER ',' NULL ''"