product(productid,description,) customer(customerid,username,fname,lname,street1,street2,city,state,zip) purchase(purchaseid,purchasetimestamp,customerid,productid,quantity,price) -- Of customers who purchased productid=42, what percentage also purchased productid=24? with blah as ( select customerid, max(case when productid=42 then 1 else 0 end) ever_purchased_prod42, max(case when productid=24 then 1 else 0 end) ever_purchased_prod24 from purchase group by customerid ) select 100.0*sum(case when ever_purchased_prod24=1 then 1.0 else 0.0 end) / sum(1.0) from blah where ever_purchased_prod42=1 with blah as ( select customerid, max(case when productid=24 then 1 else 0 end) ever_purchased_prod24 from purchase group by customerid having max(case when productid=42 then 1 else 0 end)>0 ) select 100.0*sum(case when ever_purchased_prod24=1 then 1.0 else 0.0 end) / sum(1.0) from blah doorlog(eventid,doorid,tim,username,event) --If doorid=7 is for floor 42, what's the daily occupancy of floor 42 for entire 2017 (give a number for every day in 2017) --What is the daily average (and standard deveation) occupancy of floor 42 for 2017? (single number) ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- SQL; DDL/DML DDL: create table... DML: select (insert/select/update/delete). CTAS: create table as...: -- use results of select to create a table. create table blah as select .... from ... where ....; select <-- columns .... select a.*, extract(year from age(dob)) age --- select individual records. from customer a where dob > cast( '1999-01-01' as date) --condition is applied on each record. ------- aggregate functions --- operate on a "group". by default, entire dataset is a "group". select count(*) from customer; ---counts customers select state,count(*), min(dob) mindob, max(dob) maxdob, avg( extract(year from age(dob)) avg_age, stddev( extract(year from age(dob)) sd_age from customer group by state; ---count customers by state ======================== ...---record per state. NY, 2426234, '1909-01-23', '2021-01-01', 22.44, 4.243 ... -- apply conditions per group... e.g.: select state,count(*), min(dob) mindob, max(dob) maxdob, avg( extract(year from age(dob)) avg_age, stddev( extract(year from age(dob)) sd_age from customer group by state having count(*) > 10000 --only pull states with over 10000 customers. ; -------------------------------------------------------------------------- JOINS inner joins <--- used most often A inner join B... records that has join key in BOTH tables are returned. select * from A inner join B on A.id = B.id select * from A inner join B using(id) ---some databases do not support this. select * from A natural inner join B; --performs a join on all columns with same name. ----(if both A and B have column named "id", then this is equilvalent to ---- a.id = b.id... ----PROBLEMS.... ---- for example, table A(custid, name, username, email, as_of date) -----------------------B(addressid, custid, street, city, state, zip) ---------natural inner join works... on custid. ------------what happens when a developer notices an opportunity to stick ------------an email into B table, to support multiple emails per customer. -----------------------B(addressid, custid, street, city, state, zip, email) ---- natural join now works on: custid, email left outer join <--- used in some situations A left outer join B... records that have a join key in both A and B a returned AND... all non-matching records from "left" table (A). full outer join <--- very rarely needed A full outer join B... records that have a join key in both A and B a returned AND... all non-matching records from "left" table (A). AND... all non-matching records from "right" table (B). cross join <--- almost never needed A cross join B ... matches every record of A to every record of B. (returns A.count * B.count records). ...the worst kind of join you can do is:... A cross join B where a.id=b.id -----avoid using this syntax: TERRIBLE SYNTAX select * froim A, B where a.id=b.id --------------------------------------------------------------------- --------------------------------------------------------------------- drop table ctsibm; create table ctsibm as select * from cts where symbol='IBM' ; select tdate,symbol,close from ctsibm limit 2; create table ibm_daily_prcnt_1 as with prevclose as ( select tdate,symbol,close, lag(close) over (partition by symbol order by tdate) pclose from ctsibm ) select a.*, 100.0*(close - pclose)/pclose as prcnt from prevclose a ; create table daily_prcnt_1 as with prevclose as ( select tdate,symbol,close, lag(close) over (partition by symbol order by tdate) pclose from cts ) select a.*, 100.0*(close - pclose)/(case when pclose=0 then null else pclose end) as prcnt from prevclose a where pclose!=0 ---another way to avoid div by zero. ; ...NOTE, this is not HW5... for hw5, you also need to take dividends and splits into account. order by tdate,symbol,close limit 20;