product(productid,description,)
customer(customerid,username,fname,lname,street1,street2,city,state,zip)
purchase(purchaseid,purchasetimestamp,customerid,productid,quantity,price)

-- Of customers who purchased productid=42, what percentage also purchased productid=24?

with blah as (
 select customerid, 
   max(case when productid=42 then 1 else 0 end) ever_purchased_prod42,
   max(case when productid=24 then 1 else 0 end) ever_purchased_prod24
 from purchase
 group by customerid
)
select 100.0*sum(case when ever_purchased_prod24=1 then 1.0 else 0.0 end) / sum(1.0)
from blah
where ever_purchased_prod42=1


with blah as (
 select customerid, 
   max(case when productid=24 then 1 else 0 end) ever_purchased_prod24
 from purchase
 group by customerid
 having max(case when productid=42 then 1 else 0 end)>0
)
select 100.0*sum(case when ever_purchased_prod24=1 then 1.0 else 0.0 end) / sum(1.0)
from blah


doorlog(eventid,doorid,tim,username,event)

--If doorid=7 is for floor 42, what's the daily occupancy of floor 42 for entire 2017 (give a number for every day in 2017)

--What is the daily average (and standard deveation) occupancy of floor 42 for 2017? (single number)

-------------------------------------------------------------------------------


-------------------------------------------------------------------------------

SQL; DDL/DML

DDL: create table...

DML: select (insert/select/update/delete).

CTAS: create table as...: 

    -- use results of select to create a table.
    create table blah as 
      select ....
       from ...
       where ....;

select <-- columns .... 

select a.*,
    extract(year from age(dob)) age  --- select individual records.
from customer a
where dob > cast( '1999-01-01' as date)   --condition is applied on each record.


-------
aggregate functions --- operate on a "group". by default, entire dataset is a "group".


select count(*) 
from customer;  ---counts customers

select state,count(*), min(dob) mindob, max(dob) maxdob,
   avg( extract(year from age(dob)) avg_age,
   stddev( extract(year from age(dob)) sd_age
from customer
group by state;   ---count customers by state
========================
...---record per state.
NY, 2426234, '1909-01-23', '2021-01-01', 22.44, 4.243 
...


-- apply conditions per group... e.g.:

select state,count(*), min(dob) mindob, max(dob) maxdob,
   avg( extract(year from age(dob)) avg_age,
   stddev( extract(year from age(dob)) sd_age
from customer
group by state
having count(*) > 10000 --only pull states with over 10000 customers.
;

--------------------------------------------------------------------------
JOINS


inner joins   <--- used most often

   A inner join B... records that has join key in BOTH tables are returned.


   select *
   from A inner join B 
       on A.id = B.id

   select *
   from A inner join B 
       using(id)        ---some databases do not support this.

   select * 
   from A natural inner join B;  --performs a join on all columns with same name.
    ----(if both A and B have column named "id", then this is equilvalent to 
    ---- a.id = b.id...
    ----PROBLEMS.... 
    ---- for example, table A(custid, name, username, email, as_of date)
    -----------------------B(addressid, custid, street, city, state, zip)
    ---------natural inner join works... on custid.
    ------------what happens when a developer notices an opportunity to stick 
    ------------an email into B table, to support multiple emails per customer.
    -----------------------B(addressid, custid, street, city, state, zip, email)
    ---- natural join now works on: custid, email


left outer join   <--- used in some situations

  A left outer join B... records that have a join key in both A and B a returned
                         AND... all non-matching records from "left" table (A).

full outer join   <--- very rarely needed

  A full outer join B... records that have a join key in both A and B a returned
                         AND... all non-matching records from "left" table (A).
                         AND... all non-matching records from "right" table (B).

cross join      <--- almost never needed

  A cross join B  ...   matches every record of A to every record of B. 
                        (returns A.count * B.count records).

  ...the worst kind of join you can do is:...
  A cross join B
  where a.id=b.id    

-----avoid using this syntax:  TERRIBLE SYNTAX
  select * 
  froim A, B
  where a.id=b.id


---------------------------------------------------------------------
---------------------------------------------------------------------

drop table ctsibm;
create table ctsibm as 
  select * 
  from cts 
  where symbol='IBM'
;

select tdate,symbol,close from ctsibm limit 2;

create table ibm_daily_prcnt_1 as 
 with prevclose as (
  select tdate,symbol,close,
   lag(close) over (partition by symbol order by tdate) pclose
  from ctsibm 
 )
 select a.*, 
   100.0*(close - pclose)/pclose as prcnt
 from prevclose a
;


create table daily_prcnt_1 as 
 with prevclose as (
  select tdate,symbol,close,
   lag(close) over (partition by symbol order by tdate) pclose
  from cts 
 )
 select a.*, 
   100.0*(close - pclose)/(case when pclose=0 then null else pclose end) as prcnt
 from prevclose a
 where pclose!=0   ---another way to avoid div by zero.
;

...NOTE, this is not HW5...  

for hw5, you also need to take dividends and splits into account.


 order by tdate,symbol,close
limit 20;