drop table employee; --statement 1 create table employee ( --statement 2 empid bigint, fname varchar(40), lname varchar(40), email varchar(40), dob date, depid int, --- department id salary decimal(18,8), managerid bigint --- manager id, who is also an employee. ); create table department ( depid int, name varchar(30) ); insert into employee (empid,fname,lname) values (1, 'John','Doe'); --s 3 select * from employee; --s4 update employee set dob='1999-01-01' where empid=1; --s5 delete employee where empid=1; --s6 ---of these, 99% of the complexity comes from "select" statement. --calculate stuff... select age(dob), concat( substr(fname,1,1),substr(lname,1,1)) as initials from employee where empid=12; -- filter stuff select * from employee where dob >=cast('2000-01-01' as date); --- count stuff select count(*) from employee; select export(month from dob),count(*) -- get count of birthdays per month from employee group by 1 ---some databases require this to be: export(month from dob) order by 2 desc --give me highest count first ; ---count ratios... as in... millenials to everyone select 100.0*sum(case when dob>='2000-01-01' then 1.0 else 0.0 end)/sum(1.0) prcnt from employee; ---count stddev/avg;; to find outliers. ---find employees with unusually high salaries (>4sd ) with stats as ( --using CTEs! select avg(salary) a, stddev(salary) sd from employee ) select * from employee cross join stats where salary > a + 4*st --- can also do this with windowing functions: with stats as ( select a.*, avg(salary) over() av, stddev(salary) over() sd from employee a ) select * from stats where salary > av + 4*st ---find employees with unusually high salaries (>4sd ) ---within their department with stats as ( --using CTEs! select depid, avg(salary) av, stddev(salary) sd from employee group by depid --department id ) select * from employee a inner join stats b on a.depid=b.depdi where salary > av + 4*st --- can also do this with windowing functions: with stats as ( select a.*, avg(salary) over(partition by depid) av, stddev(salary) over(partition by depid) sd from employee a ) select * from stats where salary > av + 4*st ----next topic: JOIN --- grab employee name, and department name. select a.fname,a.lname, b.name from employee a inner join department b on a.depid=b.depid --you could also do (but avoid in production) select fname,lname,name from employee natural inner join department --in fact, in production, always specify full list of columns you're using --- never do: select * ... --- select * might break if someone adds columns, removes columns, etc. --- also, it is very diffuclt to profile database code to determine what --- columns are used --- change requests: determine impact. ----- change datatype from int to bigint, or from bigint into varchar, etc. ---one way is to create a script that parses SQL, creates a mapping of ---what columns are used by which project, if they're used in where clause, --- in joins, calculations, etc., --- or pass-through (just in select), etc. --- whenever a change request to "column" arrives, you can instantly know --- which projects might need attention. there are several join types.... inner join <-- bulk of joins left outer join <-- very useful full outer join <-- very few practical uses cross join <-- don't use often right outer join <-- don't use, not supported by some datases; change to left join (by swapping left/right tables). natural inner join <-- dangerous to use in production environment natural left outer join natural right outer join --don't use "where" for joins... only use "where" for filtering. e.g. don't do this: select * from employee a, department b where a.depid=b.depid ---ambiguos way of doing left-join using this notation; "left outer join" syntax is cleaner ---eliminate dups: select distinct fname,lname from employee; select fname,lname from employee group by fname,lname; ---there are subqueries... select * from employee where dob >= ALL (select max(dob) from employee) ---there are better ways of doing these, windowing and CTEs, etc. ---avoid sub-queries. ---set operations: select fname,lname from employee union all select fname,lname from customer ---union, removes dups, union-all keeps dups. ---union all is faster select fname,lname from employee minus --- some database use "except" select fname,lname from customer -- some databases also have "intersect" ---except for union all, most of these will implicitly sort the data (may be slow). --set membership select * from employee where depid in (1,2,3,5,8) select * from employee where depid not in(6,7) ------------- lets take a 10 min break, when we get back: windowing functions. --- HW5: for days with splits AND dividends: apply splits first. --- dividends are per share, so first determine number of shares (e.g. splits first). --hw5hint: drop table daily_prcnt; create table daily_prcnt as with lags as ( select a.tdate,a.symbol,a.close, lag(a.close) over (partition by a.symbol order by a.tdate) pclose, coalesce(CAST(b.pre AS FLOAT), 1) as pre, coalesce(CAST(b.post AS FLOAT), 1) as post, coalesce(CAST(c.amount AS FLOAT), 0) as dividend from cts a left outer join splits b on a.tdate=b.tdate and a.symbol=b.symbol left outer join dividends c on a.tdate=c.tdate and a.symbol=c.symbol ), HW5 as (select a.*, 100.0 *((close * post) / (pclose * pre - dividend) - 1) as prcnt from lags a where pclose>0 and (pclose * pre - dividend)>0 ) select a.tdate, a.symbol, prcnt from hw5 a; --hw6 hints: too much data, so... create a monthly_prcnt table. create table monthly_prcnt as select to_char(tdate,'YYYYMM') month, exp( sum( log( ... )) ) prcnt from daily_prcnt group by 1; ------------------------------------------------------- windowing functions... sum/avg/stddev/min/max row_number(), rank() dense_rank() lead lag last_value first_value --for every employee, find the next youngest employee in employee table... ---try do to this without windowing functions. select ... lag(empid) over (order by dob) ... ---recursive queries... find everyone who reports to bob. with recursive emp(empid) as ( select a.empid, 1 lvl -- find everyone who reports directly to bob. from employee a inner join employee b on a.managerid=b.employeeid where b.fname='bob' union all select a.empid, b.lvl+1 as lvl --find everyone who reports to above (and recursively). from employee a inner join emp b on a.managerid = b.empid ) select * from emp; ----- hw9 hint: export ts=`date +%Y%m%d%H%M%S` export db=fall2021 for t in dividends splits; do echo "select * from $t"|sqlrunner - |gzip -c | gpg -r jj@cocomelon.com -e -a > $t.$ts.csv.gz.enc done