...database class... stuff we did before: SQL: language to talk to databases DDL and DML DDL: create table... (you really only need to know 2 statements). drop table book; create table book ( bookid bigint, title varchar(50), author varchar(20), isbn varchar(30) ); --not that important: alter table. DML: CRUD: create-retrieve-update-delete. insert insert into book(bookid,title,author,isbn) values (1,'HHGG','DNA','42'); insert into book(bookid,title,author,isbn) values (2,'SLATFATF','DNA','43'); --not that important; you shouldn't regularly use this. -- because it's slow, -- and inserts 1 record at a time, and flushes to WAL (write ahead log). select select * from book; --get all books select * from book where bookid=1; --get bookid 1 select * from book where title='HHGG'; --get book titled HHGG select author,isbn from book where title='HHGG'; --get author and isbn of HHGG --there are recursive queries. -- this is the most important statement in all of SQL. -- this is the one to study and practice. update -- update title of 1 book (e.g. where bookid=1). update book set title='Hitchickers Guide to the Galaxy' where bookid=1; update book set title='HHGG'; --without where, this will update ALL books. -- not that important, because... you lose the previous value. -- suppose after changing title to 'Hitchickers Guide to the Galaxy', -- someone was searching for title HHGG... would they find it? delete delete from book where bookid=1; --remove record with bookid=1 delete from book; --without where, this will delete ALL books. --not that important.... because you lose the record. ========================= SUMMARY important statements: create table drop table select (by far the most important). ---------------------------------------------------------- last week we did functions, aggregates, case statements... --functions: apply to each record. select a.*, substr(title,1,1) first_letter_of_title from book a; create table student ( sid bigint, fname varchar(40), lname varchar(40), dob date, street1 varchar(40), street2 varchar(40), city varchar(40), state varchar(2), zip varchar(10) ); select a.*, extract(year from age(dob)) age_in_years from student a --aggregates select count(*) from student; --count students. select count(*) from student where dob > '2001-01-01'; --count students who were born after ...date. --count students by zip code. select zip, count(*) cnt from student group by zip --count students by their initials... select concat(substr(fname,1,1),substr(lname,1,1)) initials, count(*) from student group by concat(substr(fname,1,1),substr(lname,1,1)); --or in some modern databases: select concat(substr(fname,1,1),substr(lname,1,1)) initials, count(*) from student group by 1; ---use CTEs: common table expressions with initials as ( select a.*,concat(substr(fname,1,1),substr(lname,1,1)) initial from student a ) select initial,count(*) from initials group by initial --- case statements .. (relates to HW). --create a 'north-east' identifier on each student record. select a.*, case when state in ('CT','DE','ME','MD','MA','NH','MJ','NY','PA','RI','VT') then 'NORTH-EAST' when state in ('AL','AR','FL','GA','KY','LA','MS','NC','SC','TN','VA','WV') then 'SOUTH-EAST' else 'OTHER' end region from student --- to save this student with region.... create table student2 as select a.*, case when state in ('CT','DE','ME','MD','MA','NH','MJ','NY','PA','RI','VT') then 'NORTH-EAST' when state in ('AL','AR','FL','GA','KY','LA','MS','NC','SC','TN','VA','WV') then 'SOUTH-EAST' else 'OTHER' end region from student; ---count students by region.... with r as ( select a.*, case when state in ('CT','DE','ME','MD','MA','NH','MJ','NY','PA','RI','VT') then 'NORTH-EAST' when state in ('AL','AR','FL','GA','KY','LA','MS','NC','SC','TN','VA','WV') then 'SOUTH-EAST' else 'OTHER' end region from student ) select region,count(*) from r group by region; -------operations on tables.... JOIN ---the most important operation create table book ( bookid bigint, title varchar(50), author varchar(20), isbn varchar(30) ); ---the book object, tells us that: bookid is related to title. bookid is related to author. bookid is related to isbn. title is related to author. title is related to isbn. author is related to isbn. -- for example... knowing title, we can find author. select author from book where title='whatever'; --- knowing author we can find title. select title from book where author='DNA'; --some relationships are ``special''... to primary key. --using primary key, we can get to ANY column. --using primary key, we can get to any single record. select title from book where bookid=1241245; ---will return 1 record ---(if bookid is primary key). book(bookid,title,author,isbn) --everything is inter-related. topic(isbn,topic) --topic of the book; isbn is related to topic. ---we relate these using joins... book inner join topic using(isbn) ... select * --output will have: bookid,title,author,isbn,topic from book a inner join topic b using (isbn) ---this is the same as: select * --output will have: bookid,title,author,isbn,topic from book a inner join topic b on a.isbn=b.isbn --would also work, but don't use this select * from book natural inner join topic; --natural implies a.isbn=b.isbn... all same-named-columns will be matched. ----never use this notation: select * from book a, topic b where a.isbn=b.isbn ---this is horrible (do not use). ---reason to avoid: this combines join condition with where condition. ---count distinct authors by topic... ---note: book does not have topic, and topic does not have author. with book_topic as ( select a.author, b.topic from book a inner join topic b on a.isbn=b.isbn ) select topic,count(distinct author) cnt from book_topic group by topic ---also (simpler) select b.topic, count(distinct a.author) cnt from book a inner join topic b on a.isbn=b.isbn group by b.topic --count books by topic: the topic object has isbn ---- (counts entries by topic). select topic,count(*) cnt from topic group by topic ---are there topics for non-existent books --- (a record in topic, but no matching record in book). inner join... returns matches when BOTH tables have matching records. left outer join... returns all records from ``left'' table, AND matches to the right table. right outer join... return all records from ``right'' table, and matches to the left table. e.g.: select * from TBL1 a left outer join TBL2 b on a.key=b.key --- notice that TBL1 is on the "left" --- TBL2 is on the "right"... ---so above will return all records for TBL1, and --- only matching records for TBL2. ---also: full outer join... returns all records (from left AND right), with matches. ---returns all matched records, then ---all the left-table records that didn't match, then ---all the right-table records that didn't match. ---are there topics for non-existent books --- (a record in topic, but no matching record in book). select a.* --get only topic information. from topic a left outer join book b on a.isbn=b.isbn where b.isbn is null --there's no matching book ---notice that b.isbn is in both join-condition AND where condition. --avoid subqeury "not in"...reason: optimizer is stupid. ---(longer reason: the optimizer turns "in" and "not in" into joins...but, ---that often removes a lot of other optimizations you could do manually). ---same as: (but it's cleaner with variables). select topic.* --get only topic information. from topic left outer join book on topic.isbn=book.isbn where book.isbn is null --there's no matching book ---limit command: --return the first 100 books (lowest bookid). select * from book order by bookid limit 100; you have 2 products, both have been purchased exactly 10000 times. limit 1... will return... which one? ------------------------------------------------ multiple tables. product(productid,description,) customer(customerid,username,fname,lname,street1,street2,city,state,zip) purchase(purchaseid,purchasetimestamp,customerid,productid,quantity,price) ...we want to relate customers to products...using a purchase. select * --all columns from all tables from customer a inner join purchase b on a.customerid=b.customerid inner join product c on b.productid=c.productid -- retrieve product description of all customers named 'Bob' select c.description from customer a inner join purchase b on a.customerid=b.customerid inner join product c on b.productid=c.productid where a.fname='Bob'; ----- SUMMARY ---------------------------------- joins: inner join left outer join ---do not use: right outer join, use left outer join instead. ---reason: some databases do not support right outer join. full outer join ---not a common join type to use. cross join ---matches everything to everything; ---like an inner join without a condition. ---cross joins are always "bad" performance wise ---table with 1000 records cross joined with 10000 records, --- will result in 10000000 output records. ------------------------------------------------------------ -- not common sql; subqueries with conditions find the oldest student select * from student where dob = (select min(dob) min_dob from student) ---find the state with maximum students. ---2-step thing, count students by state, ---then pick the state with the highest count. select state,count(*) cnt from student group by state having count(*) >= ALL ( select count(*) cnt from student group by state ) --there are better ways of doing this (we'll take a look at those later in semester). --we could do the above in multiple steps... using CTEs: with state_cnts as ( select state,count(*) cnt from student group by state ), max_cnt as ( select max(cnt) cnt from state_cnts ) select a.* from state_cnts a inner join max_cnt a on a.cnt=b.cnt; ---slightly uglier (with cross join). with state_cnts as ( select state,count(*) cnt from student group by state ), max_cnt as ( select max(cnt) mx from state_cnts ) select a.* from state_cnts cross join max_cnt where cnt=mx ----create tables... when creating a large table... many columns may be "null" by design.