-
Fill Rate
-- SQL to pull SRS fill rate data by week SELECT
b.county_order, b.county, a.site_no,
CASE WHEN h d.vendor_name is null THEN a.vendor_id ELSE d.vendor_name END AS vendor_name,
a.po_date,
CAST(a.site_no as varchar)+'-'+format(a.po_no, '0000000') as po_nbr, c.dept_no, c.dept_name, c.class_name,
CAST a.sku_no as varchar) +' - '+ c.item_name +' - '+ case WHEN d.vendor_name is null THEN a.vendor_id ELSE d.vendor_name END iS tem_vendor,
CASE WHEN d.vendor_name is null THEN a.vendor_id ELSE d.vendor_name end +' - '+ b.county AS vendor_region,
CAST (a.sku_no as varchar) +' - '+ c.item_name AS item_desc,
SUM(a.order_qty) AS order_qty,
SUM(CASE WHEN a.received_qty is null and a.invoice_qty is null THEN a.order_qty WHEN a.received_qty is null and a.invoice_qty IS NOT NULL THEN a.invoice_qty ELSE a.received_qty end) AS actual_received_qty,
SUM(CASE WHEN a.received_qty is null and a.invoice_qty is null THEN a.order_qty WHEN a.received_qty is null and a.invoice_qty IS NOT NULL THEN a.invoice_qty else a.received_qty end * CASE WHEN a.invoice_cost > 0 and a.min_uom = 'CS' THEN a.invoice_cost / a.qty_multiplier WHEN .invoice_cost > 0 and a.min_uom = 'EA' THEN a.invoice_cost ELSE a.unit_cost end) AS ext_receive_cost,
SUM(a.order_qty * case WHEN a.invoice_cost > 0 and a.min_uom = 'CS' THEN a.invoice_cost / a.qty_multiplier WHEN a.invoice_cost > 0 and a.min_uom = 'EA' then a.invoice_cost ELSE a.unit_cost END) AS ext_order_cost,
datediff(ss, a.po_date, max(a.po_closed_date)) / 86400.0 as po_open_duration
FROM
dashboard.imua.v_abc_orders_receive a
JOIN dashboard.dbo.mv_site b on a.site_no = b.site_no
JOIN dashboard.dbo.mv_item c on a.sku_no = c.sku_no
JOIN dashboard.dbo.item f on a.sku_no = f.sku_no
LEFT OUTE JOIN dashboard.dbo.vendor d on a.vendor_id = d.vendor_id
WHERE
--dateadd(day, 1 - datepart(weekday, a.po_closed_date), CAST(a.po_closed_date as date)) = '2023-10-29' -- Update with start of the week date
a.vendor_id in ('ABC-DC')
AN a.order_type = 'REG'
AND a.cancel_reason is null
AND YEAR(a.po_date) = '2023'
GROUP BY
b.county_order, a.po_date, b.county, a.site_no,
CASE WHEN d.vendor_name is null THEN a.vendor_id ELSE d.vendor_name END, a.po_date,
CAST(a.site_no as varchar)+'-'+format(a.po_no, '0000000'), c.dept_no, c.dept_name, c.class_name,
CAST(a.sku_no as varchar) +' - '+ c.item_name +' - '+ case WHEN d.vendor_name IS NULL THEN a.vendor_id ELSE d.vendor_name END,
CASE WHEN d.vendor_name is null THEN a.vendor_id else d.vendor_name END +' - '+ b.county
CAST(a.sku_no as varchar) +' - '+ c.item_name
-
Check for EDI
USE DASHBOARD; --Uses DASHBOARD for whole query
WITH a AS (
(SELECT site_no, vendor_id, invoice_no, invoice_date, cast(site_no as varchar) +'-'+ format(po_no, '0000000') AS po_no, SUM(invoice_cost * invoice_qty) AS merch_cost, max(extract_date) AS extract_date, count(distinct invoice_upc) AS item_count
FROM dashboard.imua.v_abc_orders_receive_stage
GROUP BY site_no, vendor_id, invoice_no, invoice_date, CAST(site_no as varchar) +'-'+ format(po_no, '0000000')) ) --CTE to reduce load time
SELECT
d.vendor_name, a.vendor_id, a.invoice_date, a.invoice_no, a.po_no, a.site_no, e.county, a.merch_cost,
CASE WHEN f.ac_amt IS NULL THEN 0 ELSE f.ac_amt END as disc_amt,
CASE WHEN c.ac_amt IS NULL THEN 0 ELSE c.ac_amt END as tax_amt,
SUM(a.merch_cost + (CASE WHEN f.ac_amt IS NULL THEN 0 ELSE f.ac_amt END) + (CASE WHEN c.ac_amt IS NULL THEN 0 ELSE c.ac_amt END)) AS invoice_total,
a.extract_date from a
LEFT OUTER JOIN dashboard.imua.v_abc_orders_receive_ac c on a.site_no = c.site_no and a.vendor_id = c.vendor_id and a.invoice_no = c.invoice_no and c.ac_code = '$TAX$'
LEFT OUTER JOIN dashboard.dbo.vendor d on a.vendor_id = d.vendor_id
LEFT OUTER JOIN dashboard.dbo.mv_site e on a.site_no = e.site_no
LEFT OUTER JOIN dashboard.imua.v_abc_orders_receive_ac f on a.site_no = f.site_no and a.vendor_id = f.vendor_id and a.invoice_no = f.invoice_no and f.ac_code = '$DISC$'
LEFT OUTER JOIN dashboard.imua.v_abc_orders_receive_ac g on a.site_no = g.site_no and a.vendor_id = g.vendor_id and a.invoice_no = g.invoice_no and g.ac_code = '$C110$'
where
--d.vendor_name = 'HAWAII FOODSERVICE ALLIANCE' and invoice_date = '2024-05-17' a.invoice_no like '160026'
GROUP BY d.vendor_name, a.vendor_id, a.invoice_date, a.invoice_no, a.po_no, a.site_no, e.county, a.merch_cost, f.ac_amt, c.ac_amt, a.extract_date
ORDER BY a.invoice_date, a.site_no, d.vendor_name, a.invoice_no
-
Bins - Upper and Lower
-- Bins created in Step 2
WITH bins AS (
SELECT generate_series(2200, 3050, 50) AS lower,
generate_series(2250, 3100, 50) AS upper),
-- Subset stackoverflow to just tag dropbox (Step 1)
dropbox AS (
SELECT question_count
FROM stackoverflow
WHERE tag='dropbox')
-- Select columns for result
-- What column are you counting to summarize?
SELECT lower, upper, count(question_count)
FROM bins -- Created above
-- Join to dropbox (created above),
-- keeping all rows from the bins table in the join
LEFT JOIN dropbox
-- Compare question_count to lower and upper
ON question_count >= lower
AND question_count < upper
-- Group by lower and upper to count values in each bin
GROUP BY lower, upper
-- Order by lower to put bins in order
ORDER BY lower;
-
Correlated Subqueries
SELECT
m.date,
(SELECT team_long_name
FROM team AS t
WHERE t.team_api_id = m.hometeam_id) AS hometeam,
-- Connect the team to the match table
(SELECT team_long_name
FROM team AS t
WHERE t.team_api_id = m.awayteam_id) AS awayteam,
-- Select home and away goals
home_goal,
away_goal
FROM match AS m;
-
Crosstab
-- Create the correct extension to enable CROSSTAB
CREATE EXTENSION IF NOT EXISTS tablefunc;
SELECT * FROM CROSSTAB($$
SELECT
Gender, Year, Country
FROM Summer_Medals
WHERE
Year IN (2008, 2012)
AND Medal = 'Gold'
AND Event = 'Pole Vault'
ORDER By Gender ASC, Year ASC;
-- Fill in the correct column names for the pivoted table
$$) AS ct (Gender VARCHAR,
"2008" VARCHAR,
"2012" VARCHAR)
ORDER BY Gender ASC;
-
CTE - Case & Rank
-- Set up the home team CTE
WITH home AS (
SELECT m.id, t.team_long_name,
CASE WHEN m.home_goal > m.away_goal THEN 'MU Win'
WHEN m.home_goal < m.away_goal THEN 'MU Loss'
ELSE 'Tie' END AS outcome
FROM match AS m
LEFT JOIN team AS t ON m.hometeam_id = t.team_api_id),
-- Set up the away team CTE
away AS (
SELECT m.id, t.team_long_name,
CASE WHEN m.home_goal > m.away_goal THEN 'MU Loss'
WHEN m.home_goal < m.away_goal THEN 'MU Win'
ELSE 'Tie' END AS outcome
FROM match AS m
LEFT JOIN team AS t ON m.awayteam_id = t.team_api_id)
-- Select columns and and rank the matches by goal difference
SELECT DISTINCT
m.date,
home.team_long_name AS home_team,
away.team_long_name AS away_team,
m.home_goal, m.away_goal,
RANK() OVER(ORDER BY ABS(home_goal - away_goal) DESC) as match_rank
-- Join the CTEs onto the match table
FROM match AS m
LEFT JOIN home ON m.id = home.id
LEFT JOIN away ON m.id = away.id
WHERE m.season = '2014/2015'
AND ((home.team_long_name = 'Manchester United' AND home.outcome = 'MU Loss')
OR (away.team_long_name = 'Manchester United' AND away.outcome = 'MU Loss'));
-
CTE & Lag
WITH Athletics_Gold AS (
SELECT DISTINCT
Gender, Year, Event, Country
FROM Summer_Medals
WHERE
Year >= 2000 AND
Discipline = 'Athletics' AND
Event IN ('100M', '10000M') AND
Medal = 'Gold')
SELECT
Gender, Year, Event,
Country AS Champion,
-- Fetch the previous year's champion by gender and event
LAG(country) OVER (PARTITION BY gender, event
ORDER BY Year ASC) AS Last_Champion
FROM Athletics_Gold
ORDER BY Event ASC, Gender ASC, Year ASC;
-
Date Trunc & Count
-- Compute monthly counts of requests created
WITH created AS (
SELECT date_trunc('month', date_created) AS month,
count(*) AS created_count
FROM evanston311
WHERE category='Rodents- Rats'
GROUP BY month),
-- Compute monthly counts of requests completed
completed AS (
SELECT date_trunc('month', date_completed) AS month,
count(*) AS completed_count
FROM evanston311
WHERE category='Rodents- Rats'
GROUP BY month)
-- Join monthly created and completed counts
SELECT created.month,
created_count,
completed_count
FROM created
INNER JOIN completed
ON created.month = completed.month
ORDER BY created.month;
-
Date Trunc, Interval, & Age
SELECT
c.first_name || ' ' || c.last_name AS customer_name,
f.title,
r.rental_date,
-- Extract the day of week date part from the rental_date
EXTRACT(dow FROM r.rental_date) AS dayofweek,
AGE(r.return_date, r.rental_date) AS rental_days,
-- Use DATE_TRUNC to get days from the AGE function
CASE WHEN DATE_TRUNC('day', AGE(r.return_date, r.rental_date)) >
-- Calculate number of d
f.rental_duration * INTERVAL'1' day
THEN TRUE
ELSE FALSE END AS past_due
FROM
film AS f
INNER JOIN inventory AS i
ON f.film_id = i.film_id
INNER JOIN rental AS r
ON i.inventory_id = r.inventory_id
INNER JOIN customer AS c
ON c.customer_id = r.customer_id
WHERE
-- Use an INTERVAL for the upper bound of the rental_date
r.rental_date BETWEEN CAST('2005-05-01' AS DATE)
AND CAST('2005-05-01' AS DATE) + INTERVAL '90 day';
-
Extract Date & DOW
-- Select name of the day of the week the request was created
SELECT to_char(date_created, 'day') AS day,
-- Select avg time between request creation and completion
avg(date_completed - date_created) AS duration
FROM evanston311
-- Group by the name of the day of the week and
-- integer value of day of week the request was created
GROUP BY day, EXTRACT(DOW FROM date_created)
-- Order by integer value of the day of the week
-- the request was created
ORDER BY EXTRACT(DOW FROM date_created);
-
Lower & Upper Date
-- Bins from Step 1
WITH bins AS (
SELECT generate_series('2016-01-01',
'2018-01-01',
'6 months'::interval) AS lower,
generate_series('2016-07-01',
'2018-07-01',
'6 months'::interval) AS upper),
-- Daily counts from Step 2
daily_counts AS (
SELECT day, count(date_created) AS count
FROM (SELECT generate_series('2016-01-01',
'2018-06-30',
'1 day'::interval)::date AS day) AS daily_series
LEFT JOIN evanston311
ON day = date_created::date
GROUP BY day)
-- Select bin bounds
SELECT lower,
upper,
-- Compute median of count for each bin
percentile_disc(.5) WITHIN GROUP (ORDER BY count) AS median
-- Join bins and daily_counts
FROM bins
LEFT JOIN daily_counts
-- Where the day is between the bin bounds
ON day >= lower
AND day < upper
-- Group by bin bounds
GROUP BY lower, upper
ORDER BY lower;
-
Multiple Subqueries
SELECT
m.date,
-- Get the home and away team names
hometeam,
awayteam,
m.home_goal,
m.away_goal
FROM match AS m
-- Join the home subquery to the match table
LEFT JOIN (
SELECT match.id, team.team_long_name AS hometeam
FROM match
LEFT JOIN team
ON match.hometeam_id = team.team_api_id) AS home
ON home.id = m.id
-- Join the away subquery to the match table
LEFT JOIN (
SELECT match.id, team.team_long_name AS awayteam
FROM match
LEFT JOIN team
-- Get the away team ID in the subquery
ON match.awayteam_id = team.team_api_id) AS away
ON away.id = m.id;
-
Pivoting & Ranking
CREATE EXTENSION IF NOT EXISTS tablefunc;
SELECT * FROM CROSSTAB($$
WITH Country_Awards AS (
SELECT
Country,
Year,
COUNT(*) AS Awards
FROM Summer_Medals
WHERE
Country IN ('FRA', 'GBR', 'GER')
AND Year IN (2004, 2008, 2012)
AND Medal = 'Gold'
GROUP BY Country, Year)
SELECT
Country,
Year,
RANK() OVER
(PARTITION BY Year
ORDER BY Awards DESC) :: INTEGER AS rank
FROM Country_Awards
ORDER BY Country ASC, Year ASC;
-- Fill in the correct column names for the pivoted table
$$) AS ct (Country VARCHAR,
"2004" INTEGER,
"2008" INTEGER,
"2012" INTEGER)
Order by Country ASC;
-
Querying the Schema
-- Select the column name, data type and udt name columns
SELECT column_name, data_type, udt_name
FROM INFORMATION_SCHEMA.COLUMNS
-- Filter by the rating column in the film table
WHERE table_name ='film' AND column_name='rating';
-
Roll Up
-- Count the total number of customers, the number of customers for each country, and the number of female and male customers for each country
SELECT country,
gender,
COUNT(*)
FROM customers
GROUP BY ROLLUP (country,gender)
ORDER BY country, gender; -- Order the result by country and gender
-
Sliding Windows - Left
SELECT
date,
home_goal,
away_goal,
-- Create a running total and running average of home goals
SUM(home_goal) OVER(ORDER BY date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total,
AVG(home_goal) OVER(ORDER BY date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_avg
FROM match
WHERE
hometeam_id = 9908
AND season = '2011/2012';
--Opposite date direction
SELECT
-- Select the date, home goal, and away goals
date,
home_goal,
away_goal,
-- Create a running total and running average of home goals
SUM(home_goal) OVER(ORDER BY date DESC
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS running_total,
AVG(home_goal) OVER(ORDER BY date DESC
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS running_avg
FROM match
WHERE
awayteam_id = 9908
AND season = '2011/2012';
-
Split Part
-- Fill in the command below with the name of the temp table
DROP TABLE IF EXISTS recode;
-- Create and name the temporary table
CREATE TEMP TABLE recode AS
-- Write the select query to generate the table
-- with distinct values of category and standardized values
SELECT DISTINCT category,
rtrim(split_part(category, '-', 1)) AS standardized
-- What table are you selecting the above values from?
FROM evanston311;
-- Look at a few values before the next step
SELECT DISTINCT standardized
FROM recode
WHERE standardized LIKE 'Trash%Cart'
OR standardized LIKE 'Snow%Removal%';
-
Creating Temp Table
-- Code from previous step
DROP TABLE IF EXISTS recode;
CREATE TEMP TABLE recode AS
SELECT DISTINCT category,
rtrim(split_part(category, '-', 1)) AS standardized
FROM evanston311;
UPDATE recode SET standardized='Trash Cart'
WHERE standardized LIKE 'Trash%Cart';
UPDATE recode SET standardized='Snow Removal'
WHERE standardized LIKE 'Snow%Removal%';
UPDATE recode SET standardized='UNUSED'
WHERE standardized IN ('THIS REQUEST IS INACTIVE...Trash Cart',
'(DO NOT USE) Water Bill',
'DO NOT USE Trash', 'NO LONGER IN USE');
-- Select the recoded categories and the count of each
SELECT standardized, count(*)
-- From the original table and table with recoded values
FROM evanston311
LEFT JOIN recode
-- What column do they have in common?
ON evanston311.category=recode.category
-- What do you need to group by to count?
GROUP BY standardized
-- Display the most common val values first
ORDER BY count DESC;
-
Substrings
SELECT
-- Extract the characters to the left of the '@'
LEFT(email, POSITION('@' IN email)-1) AS username,
-- Extract the characters to the right of the '@'
SUBSTRING(email FROM POSITION('@' IN email)+1 FOR LENGTH(email)) AS domain
FROM customer;
-
Substring, Position, & Padding
SELECT
-- Extract the characters to the left of the '@'
LEFT(email, POSITION('@' IN email)-1) AS username,
-- Extract the characters to the right of the '@'
SUBSTRING(email FROM POSITION('@' IN email)+1 FOR LENGTH(email)) AS domain
FROM customer;
-- Concatenate the first_name and last_name w/ right and left pad
SELECT
RPAD(first_name, LENGTH(first_name)+1)
|| LPAD(last_name, LENGTH(last_name)+2, ' <')
|| RPAD(email, LENGTH(email)+1, '>') AS full_email
FROM customer;
--Only RPAD
SELECT
RPAD(first_name, LENGTH(first_name)+1)
|| RPAD(last_name, LENGTH(last_name)+2, ' <')
|| RPAD(email, LENGTH(email)+1, '>') AS full_email
FROM customer;
-
Concat & Trim
Des-- Concatenate the uppercase category name and film title
SELECT
CONCAT(UPPER(c.name), ': ', title) AS film_category,
-- Truncate the description remove trailing whitespace
TRIM(LEFT(description, 50)) AS film_desc
FROM
film AS f
INNER JOIN film_category AS fc
ON f.film_id = fc.film_id
INNER JOIN category AS c
ON fc.category_id = c.category_id;
-
TS Vector & TS Query
-- Select the title and description
SELECT title, description
FROM film
-- Convert the title to a tsvector and match it against the tsquery
WHERE to_tsvector(title) @@ to_tsquery('elf');
-
CUBE
SELECT a.nationality,
a.gender,
AVG(r.rating) AS avg_rating,
COUNT(r.rating) AS n_rating,
COUNT(*) AS n_rentals,
COUNT(DISTINCT a.actor_id) AS n_actors
FROM renting AS r
LEFT JOIN actsin AS ai
ON ai.movie_id = r.movie_id
LEFT JOIN actors AS a
ON ai.actor_id = a.actor_id
WHERE r.movie_id IN (
SELECT movie_id
FROM renting
GROUP BY movie_id
HAVING COUNT(rating) >= 4)
AND r.date_renting >= '2018-04-01'
GROUP BY CUBE (a.nationality, a.gender); -- Provide results for all aggregation levels represented in a pivot table
-
Average Population of each Continent
SELECT country.continent, FLOOR(AVG(city.population)
FROM country, city
WHERE country.code = city.countrycode
GROUP BY country.continent
-
Top Earners
SELECT months * salary AS earnings, count(*)
FROM employee
GROUP BY earnings
ORDER BY earnings DESC LIMIT 1;
-
African Cities
SELECT city.name
FROM city
INNER JOIN country ON city.countrycode = country.code
WHERE country.continent = ‘Africa’