Part 1
Below is the R script with the SQL queries for the initial analysis and summary statistics.
library(tidyverse)
library(DBI)
library(odbc)
con <- dbConnect(odbc::odbc(),
Driver = "SQL Server",
Server = "DESKTOP-P6DUC2Q\\SQLEXPRESS",
Database = "AdventureWorks2016",
Trusted_Connection = "TRUE")
product.sales <- dbGetQuery(con, "Select soh.OrderDate, p.Name As ProductName, sod.OrderQty as unitVolume, sod.LineTotal as Revenue
From Sales.SalesOrderHeader as soh
Join Sales.SalesOrderDetail as sod on sod.SalesOrderID = soh.SalesOrderID
Join Production.Product as p on p.ProductID = sod.ProductID
Where Year(soh.orderDate) = 2013")
products.by.volume <- aggregate(UnitVolume~ProductName, data = product.sales, FUN = sum)
product.sales <- dbGetQuery(con, "SELECT p.Name As ProductName,
SUM(sod.OrderQty) as UnitVolume,
SUM(sod.LineTotal) as Revenue
FROM
Sales.SalesOrderHeader as soh
JOIN
Sales.SalesOrderDetail as sod
ON
sod.SalesOrderID = soh.SalesOrderID
JOIN
Production.Product as p
ON
p.ProductID = sod.ProductID
WHERE YEAR(OrderDate) = 2013
GROUP BY p.Name
Order By Revenue DESC;")
view(product.sales)
products.by.volume <- aggregate(UnitVolume~ProductName, data=product.sales, FUN=sum)
view(products.by.volume)
products.by.volume.final <- products.by.volume[order(-products.by.volume$UnitVolume),][1:10,]
view(products.by.volume.final)
products.by.revenue <- aggregate(Revenue~ProductName, data=product.sales, FUN=sum)