BSky Format Output

Open Dataset: METD1_2024_Final_report_Arizona_new_cleaned_Sheet1

```{r}
BSkyloadDataset(fullpathfilename='D:/MPA Semester 1/Analytic methods/Final project/presentation/dataset/METD1_2024 - Final report Arizona new cleaned.xlsx', filetype='XLSX', worksheetName="Sheet1", replace_ds=TRUE, load.missing=FALSE, csvHeader=TRUE, character.to.factor=TRUE, isBasketData=FALSE, trimSPSStrailing=FALSE, sepChar=',', deciChar='.', encoding="NULL", datasetName='METD1_2024_Final_report_Arizona_new_cleaned_Sheet1')
BSkyLoadRefresh("METD1_2024_Final_report_Arizona_new_cleaned_Sheet1")
```

Successfully opened using:
[1] "readxl::read_excel(path='D:/MPA Semester 1/Analytic methods/Final project/presentation/dataset/METD1_2024 - Final report Arizona new cleaned.xlsx',sheet='Sheet1', col_names=TRUE)"

Table, Basic

```{r}

library(arsenal)
library(lubridate)
library(dplyr)

# defining custom statistic functions
trimmean_func <- function(x, weights=rep(1,length(x)),...) {
  meanest <- DescTools::MeanCI(x, trim=5/100, na.rm=TRUE, conf.level=0.95)
  as.tbstat(meanest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct1_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.01, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct5_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.05, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct10_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.1, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct25_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.25, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct33_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=1/3, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct50_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.5, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct66_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=2/3, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct75_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.75, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct90_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.9, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct95_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.95, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

pct99_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=.99, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

custquant_func <- function(x, weights=rep(1,length(x)),...) {
  quantest <- DescTools::QuantileCI(x, prob=20/100, na.rm=TRUE, conf.level=0.95)
  as.tbstat(quantest, sep=" ", parens=c("(", ")"), sep2=", ")
}

cv_func <- function(x, weights=rep(1,length(x)),...) {
  sd(x, na.rm=TRUE)/mean(x, na.rm=TRUE)
}

skewness_func <- function(x, weights=rep(1,length(x)),...) {
	DescTools::Skew(x, na.rm=TRUE)
}

kurtosis_func <- function(x, weights=rep(1,length(x)),...) {
	DescTools::Kurt(x, na.rm=TRUE)
}

# changing all date variables to Date class for tableby
METD1_2024_Final_report_Arizona_new_cleaned_Sheet1.1 <- METD1_2024_Final_report_Arizona_new_cleaned_Sheet1 %>%
	mutate_if(is.timepoint,as.Date)

tab1 <- tableby( ~ sex+race+uhrswork+inctot, data=METD1_2024_Final_report_Arizona_new_cleaned_Sheet1.1, test=FALSE, total=FALSE,  
	numeric.stats=c('N','mean','min','max','sd'),
	cat.stats=c('Nmiss','countpct'),
	ordered.stats=c('Nmiss','countpct'),
	date.stats=c('Nmiss','median','range'),
	numeric.test="anova", cat.test="chisq", ordered.test="trend", 
	chisq.correct=FALSE, digits=3, digits.pct=1, digits.p=3, simulate.p.value=FALSE, B=2000, 
	conf.level=0.95, numeric.simplify=FALSE, cat.simplify=FALSE, ordered.simplify=FALSE, 
	date.simplify=FALSE)

tab1.final <- as.data.frame(summary(tab1, text=TRUE,   pfootnote=FALSE,
	stats.labels=list(trimmean_func="5% Trimmed Mean (CI)", pct1_func="1st %-ile (CI)", pct5_func="5th %-ile (CI)", 
	pct10_func="10th %-ile (CI)", pct25_func="25th %-ile (CI)", pct33_func="1st Tertile (CI)", pct50_func="Median (CI)", pct66_func="2nd Tertile (CI)", 
	pct75_func="75th %-ile (CI)", pct90_func="90th %-ile (CI)", pct95_func="95th %-ile (CI)", pct99_func="99th %-ile (CI)", custquant_func="20 %-ile (CI)", 
	cv_func="CV", skewness_func="Skewness", kurtosis_func="Kurtosis")))

BSkyFormat(tab1.final, decimalDigitsRounding=-1, singleTableOutputHeader="Variable Summaries ")
```

Attaching package: 'arsenal'
The following object is masked from 'package:lubridate':
    is.Date

Variable Summaries
	Overall (N=10876)
sex
- female	5082 (46.7%)
- male	5794 (53.3%)
race
- american indian or alaska native	236 (2.2%)
- black/african american	536 (4.9%)
- chinese	85 (0.8%)
- japanese	24 (0.2%)
- multiracial	2074 (19.1%)
- other asian or pacific islander	376 (3.5%)
- other race, nec	1269 (11.7%)
- white	6276 (57.7%)
uhrswork
- N	10876
- Mean	39.072
- Min	1.000
- Max	99.000
- SD	11.643
inctot
- N	10876
- Mean	58272.646
- Min	4.000
- Max	883000.000
- SD	71376.753

Bar Chart

```{r}

	## [Bar Chart (with means)]
	require(ggplot2);
	require(ggthemes);
	require(Rmisc);
	temp <- METD1_2024_Final_report_Arizona_new_cleaned_Sheet1             
temp <-Rmisc::summarySE( METD1_2024_Final_report_Arizona_new_cleaned_Sheet1, measurevar = "inctot", groupvars = c("race..abb","sex"),  na.rm = TRUE, .drop = TRUE)
	pd <- position_dodge(0.9)
	temp  %>% ggplot( aes(x=race..abb,y=inctot,fill=sex)) +
	geom_bar( position="dodge",alpha=1,stat="identity") +                  labs(x="race..abb",y="inctot",fill="sex",title= "Bar Chart (with means) for X axis: race..abb,Y axis: inctot, Fill: sex") +
	xlab("Race") +
	ylab("Personal Income") +
	ggtitle("The impact of gender and race on personal income") + 
	theme_grey() + 
	theme(text=element_text(family="sans",
	face="plain",
	color="#000000",  size=12,
	hjust=0.5, vjust=0.5),
	plot.title=element_text(hjust=.5, size=18))
```

Loading required package: ggthemes
Loading required package: Rmisc
Loading required package: lattice
Loading required package: plyr
------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
------------------------------------------------------------------------------
Attaching package: 'plyr'
The following object is masked from 'package:purrr':
    compact
The following objects are masked from 'package:dplyr':
    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize

Linear Regression

```{r}

require(magrittr)
require(equatiomatic)
require(textutils)
#LinearRegModel1 = METD1_2024_Final_report_Arizona_new_cleaned_Sheet1 %>%
                        #  lm(formula = inctot~0+sex+race+uhrswork, data=.,
                        # na.action=na.exclude)   
   
LinearRegModel1 = lm(formula = inctot~sex+race+uhrswork, data=METD1_2024_Final_report_Arizona_new_cleaned_Sheet1,
     na.action=na.exclude)

#Display theoretical model equation and coefficients

#Display theoretical model
LinearRegModel1 %>%
    equatiomatic::extract_eq(raw_tex = FALSE,
        wrap = TRUE, intercept = "alpha", ital_vars = FALSE) %>%
        BSkyFormat()       

#Display coefficients
LinearRegModel1 %>%
    equatiomatic::extract_eq(use_coefs = TRUE,
    wrap = TRUE,  ital_vars = FALSE, coef_digits = BSkyGetDecimalDigitSetting()) %>%
       BSkyFormat()

LinearRegModel1 %>%
    BSkyFormat()      

#Displaying the Anova table
AnovaRes = LinearRegModel1 %>%
            anova() %T>%
			  BSkyFormat(singleTableOutputHeader = "Anova table")

#Displaying sum of squares table
df = as.data.frame(AnovaRes)
totalrows = nrow(df)
regSumOfSquares = sum(df[1:totalrows - 1, 2])
residualSumOfSquares = df[totalrows, 2]
totalSumOfSquares = regSumOfSquares + residualSumOfSquares

matrix(c(regSumOfSquares, residualSumOfSquares, 
        totalSumOfSquares), nrow = 3, ncol = 1, dimnames = list(c("Sum of squares of regression", 
        "Sum of squares of residuals", "Total sum of squares"), 
        c("Values"))) %>%
            BSkyFormat(singleTableOutputHeader = "Sum of squares table")
 
# Model Plotting

#Adding attributes to support scoring
#We don't add dependent and independent variables as this is handled by our functions
attr(.GlobalEnv$LinearRegModel1,"classDepVar")= class(METD1_2024_Final_report_Arizona_new_cleaned_Sheet1[, c("inctot")])
attr(.GlobalEnv$LinearRegModel1,"depVarSample")= sample(METD1_2024_Final_report_Arizona_new_cleaned_Sheet1[, c("inctot")], size = 2, replace = TRUE)
#remove(LinearRegModel1)
```

\begin{matrix} inctot & = α + β_{1} ({sex}_{male}) + β_{2} ({race}_{b l a c k / a f r i c a n a m e r i c a n}) + β_{3} ({race}_{chinese}) + β_{4} ({race}_{japanese}) + β_{5} ({race}_{multiracial}) + β_{6} ({race}_{o t h e r a s i a n o r p a c i f i c i s l a n d e r}) + β_{7} ({race}_{o t h e r, n e c}) + β_{8} ({race}_{white}) + β_{9} (uhrswork) + ϵ \end{matrix}

$\begin{aligned} \operatorname{inctot} &= \alpha + \beta_{1}(\operatorname{sex}_{\operatorname{male}}) + \beta_{2}(\operatorname{race}_{\operatorname{black/african\ american}}) + \beta_{3}(\operatorname{race}_{\operatorname{chinese}})\ + \\ &\quad \beta_{4}(\operatorname{race}_{\operatorname{japanese}}) + \beta_{5}(\operatorname{race}_{\operatorname{multiracial}}) + \beta_{6}(\operatorname{race}_{\operatorname{other\ asian\ or\ pacific\ islander}}) + \beta_{7}(\operatorname{race}_{\operatorname{other\ ,\ nec}})\ + \\ &\quad \beta_{8}(\operatorname{race}_{\operatorname{white}}) + \beta_{9}(\operatorname{uhrswork}) + \epsilon \end{aligned}$

\begin{matrix} ˆ i n c t o t & = - 29410.8853 + 12560.6976 ({sex}_{male}) + 2118.4014 ({race}_{b l a c k / a f r i c a n a m e r i c a n}) + 26874.5039 ({race}_{chinese}) + 2129.5573 ({race}_{japanese}) - 1104.4504 ({race}_{multiracial}) + 24763.2397 ({race}_{o t h e r a s i a n o r p a c i f i c i s l a n d e r}) - 4497.6828 ({race}_{o t h e r, n e c}) + 23684.099 ({race}_{white}) + 1711.8633 (uhrswork) \end{matrix}

$\begin{aligned} \operatorname{\widehat{inctot}} &= -29410.8853 + 12560.6976(\operatorname{sex}_{\operatorname{male}}) + 2118.4014(\operatorname{race}_{\operatorname{black/african\ american}}) + 26874.5039(\operatorname{race}_{\operatorname{chinese}})\ + \\ &\quad 2129.5573(\operatorname{race}_{\operatorname{japanese}}) - 1104.4504(\operatorname{race}_{\operatorname{multiracial}}) + 24763.2397(\operatorname{race}_{\operatorname{other\ asian\ or\ pacific\ islander}}) - 4497.6828(\operatorname{race}_{\operatorname{other\ ,\ nec}})\ + \\ &\quad 23684.099(\operatorname{race}_{\operatorname{white}}) + 1711.8633(\operatorname{uhrswork}) \end{aligned}$

Model: lm(formula = inctot ~ sex + race + uhrswork, data = METD1_2024_Final_report_Arizona_new_cleaned_Sheet1, na.action = na.exclude)
LM Summary
Residual Std. Error	df	R-squared	Adjusted R-squared	F-statistic	numdf	dendf	p-value
66865.9300	10866	0.1231	0.1224	169.5289	9	10866	< .001***
Note:
Signif. codes: 0 ' * ' 0.001 ' ' 0.01 ' * ' 0.05 '.' 0.1 ' ' 1

Residuals
Min	1Q	Median	3Q	Max
-143583.0000	-30308.4400	-12126.6600	10133.6200	790572.9000

Coefficients
	Estimate	Std. Error	t value	Pr(>\|t\|)	2.5 %	97.5 %
(Intercept)	-29410.8900	4856.4350	-6.0561	< .001***	-38930.3800	-19891.3900
sexmale	12560.7000	1297.2020	9.6829	< .001***	10017.9500	15103.4500
raceblack/african american	2118.4010	5223.7590	0.4055	0.6851	-8121.1190	12357.9200
racechinese	26874.5000	8460.0350	3.1766	0.0015 **	10291.2900	43457.7100
racejapanese	2129.5570	14326.2200	0.1486	0.8818	-25952.4400	30211.5500
racemultiracial	-1104.4500	4594.4690	-0.2404	0.8100	-10110.4500	7901.5460
raceother asian or pacific islander	24763.2400	5553.2030	4.4593	< .001***	13877.9500	35648.5300
raceother race, nec	-4497.6830	4740.7420	-0.9487	0.3428	-13790.4000	4795.0370
racewhite	23684.1000	4434.0500	5.3414	< .001***	14992.5500	32375.6500
uhrswork	1711.8630	55.5671	30.8071	< .001***	1602.9420	1820.7850
Note:
Signif. codes: 0 ' * ' 0.001 ' ' 0.01 ' * ' 0.05 '.' 0.1 ' ' 1

Anova table
	Df	Sum Sq	Mean Sq	F value	Pr(>F)
sex	1	815345666760.0000	815345666760.0000	182.3610	< .001***
race	7	1763028594059.0000	251861227723.0000	56.3315	< .001***
uhrswork	1	4243381182725.0000	4243381182725.0000	949.0787	< .001***
Residuals	10866	48582463786556.0000	4471053174.0000	NA	NA
Note:
Signif. codes: 0 ' * ' 0.001 ' ' 0.01 ' * ' 0.05 '.' 0.1 ' ' 1

Sum of squares table
	Values
Sum of squares of regression	6821755443544.0000
Sum of squares of residuals	48582463786556.0000
Total sum of squares	55404219230100.0000