---
title: "007_AVTanalysis_eR_v3_3_20"
output:
  html_document: default
  pdf_document: default
---
```{r setup, include=TRUE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r Load Libraries & Data}

#load Data viewer tab-separated sample report; set . and NA as missing value marker
#replace argument after "read.table" with your txt file (or xls if generated from Dataviewer)                                    
dat007AVT <- read.table("C:\\Users\\katie\\OneDrive\\Desktop\\10_17_19_AVT_SentenceDur_Extended.xls", header = T, sep = "\t", na.strings = c(".", "NA"))
# dat007AVT <- read.table("Z:\\LCAN_Internal\\LCAN_007_LangPrediction_QRP\\Analyses\\ET_Data\\Data\\10_17_19_AVT_SentenceDur_Extended.xls", header = T, sep = "\t", na.strings = c(".", "NA"))



#load required packages

library("Matrix")
library("plyr")
library("tidyr")
library("lme4")
library("lmerTest")
library("ggplot2")
library("eyetrackingR")
library("dplyr")
```

##Create and clean required columns
```{r Subject & IA column relabeling}
#rename column "RECORDING_SESSION_LABEL" to "Subject"
names(dat007AVT)[names(dat007AVT) == "RECORDING_SESSION_LABEL"] <- "Subject"

#rename action_related_IA to be consistently labeled IA #3
dat007AVT$LEFT_INTEREST_AREA_ID[dat007AVT$LEFT_INTEREST_AREA_LABEL %in% c("DIS_ACTIONRELATED_IA")] <- 3
dat007AVT$RIGHT_INTEREST_AREA_ID[dat007AVT$RIGHT_INTEREST_AREA_LABEL %in% c("DIS_ACTIONRELATED_IA")] <- 3

```

##MARK SAMPLES WHERE TRACKLOSS OCCURS, creating new TrackLoss column
Trackloss defined as missing values for right gaze x & y and left gaze x & y. Used is.na function bc previously set to read "." as missing data marker. If . still appears in data, use Juweiriya's version of this code
```{r Trackloss}
dat007AVT %>% mutate(TrackLoss = ifelse(is.na(dat007AVT$RIGHT_GAZE_X) & is.na(dat007AVT$RIGHT_GAZE_Y) 
                                        & is.na(dat007AVT$LEFT_GAZE_X) & is.na(dat007AVT$RIGHT_GAZE_Y), TRUE, FALSE)) -> dat007AVT
```

##MAKE AOI COLUMNS
```{r}
#check what IAS appear
unique(dat007AVT$RIGHT_INTEREST_AREA_LABEL)

#Creates columns with T/F values for each IA for each sample (TRUE if that IA is being looked at & not missing)
dat007AVT %>% mutate(Outside = ifelse((is.na(RIGHT_INTEREST_AREA_LABEL) & is.na(LEFT_INTEREST_AREA_LABEL)), TRUE, FALSE)) %>% 
  
mutate(Target = ifelse((RIGHT_INTEREST_AREA_LABEL =="TARGET_IA" & !is.na(RIGHT_INTEREST_AREA_LABEL)) | (LEFT_INTEREST_AREA_LABEL =="TARGET_IA" & !is.na(LEFT_INTEREST_AREA_LABEL)), TRUE, FALSE)) %>% 
  
mutate(ActionDis = ifelse((RIGHT_INTEREST_AREA_LABEL == "DIS_ACTIONRELATED_IA" & !is.na(RIGHT_INTEREST_AREA_LABEL)) |
                                   (LEFT_INTEREST_AREA_LABEL == "DIS_ACTIONRELATED_IA" & !is.na(LEFT_INTEREST_AREA_LABEL)), 
                                    TRUE, FALSE)) %>% 
mutate(AgentDis = ifelse((RIGHT_INTEREST_AREA_LABEL == "DIS_AGENTRELATED_IA" & !is.na(RIGHT_INTEREST_AREA_LABEL)) |
                                   (LEFT_INTEREST_AREA_LABEL == "DIS_AGENTRELATED_IA" & !is.na(LEFT_INTEREST_AREA_LABEL)), 
                                   TRUE, FALSE)) %>% 
mutate(Unrelated = ifelse((RIGHT_INTEREST_AREA_LABEL == "DIS_UNRELATED_IA" & !is.na(RIGHT_INTEREST_AREA_LABEL)) |
                                   (LEFT_INTEREST_AREA_LABEL == "DIS_UNRELATED_IA" & !is.na(LEFT_INTEREST_AREA_LABEL)),
                                   TRUE, FALSE)) -> dat007AVT

#additionally recodes looks to cross IA or question block IA as outside
dat007AVT$Outside[(dat007AVT$RIGHT_INTEREST_AREA_LABEL == "CROSS"  & !is.na(dat007AVT$RIGHT_INTEREST_AREA_LABEL)) |
(dat007AVT$LEFT_INTEREST_AREA_LABEL == "CROSS" & !is.na(dat007AVT$LEFT_INTEREST_AREA_LABEL)) ] <- TRUE

dat007AVT$Outside[(dat007AVT$RIGHT_INTEREST_AREA_LABEL == "RECTANGLE_INTERESTAREA"  & !is.na(dat007AVT$RIGHT_INTEREST_AREA_LABEL)) |
               (dat007AVT$LEFT_INTEREST_AREA_LABEL == "RECTANGLE_INTERESTAREA" & !is.na(dat007AVT$LEFT_INTEREST_AREA_LABEL)) ] <- TRUE

#Create and fill out group column
dat007AVT %>% mutate(Group = if_else(Subject == "007_06" | Subject =="007_08"| 
                                 Subject =="007_10"| Subject =="007_13"| 
                                 Subject =="007_19"| Subject =="007_29"| 
                                 Subject =="007_34"| Subject =="007_35"| 
                                 Subject =="007_36"| Subject =="007_37"|
                                 Subject =="007_41"| Subject =="007_43"| 
                                 Subject =="007_44"| Subject =="007_45"| 
                                 Subject =="007_49"| Subject =="007_51"| 
                                 Subject =="007_52"| Subject =="007_54"| 
                                 Subject =="007_55"| Subject =="007_56"| 
                                 Subject =="007_60"| Subject =="007_61"|
                                 Subject =="007_63"| Subject =="007_69"|
                                 Subject =="007_71", "CONTROL", "PD"
                                 )
                     )-> dat007AVT



```

## Select rows of interest
These parameters select AVT non-practice trials only & no question trials.
Excludes 007_32 and 007_37 due to trackloss + odd task performance/ignoring visual stimuli. 01 out because pilot subject only.
```{r}

#filter in desired trial types
dat007AVT <- dat007AVT[ which(dat007AVT$blockgroup==1),]
dat007AVT <- dat007AVT[ which(dat007AVT$attnquestion==0),]
#filter out undesired subjects
dat007AVT <- dat007AVT[ which(dat007AVT$Subject != "007_32"),]
dat007AVT <- dat007AVT[ which(dat007AVT$Subject != "007_37"),]
dat007AVT <- dat007AVT[ which(dat007AVT$Subject != "007_01"),]
```

## Check number of subjects and trials per subject
```{r}
# subjects
length(unique(dat007AVT$Subject))
# trials
ddply(dat007AVT, c("Subject"), summarize, N=length(unique(TRIAL_LABEL)))

#uncomment this section to view individual trials present in data
#ddply(dat007AVT, c("Subject"), summarize, (unique(TRIAL_LABEL)))
```

## Make Eyetracking R Data
```{r}
dat007AVT <- make_eyetrackingr_data(dat007AVT,
                               participant_column = "Subject",
                               trial_column = "TRIAL_INDEX",
                               time_column = "TIMESTAMP",
                               trackloss_column = "TrackLoss",
                               aoi_columns = c('Target','AgentDis', 'ActionDis', 'Unrelated'),
                               treat_non_aoi_looks_as_missing = TRUE,
                               item_columns = 'file'
                               )
#creates a "unique trial" column and "early subjects" to be used for removing problematic trials from early subjects
dat007AVT %>% mutate(UniqueTrial = paste(dat007AVT$Subject, dat007AVT$trial_name, sep = '_')) %>%  
              mutate(EarlyS = ifelse(Subject == "007_02" | Subject== "007_03" | 
                                       Subject == "007_04" | Subject == "007_05" | 
                                       Subject == "007_06", "Early", "Normal")) -> dat007AVT
```


##Set the exported IP to timestamp of 0.
Begins right at the presentation of article sound, therefore includes initial ~200ms where looks are not driven by the played sound.

```{r}
sentence_window_plus <- subset_by_window(dat007AVT, rezero = TRUE, remove = TRUE,
                                    window_start_col = "IP_START_TIME", 
                                    window_end_col = "IP_END_TIME")

```

ONLY NEED TO DO THIS IF ANALYZING W/O ANTICIPATED TRIALS: Create time sequence for cleaning pre-anticipatory looks (includes initial 200ms)
```{r}
#create time sequence
sent_time_seq_plus <-make_time_sequence_data(sentence_window_plus, time_bin_size = 50,
                                   predictor_columns = c("Group", "trial_name", "UniqueTrial"),
                                   aois = c("Target", "ActionDis", "AgentDis", "Unrelated")
                                   )

```

Can skip this to see results from all trials: Identify/remove trials where Ss primarily looked to target in 50ms preceeding sentence onset
Do this before removing trials for trackloss. trials to exclude/model: rows where time bin = 3 (149-200ms, based on full sentence window)  &  AOI = target  &  Prop > .49999
```{r}
#labels just time bin 3 for pre-anticipated trials in original data frame
sent_time_seq_plus %>% mutate(pre_anticipated = ifelse(sent_time_seq_plus$AOI=="Target" & sent_time_seq_plus$TimeBin == 3 & sent_time_seq_plus$Prop > .49999, TRUE, FALSE)) -> sent_time_seq_plus


#create a dataframe containing just time bin 3 for pre-anticipated trials in original data frame
trials_reference_AVT <- sent_time_seq_plus[ which(sent_time_seq_plus$TimeBin == 3),]
#label which of those rows were pre-anticipated (proportion looks to target >.49999)
trials_reference_AVT %>% mutate(pre_anticipated = ifelse(trials_reference_AVT$AOI=="Target" & trials_reference_AVT$TimeBin == 3 & trials_reference_AVT$Prop > .49999, TRUE, FALSE)) -> trials_reference_AVT


# Then you use that subset of the data as a reference, and alter the original data by adding a new column that, for every participant-trial id, looks up that id in the lookup data and assigns the value in "drop" to the new column.
sentence_window_plus %>%
mutate(to_remove = trials_reference_AVT$pre_anticipated[
    match(unlist(sentence_window_plus$UniqueTrial),
          trials_reference_AVT$UniqueTrial)]) -> sentence_window_plus

#drop "to_remove" items
filter(sentence_window_plus, sentence_window_plus$to_remove!=TRUE) -> sentence_window_plus_no_ant
filter(sentence_window_plus, sentence_window_plus$to_remove==TRUE) -> removed_samples

#pre_anticipated_trials <- sent_time_seq_plus[which(sent_time_seq_plus$AOI=="Target" & sent_time_seq_plus$TimeBin == 3 & sent_time_seq_plus$Prop > .49999),]
```
Finds (and removes) 639 trials (out of 2304?). 27.7% of trials began with looks primarily to target.
results in 'sentence_window_plus_clean'


Next: OPTIONAL, get list of pre-anticipated trials for subset of subjects (used to find non-anticipated trials to use as the bases for interpolating trials later on)
```{r See pre-anticipated trials for early subjects}
pre_anticipated_trials_earlySs <- pre_anticipated_trials[ which(pre_anticipated_trials$Subject == "007_02" | pre_anticipated_trials$Subject =="007_03"| pre_anticipated_trials$Subject =="007_04"| pre_anticipated_trials$Subject =="007_05"| pre_anticipated_trials$Subject =="007_06"),]
write.csv(pre_anticipated_trials_earlySs, file = "earlySs_anticipated_trials.csv")

```



## Zoom in to sentence window minus initial 200. 
Rezeros as well, so all times are now saccade-time-adjusted. No need for further adjustments.
```{r}
#NOTE if removing anticipated trials, subset "sentence_window_plus_no_ant" dataframe instead
sentence_window <- subset_by_window(sentence_window_plus, 
                                window_start_time = 200, 
                                window_end_time = 2150,
                                rezero = TRUE, remove = TRUE)
```

## Assess and Clean Trackloss
```{r}
# analyze amount of trackloss by subjects and trials
(trackloss <- trackloss_analysis(data = sentence_window))

#remove trials with trackloss greater than threshold (here, 25%)
sentence_window_clean <- clean_by_trackloss(data = sentence_window, trial_prop_thresh = .25)

#re-assess trackloss following cleaning
trackloss_clean <- trackloss_analysis(data = sentence_window_clean)

#prints mean trackloss by participants following cleaning of trials >threshold loss
(trackloss_clean_subjects <- unique(trackloss_clean[, c('Subject','TracklossForParticipant')]))

# get mean%  samples contributed per trial, with SD
mean(1 - trackloss_clean_subjects$TracklossForParticipant)
sd(1- trackloss_clean_subjects$TracklossForParticipant)

# See number of (non-lossy) trials contributed by subject in the NumTrials column
(final_summary <- describe_data(sentence_window_clean, 'Target', 'Subject'))

#see mean/SD of trials remaining after cleaning
mean(final_summary$NumTrials)
sd(final_summary$NumTrials)
```
With threshold set to 25%, pre-anticipated trials already out,  and 32 & 37 excluded:
25% threshold results in removal of ?? trials & mean 33.0625 trials remain, SD 3.37.  mean 96.648% retention of saccades (sd 2.3%). Participant w/lowest number of trials remaining would still have ??.
10.6% would be max amount of track loss in any participant's remaining trials (007_50).


###Save this file as base set of trials for subsequent window analyses
```{r Save eyetrackingR data cleaned for target-initial trials and trackloss}
#saveRDS(sentence_window_clean, file = "007_AVT_data_clean.rds")
saveRDS(sentence_window_clean, file = "007_AVT_data_clean_with_anticipated.rds")
```

```{r load eyetracking R data cleaned for target-initial trials and trackloss}
#sentence_window_clean <- readRDS("007_AVT_data_clean.rds", refhook = NULL)
sentence_window_clean <- readRDS("007_AVT_data_clean_with_anticipated.rds", refhook = NULL)
```

## Make and Plot Time Sequence
###Full sentence window
```{r}
#create time sequence
sent_time_seq <-make_time_sequence_data(sentence_window_clean, time_bin_size = 50,
                                   predictor_columns = c("Group", "trial_name", "list", "EarlyS")
                                   )
```

OPTIONAL:

Assess mean proportion of looks to target over course of whole trial, split by group and item.
Resulting means vary ~25-75%; SDs are ~.50, so 3SD replacement rule doesn't apply.
Decision: rather than excluding "outlier" items, will model the variability as random intercepts.
```{r}

sent_time_seq %>% group_by(Group, trial_name) %>%
                       summarise_all(mean, na.rm = TRUE) -> summary_mean

sent_time_seq %>% group_by(Group) %>%
                         summarise_all(sd, na.rm = TRUE) -> summary_sd

```


##Interpolate values for set 9 trials for subjects 2-6.
007_02: list 9
007_03: list 10
007_04: list 10
007_05: list 1
007_06: list 2

```{r Create vectors for interpolated trials (early Ss set 9 trials)}

# store vectors for: original trial info; props for each bin in surrounding 2 trials; averaged new trial.

sent_time_seq <- as.data.frame(sent_time_seq)

#SUBJECT 007_05
#interpolate trial 9_3
`i_05_3_2` <- subset(sent_time_seq, Subject == "007_05" & trial_name == "ExpA_Set3_Item2", 
                    select = c("Prop"))
`i_05_7_3` <- subset(sent_time_seq, Subject == "007_05" & trial_name == "ExpA_Set7_Item3", 
                    select = c("Prop"))
sent_time_seq[which(sent_time_seq$Subject == "007_05" & sent_time_seq$trial_name == "ExpA_Set9_Item3"), "Prop"] <- (`i_05_3_2` + `i_05_7_3`)/2


#interpolate trial 9_2
"i_05_7_2" <- subset(sent_time_seq, Subject == "007_05" & trial_name == "ExpA_Set7_Item2", 
                    select = c("Prop"))
"i_05_12_1" <- subset(sent_time_seq, Subject == "007_05" & trial_name == "ExpA_Set12_Item1", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_05" & sent_time_seq$trial_name == "ExpA_Set9_Item2"), "Prop"] <- ((i_05_7_2 + i_05_12_1) / 2)

#interpolate trial 9_4
"i_05_11_4" <- subset(sent_time_seq, Subject == "007_05" & trial_name == "ExpA_Set11_Item4", 
                    select = c("Prop"))
"i_05_12_2" <- subset(sent_time_seq, Subject == "007_05" & trial_name == "ExpA_Set12_Item2", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_05" & sent_time_seq$trial_name == "ExpA_Set9_Item4"), "Prop"] <- ((i_05_11_4 + i_05_12_2) / 2)

#SUBJECT 007_02
#interpolate trial 9_4
"i_02_10_2" <- subset(sent_time_seq, Subject == "007_02" & trial_name == "ExpA_Set10_Item2", 
                    select = c("Prop"))
"i_02_3_2" <- subset(sent_time_seq, Subject == "007_02" & trial_name == "ExpA_Set3_Item2", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_02" & sent_time_seq$trial_name == "ExpA_Set9_Item4"), "Prop"] <- ((i_02_10_2 + i_02_3_2) / 2)

#interpolate trial 9_1
"i_02_3_2" <- subset(sent_time_seq, Subject == "007_02" & trial_name == "ExpA_Set3_Item2", 
                    select = c("Prop"))
"i_02_5_3" <- subset(sent_time_seq, Subject == "007_02" & trial_name == "ExpA_Set5_Item3", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_02" & sent_time_seq$trial_name == "ExpA_Set9_Item1"), "Prop"] <- ((i_02_3_2 + i_02_5_3) / 2)

#interpolate trial 9_2
"i_02_3_4" <- subset(sent_time_seq, Subject == "007_02" & trial_name == "ExpA_Set3_Item4", 
                    select = c("Prop"))
"i_02_5_1" <- subset(sent_time_seq, Subject == "007_02" & trial_name == "ExpA_Set5_Item1", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_02" & sent_time_seq$trial_name == "ExpA_Set9_Item2"), "Prop"] <- ((i_02_3_4 + i_02_5_1) / 2)

#SUBJECT 007_06
#interpolate trial 9_4
"i_06_2_1" <- subset(sent_time_seq, Subject == "007_06" & trial_name == "ExpA_Set2_Item1", 
                    select = c("Prop"))
"i_06_7_2" <- subset(sent_time_seq, Subject == "007_06" & trial_name == "ExpA_Set7_Item2", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_06" & sent_time_seq$trial_name == "ExpA_Set9_Item4"), "Prop"] <- ((i_06_2_1 + i_06_7_2) / 2)

#interpolate trial 9_2
"i_06_7_4" <- subset(sent_time_seq, Subject == "007_06" & trial_name == "ExpA_Set7_Item4", 
                    select = c("Prop"))
"i_06_4_2" <- subset(sent_time_seq, Subject == "007_06" & trial_name == "ExpA_Set4_Item2", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_06" & sent_time_seq$trial_name == "ExpA_Set9_Item2"), "Prop"] <- ((i_06_7_4 + i_06_4_2) / 2)

#SUBJECT 007_03
#interpolate trial 9_4
"i_03_11_2" <- subset(sent_time_seq, Subject == "007_03" & trial_name == "ExpA_Set11_Item2", 
                    select = c("Prop"))
"i_03_12_1" <- subset(sent_time_seq, Subject == "007_03" & trial_name == "ExpA_Set12_Item1", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_03" & sent_time_seq$trial_name == "ExpA_Set9_Item4"), "Prop"] <- ((i_03_11_2 + i_03_12_1) / 2)

#interpolate trial 9_2
"i_03_6_2" <- subset(sent_time_seq, Subject == "007_03" & trial_name == "ExpA_Set6_Item2", 
                    select = c("Prop"))
"i_03_5_1" <- subset(sent_time_seq, Subject == "007_03" & trial_name == "ExpA_Set5_Item1", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_03" & sent_time_seq$trial_name == "ExpA_Set9_Item2"), "Prop"] <- ((i_03_6_2 + i_03_5_1) / 2)

#interpolate trial 9_3
"i_03_7_4" <- subset(sent_time_seq, Subject == "007_03" & trial_name == "ExpA_Set7_Item4", 
                    select = c("Prop"))
"i_03_4_2" <- subset(sent_time_seq, Subject == "007_03" & trial_name == "ExpA_Set4_Item2", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_03" & sent_time_seq$trial_name == "ExpA_Set9_Item3"), "Prop"] <- ((i_03_7_4 + i_03_4_2) / 2)

#SUBJECT 007_04
#interpolate trial 9_4
"i_04_11_2" <- subset(sent_time_seq, Subject == "007_04" & trial_name == "ExpA_Set11_Item2", 
                    select = c("Prop"))
"i_04_7_3" <- subset(sent_time_seq, Subject == "007_04" & trial_name == "ExpA_Set7_Item3", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_04" & sent_time_seq$trial_name == "ExpA_Set9_Item4"), "Prop"] <- ((i_04_11_2 + i_04_7_3) / 2)

#interpolate trial 9_1
"i_04_10_4" <- subset(sent_time_seq, Subject == "007_04" & trial_name == "ExpA_Set10_Item4", 
                    select = c("Prop"))
"i_04_1_2" <- subset(sent_time_seq, Subject == "007_04" & trial_name == "ExpA_Set1_Item2", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_04" & sent_time_seq$trial_name == "ExpA_Set9_Item1"), "Prop"] <- ((i_04_10_4 + i_04_1_2) / 2)

#interpolate trial 9_2
"i_04_6_2" <- subset(sent_time_seq, Subject == "007_04" & trial_name == "ExpA_Set6_Item2", 
                    select = c("Prop"))
"i_04_5_1" <- subset(sent_time_seq, Subject == "007_04" & trial_name == "ExpA_Set5_Item1", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_04" & sent_time_seq$trial_name == "ExpA_Set9_Item2"), "Prop"] <- ((i_04_6_2 + i_04_5_1) / 2)

#interpolate trial 9_3
"i_04_1_3" <- subset(sent_time_seq, Subject == "007_04" & trial_name == "ExpA_Set1_Item3", 
                    select = c("Prop"))
"i_04_4_2" <- subset(sent_time_seq, Subject == "007_04" & trial_name == "ExpA_Set4_Item2", 
                    select = c("Prop"))
sent_time_seq[ which(sent_time_seq$Subject == "007_04" & sent_time_seq$trial_name == "ExpA_Set9_Item3"), "Prop"] <- ((i_04_1_3 + i_04_4_2) / 2)
```

```{r Save interpolated data file}
#save the cleaned file to run in verb/object window analysis scripts. Or plot/analyse overall sentence below.
#saveRDS(sent_time_seq, file = "007_AVT_data_clean_interpolated.rds")
saveRDS(sent_time_seq, file = "007_AVT_data_interpolated_with_anticipated.rds")

```


```{r Load previously cleaned & interpolated data}
#pick one of these two, depending on whether analyzing with 'all' trials in:
#sent_time_seq <- readRDS("007_AVT_data_clean_interpolated.rds", refhook = NULL)
sent_time_seq <- readRDS("007_AVT_data_interpolated_with_anticipated.rds", refhook = NULL)


#load required packages
library(Matrix)
library(plyr)
library(tidyr)
library(lme4)
library(lmerTest)
library(ggplot2)
library(eyetrackingR)
library(dplyr)
library(scales)
#library(optiRum)
```


```{r final prep for modeling}

#create high vs low motion verb column
sent_time_seq %>% 
  mutate(mt_content = ifelse(file == "boxer_hits_punchingbag" | file == "driver_hits_brakes" |
                               file == "cat_catches_mouse" | file == "	child_catches_football" |
                               file == "dolphin_jumps_waves" | file == "horse_jumps_fence" |
                               file == "lifeguard_saves_swimmer" | file == "detective_hunts_fugitive" |
                               file == "lion_hunts_zebra" | file == "lion_crosses_jungle" |
                               file == "detective_crosses_courtroom" | file == "mechanic_organizes_tools" |
                               file == "waitress_organizes_dishes", "Hi", "Lo")) %>% 

#create column for natural quadratic time polynomial, if using
  mutate(Time2 = Time^2) %>% 
#create column of binomial proportion values, if using
  mutate(BiProp = ifelse(Prop > .5, 1, 0)) -> sent_time_seq
#note renamed time seq2 to plain 'seq' above, to avoid changing code throughout when analyzing with anticipated trials in
  
#set factors
sent_time_seq$mt_content <- as.factor(sent_time_seq$mt_content)
sent_time_seq$Group <- as.factor(sent_time_seq$Group)


#center factors. mt: low -.5, hi .5   group: control -.5, pd .5
contrasts(sent_time_seq$mt_content) <- c(.5, -.5)
contrasts(sent_time_seq$Group) <- c(-.5, .5)

#create dataframe that excludes set 9 trials for early participants due to changed fugitive image.
sent_time_seq_9rm <- sent_time_seq[!((sent_time_seq$Subject=="007_02" | sent_time_seq$Subject=="007_03" | sent_time_seq$Subject=="007_04" | sent_time_seq$Subject=="007_05" | sent_time_seq$Subject=="007_06") & (sent_time_seq$trial_name=="ExpA_Set9_Item1" | sent_time_seq$trial_name=="ExpA_Set9_Item2" | sent_time_seq$trial_name=="ExpA_Set9_Item3" | sent_time_seq$trial_name=="ExpA_Set9_Item4")),]

#subset the data into target AOIs only
`target_sent_time_seq` <- subset(sent_time_seq, AOI == "Target", 
                    select = c("Prop", "Subject", "Group", "ot1", "ot2", "trial_name", "Time", "Time2", "BiProp","EarlyS"))
#second data frame for target AOIs only and problem trials (set 9) excluded
`target_sent_time_seq_9rm` <- subset(sent_time_seq_9rm, AOI == "Target", 
                    select = c("Prop", "Subject", "Group", "ot1", "ot2", "trial_name", "Time", "Time2", "BiProp","EarlyS"))

#change order of levels in AOI factor in order to change plot default
levels(as.factor(sent_time_seq$AOI))
sent_time_seq$AOI <- factor(sent_time_seq$AOI, levels = c("Target", "AgentDis", "ActionDis", "Unrelated"))

#create dataframe with average proportions from each subject, splits AOIs into different columns
sent_time_seq %>%
  filter(!is.na(Prop)) %>%
  group_by(Subject, AOI, Time, Group) %>% 
  summarise(MeanProp = mean(Prop), y=sum(Prop), N=length(Prop), Time=min(Time)) ->sent_time_seq_avg

#add new column that converts probabilities to logits
sent_time_seq_avg$Logit <- qlogis(sent_time_seq_avg$MeanProp)
```


## Visualize time results
```{r plot predictive sentences}

sent_time_seq$Group <- revalue(x = sent_time_seq$Group,
                                       c("PD" = "Parkinson's disease", "CONTROL" = "Control"))
sent_time_seq$mt_content <- revalue(x = sent_time_seq$mt_content,
                                       c("Lo" = "Low motion", "Hi" = "High motion"))
sent_time_seq$mt_content <- factor(sent_time_seq$mt_content, levels = c("Low motion", "High motion"))

#tiff("AVT_plot_10_1_21.tiff", unit="in", width=12, height=7, res=300)

print(
  ggplot(sent_time_seq, aes(x = Time, y = BiProp, col= AOI)) +
  stat_summary(fun.data = mean_se, geom = "errorbar", aes(color=paste("mean_se", AOI))) +
  stat_summary(geom="line", size = 2) + 
  theme_minimal() +
  #theme(legend.text=element_text(size=rel(1.2))) +
  theme(legend.position="none", legend.box = "horizontal") +
  labs(AOI = "Interest Area") +
  facet_grid(. ~ Group) +
  geom_vline(xintercept=82) +
  geom_vline(xintercept=658) +
  geom_vline(xintercept=1158, linetype="dotted") +
  geom_vline(xintercept=1246) +
  annotate(geom="text", x=375, y=.75, label="The [agent]",
                color="turquoise3", size=6) +
  annotate(geom="text", x=960, y=.75, label="[verb]s the",
              color="red2", size=6) +
  annotate(geom="text", x=1450, y=.75, label="[target]",
              color="darkorchid2", size=6) +
  scale_color_manual(labels = c("Target", "AgentDistractor", "ActionDistractor", "Unrelated"),     
                     values=c('red2', 'turquoise3','gray68','gray68',  'gray68', 'gray68', 'darkorchid2','chartreuse3')) +
  labs(title="Looks to target vs. distractor images in predictive sentences",
        x ="Time (ms)", y = "Proportion of Trials") +
  theme(strip.text.x = element_text(size = 18, colour = "black", angle = 0)) +
  theme(text = element_text(size=18)) +
  expand_limits(x = 2500)) +
  annotate(geom="text", x=2100, y=.64, label="Target",
  color="darkorchid2", size=5, angle=0) +
  annotate(geom="text", x=2265, y=.18, label="AgentDistractor",
  color="turquoise3", size=5, angle=0) +
  annotate(geom="text", x=2260, y=.11, label="VerbDistractor",
  color="red2", size=5) +
  annotate(geom="text", x=2170, y=.07, label="Unrelated",
  color="chartreuse3", size=5)
    
#dev.off()

```

```{r motion analysis plot}

sent_time_seq_9rm$Group <- revalue(x = sent_time_seq_9rm$Group,
                                       c("PD" = "Parkinson's disease", "CONTROL" = "Control"))
sent_time_seq_9rm$mt_content <- revalue(x = sent_time_seq_9rm$mt_content,
                                       c("Lo" = "Low motion", "Hi" = "High motion"))
sent_time_seq_9rm$mt_content <- factor(sent_time_seq_9rm$mt_content, levels = c("Low motion", "High motion"))

tiff("AVT_plot_motion_10_1_21.tiff", unit="in", width=12, height=7.5, res=300)

print(
  ggplot(sent_time_seq_9rm, aes(x = Time, y = BiProp, col= AOI)) +
  #geom_smooth() +
  stat_summary(fun.data = mean_se, geom = "errorbar", aes(color=paste("mean_se", AOI))) +
  stat_summary(geom="line", size = 2) + 
  theme_minimal() +
  #theme(legend.text=element_text(size=rel(1.2))) +
  theme(legend.position="none", legend.box = "horizontal") +
  labs(AOI = "Interest Area") +
  facet_grid(mt_content ~ Group) +
  geom_vline(xintercept=82) +
  geom_vline(xintercept=658) +
  geom_vline(xintercept=1158, linetype="dotted") +
  geom_vline(xintercept=1246) +
  annotate(geom="text", x=375, y=.75, label="The [agent]",
                color="turquoise3", size=6) +
  annotate(geom="text", x=960, y=.75, label="[verb]s the",
              color="red2", size=6) +
  annotate(geom="text", x=1450, y=.75, label="[target]",
              color="darkorchid2", size=6) +
  scale_color_manual(labels = c("Target", "AgentDistractor", "ActionDistractor", "Unrelated"),     
                     values=c('red2', 'turquoise3','gray68','gray68',  'gray68', 'gray68', 'darkorchid2','chartreuse3')) +
  labs(title="Looks to target vs. distractor images in predictive sentences",
        x ="Time (ms)", y = "Proportion of Trials") +
  theme(strip.text.x = element_text(size = 18, colour = "black", angle = 0)) +
  theme(text = element_text(size=18)) +
  expand_limits(x = 2500)) +
  annotate(geom="text", x=2100, y=.64, label="Target",
  color="darkorchid2", size=5, angle=0) +
  annotate(geom="text", x=2265, y=.18, label="AgentDistractor",
  color="turquoise3", size=5, angle=0) +
  annotate(geom="text", x=2260, y=.11, label="VerbDistractor",
  color="red2", size=5) +
  annotate(geom="text", x=2170, y=.07, label="Unrelated",
  color="chartreuse3", size=5)
    
dev.off()

```


```{r load model fits from later scripts}
agent.target.binom.model.alltrials <- readRDS("./agent.target.binom.model.alltrials.rds")

verb.target.binom.model.alltrials <- readRDS("./verb.target.binom.model.alltrials.rds")

obj.target.binom.model.alltrials <- readRDS("./obj.target.binom.model.alltrials.rds")

#add model fits from above models to target_sent_time_seq data frame
target_sent_time_seq$target.value.lm <- agent.target.binom.model.alltrials$fitted.values


```

```{r plot on logit scale}

tiff("AVT_plot_logit_scale_12_2.tiff", unit="in", width=13, height=7.5, res=300)

print(
  ggplot(sent_time_seq_avg, aes(x = Time, y = Logit, col= AOI)) +
  stat_summary(fun.data = mean_se, geom = "errorbar", aes(color=paste("mean_se", AOI))) +
  stat_summary(geom="line", size = 2) + 
  theme_minimal() +
  #theme(legend.text=element_text(size=rel(1.2))) +
  theme(legend.position="none", legend.box = "horizontal") +
  labs(AOI = "Interest Area") +
  facet_grid(. ~ Group) +
  geom_vline(xintercept=82) +
  geom_vline(xintercept=658) +
  geom_vline(xintercept=1158, linetype="dotted") +
  geom_vline(xintercept=1246) +
  annotate(geom="text", x=375, y=1.25, label="The [agent]",
                color="turquoise3", size=6) +
  annotate(geom="text", x=960, y=1.25, label="[verb]s the",
              color="red2", size=6) +
  annotate(geom="text", x=1450, y=1.25, label="[target]",
              color="darkorchid2", size=6) +
  scale_color_manual(labels = c("Target", "AgentDistractor", "ActionDistractor", "Unrelated"),
                     values=c('red2', 'turquoise3','gray68','gray68',  'gray68', 'gray68', 'darkorchid2','chartreuse3')) +
  labs(title="Looks to target vs. distractor images in predictive sentences",
        x ="Time (ms)", y = "Log odds of fixations on each AOI") +
  theme(strip.text.x = element_text(size = 18, colour = "black", angle = 0)) +
  theme(text = element_text(size=18)) +
  annotate(geom="text", x=2060, y=.65, label="Target",
  color="darkorchid2", size=5, angle=0) +
  annotate(geom="text", x=2200, y=-1.7, label="AgentDistractor", color="turquoise3", size=5, angle=0) +
  annotate(geom="text", x=2200, y=-2.2, label="VerbDistractor", color="red2", size=5) +
  annotate(geom="text", x=2120, y=-2.5, label="Unrelated", color="chartreuse3", size=5)) +
  expand_limits(x = 2500)
dev.off()

```