A system designed for detecting data drift in streaming datasets, offering a suite of statistical methods to track variations in data behavior.
::install_github("ugurdar/datadriftR@main") remotes
library(datadriftR)
# Generate a sample data stream of 1000 elements with approximately equal probabilities for 0 and 1
set.seed(123) # Setting a seed for reproducibility
<- sample(c(0, 1), size = 500, replace = TRUE, prob = c(0.7, 0.3))
data_part1
# Introduce a change in data distribution
<- sample(c(0, 1), size = 500, replace = TRUE, prob = c(0.3, 0.7))
data_part2
# Combine the two parts
<- c(data_part1, data_part2)
data_stream # Initialize the DDM object
<- DDM$new()
ddm
# Iterate through the data stream
for (i in seq_along(data_stream)) {
$add_element(data_stream[i])
ddm
if (ddm$change_detected) {
message(paste("Drift detected!", i))
else if (ddm$warning_detected) {
} # message(paste("Warning detected at position:", i))
}
}#> Drift detected! 560
<- EDDM$new()
eddm for (i in 1:length(data_stream)) {
$add_element(data_stream[i])
eddmif (eddm$change_detected) {
message(paste("Drift detected!",i))
else if (eddm$warning_detected) {
} # message(paste("Warning detected!",i))
}
}#> Drift detected! 403
#> Drift detected! 505
#> Drift detected! 800
<- HDDM_A$new()
hddm_a for(i in seq_along(data_stream)) {
$add_element(data_stream[i])
hddm_aif (hddm_a$warning_detected) {
cat(sprintf("Warning zone has been detected in data: %s - at index: %d\n", data_stream[i], i))
}if (hddm_a$change_detected) {
cat(sprintf("Change has been detected in data: %s - at index: %d\n", data_stream[i], i))
$reset() # Reset after detecting change
hddm_a
}
}#> Warning zone has been detected in data: 1 - at index: 511
#> Warning zone has been detected in data: 1 - at index: 512
#> Warning zone has been detected in data: 0 - at index: 513
#> Warning zone has been detected in data: 1 - at index: 514
#> Warning zone has been detected in data: 0 - at index: 515
#> Warning zone has been detected in data: 1 - at index: 516
#> Change has been detected in data: 1 - at index: 517
<- HDDM_W$new()
hddm_w_instance for(i in seq_along(data_stream)) {
$add_element(data_stream[i])
hddm_w_instanceif(hddm_w_instance$warning_detected) {
cat(sprintf("Warning zone detected at index: %d\n", i))
}if(hddm_w_instance$change_detected) {
cat(sprintf("Concept drift detected at index: %d\n", i))
}
}#> Warning zone detected at index: 507
#> Warning zone detected at index: 508
#> Warning zone detected at index: 509
#> Warning zone detected at index: 510
#> Concept drift detected at index: 511