ARLClustering - Testing Facebook Friends dataset

library(arlclustering)
#library(igraph)

Dataset description

The Facebook Friends social network dataset is a gml file, containing 362 nodes and 1988 edges.

Loading network dataset

# Start the timer
t1 <- system.time({
  dataset_path <- system.file("extdata", "Facebook_fiends.gml", package = "arlclustering")
  if (dataset_path == "") {
    stop("Facebook_fiends.gml file not found")
  }
  
  g <- arlc_get_network_dataset(dataset_path, "Facebook Friends")
  g$graphLabel
  g$totalNodes
  g$totalEdges
  g$averageDegree
})

# Display the total processing time
message("Graph loading Processing Time: ", t1["elapsed"], " seconds\n")
#> Graph loading Processing Time: 0.0379999999999994 seconds

Generate Transactions

Next, we generate transactions from the graph, with a total rows of 314

# Start the timer
t2 <- system.time({
  transactions <- arlc_gen_transactions(g$graph)
  transactions
})

# Display the total processing time
message("Transaction dataset Processing Time: ", t2["elapsed"], " seconds\n")
#> Transaction dataset Processing Time: 0.0179999999999998 seconds

Get Apriori Thresholds

We obtain the apriori thresholds for the generated transactions. The following are the thresholds for the apriori execution: - The Minimum Support : 0.04 - The Minimum Confidence : 0.5 - The Lift : 22.42857 - The Gross Rules length : 74748 - The selection Ratio : 238

# Start the timer
t3 <- system.time({
  params <- arlc_get_apriori_thresholds(transactions,
                                        supportRange = seq(0.04, 0.05, by = 0.01),
                                        Conf = 0.5)
  params$minSupp
  params$minConf
  params$bestLift
  params$lenRules
  params$ratio
})

# Display the total processing time
message("Graph loading Processing Time: ", t3["elapsed"], " seconds\n")
#> Graph loading Processing Time: 0.0979999999999999 seconds

Generate Gross Rules

We use the obtained parameters to generate gross rules, where we obtain 74748 rules.

# Start the timer
t4 <- system.time({
  minLenRules <- 1
  maxLenRules <- params$lenRules
  if (!is.finite(maxLenRules) || maxLenRules > 5*length(transactions)) {
    maxLenRules <- 5*length(transactions)
  }
  
  grossRules <- arlc_gen_gross_rules(transactions,
                                     minSupp = params$minSupp,
                                     minConf = params$minConf,
                                     minLenRules = minLenRules+1,
                                     maxLenRules = maxLenRules)
  grossRules$TotalRulesWithLengthFilter
})
#> Apriori
#> 
#> Parameter specification:
#>  confidence minval smax arem  aval originalSupport maxtime support minlen
#>         0.5    0.1    1 none FALSE            TRUE       5    0.04      2
#>  maxlen target  ext
#>    1570  rules TRUE
#> 
#> Algorithmic control:
#>  filter tree heap memopt load sort verbose
#>     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
#> 
#> Absolute minimum support count: 12 
#> 
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[342 item(s), 314 transaction(s)] done [0.00s].
#> sorting and recoding items ... [122 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 2 3 4 5 6 7 8 done [0.01s].
#> writing ... [74748 rule(s)] done [0.01s].
#> creating S4 object  ... done [0.02s].
# Display the total number of clusters and the total processing time
message("Gross rules generation Time: ", t4["elapsed"], " seconds\n")
#> Gross rules generation Time: 0.088000000000001 seconds

Filter Significant and Non-Redundant Rules

We filter out redundant rules from the generated gross rules. Next, we filter out non-significant rules from the non-redundant rules, and we obtain the 10678 rule items.

t5 <- system.time({
  NonRedRules <- arlc_get_NonR_rules(grossRules$GrossRules)
  NonRSigRules <- arlc_get_significant_rules(transactions,
                                             NonRedRules$FiltredRules)
  NonRSigRules$TotFiltredRules
})
# Display the total number of clusters and the total processing time
message("\nClearing rules Processing Time: ", t5["elapsed"], " seconds\n")
#> 
#> Clearing rules Processing Time: 0.789999999999999 seconds

Clean and genarate final Rules

We clean the final set of rules to prepare for clustering. Then, we generate clusters based on the cleaned rules. The total identified clusters is 20 clusters.

t6 <- system.time({
  cleanedRules <- arlc_clean_final_rules(NonRSigRules$FiltredRules)
  clusters <- arlc_generate_clusters(cleanedRules)
  clusters$TotClusters
})
# Display the total number of clusters and the total processing time
message("Cleaning final rules Processing Time: ", t6["elapsed"], " seconds\n")
#> Cleaning final rules Processing Time: 0.729000000000001 seconds

message("The total comsumed time is:",t1["elapsed"]+ t2["elapsed"]+t3["elapsed"]+t4["elapsed"]+t5["elapsed"]+t6["elapsed"], "seconds\n")
#> The total comsumed time is:1.761seconds

Plot Clusters

Finally, we visualize the identified clusters.

arlc_clusters_plot(g$graph,
                   g$graphLabel,
                   clusters$Clusters)
#> 
#> Total Identified Clusters: 20
#>  =========================
#>   Community 01:1 5 26 59 61 92 95 97 112 120 137 147 164 178 204 214 247 253 254 302 312 321 329 355
#>   Community 02:10 16 39 45 70 74 84 98 127 134 135 138 140 149 193 216 227 230 234 255 351 360
#>   Community 03:24 54 91 100 102 129 131 132 152 165 176 182 215 222 289 295 320 324 341 347
#>   Community 04:30 131 132 165 182 289
#>   Community 05:34 70 127 140 149 192 227 230 298
#>   Community 06:36 39 135 140 149 351 360
#>   Community 07:45 70 84 127 134 135 138 140 149 193 227 255 283 351 360
#>   Community 08:54 66 69 72 76 100 102 129 131 132 165 169 182 189 203 211 215 222 233 263 289 295 311 320 324 328 341 347
#>   Community 09:66 69 72 76 77 99 129 131 132 165 169 182 189 203 211 215 222 263 289 295 311 320 324 341 347
#>   Community 10:70 80 84 98 127 134 135 138 140 149 192 193 216 227 230 234 255 256 298 337 351 360
#>   Community 11:76 99 100 129 131 132 165 169 177 182 189 203 211 215 222 263 289 295 311 320 324 328 341 347
#>   Community 12:77 100 129 131 132 165 182 215 222 289 324 341 347
#>   Community 13:107 167 187 231 335
#>   Community 14:129 131 132 165 169 177 182 189 203 211 215 222 233 263 289 295 311 320 324 328 341 347
#>   Community 15:131 132 152 165 169 176 177 182 189 203 211 215 222 233 263 289 295 311 320 324 328 341 347
#>   Community 16:135 138 140 149 192 193 216 227 230 234 255 267 351 360
#>   Community 17:149 192 193 216 227 230 234 255 256 298 337 351 356 360
#>   Community 18:160 187 335
#>   Community 19:165 169 176 177 182 189 203 211 215 222 233 246 263 289 295 311 320 324 328 341 347
#>   Community 20:187 231 244 335
#>  =========================