diff --git a/basics.R b/basics.R index 6a53b55..120ac56 100644 --- a/basics.R +++ b/basics.R @@ -11,13 +11,18 @@ ggplot(sites, aes(score)) + labs(title="Verteilung der Punktzahlen", x="Punktzahl", y="Anzahl Sites") ggsave("plots/punkte_verteilung.png") + ## Verteilung der Punktzahlen (nur KV) +summary(sites_kv$score) + ggplot(sites_kv, aes(score)) + geom_histogram(bins = 15) + labs(title="Verteilung der Punktzahlen (nur KV)", x="Punktzahl", y="Anzahl Sites") ggsave("plots/punkte_verteilung_kv.png") ## Verteilung der Punktzahlen (nur OV) +summary(sites_ov$score) + ggplot(sites_ov, aes(score)) + geom_histogram(bins = 15) + labs(title="Verteilung der Punktzahlen (nur OV)", x="Punktzahl", y="Anzahl Sites") @@ -66,6 +71,43 @@ ggplot(sites_top_cms, aes(generator)) + y="Anzahl Sites") ggsave("plots/feeds_nach_cms_b.png") -# Feeds - Nur GCMS -ggplot(sites_generator_gcms, aes(rating.FEEDS.value)) + + +## Alle Kriterien + +sites_rating_criteria <- select(sites, + rating.DNS_RESOLVABLE_IPV4.value, + rating.SITE_REACHABLE.value, + rating.CANONICAL_URL.value, + rating.WWW_OPTIONAL.value, + rating.HTTPS.value, + rating.RESPONSIVE.value, + rating.CONTACT_LINK.value, + rating.SOCIAL_MEDIA_LINKS.value, + rating.NO_NETWORK_ERRORS.value, + rating.NO_SCRIPT_ERRORS.value, + rating.NO_THIRD_PARTY_COOKIES.value, + rating.FAVICON.value, + rating.USE_SPECIFIC_FONTS.value, + rating.FEEDS.value, +) +sites_rating_criteria_long <- tidyr::gather(sites_rating_criteria, key = type_col, value = categories) + +sapply(sites_rating_criteria_long,table) + +# requires library(qdapTools) +mtabulate(sites_rating_criteria_long) + +(sum(sites_rating_criteria$rating.FEEDS.value) / count(sites)) * 100 +(sum(sites_rating_criteria$rating.SOCIAL_MEDIA_LINKS.value) / count(sites)) * 100 +(sum(sites_rating_criteria$rating.RESPONSIVE.value) / count(sites)) * 100 +(sum(sites_rating_criteria$rating.NO_THIRD_PARTY_COOKIES.value) / count(sites)) * 100 + + +ggplot(sites_rating_criteria_long, aes(x = categories, fill = categories)) + + geom_bar() + + coord_flip() + + facet_wrap(~ type_col, scales = "free_x") + +ggplot(sites_rating_criteria_long, aes(x = categories, fill = categories)) + geom_bar() + diff --git a/common.R b/common.R index 727399d..852a84c 100644 --- a/common.R +++ b/common.R @@ -2,6 +2,7 @@ library(jsonlite) library(dplyr) library(ggplot2) library(scales) +library(tidyr) # Frische Daten gibt es per # curl https://green-spider.netzbegruenung.de/api/v1/spider-results/table/ > data/table.json diff --git a/response_duration.R b/response_duration.R index 0e3a1bc..4c7cc2f 100644 --- a/response_duration.R +++ b/response_duration.R @@ -20,7 +20,7 @@ ggplot(sites, aes(rating.HTTP_RESPONSE_DURATION.value)) + labs(title="Verteilung der Antwortzeiten", x="Antwortzeit in Millisekunden (Median in blau)", y="Häufigkeit") ggsave("plots/antwortzeiten_verteilung_kurve.png") -## Verteilung der Punkte (logarithmische X-Achse) +## Verteilung der Antwortzeiten (logarithmische X-Achse) ggplot(sites, aes(rating.HTTP_RESPONSE_DURATION.value)) + geom_histogram() + scale_x_log10() +