NA



Twitter data for everyone:
An introduction to rtweet

Michael W. Kearney
Assistant Professor
School of Journalism
Informatics Institute
University of Missouri

Get these slides

About me

Education and training

Research topics

 

 

Agenda for this talk

 

Twitter APIs

Twitter data

Package resources

## Authorizing API access
vignette(topic = "auth", package = "rtweet")

## Overview
vignette(topic = "intro", package = "rtweet")

## Streaming API
vignette(topic = "stream", package = "rtweet")
## list of functions
help(package = "rtweet")

 

Getting started with rtweet

Installing and loading rtweet

## download from CRAN
install.packages("rtweet")
## or download dev version from Github
if (!"devtools" %in% installed.packages()) {
    install.packages("devtools")
}
devtools::install_github("mkearney/rtweet")

## load rtweet
library(rtweet)

Create Twitter App

Keys and access tokens

Copy keys

Create token

## whatever name you assigned to your created app
appname <- "rtweet_token"

## api key (example below is not a real key)
key <- "XYznzPFOFZR2a39FwWKN1Jp41"

## api secret (example below is not a real key)
secret <- "CtkGEWmSevZqJuKl6HHrBxbCybxI1xGLqrD5ynPd9jG0SoHZbD"

## create token named "twitter_token"
twitter_token <- create_token(
    app = appname,
    consumer_key = key,
    consumer_secret = secret)

Save token

## path of home directory
home_directory <- path.expand("~")

## combine with name for token
file_name <- file.path(home_directory, "twitter_token.rds")

## save token to home directory
saveRDS(twitter_token, file = file_name)

Set environment variable

## On my mac, the .Renviron text looks like this:
##     TWITTER_PAT=/Users/mwk/twitter_token.rds

## assuming you followed the procodures to create "file_name"
##     from the previous code chunk, then the code below should
##     create and save your environment variable.
cat(paste0("TWITTER_PAT=", file_name),
    file = file.path(home_directory, ".Renviron"),
    append = TRUE)

 

rtweet functions

Get followers

## get most recent 75k followers
flw1 <- get_followers("realdonaldtrump", n = 75000)
flw1
## # A tibble: 75,000 x 1
##               user_id
##                 <chr>
##  1 861962717929680896
##  2 873573516393803776
##  3 873572121561124864
##  4 873573581451657216
##  5 873562392952676352
##  6 873573260625158144
##  7 873573740864561152
##  8 873573140814802944
##  9 871801684510507008
## 10 873573061219504128

Get more followers

## set page value using next cursor extractor
page <- next_cursor(flw1)

page
## [1] "1569708984537163520"

## use page value to continue where left off
flw2 <- get_followers(
    "realdonaldtrump", n = 75000, page = page)

Manage rate limits

rate_limit(get_tokens())[1:3, ]
##                    query limit remaining       reset
## 1             lists/list    15        15 14.975 mins
## 2      lists/memberships    75        75 14.975 mins
## 3 lists/subscribers/show    15        15 14.975 mins

Get friends

## get list of accounts followed BY a user
fds <- get_friends("kearneymw")
fds
## # A tibble: 962 x 1
##              user_id
##                <chr>
## 1 843078227488915456
## 2 835148691593887744
## 3 795751822585688064
## 4 707260960491376640
## 5         1349865410
## 6 836165272780345344
## 7             428333
## 8          227812077
## 9 859179631664021504
## 10          337950234
## # ... with 952 more rows

next_cursor(fds)
## [1] "0"

Lookup users

## get users data for my friends
usrs <- lookup_users(fds$user_id)

usrs
## # A tibble: 929 x 36
##               user_id               name     screen_name            location
##                 <chr>              <chr>           <chr>               <chr>
##  1 795751822585688064       Scott Harris       sm_kepler        New York, NY
##  2 707260960491376640   Mohammed Zakaria zakariaCatapult         Chicago, IL
##  3         1349865410     Marnie Ritchie   marnieritchie          Austin, TX
##  4 836165272780345344       Stuart Allen    steadystuart                <NA>
##  5             428333  CNN Breaking News          cnnbrk          Everywhere
##  6          227812077             rubaie         brubaie               kcmo
##  7          337950234            tewsie1         tewsie1         Waterloo IA
##  8           14135384     Seth Ellsworth      pinto4life          Austin, TX
##  9           57371972 amanda luppes ford         aluppes              Austin
## 10          224161598     Georgina Cosma         gcosma1 Nottingham, England
## # ... with 919 more rows, and 32 more variables: description <chr>,
## #   protected <lgl>, followers_count <int>, friends_count <int>,
## #   listed_count <int>, created_at <dttm>, favourites_count <int>,
## #   utc_offset <int>, time_zone <chr>, geo_enabled <lgl>, verified <lgl>,
## #   statuses_count <int>, lang <chr>, contributors_enabled <lgl> ...

Search tweets

rt <- search_tweets(
    "rstats", n = 300, include_rts = FALSE, exact = TRUE)
## Searching for tweets...
## Finished collecting tweets!

rt
## # A tibble: 300 x 35
##        screen_name            user_id          created_at          status_id
##              <chr>              <chr>              <dttm>              <chr>
##  1            phnk           14460093 2017-06-10 16:16:14 873574535358029824
##  2      jletteboer           92526801 2017-06-10 16:12:47 873573666776391680
##  3 BigDataInsights           89191817 2017-06-10 16:12:42 873573649416163328
##  4 CRANberriesFeed          233585808 2017-06-10 16:02:18 873571030740201473
##  5   DeborahTannon 821976676842242050 2017-06-10 16:00:52 873570670097182721
##  6 AnalyticsVidhya         2311645130 2017-06-10 16:00:18 873570526085697537
##  7 EdwardEgrosFox4           15553592 2017-06-10 15:59:12 873570249186136065
##  8       Rbloggers          144592995 2017-06-10 15:57:17 873569768019767296
##  9     geoappsmith         3306901413 2017-06-10 15:56:53 873569666547032064
## 10      juliasilge           13074042 2017-06-10 15:56:15 873569506215346176
## # ... with 290 more rows, and 31 more variables: text <chr>,
## #   retweet_count <int>, favorite_count <int>, is_quote_status <lgl>,
## #   quote_status_id <chr>, is_retweet <lgl>, retweet_status_id <chr> ...

Plain tweets

rt$text[1:5]
## [1] "Dear #rstats – If R is executable from Microsoft Server, does that mean that it's now technically feasible to run a DDoS from R?"
## [2] "Networkx #Datascience #Bigdata Tools is out! https://t.co/gASnKtH0bU Tweets via @CodeDocta @ijstokes @EwaldSchmidt #rstats"
## [3] "#RStats —Engaging the tidyverse Clean Slate Protocol : https://t.co/mrjCI8YYdH"

## strip formatting, URLs, mentions, hashtags, etc.
plain_tweets(rt$text[1:5], include_hashtags = FALSE)
## [1] "dear if r is executable from microsoft server does that mean that its now technically feasible to run a ddos from r"
## [2] "networkx tools is out tweets via"
## [3] "engaging the tidyverse clean slate protocol"

Plot

## time series plot
ts_plot(rt, by = "hours")

Users data

## extract users data from rt (tweets data)
users_data(rt)
## # A tibble: 300 x 36
##               user_id                 name     screen_name
##                 <chr>                <chr>           <chr>
##  1           14460093                  Fr.            phnk
##  2           92526801       John Letteboer      jletteboer
##  3           89191817          Marty Wells BigDataInsights
##  4          233585808 CRAN Package Updates CRANberriesFeed
##  5 821976676842242050       Deborah Tannon   DeborahTannon
##  6         2311645130     Analytics Vidhya AnalyticsVidhya
##  7           15553592         Edward Egros EdwardEgrosFox4
##  8          144592995           R-bloggers       Rbloggers
##  9         3306901413         Geo.Appsmith     geoappsmith
## 10           13074042          Julia Silge      juliasilge
## # ... with 290 more rows, and 33 more variables: location <chr> ...

Search by geocode

## load state data
data("state")

## initialize vector
state_tweets <- vector("list", 50)

## loop through 50 states
for (i in seq_along(state_tweets)) {
    state_tweets[[i]] <- search_tweets(
        paste0("comey OR trump geocode:",
               state.center$y[i], ",",
               state.center$x[i], ",",
               "50mi"),
    )
}

 

statecomey

Stream tweets

## comey stream
comes <- stream_tweets("comey,trump", timeout = 60 * 3)

## get plain text
twt <- plain_tweets(comes$text, include_hashtags = TRUE)

## sentiment analysis
sa <- syuzhet::get_nrc_sentiment(twt)
comes <- tibble::as_tibble(cbind(comes, sa))

Sentiment analysis


comey

 

User networks

 

 

rtweet not possible without:

 

That's it \o/

Thanks!