Lab 1: Basic R Commands

Author
Published

January 27, 2024

Lab Setup

Set your working directory and load any required libraries in this code chunk.

getwd()  # print current working directory
[1] "/Users/xycao/Desktop/EAS538/EAS538-assignments/lab1"
rent.US <- read.csv("../data/lab1/price2.csv")  # read data

Practice Work

Put any practice code in this code chunk.

class(rent.US)  # type of the variable
[1] "data.frame"
dim(rent.US)  # dimension (shape) of the data frame
[1] 12918     8
head(rent.US)  # first few lines of the df
          City        Metro       County State Population.Rank Jan.16 May.16
1     New York     New York       Queens    NY               1   2335   2339
2  Los Angeles  Los Angeles  Los Angeles    CA               2   2596   2662
3      Chicago      Chicago         Cook    IL               3   1668   1686
4      Houston      Houston       Harris    TX               4   1436   1446
5 Philadelphia Philadelphia Philadelphia    PA               5   1196   1211
6      Phoenix      Phoenix     Maricopa    AZ               6   1198   1236
  Sep.16
1   2324
2   2723
3   1675
4   1438
5   1220
6   1238
str(rent.US)  # structure of the df (variable type of each attribute)
'data.frame':   12918 obs. of  8 variables:
 $ City           : chr  "New York" "Los Angeles" "Chicago" "Houston" ...
 $ Metro          : chr  "New York" "Los Angeles" "Chicago" "Houston" ...
 $ County         : chr  "Queens" "Los Angeles" "Cook" "Harris" ...
 $ State          : chr  "NY" "CA" "IL" "TX" ...
 $ Population.Rank: int  1 2 3 4 5 6 7 8 9 10 ...
 $ Jan.16         : int  2335 2596 1668 1436 1196 1198 1204 1230 2360 1305 ...
 $ May.16         : int  2339 2662 1686 1446 1211 1236 1225 1245 2428 1347 ...
 $ Sep.16         : int  2324 2723 1675 1438 1220 1238 1228 1234 2442 1370 ...

Assignment

Exercise 1

Question 1.1

Pick any 2 columns one at a time from the rent.US data. Do this using both the $ and the [,] notation.

# use $
res1 <- rent.US$City
res2 <- rent.US$Jan.16
head(res1)
[1] "New York"     "Los Angeles"  "Chicago"      "Houston"      "Philadelphia"
[6] "Phoenix"     
head(res2)
[1] 2335 2596 1668 1436 1196 1198
# use [,]
res1 <- rent.US[,1]
res2 <- rent.US[,6]
head(res1)
[1] "New York"     "Los Angeles"  "Chicago"      "Houston"      "Philadelphia"
[6] "Phoenix"     
head(res2)
[1] 2335 2596 1668 1436 1196 1198

Question 1.2

Pick any 2 rows one at a time from the rent.us data.

res1 <- rent.US[1,]
res2 <- rent.US[2,]
res1
      City    Metro County State Population.Rank Jan.16 May.16 Sep.16
1 New York New York Queens    NY               1   2335   2339   2324
res2
         City       Metro      County State Population.Rank Jan.16 May.16
2 Los Angeles Los Angeles Los Angeles    CA               2   2596   2662
  Sep.16
2   2723

Question 1.3

Select any 3 columns all at once using the [,] notation.

res <- rent.US[,c(1,4,6)]
head(res)
          City State Jan.16
1     New York    NY   2335
2  Los Angeles    CA   2596
3      Chicago    IL   1668
4      Houston    TX   1436
5 Philadelphia    PA   1196
6      Phoenix    AZ   1198

Question 1.4

Select any 3 rows all at once using the [,] notation.

res <- rent.US[c(1,2,3),]
res
         City       Metro      County State Population.Rank Jan.16 May.16
1    New York    New York      Queens    NY               1   2335   2339
2 Los Angeles Los Angeles Los Angeles    CA               2   2596   2662
3     Chicago     Chicago        Cook    IL               3   1668   1686
  Sep.16
1   2324
2   2723
3   1675

Question 1.5

What happens if you try to select an individual row and an individual column at the same time? Try doing this using the [,] notation.

# This gives the element of the specific row and col
rent.US[1,1]
[1] "New York"

Question 1.6

Now try selecting 3 rows and 2 columns at once using the [,] notation

rent.US[c(1,2,3),c(1,6)]
         City Jan.16
1    New York   2335
2 Los Angeles   2596
3     Chicago   1668

Exercise 2

Question 2.1

Sort your data using the order function from least to greatest rent from May 2016. Save this sorted data frame as a new object called rent_May_ord.

i.order <- order(rent.US$May.16)  # get the order indices (descending is F by default)
rent_May_ord <- rent.US[i.order,]  # get the ordered df
head(rent_May_ord)
                    City      Metro   County State Population.Rank Jan.16
4083             Beecher      Flint  Genesee    MI            4084    516
7981  West End-Cobb Town   Anniston  Calhoun    AL            8194    579
224                Flint      Flint  Genesee    MI             224    545
614           Youngstown Youngstown Mahoning    OH             614    550
12048       Leavittsburg Youngstown Trumbull    OH           12261    547
1456              Warren Youngstown Trumbull    OH            1457    541
      May.16 Sep.16
4083     548    532
7981     552    552
224      558    546
614      572    547
12048    588    633
1456     592    614

Question 2.2

Now sort your data using the order function from greatest to least Population.Rank and save this as a new object called rent_pop_rev.

i.order <- order(rent.US$Population.Rank, decreasing=T)
rent_pop_rev <- rent.US[i.order,]
head(rent_pop_rev)
                     City      Metro     County State Population.Rank Jan.16
12918     Lebanon Borough   New York  Hunterdon    NJ           13131   1959
12917              Angels             Calaveras    CA           13130   1469
12916              Urbana    Corning    Steuben    NY           13129   1574
12915 Town of Wrightstown  Green Bay      Brown    WI           13128   1028
12914   Highland Township Gettysburg      Adams    PA           13127   1433
12913       Plymptonville     DuBois Clearfield    PA           13126    939
      May.16 Sep.16
12918   1982   1929
12917   1528   1529
12916   1611   1619
12915   1133   1047
12914   1440   1395
12913    990    980

Exercise 3

Question 3.1

Use subset() and order() to identify the top 3 most expensive cities in Michigan (using September 2016 rent prices).

rent.MI <- subset(rent.US, State == 'MI')
i.order <- order(rent.MI$Sep.16, decreasing=T)
head(rent.MI[i.order,],3)
                     City   Metro  County State Population.Rank Jan.16 May.16
7884             Franklin Detroit Oakland    MI            8097   3725   3758
7065 Grosse Pointe Shores Detroit   Wayne    MI            7278   3474   3691
9166         Orchard Lake Detroit Oakland    MI            9379   2885   2973
     Sep.16
7884   3760
7065   3623
9166   2984

Question 3.2

Subset the data to include cities in CA, OR and WA that have rent less than 1500 on January 2016.

res <- subset(rent.US, State %in% c('CA', 'OR', 'WA') & Jan.16 < 1500)
head(res)
             City       Metro         County State Population.Rank Jan.16
34         Fresno      Fresno         Fresno    CA              34   1197
35     Sacramento  Sacramento     Sacramento    CA              35   1400
64    Bakersfield Bakersfield           Kern    CA              64   1349
71       Stockton    Stockton    San Joaquin    CA              71   1274
77        Modesto     Modesto     Stanislaus    CA              77   1242
90 San Bernardino   Riverside San Bernardino    CA              90   1330
   May.16 Sep.16
34   1214   1221
35   1459   1489
64   1342   1330
71   1322   1334
77   1293   1326
90   1369   1391

Question 3.3

Subset the data to include cities in PA or cities that have rent less than 1000 on September 2016.

res <- subset(rent.US, State=='PA' | Sep.16<1000)
head(res)
           City        Metro       County State Population.Rank Jan.16 May.16
5  Philadelphia Philadelphia Philadelphia    PA               5   1196   1211
17      Detroit      Detroit        Wayne    MI              17    750    752
19      Memphis      Memphis       Shelby    TN              19    848    849
44        Tulsa        Tulsa        Tulsa    OK              44    968    983
45    Cleveland    Cleveland     Cuyahoga    OH              45    833    842
52      Wichita      Wichita     Sedgwick    KS              52    924    927
   Sep.16
5    1220
17    746
19    846
44    983
45    834
52    897

Exercise 4

Question 4.1

Create a new vector that has cat,dog,cow,bird and name it animals. Next create a new vector that has meow,woof,moo,chirp and name it sounds. Join these two vectors by stacking them on top of one another. Now join them by placing them next to each other as columns.

animals <- c('cat', 'dog', 'cow', 'bird')
sounds <- c('meow', 'woof', 'moo','chirp')
rbind(animals, sounds)
        [,1]   [,2]   [,3]  [,4]   
animals "cat"  "dog"  "cow" "bird" 
sounds  "meow" "woof" "moo" "chirp"
cbind(animals, sounds)
     animals sounds 
[1,] "cat"   "meow" 
[2,] "dog"   "woof" 
[3,] "cow"   "moo"  
[4,] "bird"  "chirp"

Question 4.2

Create a third vector that has the numbers 1,2,3,4 and name this numbers. Join numbers with animals. Show that the numbers vector stays numeric and the animals vector stays as a factor.

numbers <- c(1,2,3,4)
class(animals)  # originally, my animals store character instead of factor
[1] "character"
animals <- as.factor(animals)  # turn vector elements into factor
class(animals)
[1] "factor"
res <- data.frame(numbers, animals)
res
  numbers animals
1       1     cat
2       2     dog
3       3     cow
4       4    bird
str(res)
'data.frame':   4 obs. of  2 variables:
 $ numbers: num  1 2 3 4
 $ animals: Factor w/ 4 levels "bird","cat","cow",..: 2 4 3 1