getwd() # print current working directory
[1] "/Users/xycao/Desktop/EAS538/EAS538-assignments/lab1"
<- read.csv("../data/lab1/price2.csv") # read data rent.US
Set your working directory and load any required libraries in this code chunk.
getwd() # print current working directory
[1] "/Users/xycao/Desktop/EAS538/EAS538-assignments/lab1"
<- read.csv("../data/lab1/price2.csv") # read data rent.US
Put any practice code in this code chunk.
class(rent.US) # type of the variable
[1] "data.frame"
dim(rent.US) # dimension (shape) of the data frame
[1] 12918 8
head(rent.US) # first few lines of the df
City Metro County State Population.Rank Jan.16 May.16
1 New York New York Queens NY 1 2335 2339
2 Los Angeles Los Angeles Los Angeles CA 2 2596 2662
3 Chicago Chicago Cook IL 3 1668 1686
4 Houston Houston Harris TX 4 1436 1446
5 Philadelphia Philadelphia Philadelphia PA 5 1196 1211
6 Phoenix Phoenix Maricopa AZ 6 1198 1236
Sep.16
1 2324
2 2723
3 1675
4 1438
5 1220
6 1238
str(rent.US) # structure of the df (variable type of each attribute)
'data.frame': 12918 obs. of 8 variables:
$ City : chr "New York" "Los Angeles" "Chicago" "Houston" ...
$ Metro : chr "New York" "Los Angeles" "Chicago" "Houston" ...
$ County : chr "Queens" "Los Angeles" "Cook" "Harris" ...
$ State : chr "NY" "CA" "IL" "TX" ...
$ Population.Rank: int 1 2 3 4 5 6 7 8 9 10 ...
$ Jan.16 : int 2335 2596 1668 1436 1196 1198 1204 1230 2360 1305 ...
$ May.16 : int 2339 2662 1686 1446 1211 1236 1225 1245 2428 1347 ...
$ Sep.16 : int 2324 2723 1675 1438 1220 1238 1228 1234 2442 1370 ...
Pick any 2 columns one at a time from the rent.US data. Do this using both the $ and the [,] notation.
# use $
<- rent.US$City
res1 <- rent.US$Jan.16
res2 head(res1)
[1] "New York" "Los Angeles" "Chicago" "Houston" "Philadelphia"
[6] "Phoenix"
head(res2)
[1] 2335 2596 1668 1436 1196 1198
# use [,]
<- rent.US[,1]
res1 <- rent.US[,6]
res2 head(res1)
[1] "New York" "Los Angeles" "Chicago" "Houston" "Philadelphia"
[6] "Phoenix"
head(res2)
[1] 2335 2596 1668 1436 1196 1198
Pick any 2 rows one at a time from the rent.us data.
<- rent.US[1,]
res1 <- rent.US[2,]
res2 res1
City Metro County State Population.Rank Jan.16 May.16 Sep.16
1 New York New York Queens NY 1 2335 2339 2324
res2
City Metro County State Population.Rank Jan.16 May.16
2 Los Angeles Los Angeles Los Angeles CA 2 2596 2662
Sep.16
2 2723
Select any 3 columns all at once using the [,] notation.
<- rent.US[,c(1,4,6)]
res head(res)
City State Jan.16
1 New York NY 2335
2 Los Angeles CA 2596
3 Chicago IL 1668
4 Houston TX 1436
5 Philadelphia PA 1196
6 Phoenix AZ 1198
Select any 3 rows all at once using the [,] notation.
<- rent.US[c(1,2,3),]
res res
City Metro County State Population.Rank Jan.16 May.16
1 New York New York Queens NY 1 2335 2339
2 Los Angeles Los Angeles Los Angeles CA 2 2596 2662
3 Chicago Chicago Cook IL 3 1668 1686
Sep.16
1 2324
2 2723
3 1675
What happens if you try to select an individual row and an individual column at the same time? Try doing this using the [,] notation.
# This gives the element of the specific row and col
1,1] rent.US[
[1] "New York"
Now try selecting 3 rows and 2 columns at once using the [,] notation
c(1,2,3),c(1,6)] rent.US[
City Jan.16
1 New York 2335
2 Los Angeles 2596
3 Chicago 1668
Sort your data using the order function from least to greatest rent from May 2016. Save this sorted data frame as a new object called rent_May_ord.
<- order(rent.US$May.16) # get the order indices (descending is F by default)
i.order <- rent.US[i.order,] # get the ordered df
rent_May_ord head(rent_May_ord)
City Metro County State Population.Rank Jan.16
4083 Beecher Flint Genesee MI 4084 516
7981 West End-Cobb Town Anniston Calhoun AL 8194 579
224 Flint Flint Genesee MI 224 545
614 Youngstown Youngstown Mahoning OH 614 550
12048 Leavittsburg Youngstown Trumbull OH 12261 547
1456 Warren Youngstown Trumbull OH 1457 541
May.16 Sep.16
4083 548 532
7981 552 552
224 558 546
614 572 547
12048 588 633
1456 592 614
Now sort your data using the order function from greatest to least Population.Rank and save this as a new object called rent_pop_rev.
<- order(rent.US$Population.Rank, decreasing=T)
i.order <- rent.US[i.order,]
rent_pop_rev head(rent_pop_rev)
City Metro County State Population.Rank Jan.16
12918 Lebanon Borough New York Hunterdon NJ 13131 1959
12917 Angels Calaveras CA 13130 1469
12916 Urbana Corning Steuben NY 13129 1574
12915 Town of Wrightstown Green Bay Brown WI 13128 1028
12914 Highland Township Gettysburg Adams PA 13127 1433
12913 Plymptonville DuBois Clearfield PA 13126 939
May.16 Sep.16
12918 1982 1929
12917 1528 1529
12916 1611 1619
12915 1133 1047
12914 1440 1395
12913 990 980
Use subset() and order() to identify the top 3 most expensive cities in Michigan (using September 2016 rent prices).
<- subset(rent.US, State == 'MI')
rent.MI <- order(rent.MI$Sep.16, decreasing=T)
i.order head(rent.MI[i.order,],3)
City Metro County State Population.Rank Jan.16 May.16
7884 Franklin Detroit Oakland MI 8097 3725 3758
7065 Grosse Pointe Shores Detroit Wayne MI 7278 3474 3691
9166 Orchard Lake Detroit Oakland MI 9379 2885 2973
Sep.16
7884 3760
7065 3623
9166 2984
Subset the data to include cities in CA, OR and WA that have rent less than 1500 on January 2016.
<- subset(rent.US, State %in% c('CA', 'OR', 'WA') & Jan.16 < 1500)
res head(res)
City Metro County State Population.Rank Jan.16
34 Fresno Fresno Fresno CA 34 1197
35 Sacramento Sacramento Sacramento CA 35 1400
64 Bakersfield Bakersfield Kern CA 64 1349
71 Stockton Stockton San Joaquin CA 71 1274
77 Modesto Modesto Stanislaus CA 77 1242
90 San Bernardino Riverside San Bernardino CA 90 1330
May.16 Sep.16
34 1214 1221
35 1459 1489
64 1342 1330
71 1322 1334
77 1293 1326
90 1369 1391
Subset the data to include cities in PA or cities that have rent less than 1000 on September 2016.
<- subset(rent.US, State=='PA' | Sep.16<1000)
res head(res)
City Metro County State Population.Rank Jan.16 May.16
5 Philadelphia Philadelphia Philadelphia PA 5 1196 1211
17 Detroit Detroit Wayne MI 17 750 752
19 Memphis Memphis Shelby TN 19 848 849
44 Tulsa Tulsa Tulsa OK 44 968 983
45 Cleveland Cleveland Cuyahoga OH 45 833 842
52 Wichita Wichita Sedgwick KS 52 924 927
Sep.16
5 1220
17 746
19 846
44 983
45 834
52 897
Create a new vector that has cat,dog,cow,bird and name it animals. Next create a new vector that has meow,woof,moo,chirp and name it sounds. Join these two vectors by stacking them on top of one another. Now join them by placing them next to each other as columns.
<- c('cat', 'dog', 'cow', 'bird')
animals <- c('meow', 'woof', 'moo','chirp')
sounds rbind(animals, sounds)
[,1] [,2] [,3] [,4]
animals "cat" "dog" "cow" "bird"
sounds "meow" "woof" "moo" "chirp"
cbind(animals, sounds)
animals sounds
[1,] "cat" "meow"
[2,] "dog" "woof"
[3,] "cow" "moo"
[4,] "bird" "chirp"
Create a third vector that has the numbers 1,2,3,4 and name this numbers. Join numbers with animals. Show that the numbers vector stays numeric and the animals vector stays as a factor.
<- c(1,2,3,4)
numbers class(animals) # originally, my animals store character instead of factor
[1] "character"
<- as.factor(animals) # turn vector elements into factor
animals class(animals)
[1] "factor"
<- data.frame(numbers, animals)
res res
numbers animals
1 1 cat
2 2 dog
3 3 cow
4 4 bird
str(res)
'data.frame': 4 obs. of 2 variables:
$ numbers: num 1 2 3 4
$ animals: Factor w/ 4 levels "bird","cat","cow",..: 2 4 3 1