Skip to content

Commit e5047d9

Browse files
committed
fd-update
1 parent 843278d commit e5047d9

39 files changed

+820
-713
lines changed

Manifest.toml

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ version = "1.0.6+1"
4747

4848
[[CSV]]
4949
deps = ["CategoricalArrays", "DataFrames", "Dates", "FilePathsBase", "Mmap", "Parsers", "PooledArrays", "Tables", "Unicode", "WeakRefStrings"]
50-
git-tree-sha1 = "177e58c6965e6a0f544593b490c38a6c267bd8ce"
50+
git-tree-sha1 = "dd7861a31c6a2a9404186a80e5dec919a25b3abb"
5151
uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
52-
version = "0.5.24"
52+
version = "0.5.25"
5353

5454
[[CategoricalArrays]]
5555
deps = ["Compat", "DataAPI", "Future", "JSON", "Missings", "Printf", "Reexport", "Statistics", "Unicode"]
@@ -319,10 +319,10 @@ uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
319319
version = "1.3.7"
320320

321321
[[GR]]
322-
deps = ["Base64", "DelimitedFiles", "LinearAlgebra", "Printf", "Random", "Serialization", "Sockets", "Test"]
323-
git-tree-sha1 = "10633436bc2fc836347bda5073b7b6f06dcdc5e6"
322+
deps = ["Base64", "DelimitedFiles", "LinearAlgebra", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"]
323+
git-tree-sha1 = "41dd1395d4dc559f1c2cb558cba784ef37b561fe"
324324
uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
325-
version = "0.46.0"
325+
version = "0.47.0"
326326

327327
[[GaussianMixtures]]
328328
deps = ["Arpack", "Clustering", "Compat", "DelimitedFiles", "Distributed", "Distributions", "FileIO", "JLD2", "LinearAlgebra", "Logging", "PDMats", "Printf", "RDatasets", "Random", "ScikitLearnBase", "SpecialFunctions", "Statistics", "StatsBase", "Test"]
@@ -338,15 +338,15 @@ version = "0.11.2"
338338

339339
[[GeometryTypes]]
340340
deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "StaticArrays"]
341-
git-tree-sha1 = "62344139ee47be1fe862a946d41ead9314b1fb21"
341+
git-tree-sha1 = "480f30bd8e67240636e6d64e062c83745ee45b88"
342342
uuid = "4d00f742-c7ba-57c2-abde-4428a4b178cb"
343-
version = "0.7.8"
343+
version = "0.7.9"
344344

345345
[[HTTP]]
346346
deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets"]
347-
git-tree-sha1 = "003d2e773b019fccb93a447bb0517256fda1a71e"
347+
git-tree-sha1 = "8d9bdd55c9d0d6ddf08f8b5229f90b7f274b6777"
348348
uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
349-
version = "0.8.10"
349+
version = "0.8.12"
350350

351351
[[IRTools]]
352352
deps = ["InteractiveUtils", "MacroTools", "Test"]
@@ -435,10 +435,9 @@ uuid = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
435435
version = "0.4.0"
436436

437437
[[LaTeXStrings]]
438-
deps = ["Compat"]
439-
git-tree-sha1 = "7ab9b8788cfab2bdde22adf9004bda7ad9954b6c"
438+
git-tree-sha1 = "de44b395389b84fd681394d4e8d39ef14e3a2ea8"
440439
uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
441-
version = "1.0.3"
440+
version = "1.1.0"
442441

443442
[[LearnBase]]
444443
deps = ["LinearAlgebra", "SparseArrays", "StatsBase", "Test"]
@@ -549,10 +548,16 @@ deps = ["Base64"]
549548
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
550549

551550
[[MbedTLS]]
552-
deps = ["BinaryProvider", "Dates", "Libdl", "Random", "Sockets"]
553-
git-tree-sha1 = "85f5947b53c8cfd53ccfa3f4abae31faa22c2181"
551+
deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"]
552+
git-tree-sha1 = "709797fadd2d3b7e3f040a9f6e12726a6d6d2bf5"
554553
uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
555-
version = "0.7.0"
554+
version = "1.0.0"
555+
556+
[[MbedTLS_jll]]
557+
deps = ["Libdl", "Pkg"]
558+
git-tree-sha1 = "066a4467008745eed36dad973ceb66405785a621"
559+
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
560+
version = "2.16.0+1"
556561

557562
[[Measures]]
558563
git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f"
@@ -698,9 +703,9 @@ version = "0.6.3"
698703

699704
[[Plots]]
700705
deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryTypes", "JSON", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "Reexport", "Requires", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"]
701-
git-tree-sha1 = "5655c2e4d1296448afbec71a1b1c8716c83420cc"
706+
git-tree-sha1 = "305a6ceea7c6bf063fce224e4bcc0e0e6ce10ced"
702707
uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
703-
version = "0.29.2"
708+
version = "0.29.3"
704709

705710
[[PooledArrays]]
706711
deps = ["DataAPI"]
@@ -796,10 +801,16 @@ uuid = "ae029012-a4dd-5104-9daa-d747884805df"
796801
version = "1.0.1"
797802

798803
[[Rmath]]
799-
deps = ["BinaryProvider", "Libdl", "Random", "Statistics"]
800-
git-tree-sha1 = "2bbddcb984a1d08612d0c4abb5b4774883f6fa98"
804+
deps = ["Random", "Rmath_jll"]
805+
git-tree-sha1 = "86c5647b565873641538d8f812c04e4c9dbeb370"
801806
uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
802-
version = "0.6.0"
807+
version = "0.6.1"
808+
809+
[[Rmath_jll]]
810+
deps = ["Libdl", "Pkg"]
811+
git-tree-sha1 = "1660f8fefbf5ab9c67560513131d4e933012fc4b"
812+
uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f"
813+
version = "0.2.2+0"
803814

804815
[[SHA]]
805816
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
@@ -962,9 +973,9 @@ version = "0.6.2"
962973

963974
[[WoodburyMatrices]]
964975
deps = ["LinearAlgebra", "SparseArrays"]
965-
git-tree-sha1 = "bbb9f7fd6fbdd9582e77c0b698312c543de5eb71"
976+
git-tree-sha1 = "68f000f67654d07318d734b364a31233e465f49a"
966977
uuid = "efce3f68-66dc-5838-9240-27a6d6f5f9b6"
967-
version = "0.5.0"
978+
version = "0.5.1"
968979

969980
[[XGBoost]]
970981
deps = ["BinaryProvider", "Libdl", "Printf", "Random", "SparseArrays", "Statistics", "Test"]
Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
# This file was generated, do not modify it. # hide
2-
select!(df, Not([:yr_renovated, :sqft_basement, :zipcode]));
2+
plt.figure(figsize=(8,6))
3+
plt.hist(df.price, color = "blue", edgecolor = "white", bins=50,
4+
density=true, alpha=0.5)
5+
plt.xlabel("Price", fontsize=14)
6+
plt.ylabel("Frequency", fontsize=14)
7+
plt.savefig(joinpath(@OUTPUT, "hist_price.svg")) # hide
Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# This file was generated, do not modify it. # hide
22
plt.figure(figsize=(8,6))
3-
plt.hist(df.price, color = "blue", edgecolor = "white", bins=50,
4-
density=true)
3+
plt.hist(df.price[df.isrenovated .== true], color="blue", density=true,
4+
edgecolor="white", bins=50, label="renovated", alpha=0.5)
5+
plt.hist(df.price[df.isrenovated .== false], color="red", density=true,
6+
edgecolor="white", bins=50, label="unrenovated", alpha=0.5)
57
plt.xlabel("Price", fontsize=14)
68
plt.ylabel("Frequency", fontsize=14)
7-
plt.savefig(joinpath(@OUTPUT, "hist_price.svg")) # hide
9+
plt.legend(fontsize=12)
10+
plt.savefig(joinpath(@OUTPUT, "hist_price2.svg")) # hide
Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# This file was generated, do not modify it. # hide
2-
plt.figure(figsize=(8,6))
3-
plt.hist(df.price[df.isrenovated .== true], color="blue", density=true,
4-
edgecolor="white", bins=50, label="renovated", alpha=0.5)
5-
plt.hist(df.price[df.isrenovated .== false], color="red", density=true,
6-
edgecolor="white", bins=50, label="unrenovated", alpha=0.5)
7-
plt.xlabel("Price", fontsize=14)
8-
plt.ylabel("Frequency", fontsize=14)
9-
plt.legend(fontsize=12)
10-
plt.savefig(joinpath(@OUTPUT, "hist_price2.svg")) # hide
2+
@load DecisionTreeRegressor
3+
4+
y, X = unpack(df, ==(:price), col -> true)
5+
train, test = partition(eachindex(y), 0.7, shuffle=true, rng=5)
6+
7+
tree = machine(DecisionTreeRegressor(), X, y)
8+
9+
fit!(tree, rows=train);
Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,2 @@
11
# This file was generated, do not modify it. # hide
2-
@load DecisionTreeRegressor
3-
4-
y, X = unpack(df, ==(:price), col -> true)
5-
train, test = partition(eachindex(y), 0.7, shuffle=true, rng=5)
6-
7-
tree = machine(DecisionTreeRegressor(), X, y)
8-
9-
fit!(tree, rows=train);
2+
rms(y[test], predict(tree, rows=test))
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# This file was generated, do not modify it. # hide
2-
rms(y[test], predict(tree, rows=test))
2+
@load RandomForestRegressor pkg=ScikitLearn
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# This file was generated, do not modify it. # hide
2-
@load RandomForestRegressor pkg=ScikitLearn
2+
coerce!(X, Finite => Count);
Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
# This file was generated, do not modify it. # hide
2-
coerce!(X, Finite => Count);
2+
rf_mdl = RandomForestRegressor()
3+
rf = machine(rf_mdl, X, y)
4+
fit!(rf, rows=train)
5+
6+
rms(y[test], predict(rf, rows=test))
Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
# This file was generated, do not modify it. # hide
2-
rf_mdl = RandomForestRegressor()
3-
rf = machine(rf_mdl, X, y)
4-
fit!(rf, rows=train)
5-
6-
rms(y[test], predict(rf, rows=test))
2+
cv3 = CV(; nfolds=3)
3+
res = evaluate(rf_mdl, X, y, resampling=CV(shuffle=true),
4+
measure=rms, verbosity=0)
Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11
# This file was generated, do not modify it. # hide
2-
cv3 = CV(; nfolds=3)
3-
res = evaluate(rf_mdl, X, y, resampling=CV(shuffle=true),
4-
measure=rms, verbosity=0)
2+
@load XGBoostRegressor
Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,8 @@
11
# This file was generated, do not modify it. # hide
2-
@load XGBoostRegressor
2+
coerce!(X, Count => Continuous)
3+
4+
xgb = XGBoostRegressor()
5+
xgbm = machine(xgb, X, y)
6+
fit!(xgbm, rows=train)
7+
8+
rms(y[test], predict(xgbm, rows=test))
Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
11
# This file was generated, do not modify it. # hide
2-
coerce!(X, Count => Continuous)
3-
4-
xgb = XGBoostRegressor()
5-
xgbm = machine(xgb, X, y)
6-
fit!(xgbm, rows=train)
7-
8-
rms(y[test], predict(xgbm, rows=test))
2+
r1 = range(xgb, :max_depth, lower=3, upper=10)
3+
r2 = range(xgb, :num_round, lower=1, upper=25);
Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
# This file was generated, do not modify it. # hide
2-
r1 = range(xgb, :max_depth, lower=3, upper=10)
3-
r2 = range(xgb, :num_round, lower=1, upper=25);
2+
tm = TunedModel(model=xgb, tuning=Grid(resolution=7),
3+
resampling=CV(rng=11), ranges=[r1,r2],
4+
measure=rms)
5+
mtm = machine(tm, X, y)
6+
fit!(mtm, rows=train)
7+
8+
rms(y[test], predict(mtm, rows=test))
Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,2 @@
11
# This file was generated, do not modify it. # hide
2-
tm = TunedModel(model=xgb, tuning=Grid(resolution=7),
3-
resampling=CV(rng=11), ranges=[r1,r2,r3,r4,r5,r6,r7],
4-
measure=rms)
5-
mtm = machine(tm, X, y)
6-
fit!(mtm, rows=train)
7-
8-
rms(y[test], predict(mtm, rows=test))
2+
PyPlot.close_figs() # hide
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This file was generated, do not modify it. # hide
22
coerce!(df, :zipcode => Multiclass)
3-
df.isrenovated = @. !ismissing(df.yr_renovated)
4-
df.has_basement = @. !ismissing(df.sqft_basement)
3+
df.isrenovated = @. !iszero(df.yr_renovated)
4+
df.has_basement = @. !iszero(df.sqft_basement)
55
schema(df)
Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,2 @@
11
# This file was generated, do not modify it. # hide
2-
for col in names(df)
3-
nmissings = sum(ismissing, df[!,col])
4-
if nmissings > 0
5-
println(rpad("$col has ", 25), nmissings, " missings")
6-
end
7-
end
2+
select!(df, Not([:yr_renovated, :sqft_basement, :zipcode]));
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
nothing
1+
179.70137523672759
Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,16 @@
1-
179.96112573828466
1+
RandomForestRegressor(
2+
n_estimators = 100,
3+
criterion = "mse",
4+
max_depth = nothing,
5+
min_samples_split = 2,
6+
min_samples_leaf = 1,
7+
min_weight_fraction_leaf = 0.0,
8+
max_features = "auto",
9+
max_leaf_nodes = nothing,
10+
min_impurity_decrease = 0.0,
11+
bootstrap = true,
12+
oob_score = false,
13+
n_jobs = nothing,
14+
random_state = nothing,
15+
verbose = 0,
16+
warm_start = false) @ 8…80
Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1 @@
1-
RandomForestRegressor(
2-
n_estimators = 100,
3-
criterion = "mse",
4-
max_depth = nothing,
5-
min_samples_split = 2,
6-
min_samples_leaf = 1,
7-
min_weight_fraction_leaf = 0.0,
8-
max_features = "auto",
9-
max_leaf_nodes = nothing,
10-
min_impurity_decrease = 0.0,
11-
bootstrap = true,
12-
oob_score = false,
13-
n_jobs = nothing,
14-
random_state = nothing,
15-
verbose = 0,
16-
warm_start = false) @ 4…24
1+
nothing
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
nothing
1+
137.22983495128528
Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
1-
137.65885615237326
1+
┌───────────┬───────────────┬────────────────────────────────────────────┐
2+
│ _.measure │ _.measurement │ _.per_fold │
3+
├───────────┼───────────────┼────────────────────────────────────────────┤
4+
│ rms │ 133.0 │ [134.0, 125.0, 126.0, 144.0, 132.0, 140.0] │
5+
└───────────┴───────────────┴────────────────────────────────────────────┘
6+
_.per_observation = [missing]
Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,36 @@
1-
┌───────────┬───────────────┬────────────────────────────────────────────┐
2-
│ _.measure │ _.measurement │ _.per_fold │
3-
├───────────┼───────────────┼────────────────────────────────────────────┤
4-
│ rms │ 136.0 │ [148.0, 138.0, 123.0, 127.0, 143.0, 134.0] │
5-
└───────────┴───────────────┴────────────────────────────────────────────┘
6-
_.per_observation = [missing]
1+
XGBoostRegressor(
2+
num_round = 1,
3+
booster = "gbtree",
4+
disable_default_eval_metric = 0,
5+
eta = 0.3,
6+
gamma = 0.0,
7+
max_depth = 6,
8+
min_child_weight = 1.0,
9+
max_delta_step = 0.0,
10+
subsample = 1.0,
11+
colsample_bytree = 1.0,
12+
colsample_bylevel = 1.0,
13+
lambda = 1.0,
14+
alpha = 0.0,
15+
tree_method = "auto",
16+
sketch_eps = 0.03,
17+
scale_pos_weight = 1.0,
18+
updater = "auto",
19+
refresh_leaf = 1,
20+
process_type = "default",
21+
grow_policy = "depthwise",
22+
max_leaves = 0,
23+
max_bin = 256,
24+
predictor = "cpu_predictor",
25+
sample_type = "uniform",
26+
normalize_type = "tree",
27+
rate_drop = 0.0,
28+
one_drop = 0,
29+
skip_drop = 0.0,
30+
feature_selector = "cyclic",
31+
top_k = 0,
32+
tweedie_variance_power = 1.5,
33+
objective = "reg:linear",
34+
base_score = 0.5,
35+
eval_metric = "rmse",
36+
seed = 0) @ 8…21

0 commit comments

Comments
 (0)