From bdb365db21efb1f5e03f4e438e21fd1481313779 Mon Sep 17 00:00:00 2001
From: Sean Griffin
Date: Tue, 23 Oct 2018 14:04:35 -0600
Subject: [PATCH 1/3] Rate limit the crate publish endpoint
This adds a restriction to the number of crates that can be published
from a single IP address in a single time period. This is done per IP
instead of per-user, since that's what we can do with nginx alone. We
may want to do additional rate limiting per token to force spammers to
register additional accounts, and not just change their IP.
This will use what's called the "leaky bucket" strategy for rate
limiting. Basically every user has a bucket of tokens they use to
publish crates. They start with 10 tokens available. Every time they hit
this endpoint, they use one of the tokens. We give them a new token each
minute.
What this means is that you can upload 1 crate per minute, but we allow
you to upload up to 10 in a short period of time before we start
enforcing the rate limit.
When someone does hit the rate limit, they will receive a 429 response
(which is TOO MANY REQUESTS but since it's a newer status code I'm not
sure if nginx actually knows that's the text description). We could also
allow it to instead just slow down the requests, refusing to process
them until a token is available (queueing a max of 10 requests).
This reserves 10 megabyte of memory for the IP table, which means we can
hold about 80000 IPs. When the table is full, it will attempt to drop
the oldest record, and if that doesn't give enough space, it'll give a
503. Keep in mind this is all in memory, not shared between our servers.
This means that it is possible (but not guaranteed) that someone can
upload 20 crates, and then send 2 requests per minute.
On Heroku and any system that involves proxies, the remote_addr of the
request becomes useless, as it will be the proxy forwarding your request
rather than the client itself. Most proxies will set or append an
`X-Forwarded-For` header, putting whatever remote_addr it received at
the end of the the header. So if there are 3 proxies, the header will
look like
real_ip, proxy1, proxy2
However, if we're not careful this makes us vulnerable to IP spoofing. A
lot of implementations just look at the first value in the header. All
the proxies just append the header though (they don't know if they're
getting traffic from the origin or a proxy), so you end up with
spoofed_ip, real_ip, proxy1, proxy2
nginx, Rails, and many other implementations avoid this by requiring an
allowlist of trusted IPs. With this configuration, the realip only kicks
in if remote_addr is a trusted proxy, and then it will replace it with
the last non-trusted IP from the header.
---
.buildpacks | 2 +-
config/nginx.conf.erb | 28 ++++++++++++++++++++--------
src/middleware/block_ips.rs | 4 ++--
src/middleware/log_request.rs | 2 +-
4 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/.buildpacks b/.buildpacks
index 476f00b3e80..3a458c9402e 100644
--- a/.buildpacks
+++ b/.buildpacks
@@ -1,5 +1,5 @@
https://github.com/Starkast/heroku-buildpack-cmake#a243c67
https://github.com/emk/heroku-buildpack-rust#578d630
https://codon-buildpacks.s3.amazonaws.com/buildpacks/heroku/emberjs.tgz
-https://github.com/travis-ci/nginx-buildpack.git#2fbde35
+https://github.com/heroku/heroku-buildpack-nginx.git#fbc49cd
https://github.com/sgrif/heroku-buildpack-diesel#f605edd
diff --git a/config/nginx.conf.erb b/config/nginx.conf.erb
index d10e5d586b6..50469392455 100644
--- a/config/nginx.conf.erb
+++ b/config/nginx.conf.erb
@@ -9,6 +9,11 @@ events {
}
http {
+ set_real_ip_from 10.0.0.0/8;
+ set_real_ip_from 127.0.0.0/24;
+ real_ip_header X-Forwarded-For;
+ real_ip_recursive on;
+
gzip on;
gzip_comp_level 2;
gzip_proxied any;
@@ -28,6 +33,8 @@ http {
client_body_timeout 30;
client_max_body_size 50m;
+ limit_req_zone $remote_addr zone=publish:10m rate=1r/m;
+
upstream app_server {
server localhost:8888 fail_timeout=0;
}
@@ -38,22 +45,27 @@ http {
keepalive_timeout 5;
location ~ ^/assets/ {
- add_header Strict-Transport-Security "max-age=31536000" always;
add_header X-Content-Type-Options nosniff;
add_header Cache-Control public;
root dist;
expires max;
}
+ add_header Strict-Transport-Security "max-age=31536000" always;
+ add_header Vary 'Accept, Accept-Encoding, Cookie';
+ proxy_set_header Host $http_host;
+ proxy_set_header X-Real-Ip $remote_addr;
+ proxy_redirect off;
+
location / {
- add_header Strict-Transport-Security "max-age=31536000" always;
- add_header Vary 'Accept, Accept-Encoding, Cookie';
- proxy_set_header Host $http_host;
- proxy_redirect off;
- if ($http_x_forwarded_proto != 'https') {
- rewrite ^ https://$host$request_uri? permanent;
- }
proxy_pass http://app_server;
}
+
+ location ~ ^/api/v./crates/new$ {
+ proxy_pass http://app_server;
+
+ limit_req zone=publish burst=10 nodelay;
+ limit_req_status 429;
+ }
}
}
diff --git a/src/middleware/block_ips.rs b/src/middleware/block_ips.rs
index 442ed3093e1..91914c28187 100644
--- a/src/middleware/block_ips.rs
+++ b/src/middleware/block_ips.rs
@@ -29,10 +29,10 @@ impl Handler for BlockIps {
fn call(&self, req: &mut dyn Request) -> Result> {
let has_blocked_ip = req
.headers()
- .find("X-Forwarded-For")
+ .find("X-Real-Ip")
.unwrap()
.iter()
- .any(|v| v.split(", ").any(|ip| self.ips.iter().any(|x| x == ip)));
+ .any(|ip| self.ips.iter().any(|v| v == ip));
if has_blocked_ip {
let body = format!(
"We are unable to process your request at this time. \
diff --git a/src/middleware/log_request.rs b/src/middleware/log_request.rs
index 2958a224024..ed9b72abc1c 100644
--- a/src/middleware/log_request.rs
+++ b/src/middleware/log_request.rs
@@ -38,7 +38,7 @@ impl Handler for LogRequests {
level = level,
method = req.method(),
path = FullPath(req),
- ip = request_header(req, "X-Forwarded-For"),
+ ip = request_header(req, "X-Real-Ip"),
time_ms = response_time,
user_agent = request_header(req, "User-Agent"),
referer = request_header(req, "Referer"), // sic
From 41c44b489cee319a68ccc8dead521b6a548422eb Mon Sep 17 00:00:00 2001
From: Sean Griffin
Date: Wed, 16 Jan 2019 15:42:22 -0700
Subject: [PATCH 2/3] Re-add https rewriting
This was not intended to be deleted
---
config/nginx.conf.erb | 3 +++
1 file changed, 3 insertions(+)
diff --git a/config/nginx.conf.erb b/config/nginx.conf.erb
index 50469392455..95a52a34209 100644
--- a/config/nginx.conf.erb
+++ b/config/nginx.conf.erb
@@ -56,6 +56,9 @@ http {
proxy_set_header Host $http_host;
proxy_set_header X-Real-Ip $remote_addr;
proxy_redirect off;
+ if ($http_x_forwarded_proto != 'https') {
+ rewrite ^ https://$host$request_uri? permanent;
+ }
location / {
proxy_pass http://app_server;
From ac4d090c245dd5d4dba1bc745416aecd49d96703 Mon Sep 17 00:00:00 2001
From: Sean Griffin
Date: Mon, 28 Jan 2019 14:01:28 -0700
Subject: [PATCH 3/3] Add policy around automated tools claiming names
---
app/templates/policies.hbs | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/app/templates/policies.hbs b/app/templates/policies.hbs
index 63ab418e7ae..c6025d666af 100644
--- a/app/templates/policies.hbs
+++ b/app/templates/policies.hbs
@@ -25,6 +25,12 @@ them. If necessary, the team may reach out to inactive maintainers and help
mediate the process of ownership transfer.
+
+Using an automated tool to claim ownership of a large number of package names
+is not permitted. We reserve the right to block traffic or revoke ownership
+of any package we determine to have been claimed by an automated tool.
+
+