Updated Varnish WordPress VCL

THIS VARNISH CONFIG HAS BEEN UPDATED AND IS AVAILABLE HERE

I have been tweaking a varnish vcl config for WordPress for quite some time and I wanted to share it..  Thanks to everyone(especially DocWilco)  in #varnish on Linpro IRC for helping

Features:

  1. Load balancing
  2. Probing
  3. Does not cache wp-admin
  4. Puts all uploads/content requests onto one server
  5. Purging
  6. Long timeout for file uploads
  7. XML RPC support
  8. Custom 404 and 500 message
  9. Forwards user IP address for comments

First of all.. Let’s define some backends..

// BACKEND CONFIGS
backend server1 {
  .host = "server1.example.com";
  .port = "8080";
  .probe = {
                .url = "/";
                .interval = 5s;
                .timeout = 1 s;
                .window = 5;
                .threshold = 3;
  }
// we include time outs so uploads don't time out
 .connect_timeout = 600s;
 .first_byte_timeout = 600s;
 .between_bytes_timeout = 600s;
}

backend server2 {
  .host = "server2.example.com;
  .port = "8080";
  .probe = {
                .url = "/";
                .interval = 5s;
                .timeout = 1 s;
                .window = 5;
                .threshold = 3;
  }
// we include time outs so uploads don't time out
 .connect_timeout = 600s;
 .first_byte_timeout = 600s;
 .between_bytes_timeout = 600s;
}

// define round-robin for backends
director cluster round-robin {
        {.backend = server1;}
        {.backend = server2;}
}

// set the servers wordpress can purge from
acl purge {
        "server1.example.com";
        "server2.example.com";
}

sub vcl_fetch {
 if (req.http.host ~ "ourdomain.com"
     || req.http.host ~ "ourotherdomain.com"
// don't cache wp-admin ever cause that's not cool
     && req.url !~ "wp-admin")
{
// we cache these domains for 8 hours unless they are purged.
        set beresp.ttl = 8h;
        set beresp.grace = 600s;
// don't cache 404 or 500 errors
        if (beresp.status == 404 || beresp.status >= 500) {
                  set beresp.ttl = 0s;
        }
}
// tell all of the files to use server1
if (req.url ~ "files") {set req.backend = server;set beresp.ttl = 8h;}
}

sub vcl_recv {
// Purge WordPress requests for purge
  if (req.request == "PURGE") {
                if (!client.ip ~ purge) {
                        error 405 "Not allowed.";
                }
                purge("req.url == " req.url " && req.http.host == " req.http.host);
                error 200 "Purged.";
        }

// forward the client IP so comments show up properly
set req.http.X-Forwarded-For = client.ip;

// let server2 handle all feeds
    if (req.url ~ "/feed/")
    {set req.backend = server2;}

// server1 must handle file uploads
    if (req.url ~ "media-upload.php"
    || req.url ~ "file.php"
    || req.url ~ "async-upload.php")
    {set req.backend = server1;return(pass);}

// server1 can serve all files.
    if (req.url ~ "/files/")
    {set req.backend = server1;}

// do not cache xmlrpc.php
    if (req.url ~ "xmlrpc.php")
    {return(pass);}

// strip cookies from xmlrpc
    if (req.request == "GET" && req.url ~ "xmlrpc.php")
    remove req.http.cookie;return(pass);}

// caching these files is fine
if (req.http.Accept-Encoding) {
        if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|lzma|tbz)(\?.*|)$") {
            remove req.http.Accept-Encoding;
        } elsif (req.http.Accept-Encoding ~ "gzip") {
            set req.http.Accept-Encoding = "gzip";
        } elsif (req.http.Accept-Encoding ~ "deflate") {
            set req.http.Accept-Encoding = "deflate";
        } else {
            remove req.http.Accept-Encoding;
        }
}

// Remove cookies and query string for real static files
    if (req.url ~ "^/[^?]+\.(jpeg|jpg|png|gif|ico|js|css|txt|gz|zip|lzma|bz2|tgz|tbz|html|htm)(\?.*|)$") {
       unset req.http.cookie;
       set req.url = regsub(req.url, "\?.*$", "");
    }

// Remove cookies from front page
    if (req.url ~ "^/$") {
       unset req.http.cookie;
    }

// if the request is for our domain and not for wp-admin then load balance it to a server that is responding or send it to server1
if (req.http.host ~ "ourdomain.com"
    || req.http.host ~ "ourotherdomain.com"
    && req.url !~ "wp-admin")
{
        set req.http.X-Forwarded-For = client.ip;
        set req.backend = cluster;
        } else {
        set req.http.X-Forwarded-For = client.ip;
        set req.backend = server1;
}

// Custom error message
sub vcl_error {
if(obj.status == 404) {
        set obj.ttl = 0s;
  set obj.http.Content-Type = "text/html; charset=utf-8";
    synthetic {" <!--?xml version="1.0" encoding="utf-8"?-->

    "} obj.status " " obj.response {"
</pre>
<div style="background-color: white;"><center>
 <img src="http://whatever.com/heavyload.jpg" alt="" width="600px" /></center>
<h1>This page is unavailable</h1>
If you are seeing this page, either maintenance is being
performed or you are trying to access a file that doesn't exist. Please <a href="http://whatever.com/contact.html">contact us</a> if you believe this is an error
<h2>Error "} obj.status " " obj.response {"</h2>
"} obj.response {" on server "} req.backend {"
<address><a href="http://whatever.com/">Us.</a></address></div>
<pre>
  "};
    return (deliver);
error 404 "Not found";
 }
else
{
    set obj.http.Content-Type = "text/html; charset=utf-8";
    synthetic {" <!--?xml version="1.0" encoding="utf-8"?-->  
  
    "} obj.status " " obj.response {"
  

</pre>
<div style="background-color: white;"><center>
 <img src="http://whatever.com/heavyload.jpg" alt="" width="600px" /></center>
<h1>This website is unavailable</h1>
If you are seeing this page, either maintenance is being
performed
 or something really bad has happened. Try returning in a few
minutes. If you still see this error in a few minutes please <a href="http://whatever.com/contact.html">contact us</a>
<h2>Error "} obj.status " " obj.response {"</h2>
"} obj.response {" on server "} req.backend {"
<address><a href="http://whatever.com/">Us.</a></address></div>
<pre>
  "};
    return (deliver);
}
}

How to put the latest CBBC news on your WordPress blog

Step 1. Log into your blog
Step 2. Click Appearance-> Widgets
Step 3. Drag RSS over to your sidebar.
Step 4. Paste http://newsrss.bbc.co.uk/rss/cbbc_news/homepage/rss.xml in where it asks for RSS feed URL
Step 5. Give the widget a title. *optional

Note: PrimaryBlogger users can just drag the CBBC Widget onto their sidebar.

5 reasons you may regret using posterous

Actually this is more of a WordPress vs posterous..

1. Posterous is a closed source platform, if you don’t get closed vs open source yet then I recommend you do some reading because open source is the corner stone of 99% of the successful publishing platforms used long term on the internet.

2. You can’t export your posterous blog to an xml file, you can get your blog posts but you cant export comments etc. so if you move you will lose data.

3. Did you even know you can post to a wordpress blog via email? Just saying… Cos y’know most people don’t…

4. Applications have a natural growth pattern, posterous will eventually want to compete with the big boys and in turn will become a complex beast which is probably the reason you avoided other blog platforms..

5. You can’t assign plugins or widgets ie the fantastic CBBC news feed, Primary Games Arena or BrainPOP UK widgets.

Basically posterous is a short term solution that might get you started blogging but I warn you not to invest too much time into it because when you decide to move away you are going to lose data.

I believe in a) open source and b) teachers using long term solutions to problems as to reduce head ache in the future. So yep, I’m biased as I run a wordpress blog site but I also feel like no one has proper addressed this issue.  It is really easy to move from posterous to a wordpress blog provider such as PrimaryBlogger, just use the import tool.

What the last 4 days have tought me.

Disclaimer: These are my own words and do not reflect the view of my employer.

Some teachers are really supportive.  Huge thanks to @pimmsSmith, @LouMeethongsai, @janwebb21, @idletim, @deputymitchell, @enomilie, @purplelady1979, @anne_neal, @whorwe, @sraff79, @missbrownsword and @primaryt for your support and help.  The fact that you all got in touch asking for help and tested logins is great, we need testers just as much as we need developers or system admins so my hat goes off to you..

Some teachers are real not quite as nice (actually just one). Even though PrimaryBlogger is a free service I got an email saying “how disappointed they were with the support.”  To this teacher I remind you that PrimaryBlogger is unblocked in every UK LA, allows embed, open source, doesn’t enforce HTTPS only, spam free, ad free, not publicly funded and each blog post is reviewed by a CRB checked human from Primary Technology.  To combat this I think a community approach is needed, a wikipedia model of moderators should help.

PrimaryBlogger is as a service is spread across multiple servers and sites and isn’t run by monkeys or unskilled professionals, it takes a lot of expertise to deliver a scaled WordPress deployment.  Now you probably don’t care about how big it is but the fact is if we were selling PrimaryBlogger at £200 a school per year we would be making £400,000 a year, pretty good money right but that would mean the community wouldn’t grow and that would suck.  PrimaryBlogger will remain as a free blog service, last year we made £100 in total in sales of domains and additional storage purchases.   We thrive from the success of school blogs and we are rewarded by “inbound links” which in turn allows us to promote other Primary Technology products.  I have to justify the cost of PrimaryBlogger to our sales director and I do it based on the amount of respect it gets us inside of the teaching community, something not easily bought and something we don’t take for granted.

WordPress is open source and free and there are a number of hosts you can move your blog to.  I suggest everyone periodically backs their blog up anyway.  It’s really easy to do.  Tools -> Export..

If you really want to help PrimaryBlogger, buy a domain for your blogs through us, it’s only £50 per year and it will be an improvement to your blog.  I would like to employ someone full time to work on it and to build a community so if we can sell 400(one fifth of the # of blogs on PrimaryBlogger) domains that is one persons full time wage covered.  It would be way better though if we can encourage a community to become vocal and over the next few months that will be my goal.

I also want to comment quickly about how all of our findings/problems were put into the public domain.  How many other companies do you know that show you their inner workings and explain why a problem came about, how it was resolved and who resolved it?

So to reflect on what I have learned:

  1. Always trim old/stale blogs from the database
  2. Tell teachers that they are out of order if they are out of order
  3. Don’t do any development work on a live server the day before we’re due to have all of our staff on holiday
  4. Give PrimaryBlogger’s (and our other services) a community site they can go to to share experiences and knowledge
  5. We as a community need to discuss the benefits of having a blog site that has blogs with “search engines disabled”

PrimaryBlogger is all about giving school pupils and teachers a voice, we want you to be heard.

PrimaryBlogger – why it was down..

So I’m writing this post on PrimaryPad because PrimaryBlogger is currently down.. It’s been a nightmare, it really has.. Thankfully it’s currently 1/2 term in schools so activity is down 20% or so across the site.

We have 3 layers of backup for school blogs, our database is large and the file system is very large. None of this is surprising for a site like PrimaryBlogger but what is surprising is that 2 levels of our backups failed..

The problem stemmed from me playing with a plugin that is built to replicate one site to another, I noticed that it was playing up so I disabled it and didn’t proceed any further. The next day I was informed some people were getting a white screen when trying to access their blog..

I looked through the plugins source code (some bits I had written) and realized there was potential it could of been dropping the wrong tables from the database, no biggy.. I will just restore the database.. I had done a full backup of the file system and database just before I started playing with the plugin.. Now bare in mind I’m backing up 660GB here which takes a little while so I set it going and went off to play some Bad company 2..

I came back an hour later and it had finished backing up so proceeding playing with the plugin.. Things broke so I figured I will just restore from backup only to realize that the backup I had taken was completely useless.. It was 10MB! “What the deuce” I pondered.. mysqldump didn’t output any errors during backup so what is going on?! I checked the replication and I couldn’t use that as a backup source as the replication servers had replicated the error.. So I was left with only one option..

We take daily brick level backups off site, these brick level backups take a .sql backup of our current mysql state but because it was off site it took a long time to transfer, during that time I quickly brought up a local VM to the .sql file and went ahead at trying to restore primaryblogger’s database locally just to make sure the servers were up to the job.

The first thing I noticed is that WordPress really doesn’t like the base domain being changed after install, so I had to backtrack and begin installation w/ the correct base set.

The next thing I noticed is that my mysql reads were giving an error: ERROR 1153 (08S01) at line 218227: Got a packet bigger than ‘max_allowed_packet’ bytes. I fixed this by increasing the max_allowed_packet in my.cnf

I also have a problem when I do “use blog;” (blog is the name of my database) I get a delay and “Reading table information….” notification which takes a few minutes to get past..

When restoring to a fresh database I noticed that mysqldump is not dumping table data and is just dumping the table structure. I’m not sure why but I need to investigate this further too at some point.. Note: I was using “-d database” name like a tool.. In the future I know not to make this mistake.. It is easy to make though…

Another problem I had was that my admin password keeps resetting itself. There is no logic to this, I used a mysql update statement to update the password then check it using a select statement yet after I try to login it changes back to an unknown md5 hash. I think it’s due to the SALT values in wp-config but I may be wrong.

While I was watching the database file dump back into the database I noticed just how much crap wordpress puts into each blog.. I mean most of each blogs contents is wordpress guffing the space.. I recon that 40% of my entire blog database contents is wordpress putting links back to itself and documentation into each blog site. Not cool…

Usually I test plugins off site and this was no exception but this specific plugin needed to iterate over an array of 1000’s of blogs and I didn’t have that many records locally. The specific bug with the plugin was quickly isolated but the fall out was 12 hours of unavailility of Primary School blogs across the whole of the UK.

The only way I could of really avoided this is if it I had local snap shots in the form of a VM but even then recording from a snap shot would of given me all sorts of database and file system inconsistancies and headache.

Anyway a few things caused the problem, it was literally down to one row (out of millions) in our database and that’s why it took so long to diagnose and resolve.. I also had to completely restore the entire themes folder as for some reason this was empty..

So my apologies, I had extremely bad luck but worked my butt off over the weekend and early today to restore stability. This error could of happened to anyone and it’s very lucky we have a load of backups in place to restore all of the sites. At no point were any sites data or content at risk, credit is due to our remote backup service that saved the day.